臨床診断と機械学習二値分類モデル：COVID-19急性低酸素性呼吸不全のデータセットを用いたPythonプログラミング

COVID-19急性低酸素性呼吸不全のデータセットを用いたPythonプログラミング
Link to Data Set DOI: 10.7717/peerj.15174/supp-15

Appendix: Supplementary Python (ver 3.8) program code

#-[Python Code #1]-------------------------------------------------
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import IPython.core.display as display
import IPython.display
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

# pip install openpyxl
# you may need to restart the kernel to use updated packages.
#-[END]------------------------------------------------------------

#-[Python Code #1]-------------------------------------------------

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

import seaborn as sns

import IPython.core.display as display

import IPython.display

%matplotlib inline

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error

from sklearn.metrics import mean_absolute_error

from sklearn.metrics import confusion_matrix

from sklearn.metrics import accuracy_score

from sklearn.metrics import precision_score

from sklearn.metrics import recall_score

from sklearn.metrics import classification_report

from sklearn.preprocessing import StandardScaler

# pip install openpyxl

# you may need to restart the kernel to use updated packages.

#-[END]------------------------------------------------------------

#-[Python Code #2]-------------------------------------------------
# Read Excel Database raw_data_f16.xlsx, and preprocessing
df = pd.read_excel('raw_data_f16.xlsx')
print(df.columns)

df_copy = df.copy()
df_copy.rename(columns = {'ResipratoryTherapy': 'Therapy', \
'ROX index': "ROX", \
'LIV (%)': "LIV", \
'Gender': "Gender", \
       'Age': "Age", \
'BMI': "BMI", \
'WBC (/µL)': "WBC", \
'Cr (mg/dL)': "Cr", \
       'CRP (mg//dL)': "CRP", \
'LDH (U/L)': "LDH", \
'd-dimer (µg/L)': "d-dimer", \
'PSI': "PSI", \
'CCI': "CCI", \
       'Days to Hostital (days)': "DtoH", \
'Days to HFNC (days)': "DaystoHFNC", \
'Mortality': "Mortality" }, inplace=True)
print("")
print(df_copy.columns)

data = df_copy[["Therapy", "ROX", "LIV", "Mortality"]]

print("")
print(data.head(3))
print("")
print(data.tail(3))

# Therapy data: simplify to ['HFNC', 'MV']
def therapy_convert(x):
    if x == 'High-flow Nasal Cannulation':
        return 'HFNC'
    elif x == 'Mechanical Ventilation':
        return 'MV'

# Mortality: fix misspell ['survivied'] to ['survived']
def mortality_convert(x):
    if x == 'survivied':
        return 'survived'

# Therapy: simplify the data description 
data["Therapy"] = data["Therapy"].apply(therapy_convert)

# Fix misspell in Mortality
data["Mortality"] = data["Mortality"].apply(mortality_convert)

print("")
print(data.head(3))
print("")
print(data.tail(3))
#-[END]------------------------------------------------------------

#-[Python Code #2]-------------------------------------------------

# Read Excel Database raw_data_f16.xlsx, and preprocessing

df = pd.read_excel('raw_data_f16.xlsx')

print(df.columns)

df_copy = df.copy()

df_copy.rename(columns = {'ResipratoryTherapy': 'Therapy', \

'ROX index': "ROX", \

'LIV (%)': "LIV", \

'Gender': "Gender", \

'Age': "Age", \

'BMI': "BMI", \

'WBC (/µL)': "WBC", \

'Cr (mg/dL)': "Cr", \

'CRP (mg//dL)': "CRP", \

'LDH (U/L)': "LDH", \

'd-dimer (µg/L)': "d-dimer", \

'PSI': "PSI", \

'CCI': "CCI", \

'Days to Hostital (days)': "DtoH", \

'Days to HFNC (days)': "DaystoHFNC", \

'Mortality': "Mortality" }, inplace=True)

print("")

print(df_copy.columns)

data = df_copy[["Therapy", "ROX", "LIV", "Mortality"]]

print("")

print(data.head(3))

print("")

print(data.tail(3))

# Therapy data: simplify to ['HFNC', 'MV']

def therapy_convert(x):

if x == 'High-flow Nasal Cannulation':

return 'HFNC'

elif x == 'Mechanical Ventilation':

return 'MV'

# Mortality: fix misspell ['survivied'] to ['survived']

def mortality_convert(x):

if x == 'survivied':

return 'survived'

# Therapy: simplify the data description

data["Therapy"] = data["Therapy"].apply(therapy_convert)

# Fix misspell in Mortality

data["Mortality"] = data["Mortality"].apply(mortality_convert)

print("")

print(data.head(3))

print("")

print(data.tail(3))

#-[END]------------------------------------------------------------

#-[Python Code #3]-------------------------------------------------
# Pair Plot
sns.set(style="ticks")
sns.pairplot(data, hue='Therapy', markers=['o', ','], \
palette={'HFNC': 'green', 'MV': 'red'})
plt.show()
#-[END]-------------------------------------------------------------

#-[Python Code #3]-------------------------------------------------

# Pair Plot

sns.set(style="ticks")

sns.pairplot(data, hue='Therapy', markers=['o', ','], \

palette={'HFNC': 'green', 'MV': 'red'})

plt.show()

#-[END]-------------------------------------------------------------

#-[Python Code #4]--------------------------------------------------
#Numericalization of categorical data
data_0 = data
class_mapping1  \
= {'HFNC':0, 'MV':1}
data_0['Therapy'] \
= data['Therapy'].map(class_mapping1)
class_mapping2  \
= {'survived':0, 'died':1}
data_0['Mortality'] \
= data['Mortality'].map(class_mapping2)
data_0.head(3)
#-[END]--------------------------------------------------------------

#-[Python Code #4]--------------------------------------------------

#Numericalization of categorical data

data_0 = data

class_mapping1 \

= {'HFNC':0, 'MV':1}

data_0['Therapy'] \

= data['Therapy'].map(class_mapping1)

class_mapping2 \

= {'survived':0, 'died':1}

data_0['Mortality'] \

= data['Mortality'].map(class_mapping2)

data_0.head(3)

#-[END]--------------------------------------------------------------

#-[Python Code #5]---------------------------------------------------
data_1 = data_0.iloc[:,1:3]
data_2 = data_0.iloc[:,1:2]
data_3 = data_0.iloc[:,2:3]
Mortality = data_0.iloc[:,3:4]
Therapy = data_0.iloc[:,0:1]

IPython.display.display(data_1.head(3))
IPython.display.display(data_2.head(3))
IPython.display.display(data_3.head(3))
IPython.display.display(Mortality.head(3))
IPython.display.display(Therapy.head(3))
#-[END]------------------------------------------------------------

#-[Python Code #5]---------------------------------------------------

data_1 = data_0.iloc[:,1:3]

data_2 = data_0.iloc[:,1:2]

data_3 = data_0.iloc[:,2:3]

Mortality = data_0.iloc[:,3:4]

Therapy = data_0.iloc[:,0:1]

IPython.display.display(data_1.head(3))

IPython.display.display(data_2.head(3))

IPython.display.display(data_3.head(3))

IPython.display.display(Mortality.head(3))

IPython.display.display(Therapy.head(3))

#-[END]------------------------------------------------------------

#-[Python Code #6]-------------------------------------------------
# Classifying patients to 3 subgroups
df_group_1 = data_0[(data_0['Therapy'] == 0) \
& (data_0['Mortality'] == 0)]
df_group_2 = data_0[(data_0['Therapy'] == 1) \
& (data_0['Mortality'] == 0)]
df_group_3 = data_0[(data_0['Therapy'] == 1) \
& (data_0['Mortality'] == 1)]
# Plot data
plt.figure(figsize = (4,2.5))
plt.scatter(df_group_1['ROX'], df_group_1['LIV'], \
 label="HFNC survived", \
            color='white', s=20, marker="o", \
 edgecolor='green')
plt.scatter(df_group_2['ROX'], df_group_2['LIV'], \
label="MV survived", \
            color='white', s=20, marker=",", \
edgecolor='red')
plt.scatter(df_group_3['ROX'], df_group_3['LIV'], \
 label="MV died", \
color='black', s=20, marker=",", \
edgecolor='black')
plt.title("Data set")
plt.xlim(0,15)
plt.ylim(0,80)
plt.title("Data ROX-LIV", fontsize=16)
plt.xlabel("ROX index", fontsize=16)
plt.ylabel("LIV (%)", fontsize=16)
plt.grid()
plt.legend(loc='upper left', bbox_to_anchor=(1,1))
#-[END]------------------------------------------------------------

#-[Python Code #6]-------------------------------------------------

# Classifying patients to 3 subgroups

df_group_1 = data_0[(data_0['Therapy'] == 0) \

& (data_0['Mortality'] == 0)]

df_group_2 = data_0[(data_0['Therapy'] == 1) \

& (data_0['Mortality'] == 0)]

df_group_3 = data_0[(data_0['Therapy'] == 1) \

& (data_0['Mortality'] == 1)]

# Plot data

plt.figure(figsize = (4,2.5))

plt.scatter(df_group_1['ROX'], df_group_1['LIV'], \

label="HFNC survived", \

color='white', s=20, marker="o", \

edgecolor='green')

plt.scatter(df_group_2['ROX'], df_group_2['LIV'], \

label="MV survived", \

color='white', s=20, marker=",", \

edgecolor='red')

plt.scatter(df_group_3['ROX'], df_group_3['LIV'], \

label="MV died", \

color='black', s=20, marker=",", \

edgecolor='black')

plt.title("Data set")

plt.xlim(0,15)

plt.ylim(0,80)

plt.title("Data ROX-LIV", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

plt.grid()

plt.legend(loc='upper left', bbox_to_anchor=(1,1))

#-[END]------------------------------------------------------------

#-[Python Code #7]-------------------------------------------------
###########################
# Logistic Regression ROX
###########################
from sklearn.linear_model import LogisticRegression

# Split the data to training and test data sets
# X_train, X_test, Y_train, Y_test 
# = train_test_split(data2, Therapy, test_size = 0.2, random_state=0)
X_train = data_2
Y_train = Therapy
X_test = data_2
Y_test = Therapy

# Evaluation
Logreg_2 = LogisticRegression()
Logreg_2.fit(X_train, Y_train)

# Logistic Regression
Y_pred_2 = logreg_2.predict(X_test)

# Evaluation
# mean absolute error(MAE)
mae_2 = mean_absolute_error(Y_test, Y_pred_2)
# root mean squared error（RMSE）
rmse_2 = np.sqrt(mean_squared_error(Y_test, Y_pred_2))
# Score
score_2 = logreg_2.score(X_test, Y_test)

coef_2 = logreg_2.coef_[0]
intercept_2 = logreg_2.intercept_

print("MAE = %.3f, RMSE = %.3f" % (mae_2, rmse_2))
print("score = %.3f" % (score_2))
print("Coef = ", coef_2)
print("Intercept = ", intercept_2)
#-[END]------------------------------------------------------------

#-[Python Code #7]-------------------------------------------------

###########################

# Logistic Regression ROX

###########################

from sklearn.linear_model import LogisticRegression

# Split the data to training and test data sets

# X_train, X_test, Y_train, Y_test

# = train_test_split(data2, Therapy, test_size = 0.2, random_state=0)

X_train = data_2

Y_train = Therapy

X_test = data_2

Y_test = Therapy

# Evaluation

Logreg_2 = LogisticRegression()

Logreg_2.fit(X_train, Y_train)

# Logistic Regression

Y_pred_2 = logreg_2.predict(X_test)

# Evaluation

# mean absolute error(MAE)

mae_2 = mean_absolute_error(Y_test, Y_pred_2)

# root mean squared error（RMSE）

rmse_2 = np.sqrt(mean_squared_error(Y_test, Y_pred_2))

# Score

score_2 = logreg_2.score(X_test, Y_test)

coef_2 = logreg_2.coef_[0]

intercept_2 = logreg_2.intercept_

print("MAE = %.3f, RMSE = %.3f" % (mae_2, rmse_2))

print("score = %.3f" % (score_2))

print("Coef = ", coef_2)

print("Intercept = ", intercept_2)

#-[END]------------------------------------------------------------

#-[Python Code #8]-------------------------------------------------
# Plot Logistic Regression
plt.figure(figsize = (4,2.5))
plt.scatter(df_group_1['ROX'], df_group_1['LIV'], \
label="HFNC survived", color='white', \
s=20, marker="o", edgecolor='green')
plt.scatter(df_group_2['ROX'], df_group_2['LIV'], \
label="MV survived", color='white', \
            s=20, marker=",", edgecolor='red')
plt.scatter(df_group_3['ROX'], df_group_3['LIV'], \
 label="MV died", color='black', \
s=20, marker=",", edgecolor='black')

line = np.linspace(0, 70)
plt.plot((line * 0 - intercept_2 / coef_2), line, c='b', \
label="LogisticRegression")
print("Cut-off value = %.3f" % (- intercept_2 / coef_2))
plt.ylim(0, 70)
plt.xlim(0, 15)
plt.title("Logistic Regression: Rox index alone", fontsize=16)
plt.xlabel("ROX index", fontsize=16)
plt.ylabel("LIV (%)", fontsize=16)
plt.grid()
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
#-[END]------------------------------------------------------------

#-[Python Code #8]-------------------------------------------------

# Plot Logistic Regression

plt.figure(figsize = (4,2.5))

plt.scatter(df_group_1['ROX'], df_group_1['LIV'], \

label="HFNC survived", color='white', \

s=20, marker="o", edgecolor='green')

plt.scatter(df_group_2['ROX'], df_group_2['LIV'], \

label="MV survived", color='white', \

s=20, marker=",", edgecolor='red')

plt.scatter(df_group_3['ROX'], df_group_3['LIV'], \

label="MV died", color='black', \

s=20, marker=",", edgecolor='black')

line = np.linspace(0, 70)

plt.plot((line * 0 - intercept_2 / coef_2), line, c='b', \

label="LogisticRegression")

print("Cut-off value = %.3f" % (- intercept_2 / coef_2))

plt.ylim(0, 70)

plt.xlim(0, 15)

plt.title("Logistic Regression: Rox index alone", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

plt.grid()

plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

#-[END]------------------------------------------------------------

#-[Python Code #9]-------------------------------------------------
# Making Heat Map
cm = confusion_matrix(Y_pred_2, Therapy)
plt.figure(figsize = (2,1.5))
sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel("Predicted", fontsize=13, rotation=0)
plt.ylabel("Actual", fontsize=13)
plt.show()

from sklearn.metrics import confusion_matrix, accuracy_score, \
precision_score, recall_score, f1_score
print("confusion matrix = \n", \
confusion_matrix(y_true=Therapy, y_pred=Y_pred_2))
print("accuracy = %.3f" \
% (accuracy_score(y_true=Therapy, y_pred=Y_pred_2)))
print("precision = %.3f" \
% (precision_score(y_true=Therapy, y_pred=Y_pred_2)))
print("recall = %.3f" \
% (recall_score(y_true=Therapy, y_pred=Y_pred_2)))
print("f1 score = %.3f" \
% (f1_score(y_true=Therapy, y_pred=Y_pred_2)))

from sklearn.metrics import roc_auc_score
print("auc = %.3f" \
% (roc_auc_score(y_true=Therapy, y_score=Y_score_2)))
#-[END]------------------------------------------------------------

#-[Python Code #9]-------------------------------------------------

# Making Heat Map

cm = confusion_matrix(Y_pred_2, Therapy)

plt.figure(figsize = (2,1.5))

sns.heatmap(cm, annot=True, cmap='Blues')

plt.xlabel("Predicted", fontsize=13, rotation=0)

plt.ylabel("Actual", fontsize=13)

plt.show()

from sklearn.metrics import confusion_matrix, accuracy_score, \

precision_score, recall_score, f1_score

print("confusion matrix = \n", \

confusion_matrix(y_true=Therapy, y_pred=Y_pred_2))

print("accuracy = %.3f" \

% (accuracy_score(y_true=Therapy, y_pred=Y_pred_2)))

print("precision = %.3f" \

% (precision_score(y_true=Therapy, y_pred=Y_pred_2)))

print("recall = %.3f" \

% (recall_score(y_true=Therapy, y_pred=Y_pred_2)))

print("f1 score = %.3f" \

% (f1_score(y_true=Therapy, y_pred=Y_pred_2)))

from sklearn.metrics import roc_auc_score

print("auc = %.3f" \

% (roc_auc_score(y_true=Therapy, y_score=Y_score_2)))

#-[END]------------------------------------------------------------

#-[Python Code #10]------------------------------------------------
from sklearn.metrics import roc_curve, auc
Y_score_2 = logreg_2.predict_proba(data_2)[:, 1]  
fpr_2, tpr_2, thresholds_2 \
= roc_curve(y_true=Therapy, y_score=Y_score_2)

plt.figure(figsize=(2,2))
plt.plot(fpr_2, tpr_2, label \
='roc curve (area = %0.3f)' % auc(fpr_2, tpr_2))
plt.plot([0, 1], [0, 1], linestyle='--', label='random')
plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')
plt.legend(loc='upper left', bbox_to_anchor= (1, 1))
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.show()

from sklearn.metrics import roc_auc_score
print("auc = %.3f"  \
% (roc_auc_score(y_true=Therapy, y_score=Y_score_2)))

#-[END]------------------------------------------------------------

#-[Python Code #10]------------------------------------------------

from sklearn.metrics import roc_curve, auc

Y_score_2 = logreg_2.predict_proba(data_2)[:, 1]

fpr_2, tpr_2, thresholds_2 \

= roc_curve(y_true=Therapy, y_score=Y_score_2)

plt.figure(figsize=(2,2))

plt.plot(fpr_2, tpr_2, label \

='roc curve (area = %0.3f)' % auc(fpr_2, tpr_2))

plt.plot([0, 1], [0, 1], linestyle='--', label='random')

plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')

plt.legend(loc='upper left', bbox_to_anchor= (1, 1))

plt.xlabel('false positive rate')

plt.ylabel('true positive rate')

plt.show()

from sklearn.metrics import roc_auc_score

print("auc = %.3f" \

% (roc_auc_score(y_true=Therapy, y_score=Y_score_2)))

#-[END]------------------------------------------------------------

#-[Python Code #11]------------------------------------------------
###########################
# Logistic Regression LIV
###########################
X_train = data_3
Y_train = Therapy

# LogisticRegression
logreg_3 = LogisticRegression()

logreg_3.fit(X_train, Y_train)

# Prediction　
Y_pred_3 = logreg_3.predict(X_test)

# Evaluation
# mean absolute error(MAE)
mae_3 = mean_absolute_error(Y_test, Y_pred_3)
# root mean squared error（RMSE）
rmse_3 = np.sqrt(mean_squared_error(Y_test, Y_pred_3))
# Score
score_3 = logreg_3.score(X_test, Y_test)
coef_3 = logreg_3.coef_[0]
intercept_3 = logreg_3.intercept_

print("MAE = %.3f, RMSE = %.3f" % (mae_3, rmse_3))
print("score = %.3f" % (score_3))
print("Coef =", coef_3)
print("Intercept =", intercept_3)
print("Threshold of LIV = %.3f" % (-intercept_3/coef_3))
#-[END]------------------------------------------------------------

#-[Python Code #11]------------------------------------------------

###########################

# Logistic Regression LIV

###########################

X_train = data_3

Y_train = Therapy

# LogisticRegression

logreg_3 = LogisticRegression()

logreg_3.fit(X_train, Y_train)

# Prediction　

Y_pred_3 = logreg_3.predict(X_test)

# Evaluation

# mean absolute error(MAE)

mae_3 = mean_absolute_error(Y_test, Y_pred_3)

# root mean squared error（RMSE）

rmse_3 = np.sqrt(mean_squared_error(Y_test, Y_pred_3))

# Score

score_3 = logreg_3.score(X_test, Y_test)

coef_3 = logreg_3.coef_[0]

intercept_3 = logreg_3.intercept_

print("MAE = %.3f, RMSE = %.3f" % (mae_3, rmse_3))

print("score = %.3f" % (score_3))

print("Coef =", coef_3)

print("Intercept =", intercept_3)

print("Threshold of LIV = %.3f" % (-intercept_3/coef_3))

#-[END]------------------------------------------------------------

#-[Python Code #12]------------------------------------------------
# Plot Logistic Regression
plt.figure(figsize= (4, 2.5))
plt.scatter(df_group_1['ROX'], df_group_1['LIV'], label="HFNC survived", \
            color='white', s=20, marker="o", edgecolor='green')
plt.scatter(df_group_2['ROX'], df_group_2['LIV'], label="MV survived", \
            color='white', s=20, marker=",", edgecolor='red')
plt.scatter(df_group_3['ROX'], df_group_3['LIV'], label="MV died", \
            color='black', s=20, marker=",", edgecolor='black')

line = np.linspace(0, 15)
plt.plot(line, line * 0 - intercept_3/coef_3, c \
='b', label="LogisticRegression")
print("Cut-off value = %.3f" % (- intercept_3/coef_3))
plt.ylim(0, 70)
plt.xlim(0, 15)
plt.title("Logistic Regression: LIV alone", fontsize=16)
plt.xlabel("ROX index", fontsize=16)
plt.ylabel("LIV (%)", fontsize=16)
plt.grid()
plt.legend(loc='upper left', bbox_to_anchor= (1, 1))
#-[END]------------------------------------------------------------

#-[Python Code #12]------------------------------------------------

# Plot Logistic Regression

plt.figure(figsize= (4, 2.5))

plt.scatter(df_group_1['ROX'], df_group_1['LIV'], label="HFNC survived", \

color='white', s=20, marker="o", edgecolor='green')

plt.scatter(df_group_2['ROX'], df_group_2['LIV'], label="MV survived", \

color='white', s=20, marker=",", edgecolor='red')

plt.scatter(df_group_3['ROX'], df_group_3['LIV'], label="MV died", \

color='black', s=20, marker=",", edgecolor='black')

line = np.linspace(0, 15)

plt.plot(line, line * 0 - intercept_3/coef_3, c \

='b', label="LogisticRegression")

print("Cut-off value = %.3f" % (- intercept_3/coef_3))

plt.ylim(0, 70)

plt.xlim(0, 15)

plt.title("Logistic Regression: LIV alone", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

plt.grid()

plt.legend(loc='upper left', bbox_to_anchor= (1, 1))

#-[END]------------------------------------------------------------

#-[Python Code #13]------------------------------------------------
# Making Heat Map
cm = confusion_matrix(Y_pred_3, Therapy)
plt.figure(figsize = (2, 1.5))
sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel("Predicted", fontsize=13, rotation=0)
plt.ylabel("Actual", fontsize=13)
plt.show()

print("confusion matrix = \n", \
confusion_matrix(y_true=Therapy, y_pred=Y_pred_3))
print("accuracy = %.3f" \
% (accuracy_score(y_true=Therapy, y_pred=Y_pred_3)))
print("precision = %.3f" \
% (precision_score(y_true=Therapy, y_pred=Y_pred_3)))
print("recall = %.3f" \
% (recall_score(y_true=Therapy, y_pred=Y_pred_3)))
print("f1 score = %.3f" \
% (f1_score(y_true=Therapy, y_pred=Y_pred_3)))
#-[END]------------------------------------------------------------

#-[Python Code #13]------------------------------------------------

# Making Heat Map

cm = confusion_matrix(Y_pred_3, Therapy)

plt.figure(figsize = (2, 1.5))

sns.heatmap(cm, annot=True, cmap='Blues')

plt.xlabel("Predicted", fontsize=13, rotation=0)

plt.ylabel("Actual", fontsize=13)

plt.show()

print("confusion matrix = \n", \

confusion_matrix(y_true=Therapy, y_pred=Y_pred_3))

print("accuracy = %.3f" \

% (accuracy_score(y_true=Therapy, y_pred=Y_pred_3)))

print("precision = %.3f" \

% (precision_score(y_true=Therapy, y_pred=Y_pred_3)))

print("recall = %.3f" \

% (recall_score(y_true=Therapy, y_pred=Y_pred_3)))

print("f1 score = %.3f" \

% (f1_score(y_true=Therapy, y_pred=Y_pred_3)))

#-[END]------------------------------------------------------------

#-[Python Code #14]------------------------------------------------
Y_score_3 = logreg_3.predict_proba(data_3)[:, 1]  
fpr_3, tpr_3, thresholds_3 \
= roc_curve(y_true=Therapy, y_score=Y_score_3)

plt.figure(figsize= (2, 2))
plt.plot(fpr_3, tpr_3, label \
='roc curve LIV (area = %0.3f)' % auc(fpr_3, tpr_3))
plt.plot([0, 1], [0, 1], linestyle='--', label='random')
plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.show()

print("auc = %.3f" \
% (roc_auc_score(y_true=Therapy, y_score=Y_score_3)))
#-[END]------------------------------------------------------------

#-[Python Code #14]------------------------------------------------

Y_score_3 = logreg_3.predict_proba(data_3)[:, 1]

fpr_3, tpr_3, thresholds_3 \

= roc_curve(y_true=Therapy, y_score=Y_score_3)

plt.figure(figsize= (2, 2))

plt.plot(fpr_3, tpr_3, label \

='roc curve LIV (area = %0.3f)' % auc(fpr_3, tpr_3))

plt.plot([0, 1], [0, 1], linestyle='--', label='random')

plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')

plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

plt.xlabel('false positive rate')

plt.ylabel('true positive rate')

plt.show()

print("auc = %.3f" \

% (roc_auc_score(y_true=Therapy, y_score=Y_score_3)))

#-[END]------------------------------------------------------------

#-[Python Code #15]------------------------------------------------
#################################
# Logistic Regression　ROX & LIV
#################################
from sklearn.linear_model import LogisticRegression

# Split the data to training and test data sets
# X_train, X_test, Y_train, Y_test = train_test_split(data1, 
# Therapy, test_size = 0.2, random_state=0)
X_train = data_1
Y_train = Therapy
X_test = data_1
Y_test = Therapy

# LogisticRegression
# logreg = LogisticRegression(penalty='l2', solver="sag")
logreg_1 = LogisticRegression()
logreg_1.fit(X_train, Y_train)

# Prediction　
Y_pred_1 = logreg_1.predict(X_test)

# Evaluation
# mean absolute error(MAE)
mae_1 = mean_absolute_error(Y_test, Y_pred_1)
# root mean squared error（RMSE）
rmse_1 = np.sqrt(mean_squared_error(Y_test, Y_pred_1))
# Score
score_1 = logreg_1.score(X_test, Y_test)

coef_1 = logreg_1.coef_[0]
intercept_1 = logreg_1.intercept_

print("MAE = %.3f, RMSE = %.3f" % (mae_1, rmse_1))
print("score = %.3f" % (score_1))
print("Coef = ", coef_1)
print("Intercept = ", intercept_1)
#-[END]------------------------------------------------------------

#-[Python Code #15]------------------------------------------------

#################################

# Logistic Regression　ROX & LIV

#################################

from sklearn.linear_model import LogisticRegression

# Split the data to training and test data sets

# X_train, X_test, Y_train, Y_test = train_test_split(data1,

# Therapy, test_size = 0.2, random_state=0)

X_train = data_1

Y_train = Therapy

X_test = data_1

Y_test = Therapy

# LogisticRegression

# logreg = LogisticRegression(penalty='l2', solver="sag")

logreg_1 = LogisticRegression()

logreg_1.fit(X_train, Y_train)

# Prediction　

Y_pred_1 = logreg_1.predict(X_test)

# Evaluation

# mean absolute error(MAE)

mae_1 = mean_absolute_error(Y_test, Y_pred_1)

# root mean squared error（RMSE）

rmse_1 = np.sqrt(mean_squared_error(Y_test, Y_pred_1))

# Score

score_1 = logreg_1.score(X_test, Y_test)

coef_1 = logreg_1.coef_[0]

intercept_1 = logreg_1.intercept_

print("MAE = %.3f, RMSE = %.3f" % (mae_1, rmse_1))

print("score = %.3f" % (score_1))

print("Coef = ", coef_1)

print("Intercept = ", intercept_1)

#-[END]------------------------------------------------------------

#-[Python Code #16]------------------------------------------------
# Plot Logistic Regression
line = np.linspace(0, 15)

plt.figure(figsize=(4, 2.5))
plt.scatter(df_group_1['ROX'], \
df_group_1['LIV'], label= "HFNC survived", \
            color='white', s=20, marker="o", edgecolor='green')
plt.scatter(df_group_2['ROX'], \
df_group_2['LIV'], label="MV survived", \
            color='white', s=20, marker=",", edgecolor='red')
plt.scatter(df_group_3['ROX'], \
df_group_3['LIV'], label="MV died", \
            color='black', s=20, marker=",", edgecolor='black')

plt.plot(line, -(line * coef_1[0] + intercept_1) / coef_1[1], \
c='b', label="LogisticRegression")
plt.ylim(0, 70)
plt.xlim(0, 15)
plt.title("Logistic Regression: Rox index & LIV", fontsize=16)
plt.xlabel("ROX index", fontsize=16)
plt.ylabel("LIV (%)", fontsize=16)
plt.grid()
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
#-[END]------------------------------------------------------------

#-[Python Code #16]------------------------------------------------

# Plot Logistic Regression

line = np.linspace(0, 15)

plt.figure(figsize=(4, 2.5))

plt.scatter(df_group_1['ROX'], \

df_group_1['LIV'], label= "HFNC survived", \

color='white', s=20, marker="o", edgecolor='green')

plt.scatter(df_group_2['ROX'], \

df_group_2['LIV'], label="MV survived", \

color='white', s=20, marker=",", edgecolor='red')

plt.scatter(df_group_3['ROX'], \

df_group_3['LIV'], label="MV died", \

color='black', s=20, marker=",", edgecolor='black')

plt.plot(line, -(line * coef_1[0] + intercept_1) / coef_1[1], \

c='b', label="LogisticRegression")

plt.ylim(0, 70)

plt.xlim(0, 15)

plt.title("Logistic Regression: Rox index & LIV", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

plt.grid()

plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

#-[END]------------------------------------------------------------

#-[Python Code #17]------------------------------------------------
# Making Heat Map
cm = confusion_matrix(Y_pred_1, Therapy)
plt.figure(figsize = (2, 1.5))
sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel("Predicted", fontsize=13, rotation=0)
plt.ylabel("Actual", fontsize=13)
plt.show()

print("confusion matrix = \n", \
confusion_matrix(y_true=Therapy, y_pred=Y_pred_1))
print("accuracy = %.3f" \
% (accuracy_score(y_true=Therapy, y_pred=Y_pred_1)))
print("precision = %.3f" \
% (precision_score(y_true=Therapy, y_pred=Y_pred_1)))
print("recall = %.3f" \
% (recall_score(y_true=Therapy, y_pred=Y_pred_1)))
print("f1 score = %.3f" \
% (f1_score(y_true=Therapy, y_pred=Y_pred_1)))
#-[END]------------------------------------------------------------

#-[Python Code #17]------------------------------------------------

# Making Heat Map

cm = confusion_matrix(Y_pred_1, Therapy)

plt.figure(figsize = (2, 1.5))

sns.heatmap(cm, annot=True, cmap='Blues')

plt.xlabel("Predicted", fontsize=13, rotation=0)

plt.ylabel("Actual", fontsize=13)

plt.show()

print("confusion matrix = \n", \

confusion_matrix(y_true=Therapy, y_pred=Y_pred_1))

print("accuracy = %.3f" \

% (accuracy_score(y_true=Therapy, y_pred=Y_pred_1)))

print("precision = %.3f" \

% (precision_score(y_true=Therapy, y_pred=Y_pred_1)))

print("recall = %.3f" \

% (recall_score(y_true=Therapy, y_pred=Y_pred_1)))

print("f1 score = %.3f" \

% (f1_score(y_true=Therapy, y_pred=Y_pred_1)))

#-[END]------------------------------------------------------------

#-[Python Code #18]------------------------------------------------
Y_score_1 = logreg_1.predict_proba(data_1)[:, 1]  
fpr_1, tpr_1, thresholds_1 \
= roc_curve(y_true=Therapy, y_score=Y_score_1)

plt.figure(figsize= (2, 2))
plt.plot(fpr_1, tpr_1, label \
='roc curve LIV (area = %0.3f)' % auc(fpr_1, tpr_1))
plt.plot([0, 1], [0, 1], linestyle='--', label='random')
plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.show()

print("auc = %.3f" \
% (roc_auc_score(y_true=Therapy, y_score=Y_score_1)))
#-[END]------------------------------------------------------------

#-[Python Code #18]------------------------------------------------

Y_score_1 = logreg_1.predict_proba(data_1)[:, 1]

fpr_1, tpr_1, thresholds_1 \

= roc_curve(y_true=Therapy, y_score=Y_score_1)

plt.figure(figsize= (2, 2))

plt.plot(fpr_1, tpr_1, label \

='roc curve LIV (area = %0.3f)' % auc(fpr_1, tpr_1))

plt.plot([0, 1], [0, 1], linestyle='--', label='random')

plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')

plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

plt.xlabel('false positive rate')

plt.ylabel('true positive rate')

plt.show()

print("auc = %.3f" \

% (roc_auc_score(y_true=Therapy, y_score=Y_score_1)))

#-[END]------------------------------------------------------------

#-[Python Code #19]------------------------------------------------
plt.figure(figsize=(2, 2))
plt.plot(fpr_1, tpr_1, label \
='roc curve ROX&LIV (area = %0.3f)' % auc(fpr_1, tpr_1))
plt.plot(fpr_2, tpr_2, label \
='roc curve ROX (area = %0.3f)' % auc(fpr_2, tpr_2))
plt.plot(fpr_3, tpr_3, label \
='roc curve LIV (area = %0.3f)' % auc(fpr_3, tpr_3))
plt.plot([0, 1], [0, 1], linestyle='--', label='random')
plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')
plt.legend(loc='upper left', bbox_to_anchor= (1, 1))
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.show()
#-[END]------------------------------------------------------------

#-[Python Code #19]------------------------------------------------

plt.figure(figsize=(2, 2))

plt.plot(fpr_1, tpr_1, label \

='roc curve ROX&LIV (area = %0.3f)' % auc(fpr_1, tpr_1))

plt.plot(fpr_2, tpr_2, label \

='roc curve ROX (area = %0.3f)' % auc(fpr_2, tpr_2))

plt.plot(fpr_3, tpr_3, label \

='roc curve LIV (area = %0.3f)' % auc(fpr_3, tpr_3))

plt.plot([0, 1], [0, 1], linestyle='--', label='random')

plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal')

plt.legend(loc='upper left', bbox_to_anchor= (1, 1))

plt.xlabel('false positive rate')

plt.ylabel('true positive rate')

plt.show()

#-[END]------------------------------------------------------------

#-[Python Code #20]------------------------------------------------
########################
# Soft Margin Linear SVM
########################
from sklearn.svm import SVC

# Split the data to training and test data sets
# X_train, X_test, Y_train, Y_test = train_test_split(data1, 
# Therapy, test_size = 0.2, random_state=0)
X_train = data_1
Y_train = Therapy
X_test = data_1
Y_test = Therapy

soft_linear_svc = SVC(C=0.1, kernel='linear')
soft_linear_svc.fit(X_train, Y_train)

y_test_pred = soft_linear_svc.decision_function(X_test)
y_test_len = len(y_test_pred)
Y_pred_S = soft_linear_svc.predict(X_test)

plt.figure(figsize= (4, 2.5))
plt.scatter(df_group_1['ROX'], \
df_group_1['LIV'], label="HFNC survived", \
            color='white', s=20, marker="o", edgecolor='green')
plt.scatter(df_group_2['ROX'], \
df_group_2['LIV'], label="MV survived", \
            color='white', s=20, marker=",", edgecolor='red')
plt.scatter(df_group_3['ROX'], \
df_group_3['LIV'], label="MV died", \
            color='black', s=20, marker=",", edgecolor='black')

plt.ylim(0, 70)
plt.xlim(0, 15)
xlim = plt.xlim()
ylim = plt.ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
XX, YY = np.meshgrid(xx, yy)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = soft_linear_svc.decision_function(xy).reshape(XX.shape)
plt.contour(XX, YY, Z, colors='k', \
 levels= [-1, 0, 1], alpha=0.5, linestyles= ['--', '-', '--'])

plt.title("Soft-margin Linear SVM", fontsize=16)
plt.xlabel("ROX index", fontsize=16)
plt.ylabel("LIV (%)", fontsize=16)
plt.grid()
plt.legend(loc='upper left', bbox_to_anchor= (1, 1))
#-[END]------------------------------------------------------------

#-[Python Code #20]------------------------------------------------

########################

# Soft Margin Linear SVM

########################

from sklearn.svm import SVC

# Split the data to training and test data sets

# X_train, X_test, Y_train, Y_test = train_test_split(data1,

# Therapy, test_size = 0.2, random_state=0)

X_train = data_1

Y_train = Therapy

X_test = data_1

Y_test = Therapy

soft_linear_svc = SVC(C=0.1, kernel='linear')

soft_linear_svc.fit(X_train, Y_train)

y_test_pred = soft_linear_svc.decision_function(X_test)

y_test_len = len(y_test_pred)

Y_pred_S = soft_linear_svc.predict(X_test)

plt.figure(figsize= (4, 2.5))

plt.scatter(df_group_1['ROX'], \

df_group_1['LIV'], label="HFNC survived", \

color='white', s=20, marker="o", edgecolor='green')

plt.scatter(df_group_2['ROX'], \

df_group_2['LIV'], label="MV survived", \

color='white', s=20, marker=",", edgecolor='red')

plt.scatter(df_group_3['ROX'], \

df_group_3['LIV'], label="MV died", \

color='black', s=20, marker=",", edgecolor='black')

plt.ylim(0, 70)

plt.xlim(0, 15)

xlim = plt.xlim()

ylim = plt.ylim()

xx = np.linspace(xlim[0], xlim[1], 30)

yy = np.linspace(ylim[0], ylim[1], 30)

XX, YY = np.meshgrid(xx, yy)

xy = np.vstack([XX.ravel(), YY.ravel()]).T

Z = soft_linear_svc.decision_function(xy).reshape(XX.shape)

plt.contour(XX, YY, Z, colors='k', \

levels= [-1, 0, 1], alpha=0.5, linestyles= ['--', '-', '--'])

plt.title("Soft-margin Linear SVM", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

plt.grid()

plt.legend(loc='upper left', bbox_to_anchor= (1, 1))

#-[END]------------------------------------------------------------

#-[Python Code #21]------------------------------------------------
# Making Heat Map
cm = confusion_matrix(Y_pred_S, Therapy)
plt.figure(figsize = (2, 1.5))
sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel("Predicted", fontsize=13, rotation=0)
plt.ylabel("Actual", fontsize=13)
plt.show()

print("confusion matrix = \n", \
confusion_matrix(y_true=Therapy, y_pred=Y_pred_S))
print("accuracy = %.3f" \
% (accuracy_score(y_true=Therapy, y_pred=Y_pred_S)))
print("precision = %.3f" \
% (precision_score(y_true=Therapy, y_pred=Y_pred_S)))
print("recall = %.3f" \
% (recall_score(y_true=Therapy, y_pred=Y_pred_S)))
print("f1 score = %.3f" \
% (f1_score(y_true=Therapy, y_pred=Y_pred_S)))
#-[END]------------------------------------------------------------

#-[Python Code #21]------------------------------------------------

# Making Heat Map

cm = confusion_matrix(Y_pred_S, Therapy)

plt.figure(figsize = (2, 1.5))

sns.heatmap(cm, annot=True, cmap='Blues')

plt.xlabel("Predicted", fontsize=13, rotation=0)

plt.ylabel("Actual", fontsize=13)

plt.show()

print("confusion matrix = \n", \

confusion_matrix(y_true=Therapy, y_pred=Y_pred_S))

print("accuracy = %.3f" \

% (accuracy_score(y_true=Therapy, y_pred=Y_pred_S)))

print("precision = %.3f" \

% (precision_score(y_true=Therapy, y_pred=Y_pred_S)))

print("recall = %.3f" \

% (recall_score(y_true=Therapy, y_pred=Y_pred_S)))

print("f1 score = %.3f" \

% (f1_score(y_true=Therapy, y_pred=Y_pred_S)))

#-[END]------------------------------------------------------------

#-[Python Code #22]------------------------------------------------
########################
# Non-Linear SVM
########################
import sklearn.model_selection 
from sklearn.svm import SVC

def pt_plot(x1_x2, label):
    plt.figure(figsize=(4, 2.5))
    plt.grid(True)
    #plt.ylim(0, 70)
    #plt.xlim(0, 13)
    plt.title("Non-linear SVM", fontsize=16)
    plt.xlabel("ROX index", fontsize=16)
    plt.ylabel("LIV (%)", fontsize=16)

    datalen = len(x1_x2)
    for cnt in range(datalen):
        if label[cnt] == 0:
            plt.scatter(x1_x2[:, 0][cnt], x1_x2[:, 1][cnt], \
label="HFNC", \
            color='white', s=20, marker="o", edgecolor='green')
        elif label[cnt] == 1:
            plt.scatter(x1_x2[:, 0][cnt], x1_x2[:, 1][cnt], \
label="MV survived", \
            color='white', s=20, marker=",", edgecolor='red')

def pt_plot_mortality(x1_x2, label, mortality):
    plt.figure(figsize= (4, 2.5))
    plt.grid(True)
    plt.title("Non-linear SVM", fontsize=16)
    plt.xlabel("ROX index", fontsize=16)
    plt.ylabel("LIV (%)", fontsize=16)

    x1_x2_1 = np.empty(2)
    x1_x2_2 = np.empty(2)
    x1_x2_3 = np.empty(2)
    group_1_cnt = 0
    group_2_cnt = 0
    group_3_cnt = 0
    
    datalen = len(x1_x2)
    for cnt in range(datalen):    
        if label[cnt, 0] == 0:
            x1_x2_1 = np.append(x1_x2_1, x1_x2[cnt], axis=0)
            group_1_cnt += 1 
        if label[cnt, 0] == 1:
            x1_x2_2 = np.append(x1_x2_2, x1_x2[cnt], axis=0)
            group_2_cnt += 1 
        if mortality[cnt, 0] == 1:
            x1_x2_3 = np.append(x1_x2_3, x1_x2[cnt], axis=0)
            group_3_cnt += 1 
      
    x1_x2_1 = x1_x2_1.reshape(group_1_cnt+1, 2)  
    x1_x2_2 = x1_x2_2.reshape(group_2_cnt+1, 2)  
    x1_x2_3 = x1_x2_3.reshape(group_3_cnt+1, 2)  

    x1_x2_1 = np.delete(x1_x2_1, 0, 0)
    x1_x2_2 = np.delete(x1_x2_2, 0, 0)
    x1_x2_3 = np.delete(x1_x2_3, 0, 0)
    
    plt.scatter(x1_x2_1[:, 0], x1_x2_1[:, 1], label="HFNC survived", \
                color='white', s=20, marker="o", edgecolor='green')
    plt.scatter(x1_x2_2[:, 0], x1_x2_2[:, 1], label="MV survived", \
                color='white', s=20, marker=",", edgecolor='red')
    plt.scatter(x1_x2_3[:, 0], x1_x2_3[:, 1], label="MV died", \
                color='black', s=20, marker=",", edgecolor='black')

def false_positive_rate(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).flatten()
    return fp / (tn + fp)
#-[END]------------------------------------------------------------

#-[Python Code #22]------------------------------------------------

########################

# Non-Linear SVM

########################

import sklearn.model_selection

from sklearn.svm import SVC

def pt_plot(x1_x2, label):

plt.figure(figsize=(4, 2.5))

plt.grid(True)

#plt.ylim(0, 70)

#plt.xlim(0, 13)

plt.title("Non-linear SVM", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

datalen = len(x1_x2)

for cnt in range(datalen):

if label[cnt] == 0:

plt.scatter(x1_x2[:, 0][cnt], x1_x2[:, 1][cnt], \

label="HFNC", \

color='white', s=20, marker="o", edgecolor='green')

elif label[cnt] == 1:

plt.scatter(x1_x2[:, 0][cnt], x1_x2[:, 1][cnt], \

label="MV survived", \

color='white', s=20, marker=",", edgecolor='red')

def pt_plot_mortality(x1_x2, label, mortality):

plt.figure(figsize= (4, 2.5))

plt.grid(True)

plt.title("Non-linear SVM", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

x1_x2_1 = np.empty(2)

x1_x2_2 = np.empty(2)

x1_x2_3 = np.empty(2)

group_1_cnt = 0

group_2_cnt = 0

group_3_cnt = 0

datalen = len(x1_x2)

for cnt in range(datalen):

if label[cnt, 0] == 0:

x1_x2_1 = np.append(x1_x2_1, x1_x2[cnt], axis=0)

group_1_cnt += 1

if label[cnt, 0] == 1:

x1_x2_2 = np.append(x1_x2_2, x1_x2[cnt], axis=0)

group_2_cnt += 1

if mortality[cnt, 0] == 1:

x1_x2_3 = np.append(x1_x2_3, x1_x2[cnt], axis=0)

group_3_cnt += 1

x1_x2_1 = x1_x2_1.reshape(group_1_cnt+1, 2)

x1_x2_2 = x1_x2_2.reshape(group_2_cnt+1, 2)

x1_x2_3 = x1_x2_3.reshape(group_3_cnt+1, 2)

x1_x2_1 = np.delete(x1_x2_1, 0, 0)

x1_x2_2 = np.delete(x1_x2_2, 0, 0)

x1_x2_3 = np.delete(x1_x2_3, 0, 0)

plt.scatter(x1_x2_1[:, 0], x1_x2_1[:, 1], label="HFNC survived", \

color='white', s=20, marker="o", edgecolor='green')

plt.scatter(x1_x2_2[:, 0], x1_x2_2[:, 1], label="MV survived", \

color='white', s=20, marker=",", edgecolor='red')

plt.scatter(x1_x2_3[:, 0], x1_x2_3[:, 1], label="MV died", \

color='black', s=20, marker=",", edgecolor='black')

def false_positive_rate(y_true, y_pred):

tn, fp, fn, tp = confusion_matrix(y_true, y_pred).flatten()

return fp / (tn + fp)

#-[END]------------------------------------------------------------

#-[Python Code #23]------------------------------------------------
X_train = data_0
Y_train = Therapy

# Classifying patinets to 3 subgroups 
X_train_1 = X_train [(X_train['Therapy'] == 0) \
& (X_train['Mortality'] == 0)]
X_train_2 = X_train [(X_train['Therapy'] == 1) \
& (X_train['Mortality'] == 0)]
X_train_3 = X_train [(X_train['Therapy'] == 1) \
& (X_train['Mortality'] == 1)]

scaler = StandardScaler()
scaled_features = scaler.fit_transform(X_train.values[:, 1:3])

scaled_features_train = np.hstack((scaled_features, \
X_train['Therapy'].values.reshape(-1, 1), \
X_train['Mortality'].values.reshape(-1, 1)))
pt_plot_mortality(scaled_features, \
X_train.values, scaled_features_train[:, 3:4])
plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) 
plt.show()
#-[END]------------------------------------------------------------

#-[Python Code #23]------------------------------------------------

X_train = data_0

Y_train = Therapy

# Classifying patinets to 3 subgroups

X_train_1 = X_train [(X_train['Therapy'] == 0) \

& (X_train['Mortality'] == 0)]

X_train_2 = X_train [(X_train['Therapy'] == 1) \

& (X_train['Mortality'] == 0)]

X_train_3 = X_train [(X_train['Therapy'] == 1) \

& (X_train['Mortality'] == 1)]

scaler = StandardScaler()

scaled_features = scaler.fit_transform(X_train.values[:, 1:3])

scaled_features_train = np.hstack((scaled_features, \

X_train['Therapy'].values.reshape(-1, 1), \

X_train['Mortality'].values.reshape(-1, 1)))

pt_plot_mortality(scaled_features, \

X_train.values, scaled_features_train[:, 3:4])

plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

plt.show()

#-[END]------------------------------------------------------------

#-[Python Code #24]------------------------------------------------
# Step 2. Training the model
# kernel = Guass 
knl = 'sigmoid'
# kernel options
# knl = 'poly'
# knl = 'sigmoid'

gma = 5
dgr = 2
# cost parameter C
c_values = [0.01, 0.1, 1.0, 10, 100]

# Step 2: Split the training data
k = 5
skf = sklearn.model_selection.StratifiedKFold(n_splits=k, \
random_state=0, shuffle=True)

# Step 2: Cross validation
# List of evaluation index
scoring = ['f1_macro', 'precision_macro', 'recall_macro']
scores = None
for c in c_values:
    if knl == 'rbf':
        clf = SVC(kernel=knl, C=c, gamma=gma)
    elif knl == 'poly':
        clf = SVC(kernel=knl, C=c, gamma=gma, degree=dgr)
    else:
        clf = SVC(kernel=knl, C=c)

    score = sklearn.model_selection.cross_validate(clf, \
scaled_features, Y_train, 
 scoring=scoring, cv=skf, )
    x = pd.concat([pd.Series(np.full(k, c), name='C'), \
pd.DataFrame(score)], axis=1)
    scores = pd.concat([scores, x])
print(scores)

# Step 2: Training using training data sete
best_c = scores.loc[scores.loc[:, 'test_f1_macro'] \
== max(scores.loc[:, 'test_f1_macro']), 'C'].values[0]

if knl == 'rbf':
    clf = SVC(kernel=knl, C=best_c, gamma=gma)
    print('kernel=', knl)
    print('bestC=', best_c)
    print('gamma=', gma)
elif knl == 'poly':
    clf = SVC(kernel=knl, C=best_c, degree=dgr)
    print('kernel=', knl)
    print('bestC=', best_c)
    print('degree=', dgr)
    print('gamma=', gma)
elif knl == 'sigmoid':
    clf = SVC(kernel=knl, C=best_c)
    print('kernel=', knl)
    print('bestC=', best_c)

clf.fit(scaled_features, Y_train)
#-[END]------------------------------------------------------------

#-[Python Code #24]------------------------------------------------

# Step 2. Training the model

# kernel = Guass

knl = 'sigmoid'

# kernel options

# knl = 'poly'

# knl = 'sigmoid'

gma = 5

dgr = 2

# cost parameter C

c_values = [0.01, 0.1, 1.0, 10, 100]

# Step 2: Split the training data

k = 5

skf = sklearn.model_selection.StratifiedKFold(n_splits=k, \

random_state=0, shuffle=True)

# Step 2: Cross validation

# List of evaluation index

scoring = ['f1_macro', 'precision_macro', 'recall_macro']

scores = None

for c in c_values:

if knl == 'rbf':

clf = SVC(kernel=knl, C=c, gamma=gma)

elif knl == 'poly':

clf = SVC(kernel=knl, C=c, gamma=gma, degree=dgr)

else:

clf = SVC(kernel=knl, C=c)

score = sklearn.model_selection.cross_validate(clf, \

scaled_features, Y_train,

scoring=scoring, cv=skf, )

x = pd.concat([pd.Series(np.full(k, c), name='C'), \

pd.DataFrame(score)], axis=1)

scores = pd.concat([scores, x])

print(scores)

# Step 2: Training using training data sete

best_c = scores.loc[scores.loc[:, 'test_f1_macro'] \

== max(scores.loc[:, 'test_f1_macro']), 'C'].values[0]

if knl == 'rbf':

clf = SVC(kernel=knl, C=best_c, gamma=gma)

print('kernel=', knl)

print('bestC=', best_c)

print('gamma=', gma)

elif knl == 'poly':

clf = SVC(kernel=knl, C=best_c, degree=dgr)

print('kernel=', knl)

print('bestC=', best_c)

print('degree=', dgr)

print('gamma=', gma)

elif knl == 'sigmoid':

clf = SVC(kernel=knl, C=best_c)

print('kernel=', knl)

print('bestC=', best_c)

clf.fit(scaled_features, Y_train)

#-[END]------------------------------------------------------------

#-[Python Code #25]------------------------------------------------
# Prediction of train data　
Y_pred_SM2 = clf.predict(scaled_features)

# Display the Boundary 
plt.figure(figsize=(4, 2.5))
_x0 = np.linspace(-3, 3, 600)
_x1 = np.linspace(-3, 4, 700)
x0, x1 = np.meshgrid(_x0, _x1)
X = np.hstack((x0.ravel().reshape(-1, 1), x1.ravel().reshape(-1, 1)))
y_decision = clf.predict(X).reshape(x0.shape)

# Plot test data
pt_plot_mortality(scaled_features, \
Y_train.values, scaled_features_train[:, 3:4]) 
plt.contourf(x0, x1, y_decision, levels= [y_decision.min(), \
 0.1, y_decision.max()], alpha=0.3)
plt.legend(loc='upper left', bbox_to_anchor= (1, 1))
plt.show()
#-[END]------------------------------------------------------------

#-[Python Code #25]------------------------------------------------

# Prediction of train data　

Y_pred_SM2 = clf.predict(scaled_features)

# Display the Boundary

plt.figure(figsize=(4, 2.5))

_x0 = np.linspace(-3, 3, 600)

_x1 = np.linspace(-3, 4, 700)

x0, x1 = np.meshgrid(_x0, _x1)

X = np.hstack((x0.ravel().reshape(-1, 1), x1.ravel().reshape(-1, 1)))

y_decision = clf.predict(X).reshape(x0.shape)

# Plot test data

pt_plot_mortality(scaled_features, \

Y_train.values, scaled_features_train[:, 3:4])

plt.contourf(x0, x1, y_decision, levels= [y_decision.min(), \

0.1, y_decision.max()], alpha=0.3)

plt.legend(loc='upper left', bbox_to_anchor= (1, 1))

plt.show()

#-[END]------------------------------------------------------------

-[Python Code #26]-------------------------------------------------
# Making Heat Map
cm = confusion_matrix(Y_train, Y_pred_SM2)
plt.figure(figsize = (2, 1.5))
sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel("Predicted", fontsize=13, rotation=0)
plt.ylabel("Actual", fontsize=13)
plt.show()

print("confusion matrix = \n", \
confusion_matrix(y_true=Y_train, y_pred=Y_pred_SM2))
print("accuracy = %.3f" \
% (accuracy_score(y_true=Y_train, y_pred=Y_pred_ SM2)))
print("precision = %.3f" \
% (precision_score(y_true=Y_train, y_pred=Y_pred_ SM2)))
print("recall = %.3f" \
% (recall_score(y_true=Y_train, y_pred=Y_pred_ SM2)))
print("f1 score = %.3f" \
% (f1_score(y_true=Y_train, y_pred=Y_pred_ SM2)))
#-[END]------------------------------------------------------------

-[Python Code #26]-------------------------------------------------

# Making Heat Map

cm = confusion_matrix(Y_train, Y_pred_SM2)

plt.figure(figsize = (2, 1.5))

sns.heatmap(cm, annot=True, cmap='Blues')

plt.xlabel("Predicted", fontsize=13, rotation=0)

plt.ylabel("Actual", fontsize=13)

plt.show()

print("confusion matrix = \n", \

confusion_matrix(y_true=Y_train, y_pred=Y_pred_SM2))

print("accuracy = %.3f" \

% (accuracy_score(y_true=Y_train, y_pred=Y_pred_ SM2)))

print("precision = %.3f" \

% (precision_score(y_true=Y_train, y_pred=Y_pred_ SM2)))

print("recall = %.3f" \

% (recall_score(y_true=Y_train, y_pred=Y_pred_ SM2)))

print("f1 score = %.3f" \

% (f1_score(y_true=Y_train, y_pred=Y_pred_ SM2)))

#-[END]------------------------------------------------------------

#-[Python Code #27]------------------------------------------------
# Step 2. Training the model
# kernel = Guass 
knl = 'rbf'
# kernel options
# knl = 'poly'
# knl = 'sigmoid'

gma = 5
dgr = 2
# cost parameter C
c_values = [0.01, 0.1, 1.0, 10, 100]

# Step 2: Split the training data
k = 5
skf = sklearn.model_selection.StratifiedKFold(n_splits=k, \
 random_state=0, shuffle=True)

# Step 2: Cross validation
# List of evaluation index
scoring = ['f1_macro', 'precision_macro', 'recall_macro']
scores = None
# 
for c in c_values:
    if knl == 'rbf':
        clf = SVC(kernel=knl, C=c, gamma=gma)
    elif knl == 'poly':
        clf = SVC(kernel=knl, C=c, gamma=gma, degree=dgr)
    else:
        clf = SVC(kernel=knl, C=c)

    score = sklearn.model_selection.cross_validate(clf, \
 scaled_features, Y_train, scoring=scoring, cv=skf)
    x = pd.concat([pd.Series(np.full(k, c), name='C'), \
pd.DataFrame(score)], axis=1)
    scores = pd.concat([scores, x])
print(scores)

# Step 2: Training using training data sete
best_c = scores.loc[scores.loc[:, 'test_f1_macro'] \
== max(scores.loc[:, 'test_f1_macro']), 'C'].values[0]

if knl == 'rbf':
    clf = SVC(kernel=knl, C=best_c, gamma=gma)
    print('kernel=', knl)
    print('bestC=', best_c)
    print('gamma=', gma)
elif knl == 'poly':
    clf = SVC(kernel=knl, C=best_c, degree=dgr)
    print('kernel=', knl)
    print('bestC=', best_c)
    print('degree=', dgr)
    print('gamma=', gma)
elif knl == 'sigmoid':
    clf = SVC(kernel=knl, C=best_c)
    print('kernel=', knl)
    print('bestC=', best_c)
#-[END]------------------------------------------------------------

#-[Python Code #27]------------------------------------------------

# Step 2. Training the model

# kernel = Guass

knl = 'rbf'

# kernel options

# knl = 'poly'

# knl = 'sigmoid'

gma = 5

dgr = 2

# cost parameter C

c_values = [0.01, 0.1, 1.0, 10, 100]

# Step 2: Split the training data

k = 5

skf = sklearn.model_selection.StratifiedKFold(n_splits=k, \

random_state=0, shuffle=True)

# Step 2: Cross validation

# List of evaluation index

scoring = ['f1_macro', 'precision_macro', 'recall_macro']

scores = None

for c in c_values:

if knl == 'rbf':

clf = SVC(kernel=knl, C=c, gamma=gma)

elif knl == 'poly':

clf = SVC(kernel=knl, C=c, gamma=gma, degree=dgr)

else:

clf = SVC(kernel=knl, C=c)

score = sklearn.model_selection.cross_validate(clf, \

scaled_features, Y_train, scoring=scoring, cv=skf)

x = pd.concat([pd.Series(np.full(k, c), name='C'), \

pd.DataFrame(score)], axis=1)

scores = pd.concat([scores, x])

print(scores)

# Step 2: Training using training data sete

best_c = scores.loc[scores.loc[:, 'test_f1_macro'] \

== max(scores.loc[:, 'test_f1_macro']), 'C'].values[0]

if knl == 'rbf':

clf = SVC(kernel=knl, C=best_c, gamma=gma)

print('kernel=', knl)

print('bestC=', best_c)

print('gamma=', gma)

elif knl == 'poly':

clf = SVC(kernel=knl, C=best_c, degree=dgr)

print('kernel=', knl)

print('bestC=', best_c)

print('degree=', dgr)

print('gamma=', gma)

elif knl == 'sigmoid':

clf = SVC(kernel=knl, C=best_c)

print('kernel=', knl)

print('bestC=', best_c)

#-[END]------------------------------------------------------------

#-[Python Code #28]------------------------------------------------
# Prediction of train data　
Y_pred_SM1 = clf.predict(scaled_features)

# Display the Boundary 
plt.figure(figsize= (4, 2.5))
_x0 = np.linspace(-3, 3, 600)
_x1 = np.linspace(-3, 4, 700)
x0, x1 = np.meshgrid(_x0, _x1)
X = np.hstack((x0.ravel().reshape(-1, 1), \
               x1.ravel().reshape(-1, 1)))
y_decision = clf.predict(X).reshape(x0.shape)

# Plot test data
pt_plot_mortality(scaled_features, \
             Y_train.values, \
             scaled_features_train[:, 3:4])
plt.contourf(x0, x1, y_decision, \
             levels= [y_decision.min(), 0.1, \
                     y_decision.max()], alpha=0.3)
plt.legend(loc='upper left', bbox_to_anchor= (1, 1))
plt.show()
#-[END]------------------------------------------------------------

#-[Python Code #28]------------------------------------------------

# Prediction of train data　

Y_pred_SM1 = clf.predict(scaled_features)

# Display the Boundary

plt.figure(figsize= (4, 2.5))

_x0 = np.linspace(-3, 3, 600)

_x1 = np.linspace(-3, 4, 700)

x0, x1 = np.meshgrid(_x0, _x1)

X = np.hstack((x0.ravel().reshape(-1, 1), \

x1.ravel().reshape(-1, 1)))

y_decision = clf.predict(X).reshape(x0.shape)

# Plot test data

pt_plot_mortality(scaled_features, \

Y_train.values, \

scaled_features_train[:, 3:4])

plt.contourf(x0, x1, y_decision, \

levels= [y_decision.min(), 0.1, \

y_decision.max()], alpha=0.3)

plt.legend(loc='upper left', bbox_to_anchor= (1, 1))

plt.show()

#-[END]------------------------------------------------------------

#-[Python Code #29]------------------------------------------------
# Making Heat Map
cm = confusion_matrix(Y_train, Y_pred_SM1)
plt.figure(figsize = (2, 1.5))
sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel("Predicted", fontsize=13, rotation=0)
plt.ylabel("Actual", fontsize=13)
plt.show()

#from sklearn.metrics 
# import confusion_matrix, 
# accuracy_score, precision_score, recall_score, f1_score

print("confusion matrix = ¥n", \
      confusion_matrix(y_true=Y_train, y_pred=Y_pred_SM1))
print("accuracy = %.3f" \
      % (accuracy_score(y_true=Y_train, y_pred=Y_pred_SM1)))
print("precision = %.3f" \
      % (precision_score(y_true=Y_train, y_pred=Y_pred_SM1)))
print("recall = %.3f" \
      % (recall_score(y_true=Y_train, y_pred=Y_pred_SM1)))
print("f1 score = %.3f" \
      % (f1_score(y_true=Y_train, y_pred=Y_pred_SM1)))
clf.fit(scaled_features, Y_train)
#-[END]------------------------------------------------------------

#-[Python Code #29]------------------------------------------------

# Making Heat Map

cm = confusion_matrix(Y_train, Y_pred_SM1)

plt.figure(figsize = (2, 1.5))

sns.heatmap(cm, annot=True, cmap='Blues')

plt.xlabel("Predicted", fontsize=13, rotation=0)

plt.ylabel("Actual", fontsize=13)

plt.show()

#from sklearn.metrics

# import confusion_matrix,

# accuracy_score, precision_score, recall_score, f1_score

print("confusion matrix = ¥n", \

confusion_matrix(y_true=Y_train, y_pred=Y_pred_SM1))

print("accuracy = %.3f" \

% (accuracy_score(y_true=Y_train, y_pred=Y_pred_SM1)))

print("precision = %.3f" \

% (precision_score(y_true=Y_train, y_pred=Y_pred_SM1)))

print("recall = %.3f" \

% (recall_score(y_true=Y_train, y_pred=Y_pred_SM1)))

print("f1 score = %.3f" \

% (f1_score(y_true=Y_train, y_pred=Y_pred_SM1)))

clf.fit(scaled_features, Y_train)

#-[END]------------------------------------------------------------

#-[Python Code #30]------------------------------------------------
########################
# Gaussian Naive Bayes
########################
from sklearn.naive_bayes import GaussianNB

# Split the data to training and test data sets
# X_train, X_test, Y_train, Y_test = train_test_split(data1, 
# Therapy, test_size = 0.2, random_state=0)
X_train = data_1
Y_train = Therapy
X_test = data_1
Y_test = Therapy

# Classification test
clf = GaussianNB()                                         
clf.fit(X_train, Y_train)                                             

# Prediction　
Y_pred_G1 = clf.predict(X_test)

# Evaluation
score_G = clf.score(X_test, Y_test)

print("score = %.3f" % (score_G))
#-[END]------------------------------------------------------------

#-[Python Code #30]------------------------------------------------

########################

# Gaussian Naive Bayes

########################

from sklearn.naive_bayes import GaussianNB

# Split the data to training and test data sets

# X_train, X_test, Y_train, Y_test = train_test_split(data1,

# Therapy, test_size = 0.2, random_state=0)

X_train = data_1

Y_train = Therapy

X_test = data_1

Y_test = Therapy

# Classification test

clf = GaussianNB()

clf.fit(X_train, Y_train)

# Prediction　

Y_pred_G1 = clf.predict(X_test)

# Evaluation

score_G = clf.score(X_test, Y_test)

print("score = %.3f" % (score_G))

#-[END]------------------------------------------------------------

#-[Python Code #31]------------------------------------------------
# data, initial value set, preprocessing
# X, Y values
XY = data_1         
# label value
L = Therapy.astype(np.int)    
# Maximum value of X
A = int(max(data_1['ROX']))
# Maximum value of Y
B = int(max(data_1['LIV']))                           
C, D = np.meshgrid(np.arange(A), np.arange(B))       
# Combination of X and Y 
E = np.hstack((C.reshape(A*B, 1), D.reshape(A*B, 1)))  
# Classification
clf = GaussianNB()                                     
clf.fit(XY, L)     
Y_pred_G2 = clf.predict(E) 
#-[END]------------------------------------------------------------

#-[Python Code #31]------------------------------------------------

# data, initial value set, preprocessing

# X, Y values

XY = data_1

# label value

L = Therapy.astype(np.int)

# Maximum value of X

A = int(max(data_1['ROX']))

# Maximum value of Y

B = int(max(data_1['LIV']))

C, D = np.meshgrid(np.arange(A), np.arange(B))

# Combination of X and Y

E = np.hstack((C.reshape(A*B, 1), D.reshape(A*B, 1)))

# Classification

clf = GaussianNB()

clf.fit(XY, L)

Y_pred_G2 = clf.predict(E)

#-[END]------------------------------------------------------------

#-[Python Code #32]------------------------------------------------
# Plot Gaussian Naive Bayes
plt.figure(figsize= (4, 2.5))
# Plot boundaries
plt.pcolormesh(C, D, Y_pred_G2.reshape(C.shape), alpha=0.3)     
plt.scatter(df_group_1['ROX'], \
df_group_1['LIV'], label="HFNC survived", \
            color='white', s=20, marker="o", edgecolor='green')
plt.scatter(df_group_2['ROX'], \
df_group_2['LIV'], label="MV survived", \
            color='white', s=20, marker=",", edgecolor='red')
plt.scatter(df_group_3['ROX'], \
df_group_3['LIV'], label="MV died", \
            color='black', s=20, marker=",", edgecolor='black')
plt.grid(True)
plt.ylim(0, 70)
plt.xlim(0, 13)
plt.legend(loc='upper left', bbox_to_anchor= (1, 1))
plt.title("Gaussian Naive Bayes", fontsize=16)
plt.xlabel("ROX index", fontsize=16)
plt.ylabel("LIV (%)", fontsize=16)
plt.show()
#-[END]------------------------------------------------------------

#-[Python Code #32]------------------------------------------------

# Plot Gaussian Naive Bayes

plt.figure(figsize= (4, 2.5))

# Plot boundaries

plt.pcolormesh(C, D, Y_pred_G2.reshape(C.shape), alpha=0.3)

plt.scatter(df_group_1['ROX'], \

df_group_1['LIV'], label="HFNC survived", \

color='white', s=20, marker="o", edgecolor='green')

plt.scatter(df_group_2['ROX'], \

df_group_2['LIV'], label="MV survived", \

color='white', s=20, marker=",", edgecolor='red')

plt.scatter(df_group_3['ROX'], \

df_group_3['LIV'], label="MV died", \

color='black', s=20, marker=",", edgecolor='black')

plt.grid(True)

plt.ylim(0, 70)

plt.xlim(0, 13)

plt.legend(loc='upper left', bbox_to_anchor= (1, 1))

plt.title("Gaussian Naive Bayes", fontsize=16)

plt.xlabel("ROX index", fontsize=16)

plt.ylabel("LIV (%)", fontsize=16)

plt.show()

#-[END]------------------------------------------------------------

#-[Python Code #33]------------------------------------------------
# Making Heat Map
cm = confusion_matrix(Therapy, Y_pred_G1)
plt.figure(figsize = (2, 1.5))
sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel("Predicted", fontsize=13, rotation=0)
plt.ylabel("Actual", fontsize=13)
plt.show()

print("confusion matrix = \n", \
confusion_matrix(y_true=Therapy, y_pred=Y_pred_G1))
print("accuracy = %.3f" \
% (accuracy_score(y_true=Therapy, y_pred=Y_pred_G1)))
print("precision = %.3f" \
% (precision_score(y_true=Therapy, y_pred=Y_pred_G1)))
print("recall = %.3f" \
% (recall_score(y_true=Therapy, y_pred=Y_pred_G1)))
print("f1 score = %.3f" \
% (f1_score(y_true=Therapy, y_pred=Y_pred_G1)))
#- END ------------------------------------------------------------

#-[Python Code #33]------------------------------------------------

# Making Heat Map

cm = confusion_matrix(Therapy, Y_pred_G1)

plt.figure(figsize = (2, 1.5))

sns.heatmap(cm, annot=True, cmap='Blues')

plt.xlabel("Predicted", fontsize=13, rotation=0)

plt.ylabel("Actual", fontsize=13)

plt.show()

print("confusion matrix = \n", \

confusion_matrix(y_true=Therapy, y_pred=Y_pred_G1))

print("accuracy = %.3f" \

% (accuracy_score(y_true=Therapy, y_pred=Y_pred_G1)))

print("precision = %.3f" \

% (precision_score(y_true=Therapy, y_pred=Y_pred_G1)))

print("recall = %.3f" \

% (recall_score(y_true=Therapy, y_pred=Y_pred_G1)))

print("f1 score = %.3f" \

% (f1_score(y_true=Therapy, y_pred=Y_pred_G1)))

#- END ------------------------------------------------------------

Science To Medicine

Just My Daily Study Note by ts.anesth.kpum

臨床診断と機械学習二値分類モデル：COVID-19急性低酸素性呼吸不全のデータセットを用いたPythonプログラミング