COVID-19急性低酸素性呼吸不全のデータセットを用いたPythonプログラミング
Link to Data Set DOI: 10.7717/peerj.15174/supp-15
Appendix: Supplementary Python (ver 3.8) program code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
#-[Python Code #1]------------------------------------------------- import numpy as np import matplotlib.pyplot as plt import pandas as pd import seaborn as sns import IPython.core.display as display import IPython.display %matplotlib inline from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import classification_report from sklearn.preprocessing import StandardScaler # pip install openpyxl # you may need to restart the kernel to use updated packages. #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
#-[Python Code #2]------------------------------------------------- # Read Excel Database raw_data_f16.xlsx, and preprocessing df = pd.read_excel('raw_data_f16.xlsx') print(df.columns) df_copy = df.copy() df_copy.rename(columns = {'ResipratoryTherapy': 'Therapy', \ 'ROX index': "ROX", \ 'LIV (%)': "LIV", \ 'Gender': "Gender", \ 'Age': "Age", \ 'BMI': "BMI", \ 'WBC (/µL)': "WBC", \ 'Cr (mg/dL)': "Cr", \ 'CRP (mg//dL)': "CRP", \ 'LDH (U/L)': "LDH", \ 'd-dimer (µg/L)': "d-dimer", \ 'PSI': "PSI", \ 'CCI': "CCI", \ 'Days to Hostital (days)': "DtoH", \ 'Days to HFNC (days)': "DaystoHFNC", \ 'Mortality': "Mortality" }, inplace=True) print("") print(df_copy.columns) data = df_copy[["Therapy", "ROX", "LIV", "Mortality"]] print("") print(data.head(3)) print("") print(data.tail(3)) # Therapy data: simplify to ['HFNC', 'MV'] def therapy_convert(x): if x == 'High-flow Nasal Cannulation': return 'HFNC' elif x == 'Mechanical Ventilation': return 'MV' # Mortality: fix misspell ['survivied'] to ['survived'] def mortality_convert(x): if x == 'survivied': return 'survived' # Therapy: simplify the data description data["Therapy"] = data["Therapy"].apply(therapy_convert) # Fix misspell in Mortality data["Mortality"] = data["Mortality"].apply(mortality_convert) print("") print(data.head(3)) print("") print(data.tail(3)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 |
#-[Python Code #3]------------------------------------------------- # Pair Plot sns.set(style="ticks") sns.pairplot(data, hue='Therapy', markers=['o', ','], \ palette={'HFNC': 'green', 'MV': 'red'}) plt.show() #-[END]------------------------------------------------------------- |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
#-[Python Code #4]-------------------------------------------------- #Numericalization of categorical data data_0 = data class_mapping1 \ = {'HFNC':0, 'MV':1} data_0['Therapy'] \ = data['Therapy'].map(class_mapping1) class_mapping2 \ = {'survived':0, 'died':1} data_0['Mortality'] \ = data['Mortality'].map(class_mapping2) data_0.head(3) #-[END]-------------------------------------------------------------- |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
#-[Python Code #5]--------------------------------------------------- data_1 = data_0.iloc[:,1:3] data_2 = data_0.iloc[:,1:2] data_3 = data_0.iloc[:,2:3] Mortality = data_0.iloc[:,3:4] Therapy = data_0.iloc[:,0:1] IPython.display.display(data_1.head(3)) IPython.display.display(data_2.head(3)) IPython.display.display(data_3.head(3)) IPython.display.display(Mortality.head(3)) IPython.display.display(Therapy.head(3)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#-[Python Code #6]------------------------------------------------- # Classifying patients to 3 subgroups df_group_1 = data_0[(data_0['Therapy'] == 0) \ & (data_0['Mortality'] == 0)] df_group_2 = data_0[(data_0['Therapy'] == 1) \ & (data_0['Mortality'] == 0)] df_group_3 = data_0[(data_0['Therapy'] == 1) \ & (data_0['Mortality'] == 1)] # Plot data plt.figure(figsize = (4,2.5)) plt.scatter(df_group_1['ROX'], df_group_1['LIV'], \ label="HFNC survived", \ color='white', s=20, marker="o", \ edgecolor='green') plt.scatter(df_group_2['ROX'], df_group_2['LIV'], \ label="MV survived", \ color='white', s=20, marker=",", \ edgecolor='red') plt.scatter(df_group_3['ROX'], df_group_3['LIV'], \ label="MV died", \ color='black', s=20, marker=",", \ edgecolor='black') plt.title("Data set") plt.xlim(0,15) plt.ylim(0,80) plt.title("Data ROX-LIV", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) plt.grid() plt.legend(loc='upper left', bbox_to_anchor=(1,1)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
#-[Python Code #7]------------------------------------------------- ########################### # Logistic Regression ROX ########################### from sklearn.linear_model import LogisticRegression # Split the data to training and test data sets # X_train, X_test, Y_train, Y_test # = train_test_split(data2, Therapy, test_size = 0.2, random_state=0) X_train = data_2 Y_train = Therapy X_test = data_2 Y_test = Therapy # Evaluation Logreg_2 = LogisticRegression() Logreg_2.fit(X_train, Y_train) # Logistic Regression Y_pred_2 = logreg_2.predict(X_test) # Evaluation # mean absolute error(MAE) mae_2 = mean_absolute_error(Y_test, Y_pred_2) # root mean squared error(RMSE) rmse_2 = np.sqrt(mean_squared_error(Y_test, Y_pred_2)) # Score score_2 = logreg_2.score(X_test, Y_test) coef_2 = logreg_2.coef_[0] intercept_2 = logreg_2.intercept_ print("MAE = %.3f, RMSE = %.3f" % (mae_2, rmse_2)) print("score = %.3f" % (score_2)) print("Coef = ", coef_2) print("Intercept = ", intercept_2) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
#-[Python Code #8]------------------------------------------------- # Plot Logistic Regression plt.figure(figsize = (4,2.5)) plt.scatter(df_group_1['ROX'], df_group_1['LIV'], \ label="HFNC survived", color='white', \ s=20, marker="o", edgecolor='green') plt.scatter(df_group_2['ROX'], df_group_2['LIV'], \ label="MV survived", color='white', \ s=20, marker=",", edgecolor='red') plt.scatter(df_group_3['ROX'], df_group_3['LIV'], \ label="MV died", color='black', \ s=20, marker=",", edgecolor='black') line = np.linspace(0, 70) plt.plot((line * 0 - intercept_2 / coef_2), line, c='b', \ label="LogisticRegression") print("Cut-off value = %.3f" % (- intercept_2 / coef_2)) plt.ylim(0, 70) plt.xlim(0, 15) plt.title("Logistic Regression: Rox index alone", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) plt.grid() plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#-[Python Code #9]------------------------------------------------- # Making Heat Map cm = confusion_matrix(Y_pred_2, Therapy) plt.figure(figsize = (2,1.5)) sns.heatmap(cm, annot=True, cmap='Blues') plt.xlabel("Predicted", fontsize=13, rotation=0) plt.ylabel("Actual", fontsize=13) plt.show() from sklearn.metrics import confusion_matrix, accuracy_score, \ precision_score, recall_score, f1_score print("confusion matrix = \n", \ confusion_matrix(y_true=Therapy, y_pred=Y_pred_2)) print("accuracy = %.3f" \ % (accuracy_score(y_true=Therapy, y_pred=Y_pred_2))) print("precision = %.3f" \ % (precision_score(y_true=Therapy, y_pred=Y_pred_2))) print("recall = %.3f" \ % (recall_score(y_true=Therapy, y_pred=Y_pred_2))) print("f1 score = %.3f" \ % (f1_score(y_true=Therapy, y_pred=Y_pred_2))) from sklearn.metrics import roc_auc_score print("auc = %.3f" \ % (roc_auc_score(y_true=Therapy, y_score=Y_score_2))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
#-[Python Code #10]------------------------------------------------ from sklearn.metrics import roc_curve, auc Y_score_2 = logreg_2.predict_proba(data_2)[:, 1] fpr_2, tpr_2, thresholds_2 \ = roc_curve(y_true=Therapy, y_score=Y_score_2) plt.figure(figsize=(2,2)) plt.plot(fpr_2, tpr_2, label \ ='roc curve (area = %0.3f)' % auc(fpr_2, tpr_2)) plt.plot([0, 1], [0, 1], linestyle='--', label='random') plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal') plt.legend(loc='upper left', bbox_to_anchor= (1, 1)) plt.xlabel('false positive rate') plt.ylabel('true positive rate') plt.show() from sklearn.metrics import roc_auc_score print("auc = %.3f" \ % (roc_auc_score(y_true=Therapy, y_score=Y_score_2))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#-[Python Code #11]------------------------------------------------ ########################### # Logistic Regression LIV ########################### X_train = data_3 Y_train = Therapy # LogisticRegression logreg_3 = LogisticRegression() logreg_3.fit(X_train, Y_train) # Prediction Y_pred_3 = logreg_3.predict(X_test) # Evaluation # mean absolute error(MAE) mae_3 = mean_absolute_error(Y_test, Y_pred_3) # root mean squared error(RMSE) rmse_3 = np.sqrt(mean_squared_error(Y_test, Y_pred_3)) # Score score_3 = logreg_3.score(X_test, Y_test) coef_3 = logreg_3.coef_[0] intercept_3 = logreg_3.intercept_ print("MAE = %.3f, RMSE = %.3f" % (mae_3, rmse_3)) print("score = %.3f" % (score_3)) print("Coef =", coef_3) print("Intercept =", intercept_3) print("Threshold of LIV = %.3f" % (-intercept_3/coef_3)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
#-[Python Code #12]------------------------------------------------ # Plot Logistic Regression plt.figure(figsize= (4, 2.5)) plt.scatter(df_group_1['ROX'], df_group_1['LIV'], label="HFNC survived", \ color='white', s=20, marker="o", edgecolor='green') plt.scatter(df_group_2['ROX'], df_group_2['LIV'], label="MV survived", \ color='white', s=20, marker=",", edgecolor='red') plt.scatter(df_group_3['ROX'], df_group_3['LIV'], label="MV died", \ color='black', s=20, marker=",", edgecolor='black') line = np.linspace(0, 15) plt.plot(line, line * 0 - intercept_3/coef_3, c \ ='b', label="LogisticRegression") print("Cut-off value = %.3f" % (- intercept_3/coef_3)) plt.ylim(0, 70) plt.xlim(0, 15) plt.title("Logistic Regression: LIV alone", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) plt.grid() plt.legend(loc='upper left', bbox_to_anchor= (1, 1)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#-[Python Code #13]------------------------------------------------ # Making Heat Map cm = confusion_matrix(Y_pred_3, Therapy) plt.figure(figsize = (2, 1.5)) sns.heatmap(cm, annot=True, cmap='Blues') plt.xlabel("Predicted", fontsize=13, rotation=0) plt.ylabel("Actual", fontsize=13) plt.show() print("confusion matrix = \n", \ confusion_matrix(y_true=Therapy, y_pred=Y_pred_3)) print("accuracy = %.3f" \ % (accuracy_score(y_true=Therapy, y_pred=Y_pred_3))) print("precision = %.3f" \ % (precision_score(y_true=Therapy, y_pred=Y_pred_3))) print("recall = %.3f" \ % (recall_score(y_true=Therapy, y_pred=Y_pred_3))) print("f1 score = %.3f" \ % (f1_score(y_true=Therapy, y_pred=Y_pred_3))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
#-[Python Code #14]------------------------------------------------ Y_score_3 = logreg_3.predict_proba(data_3)[:, 1] fpr_3, tpr_3, thresholds_3 \ = roc_curve(y_true=Therapy, y_score=Y_score_3) plt.figure(figsize= (2, 2)) plt.plot(fpr_3, tpr_3, label \ ='roc curve LIV (area = %0.3f)' % auc(fpr_3, tpr_3)) plt.plot([0, 1], [0, 1], linestyle='--', label='random') plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal') plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) plt.xlabel('false positive rate') plt.ylabel('true positive rate') plt.show() print("auc = %.3f" \ % (roc_auc_score(y_true=Therapy, y_score=Y_score_3))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
#-[Python Code #15]------------------------------------------------ ################################# # Logistic Regression ROX & LIV ################################# from sklearn.linear_model import LogisticRegression # Split the data to training and test data sets # X_train, X_test, Y_train, Y_test = train_test_split(data1, # Therapy, test_size = 0.2, random_state=0) X_train = data_1 Y_train = Therapy X_test = data_1 Y_test = Therapy # LogisticRegression # logreg = LogisticRegression(penalty='l2', solver="sag") logreg_1 = LogisticRegression() logreg_1.fit(X_train, Y_train) # Prediction Y_pred_1 = logreg_1.predict(X_test) # Evaluation # mean absolute error(MAE) mae_1 = mean_absolute_error(Y_test, Y_pred_1) # root mean squared error(RMSE) rmse_1 = np.sqrt(mean_squared_error(Y_test, Y_pred_1)) # Score score_1 = logreg_1.score(X_test, Y_test) coef_1 = logreg_1.coef_[0] intercept_1 = logreg_1.intercept_ print("MAE = %.3f, RMSE = %.3f" % (mae_1, rmse_1)) print("score = %.3f" % (score_1)) print("Coef = ", coef_1) print("Intercept = ", intercept_1) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
#-[Python Code #16]------------------------------------------------ # Plot Logistic Regression line = np.linspace(0, 15) plt.figure(figsize=(4, 2.5)) plt.scatter(df_group_1['ROX'], \ df_group_1['LIV'], label= "HFNC survived", \ color='white', s=20, marker="o", edgecolor='green') plt.scatter(df_group_2['ROX'], \ df_group_2['LIV'], label="MV survived", \ color='white', s=20, marker=",", edgecolor='red') plt.scatter(df_group_3['ROX'], \ df_group_3['LIV'], label="MV died", \ color='black', s=20, marker=",", edgecolor='black') plt.plot(line, -(line * coef_1[0] + intercept_1) / coef_1[1], \ c='b', label="LogisticRegression") plt.ylim(0, 70) plt.xlim(0, 15) plt.title("Logistic Regression: Rox index & LIV", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) plt.grid() plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#-[Python Code #17]------------------------------------------------ # Making Heat Map cm = confusion_matrix(Y_pred_1, Therapy) plt.figure(figsize = (2, 1.5)) sns.heatmap(cm, annot=True, cmap='Blues') plt.xlabel("Predicted", fontsize=13, rotation=0) plt.ylabel("Actual", fontsize=13) plt.show() print("confusion matrix = \n", \ confusion_matrix(y_true=Therapy, y_pred=Y_pred_1)) print("accuracy = %.3f" \ % (accuracy_score(y_true=Therapy, y_pred=Y_pred_1))) print("precision = %.3f" \ % (precision_score(y_true=Therapy, y_pred=Y_pred_1))) print("recall = %.3f" \ % (recall_score(y_true=Therapy, y_pred=Y_pred_1))) print("f1 score = %.3f" \ % (f1_score(y_true=Therapy, y_pred=Y_pred_1))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
#-[Python Code #18]------------------------------------------------ Y_score_1 = logreg_1.predict_proba(data_1)[:, 1] fpr_1, tpr_1, thresholds_1 \ = roc_curve(y_true=Therapy, y_score=Y_score_1) plt.figure(figsize= (2, 2)) plt.plot(fpr_1, tpr_1, label \ ='roc curve LIV (area = %0.3f)' % auc(fpr_1, tpr_1)) plt.plot([0, 1], [0, 1], linestyle='--', label='random') plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal') plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) plt.xlabel('false positive rate') plt.ylabel('true positive rate') plt.show() print("auc = %.3f" \ % (roc_auc_score(y_true=Therapy, y_score=Y_score_1))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
#-[Python Code #19]------------------------------------------------ plt.figure(figsize=(2, 2)) plt.plot(fpr_1, tpr_1, label \ ='roc curve ROX&LIV (area = %0.3f)' % auc(fpr_1, tpr_1)) plt.plot(fpr_2, tpr_2, label \ ='roc curve ROX (area = %0.3f)' % auc(fpr_2, tpr_2)) plt.plot(fpr_3, tpr_3, label \ ='roc curve LIV (area = %0.3f)' % auc(fpr_3, tpr_3)) plt.plot([0, 1], [0, 1], linestyle='--', label='random') plt.plot([0, 0, 1], [0, 1, 1], linestyle='--', label='ideal') plt.legend(loc='upper left', bbox_to_anchor= (1, 1)) plt.xlabel('false positive rate') plt.ylabel('true positive rate') plt.show() #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
#-[Python Code #20]------------------------------------------------ ######################## # Soft Margin Linear SVM ######################## from sklearn.svm import SVC # Split the data to training and test data sets # X_train, X_test, Y_train, Y_test = train_test_split(data1, # Therapy, test_size = 0.2, random_state=0) X_train = data_1 Y_train = Therapy X_test = data_1 Y_test = Therapy soft_linear_svc = SVC(C=0.1, kernel='linear') soft_linear_svc.fit(X_train, Y_train) y_test_pred = soft_linear_svc.decision_function(X_test) y_test_len = len(y_test_pred) Y_pred_S = soft_linear_svc.predict(X_test) plt.figure(figsize= (4, 2.5)) plt.scatter(df_group_1['ROX'], \ df_group_1['LIV'], label="HFNC survived", \ color='white', s=20, marker="o", edgecolor='green') plt.scatter(df_group_2['ROX'], \ df_group_2['LIV'], label="MV survived", \ color='white', s=20, marker=",", edgecolor='red') plt.scatter(df_group_3['ROX'], \ df_group_3['LIV'], label="MV died", \ color='black', s=20, marker=",", edgecolor='black') plt.ylim(0, 70) plt.xlim(0, 15) xlim = plt.xlim() ylim = plt.ylim() xx = np.linspace(xlim[0], xlim[1], 30) yy = np.linspace(ylim[0], ylim[1], 30) XX, YY = np.meshgrid(xx, yy) xy = np.vstack([XX.ravel(), YY.ravel()]).T Z = soft_linear_svc.decision_function(xy).reshape(XX.shape) plt.contour(XX, YY, Z, colors='k', \ levels= [-1, 0, 1], alpha=0.5, linestyles= ['--', '-', '--']) plt.title("Soft-margin Linear SVM", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) plt.grid() plt.legend(loc='upper left', bbox_to_anchor= (1, 1)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#-[Python Code #21]------------------------------------------------ # Making Heat Map cm = confusion_matrix(Y_pred_S, Therapy) plt.figure(figsize = (2, 1.5)) sns.heatmap(cm, annot=True, cmap='Blues') plt.xlabel("Predicted", fontsize=13, rotation=0) plt.ylabel("Actual", fontsize=13) plt.show() print("confusion matrix = \n", \ confusion_matrix(y_true=Therapy, y_pred=Y_pred_S)) print("accuracy = %.3f" \ % (accuracy_score(y_true=Therapy, y_pred=Y_pred_S))) print("precision = %.3f" \ % (precision_score(y_true=Therapy, y_pred=Y_pred_S))) print("recall = %.3f" \ % (recall_score(y_true=Therapy, y_pred=Y_pred_S))) print("f1 score = %.3f" \ % (f1_score(y_true=Therapy, y_pred=Y_pred_S))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
#-[Python Code #22]------------------------------------------------ ######################## # Non-Linear SVM ######################## import sklearn.model_selection from sklearn.svm import SVC def pt_plot(x1_x2, label): plt.figure(figsize=(4, 2.5)) plt.grid(True) #plt.ylim(0, 70) #plt.xlim(0, 13) plt.title("Non-linear SVM", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) datalen = len(x1_x2) for cnt in range(datalen): if label[cnt] == 0: plt.scatter(x1_x2[:, 0][cnt], x1_x2[:, 1][cnt], \ label="HFNC", \ color='white', s=20, marker="o", edgecolor='green') elif label[cnt] == 1: plt.scatter(x1_x2[:, 0][cnt], x1_x2[:, 1][cnt], \ label="MV survived", \ color='white', s=20, marker=",", edgecolor='red') def pt_plot_mortality(x1_x2, label, mortality): plt.figure(figsize= (4, 2.5)) plt.grid(True) plt.title("Non-linear SVM", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) x1_x2_1 = np.empty(2) x1_x2_2 = np.empty(2) x1_x2_3 = np.empty(2) group_1_cnt = 0 group_2_cnt = 0 group_3_cnt = 0 datalen = len(x1_x2) for cnt in range(datalen): if label[cnt, 0] == 0: x1_x2_1 = np.append(x1_x2_1, x1_x2[cnt], axis=0) group_1_cnt += 1 if label[cnt, 0] == 1: x1_x2_2 = np.append(x1_x2_2, x1_x2[cnt], axis=0) group_2_cnt += 1 if mortality[cnt, 0] == 1: x1_x2_3 = np.append(x1_x2_3, x1_x2[cnt], axis=0) group_3_cnt += 1 x1_x2_1 = x1_x2_1.reshape(group_1_cnt+1, 2) x1_x2_2 = x1_x2_2.reshape(group_2_cnt+1, 2) x1_x2_3 = x1_x2_3.reshape(group_3_cnt+1, 2) x1_x2_1 = np.delete(x1_x2_1, 0, 0) x1_x2_2 = np.delete(x1_x2_2, 0, 0) x1_x2_3 = np.delete(x1_x2_3, 0, 0) plt.scatter(x1_x2_1[:, 0], x1_x2_1[:, 1], label="HFNC survived", \ color='white', s=20, marker="o", edgecolor='green') plt.scatter(x1_x2_2[:, 0], x1_x2_2[:, 1], label="MV survived", \ color='white', s=20, marker=",", edgecolor='red') plt.scatter(x1_x2_3[:, 0], x1_x2_3[:, 1], label="MV died", \ color='black', s=20, marker=",", edgecolor='black') def false_positive_rate(y_true, y_pred): tn, fp, fn, tp = confusion_matrix(y_true, y_pred).flatten() return fp / (tn + fp) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
#-[Python Code #23]------------------------------------------------ X_train = data_0 Y_train = Therapy # Classifying patinets to 3 subgroups X_train_1 = X_train [(X_train['Therapy'] == 0) \ & (X_train['Mortality'] == 0)] X_train_2 = X_train [(X_train['Therapy'] == 1) \ & (X_train['Mortality'] == 0)] X_train_3 = X_train [(X_train['Therapy'] == 1) \ & (X_train['Mortality'] == 1)] scaler = StandardScaler() scaled_features = scaler.fit_transform(X_train.values[:, 1:3]) scaled_features_train = np.hstack((scaled_features, \ X_train['Therapy'].values.reshape(-1, 1), \ X_train['Mortality'].values.reshape(-1, 1))) pt_plot_mortality(scaled_features, \ X_train.values, scaled_features_train[:, 3:4]) plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) plt.show() #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
#-[Python Code #24]------------------------------------------------ # Step 2. Training the model # kernel = Guass knl = 'sigmoid' # kernel options # knl = 'poly' # knl = 'sigmoid' gma = 5 dgr = 2 # cost parameter C c_values = [0.01, 0.1, 1.0, 10, 100] # Step 2: Split the training data k = 5 skf = sklearn.model_selection.StratifiedKFold(n_splits=k, \ random_state=0, shuffle=True) # Step 2: Cross validation # List of evaluation index scoring = ['f1_macro', 'precision_macro', 'recall_macro'] scores = None for c in c_values: if knl == 'rbf': clf = SVC(kernel=knl, C=c, gamma=gma) elif knl == 'poly': clf = SVC(kernel=knl, C=c, gamma=gma, degree=dgr) else: clf = SVC(kernel=knl, C=c) score = sklearn.model_selection.cross_validate(clf, \ scaled_features, Y_train, scoring=scoring, cv=skf, ) x = pd.concat([pd.Series(np.full(k, c), name='C'), \ pd.DataFrame(score)], axis=1) scores = pd.concat([scores, x]) print(scores) # Step 2: Training using training data sete best_c = scores.loc[scores.loc[:, 'test_f1_macro'] \ == max(scores.loc[:, 'test_f1_macro']), 'C'].values[0] if knl == 'rbf': clf = SVC(kernel=knl, C=best_c, gamma=gma) print('kernel=', knl) print('bestC=', best_c) print('gamma=', gma) elif knl == 'poly': clf = SVC(kernel=knl, C=best_c, degree=dgr) print('kernel=', knl) print('bestC=', best_c) print('degree=', dgr) print('gamma=', gma) elif knl == 'sigmoid': clf = SVC(kernel=knl, C=best_c) print('kernel=', knl) print('bestC=', best_c) clf.fit(scaled_features, Y_train) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
#-[Python Code #25]------------------------------------------------ # Prediction of train data Y_pred_SM2 = clf.predict(scaled_features) # Display the Boundary plt.figure(figsize=(4, 2.5)) _x0 = np.linspace(-3, 3, 600) _x1 = np.linspace(-3, 4, 700) x0, x1 = np.meshgrid(_x0, _x1) X = np.hstack((x0.ravel().reshape(-1, 1), x1.ravel().reshape(-1, 1))) y_decision = clf.predict(X).reshape(x0.shape) # Plot test data pt_plot_mortality(scaled_features, \ Y_train.values, scaled_features_train[:, 3:4]) plt.contourf(x0, x1, y_decision, levels= [y_decision.min(), \ 0.1, y_decision.max()], alpha=0.3) plt.legend(loc='upper left', bbox_to_anchor= (1, 1)) plt.show() #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
-[Python Code #26]------------------------------------------------- # Making Heat Map cm = confusion_matrix(Y_train, Y_pred_SM2) plt.figure(figsize = (2, 1.5)) sns.heatmap(cm, annot=True, cmap='Blues') plt.xlabel("Predicted", fontsize=13, rotation=0) plt.ylabel("Actual", fontsize=13) plt.show() print("confusion matrix = \n", \ confusion_matrix(y_true=Y_train, y_pred=Y_pred_SM2)) print("accuracy = %.3f" \ % (accuracy_score(y_true=Y_train, y_pred=Y_pred_ SM2))) print("precision = %.3f" \ % (precision_score(y_true=Y_train, y_pred=Y_pred_ SM2))) print("recall = %.3f" \ % (recall_score(y_true=Y_train, y_pred=Y_pred_ SM2))) print("f1 score = %.3f" \ % (f1_score(y_true=Y_train, y_pred=Y_pred_ SM2))) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
#-[Python Code #27]------------------------------------------------ # Step 2. Training the model # kernel = Guass knl = 'rbf' # kernel options # knl = 'poly' # knl = 'sigmoid' gma = 5 dgr = 2 # cost parameter C c_values = [0.01, 0.1, 1.0, 10, 100] # Step 2: Split the training data k = 5 skf = sklearn.model_selection.StratifiedKFold(n_splits=k, \ random_state=0, shuffle=True) # Step 2: Cross validation # List of evaluation index scoring = ['f1_macro', 'precision_macro', 'recall_macro'] scores = None # for c in c_values: if knl == 'rbf': clf = SVC(kernel=knl, C=c, gamma=gma) elif knl == 'poly': clf = SVC(kernel=knl, C=c, gamma=gma, degree=dgr) else: clf = SVC(kernel=knl, C=c) score = sklearn.model_selection.cross_validate(clf, \ scaled_features, Y_train, scoring=scoring, cv=skf) x = pd.concat([pd.Series(np.full(k, c), name='C'), \ pd.DataFrame(score)], axis=1) scores = pd.concat([scores, x]) print(scores) # Step 2: Training using training data sete best_c = scores.loc[scores.loc[:, 'test_f1_macro'] \ == max(scores.loc[:, 'test_f1_macro']), 'C'].values[0] if knl == 'rbf': clf = SVC(kernel=knl, C=best_c, gamma=gma) print('kernel=', knl) print('bestC=', best_c) print('gamma=', gma) elif knl == 'poly': clf = SVC(kernel=knl, C=best_c, degree=dgr) print('kernel=', knl) print('bestC=', best_c) print('degree=', dgr) print('gamma=', gma) elif knl == 'sigmoid': clf = SVC(kernel=knl, C=best_c) print('kernel=', knl) print('bestC=', best_c) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
#-[Python Code #28]------------------------------------------------ # Prediction of train data Y_pred_SM1 = clf.predict(scaled_features) # Display the Boundary plt.figure(figsize= (4, 2.5)) _x0 = np.linspace(-3, 3, 600) _x1 = np.linspace(-3, 4, 700) x0, x1 = np.meshgrid(_x0, _x1) X = np.hstack((x0.ravel().reshape(-1, 1), \ x1.ravel().reshape(-1, 1))) y_decision = clf.predict(X).reshape(x0.shape) # Plot test data pt_plot_mortality(scaled_features, \ Y_train.values, \ scaled_features_train[:, 3:4]) plt.contourf(x0, x1, y_decision, \ levels= [y_decision.min(), 0.1, \ y_decision.max()], alpha=0.3) plt.legend(loc='upper left', bbox_to_anchor= (1, 1)) plt.show() #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
#-[Python Code #29]------------------------------------------------ # Making Heat Map cm = confusion_matrix(Y_train, Y_pred_SM1) plt.figure(figsize = (2, 1.5)) sns.heatmap(cm, annot=True, cmap='Blues') plt.xlabel("Predicted", fontsize=13, rotation=0) plt.ylabel("Actual", fontsize=13) plt.show() #from sklearn.metrics # import confusion_matrix, # accuracy_score, precision_score, recall_score, f1_score print("confusion matrix = ¥n", \ confusion_matrix(y_true=Y_train, y_pred=Y_pred_SM1)) print("accuracy = %.3f" \ % (accuracy_score(y_true=Y_train, y_pred=Y_pred_SM1))) print("precision = %.3f" \ % (precision_score(y_true=Y_train, y_pred=Y_pred_SM1))) print("recall = %.3f" \ % (recall_score(y_true=Y_train, y_pred=Y_pred_SM1))) print("f1 score = %.3f" \ % (f1_score(y_true=Y_train, y_pred=Y_pred_SM1))) clf.fit(scaled_features, Y_train) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
#-[Python Code #30]------------------------------------------------ ######################## # Gaussian Naive Bayes ######################## from sklearn.naive_bayes import GaussianNB # Split the data to training and test data sets # X_train, X_test, Y_train, Y_test = train_test_split(data1, # Therapy, test_size = 0.2, random_state=0) X_train = data_1 Y_train = Therapy X_test = data_1 Y_test = Therapy # Classification test clf = GaussianNB() clf.fit(X_train, Y_train) # Prediction Y_pred_G1 = clf.predict(X_test) # Evaluation score_G = clf.score(X_test, Y_test) print("score = %.3f" % (score_G)) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
#-[Python Code #31]------------------------------------------------ # data, initial value set, preprocessing # X, Y values XY = data_1 # label value L = Therapy.astype(np.int) # Maximum value of X A = int(max(data_1['ROX'])) # Maximum value of Y B = int(max(data_1['LIV'])) C, D = np.meshgrid(np.arange(A), np.arange(B)) # Combination of X and Y E = np.hstack((C.reshape(A*B, 1), D.reshape(A*B, 1))) # Classification clf = GaussianNB() clf.fit(XY, L) Y_pred_G2 = clf.predict(E) #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
#-[Python Code #32]------------------------------------------------ # Plot Gaussian Naive Bayes plt.figure(figsize= (4, 2.5)) # Plot boundaries plt.pcolormesh(C, D, Y_pred_G2.reshape(C.shape), alpha=0.3) plt.scatter(df_group_1['ROX'], \ df_group_1['LIV'], label="HFNC survived", \ color='white', s=20, marker="o", edgecolor='green') plt.scatter(df_group_2['ROX'], \ df_group_2['LIV'], label="MV survived", \ color='white', s=20, marker=",", edgecolor='red') plt.scatter(df_group_3['ROX'], \ df_group_3['LIV'], label="MV died", \ color='black', s=20, marker=",", edgecolor='black') plt.grid(True) plt.ylim(0, 70) plt.xlim(0, 13) plt.legend(loc='upper left', bbox_to_anchor= (1, 1)) plt.title("Gaussian Naive Bayes", fontsize=16) plt.xlabel("ROX index", fontsize=16) plt.ylabel("LIV (%)", fontsize=16) plt.show() #-[END]------------------------------------------------------------ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#-[Python Code #33]------------------------------------------------ # Making Heat Map cm = confusion_matrix(Therapy, Y_pred_G1) plt.figure(figsize = (2, 1.5)) sns.heatmap(cm, annot=True, cmap='Blues') plt.xlabel("Predicted", fontsize=13, rotation=0) plt.ylabel("Actual", fontsize=13) plt.show() print("confusion matrix = \n", \ confusion_matrix(y_true=Therapy, y_pred=Y_pred_G1)) print("accuracy = %.3f" \ % (accuracy_score(y_true=Therapy, y_pred=Y_pred_G1))) print("precision = %.3f" \ % (precision_score(y_true=Therapy, y_pred=Y_pred_G1))) print("recall = %.3f" \ % (recall_score(y_true=Therapy, y_pred=Y_pred_G1))) print("f1 score = %.3f" \ % (f1_score(y_true=Therapy, y_pred=Y_pred_G1))) #- END ------------------------------------------------------------ |