https://github.com/arvinsoepriatna/AP_Analysis_Routines_Cardiotoxicity_Microtissues
Tip revision: 3172027cbd78883c5c6e464524afea6fa213cfad authored by bumrak on 18 May 2023, 19:50:22 UTC
Update README.md
Update README.md
Tip revision: 3172027
LogisticRegression_CardiotoxicityAlgorithms.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#Load libraries\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.linear_model import LogisticRegressionCV \n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.preprocessing import StandardScaler"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Logistic Regression with Experimental Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#Load experimental data \n",
"E4031_before = pd.read_csv('experiment_data/E4031_before.dat', header=None)\n",
"E4031_after = pd.read_csv('experiment_data/E4031_after.dat', header=None)\n",
"Flecainide_before = pd.read_csv('experiment_data/Flecainide_before.dat', header=None)\n",
"Flecainide_after = pd.read_csv('experiment_data/Flecainide_after.dat', header=None)\n",
"Nifedipine_before = pd.read_csv('experiment_data/Nifedipine_before.dat', header=None)\n",
"Nifedipine_after = pd.read_csv('experiment_data/Nifedipine_after.dat', header=None)\n",
"TTX_before = pd.read_csv('experiment_data/TTX-mold2-before.dat', header=None)\n",
"TTX_after = pd.read_csv('experiment_data/TTX-mold2-after.dat', header=None)\n",
"col_names = pd.read_csv('experiment_data/DataLabels.txt').columns.tolist()\n",
"col_names = [col[1:] for col in col_names]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#Calculate metric percentage differences after compound administration\n",
"E4031_diff = (E4031_after - E4031_before)/E4031_before\n",
"E4031_diff.columns = col_names[:-1]\n",
"Flecainide_diff = (Flecainide_after - Flecainide_before)/Flecainide_before\n",
"Flecainide_diff.columns = col_names[:-1]\n",
"Nifedipine_diff = (Nifedipine_after - Nifedipine_before)/Nifedipine_before\n",
"Nifedipine_diff.columns = col_names[:-1]\n",
"TTX_diff = (TTX_after - TTX_before)/TTX_before\n",
"TTX_diff.columns = col_names[:-1]\n",
"\n",
"#Adjust column labeling\n",
"E4031_diff['pharm'] = 'E4031'\n",
"TTX_diff['pharm'] = 'TTX'\n",
"Flecainide_diff['pharm'] = 'Flec'\n",
"Nifedipine_diff['pharm'] = 'Nif'\n",
"\n",
"E4031_diff['channel_blocked'] = 'kr'\n",
"TTX_diff['channel_blocked'] = 'na'\n",
"Flecainide_diff['channel_blocked'] = 'nakr'\n",
"Nifedipine_diff['channel_blocked'] = 'ca'\n",
"\n",
"#Concatenate datasets for all compounds\n",
"all_data_exp = pd.concat([E4031_diff, Nifedipine_diff, TTX_diff, Flecainide_diff])\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#Predictor variable names\n",
"X_col = ['APD30', 'APD50', 'APD80', 'APDmxr', 'tri', 'rise time', 'stim delay',]\n",
"\n",
"#Containers for results\n",
"test_scores = []\n",
"channel_scores_exp = [[] for i in range(4)]\n",
"\n",
"for i in range(100):\n",
" #Divide training and test datasets\n",
" train_exp, test_exp = train_test_split(all_data_exp, test_size = 0.2, stratify = all_data_exp.channel_blocked)\n",
" \n",
" #Separate predictors and response variables\n",
" X_train_exp, y_train_exp = train_exp[X_col], train_exp['channel_blocked']\n",
" X_test_exp, y_test_exp = test_exp[X_col], test_exp['channel_blocked']\n",
" \n",
" #Find scaler using training dataset then apply to test dataset\n",
" scaler_exp = StandardScaler().fit(X_train_exp)\n",
" scaled_X_train_exp = scaler_exp.transform(X_train_exp)\n",
" scaled_X_test_exp = scaler_exp.transform(X_test_exp)\n",
" \n",
" #Cross-validate to tune model parameters\n",
" clf_exp = LogisticRegressionCV(Cs = 5, cv = 5, multi_class = 'multinomial').fit(scaled_X_train_exp, y_train_exp)\n",
" \n",
" #Save results \n",
" test_scores.append(clf_exp.score(scaled_X_test_exp, y_test_exp))\n",
" \n",
" #Obtain model performance for individual ion channels\n",
" for idx, channel in enumerate(['kr', 'ca', 'na', 'nakr']):\n",
" num = np.sum(clf_exp.predict(scaled_X_test_exp)[np.array(y_test_exp == channel)] == channel)\n",
" denom = np.sum(y_test_exp == channel)\n",
" channel_scores_exp[idx].append(num/denom)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean Model Accuracy on Full Dataset: 0.9467857142857141\n",
"Standard Deviation of Model Accuracy on Full Dataset: 0.03829763940079687\n",
"\n",
"Model Accuracy for Individual Ion Channels:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model Accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>kr</th>\n",
" <td>0.951429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ca</th>\n",
" <td>0.971429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>na</th>\n",
" <td>0.921429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nakr</th>\n",
" <td>0.942857</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model Accuracy\n",
"kr 0.951429\n",
"ca 0.971429\n",
"na 0.921429\n",
"nakr 0.942857"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Print results\n",
"print(\"Mean Model Accuracy on Full Dataset: \", np.mean(test_scores))\n",
"print(\"Standard Deviation of Model Accuracy on Full Dataset: \", np.std(test_scores))\n",
"print()\n",
"\n",
"print(\"Model Accuracy for Individual Ion Channels:\")\n",
"pd.DataFrame([np.mean(score) for score in channel_scores_exp], index = ['kr', 'ca', 'na', 'nakr'],\n",
" columns = ['Model Accuracy'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Logistic Regression with Simulation Data- 5 Ion Channels"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"all_sims = pd.read_excel('dfsfromcardiotoxsims/fixedrisetimes25/sim_df_joined.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"all_sims = all_sims[all_sims['APDmxr'] < 2*all_sims['APD80']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Reserve test set\n",
"X_col = ['APD30', 'APD50', 'APD80', 'APDmxr', 'APDtri', 'rise_time', 'delay',]\n",
"\n",
"test_scores_sim = []\n",
"channel_scores = [[] for i in range(5)]\n",
"\n",
"for i in range(5):\n",
" #Divide training and test datasets\n",
" train, test = train_test_split(all_sims, test_size = 0.2, stratify = all_sims.channel_blocked)\n",
" \n",
" #Separate predictors and response variables\n",
" X_train, y_train = train[X_col], train['channel_blocked']\n",
" X_test, y_test = test[X_col], test['channel_blocked']\n",
" \n",
" #Find scaler using training dataset then apply to test dataset\n",
" scaler = StandardScaler().fit(X_train)\n",
" scaled_X_train = scaler.transform(X_train)\n",
" scaled_X_test = scaler.transform(X_test)\n",
"\n",
" #Cross-validate to tune model parameters \n",
" clf = LogisticRegressionCV(Cs = 3, cv = 5, solver = 'sag', penalty = 'l2', \n",
" multi_class = 'multinomial', max_iter = 300, \n",
" tol = 1e-3).fit(scaled_X_train, y_train)\n",
"\n",
" #Save results \n",
" test_scores_sim.append(clf.score(scaled_X_test, y_test))\n",
" \n",
" #Obtain model performance for individual ion channels\n",
" for idx, channel in enumerate(['kr', 'ca', 'na','ks', 'to']):\n",
" num = np.sum(clf.predict(scaled_X_test)[y_test == channel] == channel)\n",
" denom = np.sum(y_test == channel)\n",
" channel_scores[idx].append(num/denom)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Print results\n",
"print(\"Mean Model Accuracy on Full Dataset: \", np.mean(test_scores))\n",
"print(\"Standard Deviation of Model Accuracy on Full Dataset: \", np.std(test_scores))\n",
"print()\n",
"\n",
"print(\"Model Accuracy for Individual Ion Channels:\")\n",
"pd.DataFrame([[np.mean(score) for score in channel_scores], [np.std(score) for score in channel_scores]], \n",
" columns = ['kr', 'ca', 'na', 'ks', 'to'],\n",
" index = ['Model Accuracy', 'Model SD']).T"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Logistic Regression with Simulation Data- IKr, ICa, INa Only (Mimics Experimental Data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"all_sims_short = all_sims[all_sims.channel_blocked != 'to']\n",
"all_sims_short = all_sims_short[all_sims_short.channel_blocked != 'ks']\n",
"\n",
"#Reserve test set\n",
"X_col = ['APD30', 'APD50', 'APD80', 'APDmxr', 'APDtri', 'rise_time', 'delay',]\n",
"\n",
"test_scores_sim_short = []\n",
"channel_scores_short = [[] for i in range(5)]\n",
"\n",
"for i in range(5):\n",
" #Divide training and test datasets\n",
" train, test = train_test_split(all_sims_short, test_size = 0.2, stratify = all_sims_short.channel_blocked)\n",
" \n",
" #Separate predictors and response variables\n",
" X_train, y_train = train[X_col], train['channel_blocked']\n",
" X_test, y_test = test[X_col], test['channel_blocked']\n",
" \n",
" #Find scaler using training dataset then apply to test dataset\n",
" scaler = StandardScaler().fit(X_train)\n",
" scaled_X_train = scaler.transform(X_train)\n",
" scaled_X_test = scaler.transform(X_test)\n",
"\n",
" #Cross-validate to tune model parameters \n",
" clf = LogisticRegressionCV(Cs = 3, cv = 5, solver = 'sag', penalty = 'l2', \n",
" multi_class = 'multinomial', max_iter = 300, \n",
" tol = 1e-3).fit(scaled_X_train, y_train)\n",
"\n",
" #Save results \n",
" test_scores_sim_short.append(clf.score(scaled_X_test, y_test))\n",
" \n",
" #Obtain model performance for individual ion channels\n",
" for idx, channel in enumerate(['kr', 'ca', 'na']):\n",
" num = np.sum(clf.predict(scaled_X_test)[y_test == channel] == channel)\n",
" denom = np.sum(y_test == channel)\n",
" channel_scores_short[idx].append(num/denom)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Print results\n",
"print(\"Mean Model Accuracy on Full Dataset: \", np.mean(test_scores_sim_short))\n",
"print(\"Standard Deviation of Model Accuracy on Full Dataset: \", np.std(test_scores_sim_short))\n",
"print()\n",
"\n",
"print(\"Model Accuracy for Individual Ion Channels:\")\n",
"pd.DataFrame([[np.mean(score) for score in channel_scores_short[:3]], [np.std(score) for score in channel_scores_short[:3]]], \n",
" columns = ['kr', 'ca', 'na'],\n",
" index = ['Model Accuracy', 'Model SD']).T"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
