{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Epilepsy Comorbidity Analysis using SCAIView\n", "\n", "This notebook contains the Quantification of gene overlap comparing Epilepsy with other disorders using text mining presented in Hoyt and Domingo-Fernandez *et. al*, 2018." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import csv\n", "import os\n", "import sys\n", "import time\n", "\n", "from operator import itemgetter\n", "\n", "from matplotlib import pyplot as plt\n", "from matplotlib_venn import venn3, venn2\n", "import numpy as np\n", "import pandas as pd\n", "import scipy.stats as stats\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "%config InlineBackend.figure_format = 'svg'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.6.5 (default, Apr 20 2018, 08:54:42) \n", "[GCC 4.8.5 20150623 (Red Hat 4.8.5-16)]\n" ] } ], "source": [ "print(sys.version)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fri Apr 20 11:07:01 2018\n" ] } ], "source": [ "print(time.asctime())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "current_path = os.getcwd() # Notebook abs path" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Queries\n", "\n", "The following two sets of queries were used in this analysis:\n", "\n", "## Reference Queries\n", "\n", "- `[MeSH Disease:\"Epilepsy\"]`\n", "- `[MeSH Disease:\"Alzheimer Disease\"]`\n", "- `[MeSH Disease:\"Tuberculosis\"]`\n", "- `[MeSH Disease:\"Parkinson Disease\"]`\n", "- `[MeSH Disease:\"Dementia\"]`\n", "- `[MeSH Disease:\"Migraine Disorders\"]`\n", "- `[MeSH Disease:\"Diabetes Mellitus\"]`\n", "- `[MeSH Disease:\"Pulmonary Disease Chronic Obstructive\"]`\n", "- `[MeSH Disease:\"Peptic Ulcer\"]`\n", "- `[MeSH Disease:\"Anxiety Disorders\"]`\n", "- `[MeSH Disease:\"Urinary Incontinence\"]`\n", "- `[MeSH Disease:\"Cataract\"]`\n", "- `[MeSH Disease:\"Hypertension\"]`\n", "- `[MeSH Disease:\"Arthritis\"]`\n", "- `[MeSH Disease:\"Asthma\"]`\n", "- `[MeSH Disease:\"Bronchitis Chronic\"]`\n", "- `[MeSH Disease:\"Emphysema\"]`\n", "- `[MeSH Disease:\"Fibromyalgia\"]`\n", "- `[MeSH Disease:\"Glaucoma\"]`\n", "- `[MeSH Disease:\"Intestinal Diseases\"]`\n", "- `[MeSH Disease:\"Thyroid Diseases\"]`\n", "- `[MeSH Disease:\"Depressive Disorder, Major\"]`\n", "- `[MeSH Disease:\"Back Pain\"]`\n", "- `[MeSH Disease:\"Stroke\"]`\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Queries used for calculating pleitropy rates\n", "\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Alzheimer Disease\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Parkinson Disease\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Dementia\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Migraine Disorders\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Diabetes Mellitus\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Pulmonary Disease Chronic Obstructive\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Anxiety Disorders\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Urinary Incontinence\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Cataract\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Hypertension\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Arthritis\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Asthma\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Bronchitis Chronic\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Emphysema\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Fibromyalgia\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Glaucoma\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Intestinal Diseases\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Thyroid Diseases\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Depressive Disorder, Major\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Back Pain\"]`\n", "- `[MeSH Disease:\"Epilepsy\"] AND [MeSH Disease:\"Stroke\"]`\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The queries were retrieved using SCAIView version 1.7.3\n", "Corresponding to the indexing of MEDLINE on 2016-07-14T13:50:07.797575Z.\n", "\n", "*Note that the reference queries might take time since thousand of articles need to be analyzed." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Notebook results" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Condition | \n", "MeSH ID | \n", "Reference Query | \n", "Associated documents | \n", "Disease-associated genes | \n", "Comorbidity-associated genes | \n", "Normalized pleitropy rate (%) | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "Epilepsy | \n", "D004827 | \n", "[MeSH Disease:\"Epilepsy\"] | \n", "192245 | \n", "2901.000 | \n", "- | \n", "- | \n", "
1 | \n", "Stroke | \n", "D020521 | \n", "[MeSH Disease:\"Stroke\"] | \n", "210846 | \n", "4533.000 | \n", "633 | \n", "17.78 | \n", "
2 | \n", "Alzheimer's Disease | \n", "D000544 | \n", "[MeSH Disease:\"Alzheimer Disease\"] | \n", "109495 | \n", "4968.000 | \n", "396 | \n", "13.65 | \n", "
3 | \n", "Migraine | \n", "D008881 | \n", "[MeSH Disease:\"Migraine Disorders\"] | \n", "30928 | \n", "1230.000 | \n", "306 | \n", "10.54 | \n", "
4 | \n", "Parkinson's Disease | \n", "D010300 | \n", "[MeSH Disease:\"Parkinson Disease\"] | \n", "79103 | \n", "3646.000 | \n", "258 | \n", "8.89 | \n", "
5 | \n", "Hypertension | \n", "D006973 | \n", "[MeSH Disease:\"Hypertension\"] | \n", "391190 | \n", "5574.000 | \n", "252 | \n", "8.68 | \n", "
6 | \n", "Dementia | \n", "D003704 | \n", "[MeSH Disease:\"Dementia\"] | \n", "183802 | \n", "5833.000 | \n", "220 | \n", "7.58 | \n", "
7 | \n", "Diabetes Mellitus | \n", "D003920 | \n", "[MeSH Disease:\"Diabetes Mellitus\"] | \n", "394411 | \n", "6661.000 | \n", "184 | \n", "6.34 | \n", "
8 | \n", "Intestinal Diseases | \n", "D007410 | \n", "[MeSH Disease:\"Intestinal Diseases\"] | \n", "629691 | \n", "9.093 | \n", "166 | \n", "5.72 | \n", "
9 | \n", "Thyroid Diseases | \n", "D013959 | \n", "[MeSH Disease:\"Thyroid Diseases\"] | \n", "153025 | \n", "4366.000 | \n", "133 | \n", "4.58 | \n", "
10 | \n", "Anxiety | \n", "D001007 | \n", "[MeSH Disease:\"Anxiety Disorders\"] | \n", "84138 | \n", "1782.000 | \n", "124 | \n", "4.27 | \n", "
11 | \n", "Arthritis | \n", "D001168 | \n", "[MeSH Disease:\"Arthritis\"] | \n", "259327 | \n", "5367.000 | \n", "122 | \n", "4.2 | \n", "
12 | \n", "Cataract | \n", "D002386 | \n", "[MeSH Disease:\"Cataract\"] | \n", "52150 | \n", "2238.000 | \n", "119 | \n", "4.1 | \n", "
13 | \n", "Asthma | \n", "D001249 | \n", "[MeSH Disease:\"Asthma\"] | \n", "147697 | \n", "3761.000 | \n", "86 | \n", "2.96 | \n", "
14 | \n", "Glaucoma | \n", "D005901 | \n", "[MeSH Disease:\"Glaucoma\"] | \n", "56679 | \n", "2303.000 | \n", "48 | \n", "1.65 | \n", "
15 | \n", "Depressive Disorder, Major | \n", "D003865 | \n", "[MeSH Disease:\"Depressive Disorder, Major\"] | \n", "15706 | \n", "1249.000 | \n", "46 | \n", "1.58 | \n", "
16 | \n", "Urinary Incontinence | \n", "D014549 | \n", "[MeSH Disease:\"Urinary Incontinence\"] | \n", "34170 | \n", "720.000 | \n", "24 | \n", "0.82 | \n", "
17 | \n", "Peptic Ulcer | \n", "D010437 | \n", "[MeSH Disease:\"Peptic Ulcer\"] | \n", "68234 | \n", "1445.000 | \n", "21 | \n", "0.72 | \n", "
18 | \n", "Back Pain | \n", "D001416 | \n", "[MeSH Disease:\"Back Pain\"] | \n", "48516 | \n", "1191.000 | \n", "17 | \n", "0.58 | \n", "
19 | \n", "Pulmonary Disease, Chronic Obstructive | \n", "D029424 | \n", "[MeSH Disease:\"Pulmonary Disease Chronic Obstr... | \n", "35627 | \n", "2244.000 | \n", "15 | \n", "0.51 | \n", "
20 | \n", "Fibromyalgia | \n", "D005356 | \n", "[MeSH Disease:\"Fibromyalgia\"] | \n", "9021 | \n", "468.000 | \n", "10 | \n", "0.34 | \n", "
21 | \n", "Emphysema | \n", "D004646 | \n", "[MeSH Disease:\"Emphysema\"] | \n", "25511 | \n", "1261.000 | \n", "9 | \n", "0.31 | \n", "
22 | \n", "Bronchitis Chronic | \n", "D029481 | \n", "[MeSH Disease:\"Bronchitis Chronic\"] | \n", "9085 | \n", "580.000 | \n", "2 | \n", "0.06 | \n", "