{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/colleenmclaughlin/opt/anaconda3/lib/python3.7/site-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", " import pandas.util.testing as tm\n" ] } ], "source": [ "from __future__ import division\n", "import sys\n", "import random\n", "import copy\n", "import math\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import scipy\n", "\n", "\n", "%matplotlib inline\n", "from matplotlib import pyplot as plt\n", "import matplotlib as mpl\n", "\n", "import seaborn as sns\n", "\n", "sys.path.append(\"../resources/\")\n", "import sct_py3\n", "sns.set_style(\"ticks\")\n", "sns.set_context(\"talk\")\n", "\n", "output_dir = \"out/\"\n", "output_suffix = \"\"\n", "output_formats = [\".png\", \".pdf\"]\n", "\n", "\n", "mpl.rc('savefig', dpi=300)\n", "\n", "def save_figure(fig, name):\n", " for output_format in output_formats:\n", " fig.savefig(output_dir + \"/\" + name + output_suffix + output_format)\n", " return None\n", "\n", "mpl.rc('savefig', dpi=300)\n", "\n", "pd.options.mode.chained_assignment = None # default='warn'\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# load matricies\n", "\n", "these data include: All 42h APF data from Li et al., 2020; 24h APF elav cells (including one cluster of auditory neruons); Adult nsyb>unc84-GFP nuclei\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((17472, 2829), (17472, 1892), (15908, 4203))" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_ORN_nSyb_adult = pd.read_csv('../data/htseq_ORN_nuclei_LogCPM_17plates_addintron_hq50k_neuron2-5.tab.gz', sep=\"\\t\", header=0, index_col=0)\n", "\n", "df_ORN_24h = pd.read_csv('../data/htseq_ORN_cell_LogCPM_10plates_hq50k_neuron2-5.tab.gz', sep=\"\\t\", header=0, index_col=0)\n", "\n", "df_ORN_42h = pd.read_csv('../data/htseq_logCPM_hq.tab.gz', sep=\"\\t\", header=0, index_col=0)\n", "\n", "df_ORN_nSyb_adult.shape, df_ORN_24h.shape, df_ORN_42h.shape " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "combined (gene,cell):\n" ] }, { "data": { "text/plain": [ "(None, (17474, 8924))" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_log_CPM_all = pd.concat([df_ORN_24h, df_ORN_42h,df_ORN_nSyb_adult], axis = 1, sort=False)\n", "\n", "df_log_CPM_all= df_log_CPM_all.fillna(0)\n", "\n", "print('combined (gene,cell):'), df_log_CPM_all.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ORNcell_24h_ElavCD8GFP_P1_A10 | \n", "ORNcell_24h_ElavCD8GFP_P1_A11 | \n", "ORNcell_24h_ElavCD8GFP_P1_A12 | \n", "ORNcell_24h_ElavCD8GFP_P1_A15 | \n", "ORNcell_24h_ElavCD8GFP_P1_A16 | \n", "ORNcell_24h_ElavCD8GFP_P1_A17 | \n", "ORNcell_24h_ElavCD8GFP_P1_A18 | \n", "ORNcell_24h_ElavCD8GFP_P1_A19 | \n", "ORNcell_24h_ElavCD8GFP_P1_A1 | \n", "ORNcell_24h_ElavCD8GFP_P1_A20 | \n", "... | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_I6 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_J4 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_L14 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_N15 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_N8 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_O1 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_P2 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_P6 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_P7 | \n", "ORNnuclei_adult_AM29UNC84GFP_P3_P9 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
7SLRNA:CR32864 | \n", "0.000000 | \n", "0.953444 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "2.68672 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
a | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
abd-A | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
Abd-B | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
Abl | \n", "9.458788 | \n", "10.285869 | \n", "0.0 | \n", "9.541255 | \n", "7.627055 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "8.482902 | \n", "10.318245 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.0 | \n", "0.0 | \n", "10.899275 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
5 rows × 8924 columns
\n", "\n", " | pooled_library | \n", "SampleID | \n", "Index | \n", "num | \n", "experiment | \n", "plate | \n", "well | \n", "num_cells | \n", "num_mapped_reads | \n", "color | \n", "genotype | \n", "colorHL | \n", "label | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
library | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N447Barcode_701-502 | \n", "N477 | \n", "1000100701-A1-flybrain-1 | \n", "TAAGGCGA-ATAGAGAG | \n", "2477.0 | \n", "13.0 | \n", "1000100701 | \n", "A1 | \n", "1.0 | \n", "NaN | \n", "#a3eb13 | \n", "nSyb_48h_ORN | \n", "#e31a1c | \n", "nSyb_48h_ORN_P0 | \n", "
N447Barcode_702-502 | \n", "N447 | \n", "1000100701-A2-flybrain-1 | \n", "CGTACTAG-ATAGAGAG | \n", "2478.0 | \n", "13.0 | \n", "1000100701 | \n", "A2 | \n", "1.0 | \n", "NaN | \n", "#a3eb13 | \n", "nSyb_48h_ORN | \n", "#e31a1c | \n", "nSyb_48h_ORN_P0 | \n", "
N447Barcode_703-502 | \n", "N447 | \n", "1000100701-A3-flybrain-1 | \n", "AGGCAGAA-ATAGAGAG | \n", "2479.0 | \n", "13.0 | \n", "1000100701 | \n", "A3 | \n", "1.0 | \n", "NaN | \n", "#a3eb13 | \n", "nSyb_48h_ORN | \n", "#e31a1c | \n", "nSyb_48h_ORN_P0 | \n", "
N447Barcode_704-502 | \n", "N447 | \n", "1000100701-A4-flybrain-1 | \n", "TCCTGAGC-ATAGAGAG | \n", "2480.0 | \n", "13.0 | \n", "1000100701 | \n", "A4 | \n", "1.0 | \n", "NaN | \n", "#a3eb13 | \n", "nSyb_48h_ORN | \n", "#e31a1c | \n", "nSyb_48h_ORN_P0 | \n", "
N447Barcode_705-502 | \n", "N447 | \n", "1000100701-A5-flybrain-1 | \n", "GGACTCCT-ATAGAGAG | \n", "2481.0 | \n", "13.0 | \n", "1000100701 | \n", "A5 | \n", "1.0 | \n", "NaN | \n", "#a3eb13 | \n", "nSyb_48h_ORN | \n", "#e31a1c | \n", "nSyb_48h_ORN_P0 | \n", "