#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 22 18:19:27 2019
@author: kantundpeterpan
"""
import os
import pandas as pd
import mapply
#from .Species.Species_class import Species
from .helper_functions import *
from .MS_data_class import *
from .MS_analysis_class import *
from .mzml_Parser import *
from .test_file_dialog import *
from . import fastmzml
from . IsoDB import IsoDB
mapply.init(n_workers = -1, chunk_size = 100, max_chunks_per_worker = 8, progressbar = True)
class MSanalyzer(object):
'''Kind of superclass containing analysis and data modules'''
module_path = os.path.dirname(os.path.abspath(__file__))
#https://physics.nist.gov/cgi-bin/Compositions/stand_alone.pl
IsoPath = os.path.join(module_path, 'data', 'IsotopeDef')
isotope_masses = pd.read_csv(IsoPath + '/isotopes_atomic_masses_processed.csv',
index_col = 0)
#isotope_masses.set_index('element_symbol', inplace = True)
isotope_masses.dropna(inplace = True)
ResPath = os.path.join(module_path, 'data', 'ResidueDef')
residues = pd.read_csv(ResPath + '/residues.csv')
residues.set_index('Residue', inplace = True)
#additional databases for different labelings?
#dictionary of dataframes with labeling as keys?
SpecPath = os.path.join(module_path, 'data', 'SpeciesDef')
mol_db = pd.read_csv(SpecPath + '/species_coli_lac.csv')
#mol_db.set_index('Species')
m_h = IsoDB.H[1] #1.0078250322
m_na = IsoDB.Na[23] #22.98976928
m_k_39 = IsoDB.K[39] #38.96370649
@classmethod
def add_species_obj(self):
from .Species.Species_class import Species
self.mol_db['species_obj'] = self.mol_db.mapply(lambda x: Species(name = x.Species,
residue_string = x.sequence, msanalyzer = self, labeling_variants = eval(x.labeling_variants)), axis = 1)
self.mol_db = self.mol_db.set_index('Species')
def find_labelings_variants(self, mass, precision_in_ppm, df_to_search):
ll = mass * (1 - precision_in_ppm * 1e-06)
ul = mass * (1 + precision_in_ppm * 1e-06)
try:
assert hasattr(df_to_search, 'monoisotopic_mass')
except:
print('database not correctly formatted')
return
ind = (df_to_search.monoisotopic_mass>ll) & (df_to_search.monoisotopic_mass<ul)
search = df_to_search[ind]
return search
def __init__(self, filename='filedialog', plot_tic=True,
fast=False, fast_stream = False):
#self.add_species_obj()
if filename == 'filedialog':
fd = FileDialog()
filename = fd.openFileNameDialog()
if filename != '':
self.path = os.path.abspath(filename)
self.name = filename[:-5]
if '.txt' in self.path:
self.raw_data = load_mz_txt(filename)
self.data = MS_data(self)
self.analysis = MS_analysis(self)
if '.csv' in self.path:
self.raw_data = pd.read_csv(filename, header = 0)
if self.raw_data.shape[1] == 3:
self.raw_data = self.raw_data.iloc[:,1:3]
self.raw_data.columns = ['x', 'y']
self.data = MS_data(self)
self.analysis = MS_analysis(self)
if '.mzml' in self.path or '.mzML' in self.path:
self.data = MS_data(self)
if fast:
self.mzml_parser = fastmzml.FastMZml(filename, self, stream = fast_stream)
else:
self.mzml_parser = mzML_parser(filename, self)
self.analysis = MS_analysis(self)
if plot_tic:
self.analysis.plot_tic()
def preparePickle(self):
for attribute in ('fig', 'ax', 'root',
'data_line', 'peak_label_dialog',
'cursor'):
setattr(self.analysis, attribute, None)