#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Jan 22 18:19:27 2019 @author: kantundpeterpan """ import os import pandas as pd import mapply #from .Species.Species_class import Species from .helper_functions import * from .MS_data_class import * from .MS_analysis_class import * from .mzml_Parser import * from .test_file_dialog import * from . import fastmzml from . IsoDB import IsoDB mapply.init(n_workers = -1, chunk_size = 100, max_chunks_per_worker = 8, progressbar = True) class MSanalyzer(object): '''Kind of superclass containing analysis and data modules''' module_path = os.path.dirname(os.path.abspath(__file__)) #https://physics.nist.gov/cgi-bin/Compositions/stand_alone.pl IsoPath = os.path.join(module_path, 'data', 'IsotopeDef') isotope_masses = pd.read_csv(IsoPath + '/isotopes_atomic_masses_processed.csv', index_col = 0) #isotope_masses.set_index('element_symbol', inplace = True) isotope_masses.dropna(inplace = True) ResPath = os.path.join(module_path, 'data', 'ResidueDef') residues = pd.read_csv(ResPath + '/residues.csv') residues.set_index('Residue', inplace = True) #additional databases for different labelings? #dictionary of dataframes with labeling as keys? SpecPath = os.path.join(module_path, 'data', 'SpeciesDef') mol_db = pd.read_csv(SpecPath + '/species_coli_lac.csv') #mol_db.set_index('Species') m_h = IsoDB.H[1] #1.0078250322 m_na = IsoDB.Na[23] #22.98976928 m_k_39 = IsoDB.K[39] #38.96370649 @classmethod def add_species_obj(self): from .Species.Species_class import Species self.mol_db['species_obj'] = self.mol_db.mapply(lambda x: Species(name = x.Species, residue_string = x.sequence, msanalyzer = self, labeling_variants = eval(x.labeling_variants)), axis = 1) self.mol_db = self.mol_db.set_index('Species') def find_labelings_variants(self, mass, precision_in_ppm, df_to_search): ll = mass * (1 - precision_in_ppm * 1e-06) ul = mass * (1 + precision_in_ppm * 1e-06) try: assert hasattr(df_to_search, 'monoisotopic_mass') except: print('database not correctly formatted') return ind = (df_to_search.monoisotopic_mass>ll) & (df_to_search.monoisotopic_mass