Raw File
Tip revision: c45a7e66a8a89c436d41b8431938b35acfa4e53b authored by Heiner Atze on 17 August 2021, 11:38:48 UTC
Tip revision: c45a7e6
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
Created on Fri Jun 28 11:09:33 2019

@author: kantundpeterpan

Substract two spectra after scaling one of them with respect to the other.

Spectra are first "aligned" by binning with fixed bins with binsize set to 
the min(min_mz(ms1), min_mz(ms2))


import sys
import argparse
import gc
import pymzml
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from MSanalyzer_class import *
from PyQt5 import QtWidgets

def get_min_delta_mz(ms, tmin, tmax):
        ind = ( >= tmin) & ( < tmax)
        mzs =[ind].values
        ints =[ind].values
        array_s = np.array([np.hstack([mz, val]) for mz, val in zip(mzs, ints)])
        run =    
        scans = (s for s in run if s.scan_time[0]/60>tmin and s.scan_time[0]/60<tmax)
        array_s = np.array([ms.mzml_parser.get_non_zero(s.peaks('raw')) for s in scans])

    binsize = ms.mzml_parser.get_min_mz_delta(array_s)
    return binsize

parser = argparse.ArgumentParser()

parser.add_argument("-f1", "--file1", type=str, help="provide path to first file")
parser.add_argument("-f2", "--file2", type=str, help="provide path to second file")
parser.add_argument("-is", "--tic_integration_start",
                    type=float, help="start for TIC integration to m/z")
parser.add_argument("-ie", "--tic_integration_end",
                    type=float, help="start for TIC integration to m/z")
parser.add_argument("-minmz", "--mz_lower_limit",
                    type=float, help="lower limit for rebinning")
parser.add_argument("-maxmz", "--mz_upper_limit",
                    type=float, help="upper limit for rebinning")
parser.add_argument("-bs", "--binsize",
                    type=str, help="binsize for rebinning")
parser.add_argument("-sp", "--scaling_peak",
                    type=float, help="provide m/z to scale on")
parser.add_argument("-o", "--order",
                    type=str, help="specify order for processing")
parser.add_argument("-f", "--filter",
                    action='store_true', help="activate to crop data to [minmz:maxmz] to reduce filesize and processing time")
parser.add_argument("-ff", "--fast",
                    action='store_true', help="Use (experimental) fast mzML parser")
args = parser.parse_args()

tic_start = args.tic_integration_start
tic_end = args.tic_integration_end
min_mz = args.mz_lower_limit
max_mz = args.mz_upper_limit

scaling_peak = args.scaling_peak

if args.binsize:
    if args.binsize != 'a':
            raise TypeError('binsize has to be provided as float')

    elif args.binsize ==  'a':
        binsize = 'a'

    binsize = 'a'
app = QtWidgets.QApplication(sys.argv)

if args.file1:
    ms1 = MSanalyzer(args.file1, plot_tic=False,
    ms1 = MSanalyzer(plot_tic=False,
if args.file2:
    ms2 = MSanalyzer(args.file2, plot_tic=False,
    ms2 = MSanalyzer(plot_tic=False,
ms = [ms1, ms2]

if binsize == 'a':
    print('Determining binsize...')
    binsize = min([get_min_delta_mz(m, tic_start, tic_end) for m in ms])*1.05

print('Binsize for rebinning set to: ', binsize)

rebin = []

if args.filter:
    for m in ms:
        temp = m.mzml_parser.combine_spectra_fixed_bins(tic_start, tic_end,
                                                        min_mz, max_mz,
                                                        apply_filter = True)
    for m in ms:
        temp = m.mzml_parser.combine_spectra_fixed_bins(tic_start, tic_end,
                                                        min_mz, max_mz,
order = args.order.split(',')
order = [int(i)-1 for i in order]

ind = np.searchsorted(rebin[0][:,0], scaling_peak, 'right')
    assert (rebin[first][ind,1] != 0) and (rebin[second][ind,1] != 0)
    ind = ind+1
print('ind: ', ind)

first = order[0]
second = order[1]

print('int val 1: ', rebin[first][ind,1])
print('int val 2: ', rebin[second][ind,1])

factor = (rebin[first][ind,1])/(rebin[second][ind,1])

scaled = rebin[second][:,1]*factor
print('Scaling succeeded.')
substracted = rebin[first][:,1] - scaled
print('Subtraction succeeded')

print('factor: ', factor)
print('scaled: ', scaled.shape)
print('substracted: ', substracted.shape)

fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True)
axes = axes.ravel()

axes[0].plot(rebin[first][:,0], rebin[first][:,1], label = 'first - before',
axes[0].plot(rebin[second][:,0], scaled, label = 'second - before',
axes[0].set_title('Overlay - Spectrum to process and scaled spec to substract')

axes[1].plot(rebin[first][:,0], substracted, label = 'first - substracted')
axes[1].set_title('New spectrum')

#df = pd.DataFrame({'m_z':rebin[0][:,0],
 #                 'spectrum_before_processing':rebin[first][:,1],
  #                'scaled_spec_to_substract':scaled,
   #               'new_spectrum':substracted})

spec_before_proc = rebin[first][np.nonzero(rebin[first][:,1])].astype(np.float64)
scaled_spec_to_substract = np.array([rebin[second][:,0],scaled]).transpose()
scaled_spec_to_substract = scaled_spec_to_substract[np.nonzero(scaled)]
new_spectrum = np.array([rebin[0][:,0], substracted]).transpose()
new_spectrum = new_spectrum[np.nonzero(substracted)]

np.savetxt('unprocessed.csv', spec_before_proc, delimiter = ',')
np.savetxt('scaled_to_substract.csv', scaled_spec_to_substract, delimiter = ',')
np.savetxt('new.csv', new_spectrum, delimiter = ',')

new_cutoff = new_spectrum[new_spectrum[:,1]>0,:]
new_cutoff = new_cutoff[np.nonzero(new_cutoff[:,1])]
np.savetxt('new_cutoff.csv', new_cutoff, delimiter=',')

back to top