Content - eff7b3fa152641258927b31dd17897336477d18e - 86fc108/substract_spectra_commandline.py

visit type:
Tip revision: c45a7e66a8a89c436d41b8431938b35acfa4e53b authored by Heiner Atze on 17 August 2021, 11:38:48 UTC
Update README.md
Tip revision: c45a7e6
substract_spectra_commandline.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 28 11:09:33 2019

@author: kantundpeterpan

Substract two spectra after scaling one of them with respect to the other.

Spectra are first "aligned" by binning with fixed bins with binsize set to 
the min(min_mz(ms1), min_mz(ms2))


"""

import sys
import argparse
import gc
import pymzml
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from MSanalyzer_class import *
from PyQt5 import QtWidgets

def get_min_delta_mz(ms, tmin, tmax):
    
    if args.fast:
        ind = (ms.mzml_parser.data.scan_time >= tmin) & (ms.mzml_parser.data.scan_time < tmax)
        mzs = ms.mzml_parser.data.mz_array[ind].values
        ints = ms.mzml_parser.data.int_array[ind].values
        array_s = np.array([np.hstack([mz, val]) for mz, val in zip(mzs, ints)])
    
    else:
        run = pymzml.run.Reader(ms.mzml_parser.file)    
        scans = (s for s in run if s.scan_time[0]/60>tmin and s.scan_time[0]/60<tmax)
        array_s = np.array([ms.mzml_parser.get_non_zero(s.peaks('raw')) for s in scans])

    binsize = ms.mzml_parser.get_min_mz_delta(array_s)
    
    return binsize

parser = argparse.ArgumentParser()

parser.add_argument("-f1", "--file1", type=str, help="provide path to first file")
parser.add_argument("-f2", "--file2", type=str, help="provide path to second file")
parser.add_argument("-is", "--tic_integration_start",
                    type=float, help="start for TIC integration to m/z")
parser.add_argument("-ie", "--tic_integration_end",
                    type=float, help="start for TIC integration to m/z")
parser.add_argument("-minmz", "--mz_lower_limit",
                    type=float, help="lower limit for rebinning")
parser.add_argument("-maxmz", "--mz_upper_limit",
                    type=float, help="upper limit for rebinning")
parser.add_argument("-bs", "--binsize",
                    type=str, help="binsize for rebinning")
parser.add_argument("-sp", "--scaling_peak",
                    type=float, help="provide m/z to scale on")
parser.add_argument("-o", "--order",
                    type=str, help="specify order for processing")
parser.add_argument("-f", "--filter",
                    action='store_true', help="activate to crop data to [minmz:maxmz] to reduce filesize and processing time")
parser.add_argument("-ff", "--fast",
                    action='store_true', help="Use (experimental) fast mzML parser")
args = parser.parse_args()

tic_start = args.tic_integration_start
tic_end = args.tic_integration_end
min_mz = args.mz_lower_limit
max_mz = args.mz_upper_limit

scaling_peak = args.scaling_peak

if args.binsize:
    if args.binsize != 'a':
        try:
            binsize=float(args.binsize)
        except:
            raise TypeError('binsize has to be provided as float')

    elif args.binsize ==  'a':
        binsize = 'a'

else:
    binsize = 'a'
    
app = QtWidgets.QApplication(sys.argv)

if args.file1:
    ms1 = MSanalyzer(args.file1, plot_tic=False, fast=args.fast)
else:
    ms1 = MSanalyzer(plot_tic=False, fast=args.fast)
if args.file2:
    ms2 = MSanalyzer(args.file2, plot_tic=False, fast=args.fast)
else:
    ms2 = MSanalyzer(plot_tic=False, fast=args.fast)
    
ms = [ms1, ms2]

if binsize == 'a':
    print('Determining binsize...')
    binsize = min([get_min_delta_mz(m, tic_start, tic_end) for m in ms])*1.05

print('Binsize for rebinning set to: ', binsize)

rebin = []

if args.filter:
    
    for m in ms:
        temp = m.mzml_parser.combine_spectra_fixed_bins(tic_start, tic_end,
                                                        min_mz, max_mz,
                                                        binsize,
                                                        apply_filter = True)
        rebin.append(temp)
else:
    for m in ms:
        temp = m.mzml_parser.combine_spectra_fixed_bins(tic_start, tic_end,
                                                        min_mz, max_mz,
                                                        binsize)
    
        rebin.append(temp)
    
order = args.order.split(',')
order = [int(i)-1 for i in order]

ind = np.searchsorted(rebin[0][:,0], scaling_peak, 'right')
try:
    assert (rebin[first][ind,1] != 0) and (rebin[second][ind,1] != 0)
except:
    ind = ind+1
print('ind: ', ind)

first = order[0]
second = order[1]

print('int val 1: ', rebin[first][ind,1])
print('int val 2: ', rebin[second][ind,1])

factor = (rebin[first][ind,1])/(rebin[second][ind,1])

scaled = rebin[second][:,1]*factor
print('Scaling succeeded.')
substracted = rebin[first][:,1] - scaled
print('Subtraction succeeded')

print('factor: ', factor)
print('scaled: ', scaled.shape)
print('substracted: ', substracted.shape)

fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True)
axes = axes.ravel()

axes[0].plot(rebin[first][:,0], rebin[first][:,1], label = 'first - before',
    linewidth=1)
axes[0].plot(rebin[second][:,0], scaled, label = 'second - before',
    linewidth=1)
axes[0].set_title('Overlay - Spectrum to process and scaled spec to substract')
axes[0].legend()

axes[1].plot(rebin[first][:,0], substracted, label = 'first - substracted')
axes[1].set_title('New spectrum')

#df = pd.DataFrame({'m_z':rebin[0][:,0],
 #                 'spectrum_before_processing':rebin[first][:,1],
  #                'scaled_spec_to_substract':scaled,
   #               'new_spectrum':substracted})

spec_before_proc = rebin[first][np.nonzero(rebin[first][:,1])].astype(np.float64)
scaled_spec_to_substract = np.array([rebin[second][:,0],scaled]).transpose()
scaled_spec_to_substract = scaled_spec_to_substract[np.nonzero(scaled)]
new_spectrum = np.array([rebin[0][:,0], substracted]).transpose()
new_spectrum = new_spectrum[np.nonzero(substracted)]

np.savetxt('unprocessed.csv', spec_before_proc, delimiter = ',')
np.savetxt('scaled_to_substract.csv', scaled_spec_to_substract, delimiter = ',')
np.savetxt('new.csv', new_spectrum, delimiter = ',')

new_cutoff = new_spectrum[new_spectrum[:,1]>0,:]
new_cutoff = new_cutoff[np.nonzero(new_cutoff[:,1])]
np.savetxt('new_cutoff.csv', new_cutoff, delimiter=',')

plt.show()
app.exit()
Browse the archive

https://gitlab.com/kantundpeterpan/masseltof