https://github.com/jhbadger/scripts
Tip revision: 6ad694835e70be1c38b4cc13806c5b4361f669fc authored by Jonathan Badger on 12 January 2024, 20:33:42 UTC
report misses in virtualPCR
report misses in virtualPCR
Tip revision: 6ad6948
buildTCGATaxSummaryMetaphlan
#!/usr/bin/env ruby
require 'optimist'
require 'csv'
ARGV.push("--help") if ARGV.empty?
opts = Optimist::options do
banner File.basename($0)
opt :input, "input file(s)", :required=>true, :type=>:strings
opt :metadata, "metadata.tsv", :required=>true, :type=>:string
opt :num, "normalizing number", :default=>100000
opt :csv, "output standard normalized csv"
end
samples = Hash.new
CSV.foreach(opts.metadata, options={:col_sep=>"\t", :headers=>true}) do |row|
samples[row["Analysis_Id"]]=row["Sample"]
end
counts = Hash.new
levs = Hash.new
usedSamples = Hash.new
opts.input.each do |file|
if File::Stat.new(file).size > 50
name = File.basename(file, "_metaphlan.complex.trimmed.fasta.metaphlan")
sample = samples[name]
usedSamples[sample] = true if sample !~/VALIDATION/
File.new(file).each do |line|
if line =~/__/
taxon, per = line.chomp.split("\t")
count = (per.to_f*opts.num/100.0).to_i
lev = taxon.split("|").last
counts[lev] = Hash.new if ! counts[lev]
counts[lev][sample] = count
end
end
end
end
if opts.csv
header = ["Sample", "Patient", "Sample_Type", "Library_Type"]
header += counts.keys.sort
print header.to_csv
usedSamples.keys.sort.each do |key|
row = [key] + key.split("_")
row += counts.keys.sort.collect{|x| counts[x][key].to_i}
print row.to_csv
end
else
header = ["taxlevel"," rankID", " taxon", " daughterlevels", " total"]
header += usedSamples.keys.sort
print header.to_csv(:col_sep=>"\t")
counts.keys.sort.each do |key|
lcounts = usedSamples.keys.sort.collect{|x|counts[key][x].to_i}
row=["","",key,"", lcounts.reduce(:+)]
row += lcounts
print row.to_csv(:col_sep=>"\t")
end
end