#!usr/bin/perl -w -s #11/2/09 #per-sample count per million normalization on processed mapping output data #06/12/2013 # #input file format: #SampleID \t chromosome \t Coordinate \t L_read \t R_read \t Total_reads \n #04/09/2015 #Adjusted the cutoff to 3 if (!@ARGV){ print "\nUsage: perl $0 \n"; exit; } open IN, "$ARGV[0]"; open OUT, ">$ARGV[0]_filter_cpm.txt"; my $logfile; if ($ARGV[0]=~/(\w+\.scarf)/){ $logfile=$1.".log"; } open LOG,">>$logfile"; print LOG "SampleID\tRawReadsAfterFiltered\tCoverage\tScale factor\n"; #print OUT "SampleID\tLocation\tCPM-normalized count\n"; #read file into 2D hash my $data; # $data->{sampleID}->{location} = count my $sample; while (my $line = ){ chomp $line; my @temp = split /\s+/, $line; $data->{$temp[0]}->{$temp[1]} = $temp[4]; } my @sites; my @counts; my @norm_counts; foreach my $chromosome (keys %$data){ foreach my $location (keys %{$data->{$chromosome}}){ #for each location within that sampleID if ($data->{$chromosome}->{$location}>=3){ push (@sites, $location); push (@counts, $data->{$chromosome}->{$location}); } } } my $sum=0; my $coverage=scalar @sites; for (my $i=0; $i{$chromosome}}){ #for each location within that sampleID if ($data->{$chromosome}->{$location}>5){ print OUT"$chromosome\t$location\t".$data->{$chromosome}->{$location}*$scale_factor."\n"; } } } close LOG;