#!/usr/bin/perl -w ###This script takes the .ptt and the filter_cpm file for mapping insertions to the gene ###The format of the .ptt file is the standard annotation file format from NCBI, or you can refer to the .ptt file in the demo section if (!@ARGV){ print "$0 <.ptt> >$logfile"; my $gene; my $start; my $end; my @temp; my @gene; my $name; my $strand; my $annotation; open PTT,$ptt; while (){ chomp; if (m/^(\d+)\.\.(\d+)/){ @temp=split /\t/,$_; $start->{$temp[5]}=$1; $end->{$temp[5]}=$2; $strand->{$temp[5]}=$temp[1]; $length->{$temp[5]}=abs($end->{$temp[5]}-$start->{$temp[5]}); $annotation->{$temp[5]}=$temp[-1]; } } foreach my $gene (sort keys %$start){ for (my $i=$start->{$gene};$i<=$end->{$gene};$i++){ $target->{$i}=$gene; } } my $count; my $hit; my $inside_ORF=0; my $total_in=0; my $insertion; open CPM,$file; while (){ @temp=split /\s+/,$_; # if ($temp[4]>3){ $total_in++; if ($target->{$temp[1]}){ if ($strand->{$target->{$temp[1]}} eq "+"){ $position=abs(($temp[1]-$start->{$target->{$temp[1]}})/$length->{$target->{$temp[1]}}); }elsif ($strand->{$target->{$temp[1]}} eq "-"){ $position=abs(($temp[1]-$end->{$target->{$temp[1]}})/$length->{$target->{$temp[1]}}); } if ($position<=$percent){ my $po=sprintf ("%.3f",$position); $inside_ORF++; $hit->{$target->{$temp[1]}}++; $count->{$target->{$temp[1]}}+=$temp[2]; my $insert_info=$po.":".$temp[1].":".$temp[2]; push @{$insertion->{$target->{$temp[1]}}},$insert_info; } } # } } open OUT,">$out"; print OUT"Gene ID\tUnique insertions\tCount\tAnnotation\n"; foreach $gene (sort keys %$start){ if ($hit->{$gene}){ my @out; foreach my $item(sort @{$insertion->{$gene}}){ push @out,$item; } my $details=join"\t",@out; print OUT "$gene\t$hit->{$gene}\t$count->{$gene}\t$annotation->{$gene}\t$details\n"; }else{ $hit->{$gene}=0; $count->{$gene}=0; print OUT "$gene\t$hit->{$gene}\t$count->{$gene}\t$annotation->{$gene}\n"; } } $intergenic=$total_in-$inside_ORF; print LOG "Sample\tTotal insertions\t Insertions inside of ORF\tInsertions located intergenic region\n"; print LOG "$file\t$total_in\t$inside_ORF\t$intergenic\n"; close LOG;