https://github.com/gantzgraf/vcfhacks
Tip revision: 3b569742cedc2eded1d60bdaba1c2ee91134c8e1 authored by David A. Parry on 10 July 2020, 10:56:40 UTC
Update readme.md
Update readme.md
Tip revision: 3b56974
selectSampleCalls.pl
#!/usr/bin/env perl
#David Parry June 2015
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use Data::Dumper;
use FindBin qw($RealBin);
use lib "$RealBin/lib/dapPerlGenomicLib";
use VcfReader 0.3;
my @samples = ();
my %opts = (s => \@samples);
GetOptions(
\%opts,
'i|input=s',
'o|output=s',
'f|file=s', #list of names
's|sample=s{,}',
'h|?|help' ,
) or usage("Syntax error");
usage() if $opts{h};
usage( "--input is required.") if not $opts{i};
usage( "--sample or --file argument is required.") if not $opts{s} and not $opts{f};
if ($opts{f}){
open (my $SAMP, $opts{f}) or die "Can't open $opts{f} for reading: $!\n";
while (my $line = <$SAMP>){
chomp $line;
push @samples, $line;
}
}
my ($header, $first_var, $IN) = VcfReader::getHeaderAndFirstVariant($opts{i});
die "Bad header!\n" if (not VcfReader::checkHeader(header => $header));
my %samples_to_col = VcfReader::getSamples(
header => $header,
get_columns => 1
);
foreach my $s (@samples){
if (not exists $samples_to_col{$s}){
die "ERROR: Sample $s not found in VCF!\n";
}
}
my $OUT = \*STDOUT;
if ($opts{o}){
open ($OUT, ">$opts{o}") or die "Couldn't open $opts{o} for writing: $!\n";
}
print $OUT join("\n", @$header[0..$#{$header}-1]) . "\n";
print $OUT "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t";
print $OUT join("\t", @samples) . "\n";
my $line = $first_var;
while (defined $line){
chomp $line;
my @split = split("\t", $line);
my @output;
foreach my $f (qw / CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /){
push @output, VcfReader::getVariantField(\@split, $f);
}
foreach my $s (@samples){
push @output, VcfReader::getSampleVariant(\@split, $samples_to_col{$s});
}
print $OUT join("\t", @output) . "\n";
$line = <$IN>;
}
close $IN;
close $OUT;
#################################################
sub usage{
my $error = shift;
if ($error){
print "ERROR: $error\n";
}
print <<EOT
Retrieve variant calls for select samples from a VCF.
Usage:
$0 -i input.vcf -s sample1 [sample2... ] [-o output.vcf]
Options:
-i,--input [vcf input]
-o,--output [optional output file. Defaults to printing to STDOUT]
-s,--sample [one or more samples to retrieve]
-f,--file [text file with one sample per line]
-h,-?,--help [show this help message and exit]
This program will output the calls for samples specified by --sample or --file arguments from a VCF in VCF format. The idea is just to quickly output the call information for a handful of samples from a multisample VCF. However INFO fields will not be modified so INFO fields such as allele counts and frequencies will be inaccurate after processing with this program.
EOT
;
exit 1 if $error;
exit;
}