https://github.com/gantzgraf/vcfhacks
Tip revision: 3b569742cedc2eded1d60bdaba1c2ee91134c8e1 authored by David A. Parry on 10 July 2020, 10:56:40 UTC
Update readme.md
Update readme.md
Tip revision: 3b56974
getHetVariants.pl
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use FindBin qw($RealBin);
use lib "$RealBin/lib";
use ParseVCF;
use Pod::Usage;
use Getopt::Long;
my $vcf;
my $out;
my @samples = ();
my $min_gq = 0;
my @ab = ();
my $hom;
my $help;
my $man;
GetOptions(
"input=s" => \$vcf,
"output=s" => \$out,
"reverse" => \$hom,
"samples=s{,}" => \@samples,
"gq=f" => \$min_gq,
"ab=f{,}" => \@ab,
"help|?" => \$help,
"manual" => \$man
) or pod2usage( -exitval => 2, -message => "Syntax error" );
pod2usage( -verbose => 2, -exitval => 0 ) if $man;
pod2usage( -verbose => 1, -exitval => 0 ) if $help;
pod2usage( -exitval => 2, -message => "Syntax error" ) if not $vcf;
foreach my $bal (@ab){
die "--ab argument must be between 0 and 1\n" if $bal > 1 or $bal < 0;
}
my $OUT;
if ($out) {
open( $OUT, ">$out" ) || die "Can't open $out for writing: $!\n";
}
else {
$OUT = \*STDOUT;
}
my $obj = ParseVCF->new( file => $vcf );
if ( not @samples ) {
@samples = $obj->getSampleNames;
}
print $OUT join( "", @{ $obj->get_metaHeader } );
print $OUT $obj->get_header . "\n";
while ( my $line = $obj->readLine ) {
if ($hom) {
my $hom =
$obj->sampleIsHomozygous( multiple => \@samples, minGQ => $min_gq );
print $OUT "$line\n" if $hom;
}
else {
my $het;
if (@ab){
$het = $obj->sampleIsHeterozygous(allele_balance => \@ab, multiple => \@samples, minGQ => $min_gq );
}else{
$het = $obj->sampleIsHeterozygous(multiple => \@samples, minGQ => $min_gq );
}
print $OUT "$line\n" if $het;
}
}
=head1 NAME
getHetVariants.pl - print heterozygous variants from VCF file
=head1 SYNOPSIS
getHetVariants.pl -i [vcf_file] [options]
=head1 ARGUMENTS
=over 8
=item B<-i --input>
Input VCF file.
=item B<-o --output>
Output filename.
=item B<-s --samples>
One or more samples to check variants for. Default is to check all samples in vcf.
=item B<-g --gq>
Minimum genotype quality to specify for het variants. Anything less will be counted as a no call.
=item B<-a --ab>
Minimum and optional maximum allele balance for heterozygous variants. For
example, use "--ab 0.3" to only consider heterozygous variants where the ALT
allele makes up at least 30% of reads for a sample. To only consider
heterozygous variants with an allele balance in the range of 40-60% use
"--ab 0.4 0.6".
This option is ignored when the -r/--reverse option is in effect.
=item B<-r --reverse>
Use this flag to print homozygous variants rather than heterozygous variants.
=item B<-h --help>
Show help message.
=item B<-m --manual>
Show manual page.
=back
=cut
=head1 DESCRIPTION
Prints only heterozygous variants from a VCF file. Will only print if all variants or all variants for samples selected with --samples argument are heterozygous. More sophisticated implementations may follow.
=head1 AUTHOR
David A. Parry
=head1 COPYRIGHT AND LICENSE
AUTHOR
David A. Parry
University of Leeds
COPYRIGHT AND LICENSE
Copyright 2013,2014,2015 David A. Parry
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.
=cut