https://github.com/minillinim/APP
Tip revision: d7e8cbba2fdba8705527a141848c6f5b30118d7b authored by Askars on 20 March 2012, 03:22:08 UTC
Merge branch 'master' of git://github.com/minillinim/APP
Merge branch 'master' of git://github.com/minillinim/APP
Tip revision: d7e8cbb
app_csv2epi.pl
#!/usr/bin/perl
###############################################################################
#
# app_csv2epi.pl
#
# Convert a csv file to another csv file which is formatted for use in the robot
# Input file should look like this!
# A1,9.4,pyroL803Fmix
# A2,8.1,pyroL803Fmix
# Bleg...
#
# NOTE:
# All units are in ng and uL
#
# Copyright (C) 2011 Michael Imelfort and Paul Dennis
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
###############################################################################
#pragmas
use strict;
use warnings;
#core Perl modules
use Getopt::Long;
#CPAN modules
#locally-written modules
use AppPrimers;
BEGIN {
select(STDERR);
$| = 1;
select(STDOUT);
$| = 1;
}
# get input params and print copyright
printAtStart();
my $options = checkParams();
######################################################################
# CODE HERE
######################################################################
#### Globals / Defaults
# set the upper limit for the pooled tube
my $global_pool_tube_capacity = 2000; # uL
my $global_dilutant_tube_capacity = 2000; # uL
# set the amount of DNA we need for each sample
my $global_sample_DNA_required_total = 4; #ng
# we need to monitor the maximum amount of fluid in each original tube
my $global_sample_tube_volume = 13; # uL
my $global_sample_tube_capacity = 300; # uL
# where is all this stuff going?
my $global_pool_tube_position = "A1";
my $global_total_pool_volume = 0;
# set the minium volume we should take from any one well
my $global_pool_minimum_volume = 1; # uL
# where is the dilutant stored
my $global_dilutant_tube_position = "A2";
my $global_total_dilutant_volume = 0; # uL
# we won't bother diluting if
my $global_dilutant_minimum_volume = 2; # uL
# prefix for output files
my $global_out_prefix = "";
#### OVERRIDE DEFAULTS
if (exists $options->{'sample_DNA_required_total'}) { $global_sample_DNA_required_total = $options->{'sample_DNA_required_total'}; }
if (exists $options->{'sample_tube_volume'}) { $global_sample_tube_volume = $options->{'sample_tube_volume'}; }
if (exists $options->{'sample_tube_capacity'}) { $global_sample_tube_capacity = $options->{'sample_tube_capacity'}; }
if (exists $options->{'pool_tube_position'}) { $global_pool_tube_position = $options->{'pool_tube_position'}; }
if (exists $options->{'pool_minimum_volume'}) { $global_pool_minimum_volume = $options->{'pool_minimum_volume'}; }
if (exists $options->{'pool_tube_capacity'}) { $global_pool_tube_capacity = $options->{'pool_tube_capacity'}; }
if (exists $options->{'dilutant_tube_position'}) { $global_dilutant_tube_position = $options->{'dilutant_tube_position'}; }
if (exists $options->{'dilutant_minimum_volume'}) { $global_dilutant_minimum_volume = $options->{'dilutant_minimum_volume'}; }
if (exists $options->{'dilutant_tube_capacity'}) { $global_dilutant_tube_capacity = $options->{'dilutant_tube_capacity'}; }
if (exists $options->{'prefix'}) { $global_out_prefix= $options->{'prefix'}; }
#### PRINT INFORMATION FOR THE USER
print "sample_DNA_required_total: $global_sample_DNA_required_total ng\n";
print "sample_tube_volume: $global_sample_tube_volume uL\n";
print "sample_tube_capacity: $global_sample_tube_capacity uL\n";
print "pool_tube_position: $global_pool_tube_position\n";
print "pool_minimum_volume: $global_pool_minimum_volume uL\n";
print "pool_tube_capacity: $global_pool_tube_capacity uL\n";
print "dilutant_tube_position: $global_dilutant_tube_position\n";
print "dilutant_minimum_volume: $global_dilutant_minimum_volume uL\n";
print "dilutant_tube_capacity: $global_dilutant_tube_capacity uL\n";
# but which primers have we seen?
my %global_seen_primers_hash = ();
# concentration per well location
my %global_well_conc_hash = ();
# primer per well location
my %global_well_primer_hash = ();
# sometimes we need to dilute the stronger samples
my %well_dilute_hash = ();
# need a list of valid wells
my %global_well2id = ();
my %global_id2well = ();
populateVars();
my $global_csv_header = "Rack,Source,Rack,Destination,Volume,Tool\r\n";
# open files
open my $global_input_fh, "<", $options->{'in'} or die $!;
open my $global_pool_fh, ">", $global_out_prefix."_pool.csv" or die $!;
open my $global_dilute_fh, ">", $global_out_prefix."_dilute.csv" or die $!;
#### PARSE THE INPUT FILE
while(<$global_input_fh>)
{
chomp $_;
next if($_ eq "");
# remove whitespace
$_ =~ s/ //g;
my @line_fields = split /,/, $_;
# check we know this well
if(!exists $global_well2id{$line_fields[0]})
{
die "**ERROR: Unkown well: \"$line_fields[0]\"\n";
}
# check we know this primer:
if(!exists $APP_prim_len_hash{$line_fields[2]})
{
die "**ERROR: Unkown primer: \"$line_fields[2]\"\n";
}
# for now, just take the specified concentrations and store the primer
$global_well_conc_hash{$line_fields[0]} = $line_fields[1];
$global_well_primer_hash{$line_fields[0]} = $APP_prim_len_hash{$line_fields[2]};
$global_seen_primers_hash{$line_fields[2]} = $APP_prim_len_hash{$line_fields[2]};
}
# close the input file
close $global_input_fh;
#### MUNGE STUFF UP
# multiple length primers? Find the shortest one.
my $min_primer_len = 1000000000;
my @primers = values %global_seen_primers_hash;
if(0 == $#primers)
{
# only one primer!
$min_primer_len = $primers[0];
}
else
{
foreach my $primer (@primers)
{
if($primer < $min_primer_len) { $min_primer_len = $primer; }
}
}
print "------------------------------\n";
print "File contains: " . ($#primers + 1) . " different primers with a min length of: $min_primer_len\n";
print "------------------------------\n";
print "SANITY CHECK\n";
# now normalise for the primer lengths
foreach my $key (keys %global_well_primer_hash)
{
$global_well_primer_hash{$key} = $global_well_primer_hash{$key} / $min_primer_len;
}
#### SANITY CHECK
# Run through once and check to see if the parameters chosen match up nicely
my $over_warnings = "";
my $under_warnings = "";
foreach my $well (keys %global_well_conc_hash)
{
# calculate how much volume to take from the well
my $volume = ($global_sample_DNA_required_total / $global_well_conc_hash{$well}) * $global_well_primer_hash{$well};
# is this less than the robot likes to take at a minumum?
if($volume < $global_pool_minimum_volume)
{
$under_warnings .= "Warning: Sample in well $well with concentration $global_well_conc_hash{$well} has volume ".roundVolume($volume)." \n";
# calculate the amount of silution needed
my $dil_amnt = (($global_well_conc_hash{$well} * $global_sample_tube_volume) / $global_sample_DNA_required_total) - $global_sample_tube_volume;
# no point in diluting petty amounts
if($dil_amnt >= $global_dilutant_minimum_volume)
{
$under_warnings .= "\tWill dilute with ".roundVolume($dil_amnt)."\n";
# we are starting with finite sized tubes. So we need to make sure we don't overflow.
if(($dil_amnt + $global_sample_tube_volume) > $global_sample_tube_capacity)
{
$under_warnings .= "\tOVERFLOW!!!\n";
die $under_warnings;
}
# store what we calculated
$well_dilute_hash{$well} = $dil_amnt;
$global_total_dilutant_volume += $dil_amnt;
}
else
{
$under_warnings .= "Dilution amount: ".roundVolume($dil_amnt)." is less than $global_dilutant_minimum_volume so won't dilute\n";
}
# set the concentration to match the amount needed...
$global_well_conc_hash{$well} = $global_sample_DNA_required_total;
}
# do we require more than is available in the tube?
# (we will fix this later)
elsif($volume > $global_sample_tube_volume)
{
$over_warnings .= "Warning: Sample in well $well with concentration $global_well_conc_hash{$well} requires ".roundVolume($volume)." \n";
}
$global_total_pool_volume += $volume;
}
if($global_total_pool_volume > $global_pool_tube_capacity)
{
die "**ERROR: Total volume (".roundVolume($global_total_pool_volume).") exceeds global limit of $global_pool_tube_capacity\n";
}
if($global_total_dilutant_volume > $global_dilutant_tube_capacity)
{
die "**ERROR: Total dilutant volume ($global_total_dilutant_volume) exceeds global limit of $global_dilutant_tube_capacity\nTry setting a higher value for sample_DNA_required_total";
}
if("" ne $over_warnings)
{
print "\n\tWARNING!\n\n\tTaking amounts which are more than contained in each tube: $global_pool_minimum_volume\n\n";
print $over_warnings;
print "\n";
}
if("" ne $under_warnings)
{
print "\n\tWARNING!\n\n\tTaking amounts which are lower than min specified: $global_pool_minimum_volume\n\n";
print $under_warnings;
print "\n";
}
#### PRINT THE OUTPUT FILE
print $global_pool_fh $global_csv_header;
print $global_dilute_fh $global_csv_header;
$global_total_pool_volume = 0;
$global_total_dilutant_volume = 0;
foreach my $well_num (sort {$a <=> $b} keys %global_id2well)
{
my $well = $global_id2well{$well_num};
if(exists $global_well_conc_hash{$well})
{
my $volume = ($global_sample_DNA_required_total / $global_well_conc_hash{$well}) * $global_well_primer_hash{$well};
if ($volume > $global_sample_tube_volume) { $volume = $global_sample_tube_volume; }
if(exists $well_dilute_hash{$well})
{
# do a dilution first (if needed)
print $global_dilute_fh printLine($global_dilutant_tube_position,$well,$well_dilute_hash{$well});
$global_total_dilutant_volume += $well_dilute_hash{$well};
}
$global_total_pool_volume += $volume;
print $global_pool_fh printLine($well, $global_pool_tube_position,$volume);
}
}
# close files
close $global_pool_fh;
close $global_dilute_fh;
#### TELL THE USER WHAT HAPPENED
print "------------------------------\n";
print "Pooling will produce: ".roundVolume($global_total_pool_volume)." uL\n";
print "Diluting will use: ".roundVolume($global_total_dilutant_volume)." uL\n";
print "------------------------------\n";
######################################################################
# CUSTOM SUBS
######################################################################
sub printLine
{
#-----
# print one line to output
#
my ($position_from, $position_to, $volume) = @_;
return "1,$position_from,1,$position_to,".roundVolume($volume).",".chooseTool($volume)."\r\n";
}
sub roundVolume
{
#-----
# Round a volume
#
my ($volume) = @_;
return sprintf("%.2f", $volume);
}
sub chooseTool
{
#-----
# Choose the appropriate tool based on volume
#
my ($volume) = @_;
if($volume <= 50)
{
return 1;
}
elsif($volume <= 300)
{
return 2;
}
else
{
die "OMG!!!! Wrong pipette for volume $volume\n";
}
}
sub populateVars
{
$global_id2well{1} = "A1";
$global_id2well{2} = "A2";
$global_id2well{3} = "A3";
$global_id2well{4} = "A4";
$global_id2well{5} = "A5";
$global_id2well{6} = "A6";
$global_id2well{7} = "A7";
$global_id2well{8} = "A8";
$global_id2well{9} = "A9";
$global_id2well{10} = "A10";
$global_id2well{11} = "A11";
$global_id2well{12} = "A12";
$global_id2well{13} = "B1";
$global_id2well{14} = "B2";
$global_id2well{15} = "B3";
$global_id2well{16} = "B4";
$global_id2well{17} = "B5";
$global_id2well{18} = "B6";
$global_id2well{19} = "B7";
$global_id2well{20} = "B8";
$global_id2well{21} = "B9";
$global_id2well{22} = "B10";
$global_id2well{23} = "B11";
$global_id2well{24} = "B12";
$global_id2well{25} = "C1";
$global_id2well{26} = "C2";
$global_id2well{27} = "C3";
$global_id2well{28} = "C4";
$global_id2well{29} = "C5";
$global_id2well{30} = "C6";
$global_id2well{31} = "C7";
$global_id2well{32} = "C8";
$global_id2well{33} = "C9";
$global_id2well{34} = "C10";
$global_id2well{35} = "C11";
$global_id2well{36} = "C12";
$global_id2well{37} = "D1";
$global_id2well{38} = "D2";
$global_id2well{39} = "D3";
$global_id2well{40} = "D4";
$global_id2well{41} = "D5";
$global_id2well{42} = "D6";
$global_id2well{43} = "D7";
$global_id2well{44} = "D8";
$global_id2well{45} = "D9";
$global_id2well{46} = "D10";
$global_id2well{47} = "D11";
$global_id2well{48} = "D12";
$global_id2well{49} = "E1";
$global_id2well{50} = "E2";
$global_id2well{51} = "E3";
$global_id2well{52} = "E4";
$global_id2well{53} = "E5";
$global_id2well{54} = "E6";
$global_id2well{55} = "E7";
$global_id2well{56} = "E8";
$global_id2well{57} = "E9";
$global_id2well{58} = "E10";
$global_id2well{59} = "E11";
$global_id2well{60} = "E12";
$global_id2well{61} = "F1";
$global_id2well{62} = "F2";
$global_id2well{63} = "F3";
$global_id2well{64} = "F4";
$global_id2well{65} = "F5";
$global_id2well{66} = "F6";
$global_id2well{67} = "F7";
$global_id2well{68} = "F8";
$global_id2well{69} = "F9";
$global_id2well{70} = "F10";
$global_id2well{71} = "F11";
$global_id2well{72} = "F12";
$global_id2well{73} = "G1";
$global_id2well{74} = "G2";
$global_id2well{75} = "G3";
$global_id2well{76} = "G4";
$global_id2well{77} = "G5";
$global_id2well{78} = "G6";
$global_id2well{79} = "G7";
$global_id2well{80} = "G8";
$global_id2well{81} = "G9";
$global_id2well{82} = "G10";
$global_id2well{83} = "G11";
$global_id2well{84} = "G12";
$global_id2well{85} = "H1";
$global_id2well{86} = "H2";
$global_id2well{87} = "H3";
$global_id2well{88} = "H4";
$global_id2well{89} = "H5";
$global_id2well{90} = "H6";
$global_id2well{91} = "H7";
$global_id2well{92} = "H8";
$global_id2well{93} = "H9";
$global_id2well{94} = "H10";
$global_id2well{95} = "H11";
$global_id2well{96} = "H12";
$global_well2id{"A1"} = 1;
$global_well2id{"A2"} = 2;
$global_well2id{"A3"} = 3;
$global_well2id{"A4"} = 4;
$global_well2id{"A5"} = 5;
$global_well2id{"A6"} = 6;
$global_well2id{"A7"} = 7;
$global_well2id{"A8"} = 8;
$global_well2id{"A9"} = 9;
$global_well2id{"A10"} = 10;
$global_well2id{"A11"} = 11;
$global_well2id{"A12"} = 12;
$global_well2id{"B1"} = 13;
$global_well2id{"B2"} = 14;
$global_well2id{"B3"} = 15;
$global_well2id{"B4"} = 16;
$global_well2id{"B5"} = 17;
$global_well2id{"B6"} = 18;
$global_well2id{"B7"} = 19;
$global_well2id{"B8"} = 20;
$global_well2id{"B9"} = 21;
$global_well2id{"B10"} = 22;
$global_well2id{"B11"} = 23;
$global_well2id{"B12"} = 24;
$global_well2id{"C1"} = 25;
$global_well2id{"C2"} = 26;
$global_well2id{"C3"} = 27;
$global_well2id{"C4"} = 28;
$global_well2id{"C5"} = 29;
$global_well2id{"C6"} = 30;
$global_well2id{"C7"} = 31;
$global_well2id{"C8"} = 32;
$global_well2id{"C9"} = 33;
$global_well2id{"C10"} = 34;
$global_well2id{"C11"} = 35;
$global_well2id{"C12"} = 36;
$global_well2id{"D1"} = 37;
$global_well2id{"D2"} = 38;
$global_well2id{"D3"} = 39;
$global_well2id{"D4"} = 40;
$global_well2id{"D5"} = 41;
$global_well2id{"D6"} = 42;
$global_well2id{"D7"} = 43;
$global_well2id{"D8"} = 44;
$global_well2id{"D9"} = 45;
$global_well2id{"D10"} = 46;
$global_well2id{"D11"} = 47;
$global_well2id{"D12"} = 48;
$global_well2id{"E1"} = 49;
$global_well2id{"E2"} = 50;
$global_well2id{"E3"} = 51;
$global_well2id{"E4"} = 52;
$global_well2id{"E5"} = 53;
$global_well2id{"E6"} = 54;
$global_well2id{"E7"} = 55;
$global_well2id{"E8"} = 56;
$global_well2id{"E9"} = 57;
$global_well2id{"E10"} = 58;
$global_well2id{"E11"} = 59;
$global_well2id{"E12"} = 60;
$global_well2id{"F1"} = 61;
$global_well2id{"F2"} = 62;
$global_well2id{"F3"} = 63;
$global_well2id{"F4"} = 64;
$global_well2id{"F5"} = 65;
$global_well2id{"F6"} = 66;
$global_well2id{"F7"} = 67;
$global_well2id{"F8"} = 68;
$global_well2id{"F9"} = 69;
$global_well2id{"F10"} = 70;
$global_well2id{"F11"} = 71;
$global_well2id{"F12"} = 72;
$global_well2id{"G1"} = 73;
$global_well2id{"G2"} = 74;
$global_well2id{"G3"} = 75;
$global_well2id{"G4"} = 76;
$global_well2id{"G5"} = 77;
$global_well2id{"G6"} = 78;
$global_well2id{"G7"} = 79;
$global_well2id{"G8"} = 80;
$global_well2id{"G9"} = 81;
$global_well2id{"G10"} = 82;
$global_well2id{"G11"} = 83;
$global_well2id{"G12"} = 84;
$global_well2id{"H1"} = 85;
$global_well2id{"H2"} = 86;
$global_well2id{"H3"} = 87;
$global_well2id{"H4"} = 88;
$global_well2id{"H5"} = 89;
$global_well2id{"H6"} = 90;
$global_well2id{"H7"} = 91;
$global_well2id{"H8"} = 92;
$global_well2id{"H9"} = 93;
$global_well2id{"H10"} = 94;
$global_well2id{"H11"} = 95;
$global_well2id{"H12"} = 96;
}
######################################################################
# TEMPLATE SUBS
######################################################################
sub checkParams {
my @standard_options = ( "prefix|p:s", "help|h+", "in|i:s", "sample_tube_volume:i", "sample_tube_capacity:i", "sample_DNA_required_total:i", "pool_tube_position:s", "pool_minimum_volume:i", "pool_tube_capacity:i", "dilutant_tube_position:s", "dilutant_minimum_volume:i", "dilutant_tube_capacity:i");
my %options;
# Add any other command line options, and the code to handle them
#
GetOptions( \%options, @standard_options );
# if no arguments supplied print the usage and exit
#
exec("pod2usage $0") if (0 == (keys (%options) ));
# If the -help option is set, print the usage and exit
#
exec("pod2usage $0") if $options{'help'};
# Compulsosy items
if(!exists $options{'in'} ) { print "**ERROR: You need to supply a csv file to parse\n"; exec("pod2usage $0"); }
#if(!exists $options{''} ) { print "**ERROR: \n"; exec("pod2usage $0"); }
return \%options;
}
sub printAtStart {
print<<"EOF";
----------------------------------------------------------------
$0
Copyright (C) 2011 Michael Imelfort and Paul Dennis
This program comes with ABSOLUTELY NO WARRANTY;
This is free software, and you are welcome to redistribute it
under certain conditions: See the source for more details.
----------------------------------------------------------------
EOF
}
__DATA__
=head1 NAME
app_csv2epi.pl
=head1 COPYRIGHT
copyright (C) 2011 Michael Imelfort and Paul Dennis
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 DESCRIPTION
Convert a csv file to another csv file which is formatted for use in the robot
=head1 SYNOPSIS
app_csv2epi.pl -in|i CSV_FILE [-help|h]
Convert a csv file to another csv file which is formatted for use in the robot
-in -i CSV_FILE File to parse -- DO NOT INCLUDE HEADER IN FILE
-prefix -p NAME Prefix to attach to output files (default: NONE)
[-help -h] Displays basic usage information
Source rack options:
[-sample_tube_volume AMOUNT (uL) ] The amount of material in the sample tube (default: 13 uL)
[-sample_tube_capacity AMOUNT (uL) ] The capacity of the sample tube (default: 300 uL)
[-sample_DNA_required_total AMOUNT (ng) ] The amount of DNA required in the pool for EACH SAMPLE (default: 13 uL)
Pool options:
[-pool_tube_position POSITION ] The position in the rack of the pooling tube (default: A1)
[-pool_minimum_volume AMOUNT (uL) ] The minimum amount of material we take from each well during pooling (default 1 uL)
[-pool_tube_capacity AMOUNT (uL) ] The capacity of the pooling tube (default: 2000 uL)
Dilution options:
[-dilutant_tube_position POSITION ] The position in the rack of the dilution tube (default: A2)
[-dilutant_minimum_volume AMOUNT (uL) ] The minimum amount we add to dilute (default 2 uL)
[-dilutant_tube_capacity AMOUNT (uL) ] The capacity of the dilution tube (default: 2000 uL)
=cut