https://github.com/Naima16/dbd.git
Tip revision: ecb4f844264b72eaa8cbd708244ecd32d414c7dd authored by Naima16 on 02 June 2020, 17:03:15 UTC
Update README.md
Update README.md
Tip revision: ecb4f84
permute_ASVs_synthetic.pl
#!/usr/bin/perl -w
# example usage:
# perl permute_ASVs_synthetic.pl 84 10 1000 > permute.ASVs.Phyla84.ratio10.seq1000.txt
use strict;
my $N_genera = $ARGV[0]; # the number of genera present in the sample. Here we will use genera, but this could be any taxonomic level (the higher of the two)
my $ASV_per_g = $ARGV[1]; # the number of ASVs per genus. Here we will use ASVs, but this could be any taxonomic level nested within a higher level
my $N_reads = $ARGV[2]; # the number of reads sampled
my %taxonomy; #keys=ASV_ID, values=taxonomy
my @ASVs; # list of all ASVs
# Assign ASVs to genera
my $ASV_ID = 0;
for (my $g=1; $g<=$N_genera; $g++) {
for (my $n=1; $n<=$ASV_per_g; $n++) {
$taxonomy{$ASV_ID} = $g;
$ASV_ID++;
}
}
# Print headers
print join "\t", "true_ASVs", "true_G", "obs_ASVs", "obs_G";
print "\n";
# Print the true number of ASVs and genera in each sample, in a range of samples containing from 1 to $N_genera
for (my $g=1; $g<=$N_genera; $g++) {
my @ASVs;
for (my $genus=1; $genus<=$g; $genus++) {
for (my $n=0; $n<$ASV_ID; $n++) {
next unless ($taxonomy{$n} == $genus);
push @ASVs, $n;
}
}
print join "\t", (@ASVs+0), $g;
print "\t";
# in each sample, count the total number of observd unique ASVs and observed unique genera at a given sampling effort ($N_reads)
my $N_ASV = 0;
my $N_genus = 0;
my %seen_ASV;
my %seen_genus;
for (my $j=0; $j<$N_reads; $j++) { # sample a set number of reads at random from the ASVs in the sample
my $j = int rand ( @ASVs+0 );
my $ASV = $ASVs[$j];
my $genus = $taxonomy{$ASV};
unless (exists $seen_ASV{$ASV}) {
$seen_ASV{$ASV} = 1;
$N_ASV++;
}
unless (exists $seen_genus{$genus}) {
$seen_genus{$genus} = 1;
$N_genus++
}
}
print join "\t", $N_ASV, $N_genus;
print "\n";
}