#!/bin/bash
# Martial Marbouty
# RSG - Meta3C TEAM
# this script will generate data based on the louvain partitionning procedure
#usage --> bash louvain_data_treatment.sh NAME_project targeted_iterations(Integer)
################# input ###################
project=$1
iterations=$2
targeted_iteration=$3
threshold=$4
assembly=$5
mkdir -p "$project"/fasta_it"$targeted_iteration"_over"$threshold"/
cat "$project"/binning/louvain_mat_weighted_"$targeted_iteration".txt | awk -v var3=$threshold '$3 >= var3 {print $1,$2,"1"}' | sort -u > "$project"/temp/overlapping_comm.txt
python secondary_scripts/connected.py "$project"/temp/overlapping_comm.txt "$project"/temp/connected_"$threshold".txt
cat "$project"/temp/connected_"$threshold".txt | sed '1d' > "$project"/temp/data_connect.txt
cat -n "$project"/temp/data_connect.txt | awk '{print "bin",$1,"overcomm",$2}' > "$project"/temp/data_overcomm_it"$targeted_iteration"_over"$threshold".txt
#var=$(cat "$project"/temp/data_overcomm_it"$targeted_iteration"_over"$threshold".txt | awk '{print $4}' | sort -u)
rm "$project"/binning/data_overcomm_it"$targeted_iteration"_over"$threshold".txt
#for over in $var
for over in $(seq 1 100)
do
cat "$project"/temp/data_overcomm_it"$targeted_iteration"_over"$threshold".txt | awk '$4=="'$over'" {print $2}' > "$project"/temp/temp_bin.txt
var2=$(cat "$project"/temp/temp_bin.txt)
rm "$project"/temp/overcomm_data.txt
rm "$project"/temp/overcomm_data_V2.txt
rm "$project"/temp/overcomm_data_V3.txt
for bin in $var2
do
cat "$project"/binning/bin_data_"$targeted_iteration".txt | awk '$1=="'$bin'" {print $0,"overlapping","'$over'"}' >> "$project"/temp/overcomm_data.txt
cat "$project"/binning/contig_data_"$targeted_iteration".txt | awk '$5=="'$bin'" {print $0,"overlapping","'$over'"}' >> "$project"/temp/overcomm_data_V2.txt
cat "$project"/binning/contig_data_"$targeted_iteration".txt | awk '$5=="'$bin'" {print "NODE_"$1"_length_"$2}' >> "$project"/temp/overcomm_data_V3.txt
done
size_overcomm=$(cat "$project"/temp/overcomm_data.txt | awk '{sum+=$3} END {print sum}')
contigs_overcomm=$(cat "$project"/temp/overcomm_data.txt | awk '{sum+=$2} END {print sum}')
cat "$project"/temp/overcomm_data.txt | awk '{print $0,"'$contigs_overcomm'","'$size_overcomm'"}' >> "$project"/binning/data_overcomm_it"$targeted_iteration"_over"$threshold".txt
cat "$project"/temp/overcomm_data_V2.txt | awk '{print $0,"'$contigs_overcomm'","'$size_overcomm'"}' >> "$project"/binning/data_overcomm_it"$targeted_iteration"_over"$threshold"_contig.txt
python secondary_scripts/extract_contig.py "$assembly" "$project"/temp/overcomm_data_V3.txt "$project"/fasta_it"$targeted_iteration"_over"$threshold"/over"$over".fa
done
#var=$(cat "$project"/binning/data_overcomm_it"$targeted_iteration"_over"$threshold".txt | awk '{print $4}' | sort -u)
#for over in $var
#do
#cat "$project"/binning/data_overcomm_it"$targeted_iteration"_over"$threshold".txt | awk '$4=="'$over'" {print $2}' > "$project"/temp/temp.core.txt
#python extract_contig.py "$assembly" "$project"/temp/temp.core.txt "$project"/fasta_it"$targeted_iteration"_over"$threshold"/over"$over".fa
#done