https://github.com/grenaud/libbam
Tip revision: c210c1ce1d4f0e1fe07fcbb4a438f9a7a2e3cf9b authored by grenaud on 24 June 2022, 09:00:06 UTC
subsample
subsample
Tip revision: c210c1c
tallyByRG.cpp
/*
* tallyByRG
* Date: Apr-16-2013
* Author : Gabriel Renaud gabriel.reno [at sign here ] gmail.com
*
*/
#include <iostream>
#include "api/BamMultiReader.h"
#include "api/BamReader.h"
#include "api/BamWriter.h"
#include "api/BamAux.h"
#include "libgab.h"
using namespace std;
using namespace BamTools;
int main (int argc, char *argv[]) {
if( (argc!= 2) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cerr<<"Usage:tallyByRG [in bam]"<<endl<<"this program computes how many reads mapped\nto various chromosomes on a per RG basis"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
// if(!strEndsWith(bamfiletopen,".bam")){
// }
// string bamDirOutPrefix = string(argv[2]);
map<string,vector<unsigned int> > rg2Tally;
// if(!isDirectory(bamDirOut)){
// cerr<<"ERROR: the out directory does not exist"<<endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
vector<RefData> refData=reader.GetReferenceData();
SamReadGroupDictionary srgd=header.ReadGroups;
for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
srgci<srgd.ConstEnd();
srgci++){
//cout<<*srgci<<endl;
const SamReadGroup rg = (*srgci);
//cout<<rg.ID<<endl;
rg2Tally[rg.ID] = vector<unsigned int> (references.size(),0);
}
// return 1;
// BamWriter unmapped;
// cout<<header.ToString()<<endl;
// return 1;
// if ( !unmapped.Open(bamDirOutPrefix+".unmapped.bam",header,references) ) {
// cerr << "Could not open output BAM file "<< bamDirOutPrefix+".unmapped.bam" << endl;
// return 1;
// }
BamAlignment al;
unsigned int total=0;
while ( reader.GetNextAlignment(al) ) {
if(!al.IsMapped () )//do not care about unmapped
continue;
if(al.HasTag("RG")){
string rgTag;
al.GetTag("RG",rgTag);
//cout<<rgTag<<endl;
if(rg2Tally.find(rgTag) == rg2Tally.end()){ //new
cerr<<"Unfound new RG, creating it: "<<rgTag<<endl;
rg2Tally[rgTag] = vector<unsigned int> (references.size(),0);
rg2Tally[rgTag][al.RefID]++;
}else{
rg2Tally[rgTag][al.RefID]++;
}
}else{
cerr << "Cannot get RG tag for " << al.Name<<endl;
//return 1;
}
total++;
} //while al
reader.Close();
// writer.Close();
// unmapped.Close();
cout<<"\t";
vector<string> toprint;
for(unsigned i =0;i<references.size();i++){
toprint.push_back(refData[i].RefName);
}
cout<<vectorToString(toprint,"\t")<<endl;
map<string, vector<unsigned int> >::iterator rg2TallyIt;
for (rg2TallyIt =rg2Tally.begin();
rg2TallyIt!=rg2Tally.end();
rg2TallyIt++){
cout<<rg2TallyIt->first<<"\t"<<vectorToString(rg2Tally[rg2TallyIt->first],"\t")<<endl;
//rg2TallyIt->second->Close();
}
cerr<<"Finished succesfully"<<endl;
return 0;
}