https://github.com/ekg/freebayes
Tip revision: 9755e43db74f9fa8ad2653d6a771d7a8d148ee66 authored by Erik Garrison on 10 December 2013, 00:24:08 UTC
Setting Release-Version v0.9.10
Setting Release-Version v0.9.10
Tip revision: 9755e43
bamfiltertech.cpp
#include <iostream>
#include <getopt.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <signal.h>
#include <stdlib.h>
#include <cmath>
#include <algorithm>
#include <map>
#include "BamAlignment.h"
#include "BamReader.h"
#include "BamWriter.h"
#include "split.h"
using namespace std;
using namespace BamTools;
int main(int argc, char** argv) {
if (argc == 1) {
cerr << "usage: " << argv[0] << " [technology name] [ [technology name] ... ]" << endl;
cerr << "filters BAM file piped on stdin for reads generated by a sequencing technology listed on the command line" << endl;
return 1;
}
map<string, bool> technologies;
for (int i = 1; i < argc; ++i) {
technologies[argv[i]] = true;
}
BamReader reader;
if (!reader.Open("stdin")) {
cerr << "Could not open stdin for reading" << endl;
return 1;
}
// retrieve header information
map<string, string> readGroupToTechnology;
string bamHeader = reader.GetHeaderText();
vector<string> headerLines = split(bamHeader, '\n');
for (vector<string>::const_iterator it = headerLines.begin(); it != headerLines.end(); ++it) {
// get next line from header, skip if empty
string headerLine = *it;
if ( headerLine.empty() ) { continue; }
// lines of the header look like:
// "@RG ID:- SM:NA11832 CN:BCM PL:454"
// ^^^^^^^\ is our sample name
if ( headerLine.find("@RG") == 0 ) {
vector<string> readGroupParts = split(headerLine, "\t ");
string tech;
string readGroupID;
for (vector<string>::const_iterator r = readGroupParts.begin(); r != readGroupParts.end(); ++r) {
vector<string> nameParts = split(*r, ":");
if (nameParts.at(0) == "PL") {
tech = nameParts.at(1);
} else if (nameParts.at(0) == "ID") {
readGroupID = nameParts.at(1);
}
}
if (tech.empty()) {
cerr << " could not find PL: in @RG tag " << endl << headerLine << endl;
return 1;
}
if (readGroupID.empty()) {
cerr << " could not find ID: in @RG tag " << endl << headerLine << endl;
return 1;
}
//string name = nameParts.back();
//mergedHeader.append(1, '\n');
//cerr << "found read group id " << readGroupID << " containing sample " << name << endl;
readGroupToTechnology[readGroupID] = tech;
}
}
// open writer, uncompressed BAM
BamWriter writer;
bool writeUncompressed = true;
if ( !writer.Open("stdout", bamHeader, reader.GetReferenceData(), writeUncompressed) ) {
cerr << "Could not open stdout for writing." << endl;
return 1;
}
BamAlignment alignment;
while (reader.GetNextAlignment(alignment)) {
string name;
if (alignment.GetTag("RG", name)) {
if (technologies.find(readGroupToTechnology[name]) != technologies.end()) {
//cout << name << " " << readGroupToTechnology[name] << endl;
writer.SaveAlignment(alignment);
}
}
}
reader.Close();
writer.Close();
return 0;
}