https://github.com/pmelsted/pizzly
Tip revision: 8617a24a7fc353a99121431d92c6fd64439fcd71 authored by Pall Melsted on 12 September 2017, 11:10:57 UTC
Merge pull request #14 from bgruening/patch-1
Merge pull request #14 from bgruening/patch-1
Tip revision: 8617a24
main.cpp
#include <iostream>
#include <seqan/arg_parse.h>
#include <seqan/seq_io.h>
#include "common.h"
#include "GeneModel.hpp"
#include "FilterFusions.hpp"
seqan::ArgumentParser::ParseResult
parseCommandLine(ProgramOptions & options, int argc, char const ** argv) {
seqan::ArgumentParser parser("pizzly");
// We require one argument.
seqan::addArgument(parser, seqan::ArgParseArgument(
seqan::ArgParseArgument::STRING, "FUSION"));
// Define Options
seqan::addOption(parser, seqan::ArgParseOption(
"k", "", "k-mer size used in kallisto",
seqan::ArgParseArgument::INTEGER, "K"));
seqan::addOption(parser, seqan::ArgParseOption(
"a", "align-score", "Maximum number of mismatches allowed (default: 2)",
seqan::ArgParseArgument::INTEGER, "ALIGN_SCORE"));
seqan::addOption(parser, seqan::ArgParseOption(
"i", "insert-size", "Maximum fragment size of library (default: 400)",
seqan::ArgParseArgument::INTEGER, "INSERT_SIZE"));
seqan::addOption(parser, seqan::ArgParseOption(
"o", "output", "Prefix for output files",
seqan::ArgParseArgument::STRING, "OUTPUT_PREFIX"));
seqan::addOption(parser, seqan::ArgParseOption(
"G", "gtf", "Annotation in GTF format",
seqan::ArgParseArgument::STRING, "GTF"));
seqan::addOption(parser, seqan::ArgParseOption(
"C", "cache", "File for caching annotation (created if not present, otherwise reused from previous runs)",
seqan::ArgParseArgument::STRING, "cache"));
seqan::addOption(parser, seqan::ArgParseOption(
"F", "fasta", "Fasta reference",
seqan::ArgParseArgument::STRING, "FASTA"));
seqan::addOption(parser, seqan::ArgParseOption(
"", "ignore-protein", "Ignore any protein coding information in annotation"));
seqan::setRequired(parser, "k");
seqan::setRequired(parser, "o");
seqan::setRequired(parser, "F");
seqan::setVersion(parser, std::string(PIZZLY_VERSION));
// Parse command line.
seqan::ArgumentParser::ParseResult res = seqan::parse(parser, argc, argv);
// Only extract options if the program will continue after parseCommandLine()
if (res != seqan::ArgumentParser::PARSE_OK)
return res;
// Extract option values.
getOptionValue(options.k, parser, "k");
getOptionValue(options.alignScore, parser, "align-score");
getOptionValue(options.insertSize, parser, "insert-size");
getOptionValue(options.gtf, parser, "gtf");
getOptionValue(options.fasta, parser, "fasta");
getOptionValue(options.cache, parser, "cache");
getOptionValue(options.outprefix, parser, "output");
getArgumentValue(options.fusionFile, parser, 0);
if (seqan::isSet(parser, "ignore-protein")) {
options.ignoreProtein = true;
}
return seqan::ArgumentParser::PARSE_OK;
}
int main(int argc, char const ** argv)
{
// Parse the command line.
ProgramOptions options;
seqan::ArgumentParser::ParseResult res = parseCommandLine(options, argc, argv);
// If parsing was not successful then exit with code 1 if there were errors.
// Otherwise, exit with code 0 (e.g. help was printed).
if (res != seqan::ArgumentParser::PARSE_OK)
return res == seqan::ArgumentParser::PARSE_ERROR;
// parse GTF file
Transcriptome trx;
if (options.cache.empty()) {
parseGTF(trx, options.gtf, options);
} else {
std::ifstream in(options.cache);
if (in.good()) {
std::cerr << "Opening cached file ... ";
loadTranscriptome(trx,in, options);
std::cerr << "loaded " << trx.genes.size() << " genes and " << trx.trxToGeneId.size() << " transcripts" << std::endl;
in.close();
} else {
parseGTF(trx, options.gtf, options);
in.close();
std::ofstream out(options.cache);
writeTranscriptome(trx,out);
out.close();
}
in.close();
}
parseFasta(trx, options.fasta);
int numTxpSeqFound = 0;
int numProt = 0;
for (const auto &it : trx.seqs) {
if (trx.trxToGeneId.find(it.first) != trx.trxToGeneId.end()) {
numTxpSeqFound++;
}
}
for (const auto &git : trx.genes) {
for (const auto &it : git.second.transcripts) {
if (it.second.type == BioType::PROTEIN) {
numProt++;
}
}
}
if (numTxpSeqFound == 0) {
std::cerr << "Error, could not find any transcript sequences check that the ids in the FASTA file and GTF file match" << std::endl;
exit(1);
} else if (numTxpSeqFound != trx.seqs.size()) {
std::cerr << "Warning: could not find annotations for " << (trx.seqs.size() - numTxpSeqFound) << " transcripts\n" << std::endl;
}
if (!options.ignoreProtein && numProt == 0) {
std::cerr << "Warning: the annotation contains no protein coding information\n"
<< " pizzly will probably not report any fusions, consider finding\n"
<< " a better annotation or run again with --ignore-protein option.\n" << std::endl;
}
// filter fusion edges
processFusions(trx,options);
return 0;
}