https://github.com/ekg/freebayes
Tip revision: ffcee0d8719ffdbbf0aa7059b83999fb3fc9d810 authored by Erik Garrison on 03 June 2019, 13:16:34 UTC
avoid hanging in super low entropy sequence
avoid hanging in super low entropy sequence
Tip revision: ffcee0d
Fasta.h
// ***************************************************************************
// FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu>
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
// Last modified: 5 February 2010 (EG)
// ---------------------------------------------------------------------------
#ifndef _FASTA_H
#define _FASTA_H
#include <map>
#include <iostream>
#include <fstream>
#include <vector>
#include <stdint.h>
#include <stdio.h>
#include <algorithm>
#include "LargeFileSupport.h"
#include "Utility.h"
#include <sys/stat.h>
#include "split.h"
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
using namespace std;
class FastaIndexEntry {
friend ostream& operator<<(ostream& output, const FastaIndexEntry& e);
public:
FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len);
FastaIndexEntry(void);
~FastaIndexEntry(void);
string name; // sequence name
int length; // length of sequence
long long offset; // bytes offset of sequence from start of file
int line_blen; // line length in bytes, sequence characters
int line_len; // line length including newline
void clear(void);
};
class FastaIndex : public map<string, FastaIndexEntry> {
friend ostream& operator<<(ostream& output, FastaIndex& i);
public:
FastaIndex(void);
~FastaIndex(void);
vector<string> sequenceNames;
void indexReference(string refName);
void readIndexFile(string fname);
void writeIndexFile(string fname);
ifstream indexFile;
FastaIndexEntry entry(string key);
void flushEntryToIndex(FastaIndexEntry& entry);
string indexFileExtension(void);
};
class FastaReference {
public:
void open(string reffilename);
string filename;
~FastaReference(void);
FILE* file;
FastaIndex* index;
vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart);
string getRawSequence(string seqname);
string getSequence(string seqname);
// potentially useful for performance, investigate
// void getSequence(string seqname, string& sequence);
string getRawSubSequence(string seqname, int start, int length);
string getSubSequence(string seqname, int start, int length);
string sequenceNameStartingWith(string seqnameStart);
long unsigned int sequenceLength(string seqname);
};
#endif