Revision 5424c50a79c72d9c4868148cee7731390f4fa14e authored by Prashant Pandey on 26 June 2018, 01:11:06 UTC, committed by Prashant Pandey on 26 June 2018, 01:11:06 UTC
1 parent 3c95ad4
reader.h
/*
* =====================================================================================
*
* Filename: reader.h
*
* Description:
*
* Version: 1.0
* Created: 05/06/2016 09:56:26 PM
* Revision: none
* Compiler: gcc
*
* Author: Prashant Pandey (), ppandey@cs.stonybrook.edu
* Organization: Stony Brook University
*
* =====================================================================================
*/
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <zlib.h>
#include <bzlib.h>
#ifndef _READER_H_
#define _READER_H_
struct file_pointer;
class reader {
public:
reader();
reader(FILE *in, gzFile in_gzip, BZFILE *in_bzip2, int bzerror);
bool is_eof(int mode);
static bool skip_next_eol(char *part, int64_t &pos, int64_t max_pos);
static bool fastq_read_parts(int mode, file_pointer *fp);
static bool getFileReader(int mode, const char* fastq_file, reader*
file_reader);
FILE *in = nullptr;
gzFile in_gzip = nullptr;
BZFILE *in_bzip2 = nullptr;
int bzerror;
};
struct file_pointer {
std::unique_ptr<reader> freader{nullptr};
char* part{nullptr};
char* part_buffer{nullptr};
int mode{0};
uint64_t size{0};
uint64_t part_filled{0};
};
reader::reader()
{
in = nullptr;
in_gzip = nullptr;
in_bzip2 = nullptr;
bzerror = 0;
}
reader::reader(FILE *_in, gzFile _in_gzip, BZFILE *_in_bzip2, int _bzerror)
{
in = _in;
in_gzip = _in_gzip;
in_bzip2 = _in_bzip2;
bzerror = _bzerror;
}
/* check if it's the end of the file. */
bool reader::is_eof(int mode) {
if (mode == 0)
return feof(in) != 0;
else if (mode == 1)
return gzeof(in_gzip) != 0;
else if (mode == 2)
return bzerror == BZ_STREAM_END;
return true;
}
/* move the pointer to the end of the next newline. */
bool reader::skip_next_eol(char *part, int64_t &pos, int64_t max_pos) {
int64_t i;
for(i = pos; i < max_pos-2; ++i)
if((part[i] == '\n' || part[i] == '\r') && !(part[i+1] == '\n' ||
part[i+1] == '\r'))
break;
if(i >= max_pos-2)
return false;
pos = i+1;
return true;
}
/* read a part of the fastq file. */
bool reader::fastq_read_parts(int mode, file_pointer *fp) {
char *& _part = (fp->part);
uint64_t& _size = fp->size;
char*& part_buffer = (fp->part_buffer);
uint64_t& part_filled = fp->part_filled;
reader& file_reader = *(fp->freader.get());
uint32_t OVERHEAD_SIZE = 65535;
uint64_t part_size = 1ULL << 23;
char *part = (char *)malloc((part_size + OVERHEAD_SIZE)*sizeof(char));
memcpy(part, part_buffer, part_filled);
if(file_reader.is_eof(mode))
return false;
uint64_t readed = 0;
if (mode == 0)
readed = fread(part+part_filled, 1, part_size, file_reader.in);
else if (mode == 1)
readed = gzread(file_reader.in_gzip, part+part_filled, (int) part_size);
else if (mode == 2)
readed = BZ2_bzRead(&file_reader.bzerror, file_reader.in_bzip2,
part+part_filled, (int) part_size);
else
readed = 0;
int64_t total_filled = part_filled + readed;
int64_t i;
if(part_filled >= OVERHEAD_SIZE)
{
std::cout << "Error: Wrong input file!" << std::endl;
exit(EXIT_FAILURE);
}
if(file_reader.is_eof(mode))
{
_part = part;
_size = total_filled;
part = NULL;
return true;
}
// Looking for a FASTQ record at the end of the area
{
int64_t line_start[9];
int32_t j;
i = total_filled - OVERHEAD_SIZE / 2;
for(j = 0; j < 9; ++j)
{
if(!skip_next_eol(part, i, total_filled))
break;
line_start[j] = i;
}
_part = part;
if(j < 9)
_size = 0;
else
{
int k;
for(k = 0; k < 4; ++k)
{
if(part[line_start[k]+0] == '@' && part[line_start[k+2]+0] == '+')
{
if(part[line_start[k+2]+1] == '\n' || part[line_start[k+2]+1] == '\r')
break;
if(line_start[k+1]-line_start[k] == line_start[k+3]-line_start[k+2] &&
memcmp(part+line_start[k]+1, part+line_start[k+2]+1,
line_start[k+3]-line_start[k+2]-1) == 0)
break;
}
}
if(k == 4)
_size = 0;
else
_size = line_start[k];
}
}
std::copy(_part+_size, _part+total_filled, part_buffer);
part_filled = total_filled - _size;
return true;
}
bool reader::getFileReader(int mode, const char* fastq_file, reader*
file_reader) {
uint64_t gzip_buffer_size = 1ULL << 26;
uint64_t bzip2_buffer_size = 1ULL << 26;
if (mode == 0) {
if ((file_reader->in = fopen(fastq_file, "rb")) == NULL)
return false;
} else if (mode == 1) {
if ((file_reader->in_gzip = gzopen(fastq_file, "rb")) == NULL)
return false;
gzbuffer(file_reader->in_gzip, gzip_buffer_size);
} else if (mode == 2) {
file_reader->in = fopen(fastq_file, "rb");
if (!file_reader->in)
return false;
setvbuf(file_reader->in, NULL, _IOFBF, bzip2_buffer_size);
if ((file_reader->in_bzip2 = BZ2_bzReadOpen(&file_reader->bzerror,
file_reader->in, 0, 0, NULL,
0)) == NULL) {
fclose(file_reader->in);
return false;
}
}
return true;
}
#endif
Computing file changes ...