https://github.com/splatlab/squeakr
Revision f5d2feb45773a03ccea9cea87ac2234ff21068e2 authored by Prashant Pandey on 04 January 2019, 19:24:04 UTC, committed by Prashant Pandey on 04 January 2019, 19:24:04 UTC
1 parent 88589e9
Tip revision: f5d2feb45773a03ccea9cea87ac2234ff21068e2 authored by Prashant Pandey on 04 January 2019, 19:24:04 UTC
Chaing info to mmap Squeakr file.
Chaing info to mmap Squeakr file.
Tip revision: f5d2feb
query.cc
/*
* ============================================================================
*
* Authors: Prashant Pandey <ppandey@cs.stonybrook.edu>
* Rob Johnson <robj@vmware.com>
* Rob Patro (rob.patro@cs.stonybrook.edu)
*
* ============================================================================
*/
#include <iostream>
#include <algorithm>
#include <cstring>
#include <vector>
#include <set>
#include <bitset>
#include <cassert>
#include <fstream>
#include <boost/thread/thread.hpp>
#include <boost/lockfree/queue.hpp>
#include <boost/lockfree/spsc_queue.hpp>
#include <boost/atomic.hpp>
#include <time.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/mman.h>
#include "clipp.h"
#include "ProgOpts.h"
#include "gqf_cpp.h"
#include "kmer.h"
#include "squeakrconfig.h"
/*
* === FUNCTION =============================================================
* Name: main
* Description:
* ============================================================================
*/
int query_main(QueryOpts& opts)
{
struct timeval start, end;
struct timezone tzp;
std::unordered_set<uint64_t> kmers;
srand(time(NULL));
spdlog::logger* console = opts.console.get();
//Initialize the QF
console->info("Reading kmers into the QF off the disk.");
CQF<KeyObject> cqf(opts.squeakr_file, FREAD);
// seek to the end of the file and read the k-mer size
std::ifstream squeakr_file(opts.squeakr_file, std::ofstream::in);
squeakr_file.seekg(0, squeakr_file.end);
uint64_t file_size = squeakr_file.tellg();
squeakrconfig config;
squeakr_file.seekg(file_size - sizeof(squeakrconfig));
squeakr_file.read((char*)&config, sizeof(config));
squeakr_file.close();
if (config.version != VERSION) {
console->error("Squeakr index version is invalid. Expected: {} Available: {}",
VERSION, config.version);
exit(1);
}
console->info("kmer size: {}, version: {}", config.kmer_size, config.version);
if (cqf.is_exact() && config.kmer_size*2 != cqf.keybits()) {
console->error("K-mer size is not correct.");
return 1;
}
console->info("Parsing query file for {}-mers.", config.kmer_size);
Kmer::parse_kmers(opts.queryfile.c_str(), config.kmer_size, kmers);
console->info("Found {} k-mers", kmers.size());
std::ofstream opfile(opts.output_file.c_str(), std::ofstream::out);
console->info("Querying kmers in the QF.");
uint64_t num_not_found = 0;
gettimeofday(&start, &tzp);
for (auto it = kmers.begin(); it != kmers.end(); ++it) {
uint64_t count = cqf.query(KeyObject(*it, 0, 0), 0);
if (count == 0) {
console->error(Kmer::int_to_str(*it, config.kmer_size));
num_not_found++;
}
else
opfile << Kmer::int_to_str(*it, config.kmer_size) << "\t" << count << std::endl;
}
gettimeofday(&end, &tzp);
opfile.close();
print_time_elapsed("", &start, &end, console);
console->info("Not found: {}", num_not_found);
return EXIT_SUCCESS;
} /* ---------- end of function main ---------- */
Computing file changes ...