Revision 8c40ff8c8d774941b520ba9e2a0ad9e1fdf1ce11 authored by Prashant Pandey on 01 August 2017, 01:15:07 UTC, committed by Prashant Pandey on 01 August 2017, 01:15:07 UTC
1 parent 8760147
kmer.h
/*
* =====================================================================================
*
* Filename: kmer.h
*
* Description:
*
* Version: 1.0
* Created: 04/18/2016 05:06:51 PM
* Revision: none
* Compiler: gcc
*
* Author: Prashant Pandey (ppandey@cs.stonybrook.edu)
* Rob Patro (rob.patro@cs.stonybrook.edu)
* Rob Johnson (rob@cs.stonybrook.edu)
* Organization: Stony Brook University
*
* =====================================================================================
*/
#ifndef _KMER_H_
#define _KMER_H_
#include <stdio.h>
#include <string>
#define K 55
//#define K 28
enum DNA_MAP {C, A, T, G}; // A=1, C=0, T=2, G=3
using namespace std;
namespace kmercounting {
class kmer {
public:
static inline char map_int(uint8_t base);
static inline uint8_t map_base(char base);
static __int128_t str_to_int(string str);
static string int_to_str(__int128_t kmer);
static inline int reverse_complement_base(int x);
static __int128_t reverse_complement(__int128_t kmer);
static inline bool compare_kmers(__int128_t kmer, __int128_t kmer_rev);
static inline unsigned __int128 word_reverse_complement(unsigned __int128 w);
static inline int64_t word_reverse_complement(uint64_t w);
static inline uint32_t word_reverse_complement(uint32_t w);
private:
kmer();
};
/*return the integer representation of the base */
inline char kmer::map_int(uint8_t base)
{
switch(base) {
case DNA_MAP::A: { return 'A'; }
case DNA_MAP::T: { return 'T'; }
case DNA_MAP::C: { return 'C'; }
case DNA_MAP::G: { return 'G'; }
default: { return DNA_MAP::G+1; }
}
}
/*return the integer representation of the base */
inline uint8_t kmer::map_base(char base)
{
switch(base) {
case 'A': { return DNA_MAP::A; }
case 'T': { return DNA_MAP::T; }
case 'C': { return DNA_MAP::C; }
case 'G': { return DNA_MAP::G; }
default: { return DNA_MAP::G+1; }
}
}
/**
* Converts a string of "ATCG" to a uint64_t
* where each character is represented by using only two bits
*/
__int128_t str_to_int(string str)
{
uint64_t strint = 0;
for (auto it = str.begin(); it != str.end(); it++) {
uint8_t curr = 0;
switch (*it) {
case 'A': { curr = DNA_MAP::A; break; }
case 'T': { curr = DNA_MAP::T; break; }
case 'C': { curr = DNA_MAP::C; break; }
case 'G': { curr = DNA_MAP::G; break; }
}
strint = strint | curr;
strint = strint << 2;
}
return strint >> 2;
}
/**
* Converts a uint64_t to a string of "ACTG"
* where each character is represented by using only two bits
*/
string int_to_str(__int128_t kmer)
{
uint8_t base;
string str;
for (int i=K; i>0; i--) {
base = (kmer >> (i*2-2)) & 3ULL;
char chr = kmer::map_int(base);
str.push_back(chr);
}
return str;
}
/* Return the reverse complement of a base */
inline int kmer::reverse_complement_base(int x) { return 3 - x; }
/* Calculate the revsese complement of a kmer */
__int128_t kmer::reverse_complement(__int128_t kmer)
{
__int128_t rc = 0;
uint8_t base = 0;
for (int i=0; i<K; i++) {
base = kmer & 3ULL;
base = reverse_complement_base(base);
kmer >>= 2;
rc |= base;
rc <<= 2;
}
rc >>=2;
return rc;
}
/* Compare the kmer and its reverse complement and return the result
* Return true if the kmer is greater than or equal to its
* reverse complement.
* */
inline bool kmer::compare_kmers(__int128_t kmer, __int128_t kmer_rev)
{
return kmer >= kmer_rev;
}
}
#endif
Computing file changes ...