/*
*
* nthash.hpp
* Author: Hamid Mohamadi
* Genome Sciences Centre,
* British Columbia Cancer Agency
*/
#ifndef NT_HASH_H
#define NT_HASH_H
#include <stdint.h>
// offset for the complement base in the random seeds table
const uint8_t cpOff = 0x07;
// shift for gerenerating multiple hash values
const int multiShift = 27;
// seed for gerenerating multiple hash values
static const uint64_t multiSeed = 0x90b45d39fb6da1fa;
// 64-bit random seeds corresponding to bases and their complements
static const uint64_t seedA = 0x3c8bfbb395c60474;
static const uint64_t seedC = 0x3193c18562a02b4c;
static const uint64_t seedG = 0x20323ed082572324;
static const uint64_t seedT = 0x295549f54be24456;
static const uint64_t seedN = 0x0000000000000000;
static const uint64_t vecA[64] = {
0x3c8bfbb395c60474,0x7917f7672b8c08e8,0xf22feece571811d0,0xe45fdd9cae3023a1,0xc8bfbb395c604743,0x917f7672b8c08e87,0x22feece571811d0f,0x45fdd9cae3023a1e,
0x8bfbb395c604743c,0x17f7672b8c08e879,0x2feece571811d0f2,0x5fdd9cae3023a1e4,0xbfbb395c604743c8,0x7f7672b8c08e8791,0xfeece571811d0f22,0xfdd9cae3023a1e45,
0xfbb395c604743c8b,0xf7672b8c08e87917,0xeece571811d0f22f,0xdd9cae3023a1e45f,0xbb395c604743c8bf,0x7672b8c08e87917f,0xece571811d0f22fe,0xd9cae3023a1e45fd,
0xb395c604743c8bfb,0x672b8c08e87917f7,0xce571811d0f22fee,0x9cae3023a1e45fdd,0x395c604743c8bfbb,0x72b8c08e87917f76,0xe571811d0f22feec,0xcae3023a1e45fdd9,
0x95c604743c8bfbb3,0x2b8c08e87917f767,0x571811d0f22feece,0xae3023a1e45fdd9c,0x5c604743c8bfbb39,0xb8c08e87917f7672,0x71811d0f22feece5,0xe3023a1e45fdd9ca,
0xc604743c8bfbb395,0x8c08e87917f7672b,0x1811d0f22feece57,0x3023a1e45fdd9cae,0x604743c8bfbb395c,0xc08e87917f7672b8,0x811d0f22feece571,0x023a1e45fdd9cae3,
0x04743c8bfbb395c6,0x08e87917f7672b8c,0x11d0f22feece5718,0x23a1e45fdd9cae30,0x4743c8bfbb395c60,0x8e87917f7672b8c0,0x1d0f22feece57181,0x3a1e45fdd9cae302,
0x743c8bfbb395c604,0xe87917f7672b8c08,0xd0f22feece571811,0xa1e45fdd9cae3023,0x43c8bfbb395c6047,0x87917f7672b8c08e,0x0f22feece571811d,0x1e45fdd9cae3023a
};
static const uint64_t vecC[64] = {
0x3193c18562a02b4c,0x6327830ac5405698,0xc64f06158a80ad30,0x8c9e0c2b15015a61,0x193c18562a02b4c3,0x327830ac54056986,0x64f06158a80ad30c,0xc9e0c2b15015a618,
0x93c18562a02b4c31,0x27830ac540569863,0x4f06158a80ad30c6,0x9e0c2b15015a618c,0x3c18562a02b4c319,0x7830ac5405698632,0xf06158a80ad30c64,0xe0c2b15015a618c9,
0xc18562a02b4c3193,0x830ac54056986327,0x06158a80ad30c64f,0x0c2b15015a618c9e,0x18562a02b4c3193c,0x30ac540569863278,0x6158a80ad30c64f0,0xc2b15015a618c9e0,
0x8562a02b4c3193c1,0x0ac5405698632783,0x158a80ad30c64f06,0x2b15015a618c9e0c,0x562a02b4c3193c18,0xac54056986327830,0x58a80ad30c64f061,0xb15015a618c9e0c2,
0x62a02b4c3193c185,0xc54056986327830a,0x8a80ad30c64f0615,0x15015a618c9e0c2b,0x2a02b4c3193c1856,0x54056986327830ac,0xa80ad30c64f06158,0x5015a618c9e0c2b1,
0xa02b4c3193c18562,0x4056986327830ac5,0x80ad30c64f06158a,0x015a618c9e0c2b15,0x02b4c3193c18562a,0x056986327830ac54,0x0ad30c64f06158a8,0x15a618c9e0c2b150,
0x2b4c3193c18562a0,0x56986327830ac540,0xad30c64f06158a80,0x5a618c9e0c2b1501,0xb4c3193c18562a02,0x6986327830ac5405,0xd30c64f06158a80a,0xa618c9e0c2b15015,
0x4c3193c18562a02b,0x986327830ac54056,0x30c64f06158a80ad,0x618c9e0c2b15015a,0xc3193c18562a02b4,0x86327830ac540569,0x0c64f06158a80ad3,0x18c9e0c2b15015a6
};
static const uint64_t vecG[64] = {
0x20323ed082572324,0x40647da104ae4648,0x80c8fb42095c8c90,0x0191f68412b91921,0x0323ed0825723242,0x0647da104ae46484,0x0c8fb42095c8c908,0x191f68412b919210,
0x323ed08257232420,0x647da104ae464840,0xc8fb42095c8c9080,0x91f68412b9192101,0x23ed082572324203,0x47da104ae4648406,0x8fb42095c8c9080c,0x1f68412b91921019,
0x3ed0825723242032,0x7da104ae46484064,0xfb42095c8c9080c8,0xf68412b919210191,0xed08257232420323,0xda104ae464840647,0xb42095c8c9080c8f,0x68412b919210191f,
0xd08257232420323e,0xa104ae464840647d,0x42095c8c9080c8fb,0x8412b919210191f6,0x08257232420323ed,0x104ae464840647da,0x2095c8c9080c8fb4,0x412b919210191f68,
0x8257232420323ed0,0x04ae464840647da1,0x095c8c9080c8fb42,0x12b919210191f684,0x257232420323ed08,0x4ae464840647da10,0x95c8c9080c8fb420,0x2b919210191f6841,
0x57232420323ed082,0xae464840647da104,0x5c8c9080c8fb4209,0xb919210191f68412,0x7232420323ed0825,0xe464840647da104a,0xc8c9080c8fb42095,0x919210191f68412b,
0x232420323ed08257,0x464840647da104ae,0x8c9080c8fb42095c,0x19210191f68412b9,0x32420323ed082572,0x64840647da104ae4,0xc9080c8fb42095c8,0x9210191f68412b91,
0x2420323ed0825723,0x4840647da104ae46,0x9080c8fb42095c8c,0x210191f68412b919,0x420323ed08257232,0x840647da104ae464,0x080c8fb42095c8c9,0x10191f68412b9192
};
static const uint64_t vecT[64] = {
0x295549f54be24456,0x52aa93ea97c488ac,0xa55527d52f891158,0x4aaa4faa5f1222b1,0x95549f54be244562,0x2aa93ea97c488ac5,0x55527d52f891158a,0xaaa4faa5f1222b14,
0x5549f54be2445629,0xaa93ea97c488ac52,0x5527d52f891158a5,0xaa4faa5f1222b14a,0x549f54be24456295,0xa93ea97c488ac52a,0x527d52f891158a55,0xa4faa5f1222b14aa,
0x49f54be244562955,0x93ea97c488ac52aa,0x27d52f891158a555,0x4faa5f1222b14aaa,0x9f54be2445629554,0x3ea97c488ac52aa9,0x7d52f891158a5552,0xfaa5f1222b14aaa4,
0xf54be24456295549,0xea97c488ac52aa93,0xd52f891158a55527,0xaa5f1222b14aaa4f,0x54be24456295549f,0xa97c488ac52aa93e,0x52f891158a55527d,0xa5f1222b14aaa4fa,
0x4be24456295549f5,0x97c488ac52aa93ea,0x2f891158a55527d5,0x5f1222b14aaa4faa,0xbe24456295549f54,0x7c488ac52aa93ea9,0xf891158a55527d52,0xf1222b14aaa4faa5,
0xe24456295549f54b,0xc488ac52aa93ea97,0x891158a55527d52f,0x1222b14aaa4faa5f,0x24456295549f54be,0x488ac52aa93ea97c,0x91158a55527d52f8,0x222b14aaa4faa5f1,
0x4456295549f54be2,0x88ac52aa93ea97c4,0x1158a55527d52f89,0x22b14aaa4faa5f12,0x456295549f54be24,0x8ac52aa93ea97c48,0x158a55527d52f891,0x2b14aaa4faa5f122,
0x56295549f54be244,0xac52aa93ea97c488,0x58a55527d52f8911,0xb14aaa4faa5f1222,0x6295549f54be2445,0xc52aa93ea97c488a,0x8a55527d52f89115,0x14aaa4faa5f1222b
};
static const uint64_t vecN[64] = {
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN,
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN,
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN,
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN,
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN,
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN,
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN,
seedN,seedN,seedN,seedN,seedN,seedN,seedN,seedN
};
static const uint64_t *msTab[256] = {
vecN, vecT, vecN, vecG, vecA, vecN, vecN, vecC, // 0..7
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 8..15
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 16..23
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 24..31
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 32..39
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 40..47
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 48..55
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 56..63
vecN, vecA, vecN, vecC, vecN, vecN, vecN, vecG, // 64..71
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 72..79
vecN, vecN, vecN, vecN, vecT, vecN, vecN, vecN, // 80..87
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 88..95
vecN, vecA, vecN, vecC, vecN, vecN, vecN, vecG, // 96..103
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 104..111
vecN, vecN, vecN, vecN, vecT, vecN, vecN, vecN, // 112..119
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 120..127
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 128..135
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 136..143
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 144..151
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 152..159
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 160..167
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 168..175
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 176..183
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 184..191
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 192..199
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 200..207
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 208..215
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 216..223
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 224..231
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 232..239
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN, // 240..247
vecN, vecN, vecN, vecN, vecN, vecN, vecN, vecN // 248..255
};
static const uint64_t seedTab[256] = {
seedN, seedT, seedN, seedG, seedA, seedN, seedN, seedC, // 0..7
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 8..15
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 16..23
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 24..31
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 32..39
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 40..47
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 48..55
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 56..63
seedN, seedA, seedN, seedC, seedN, seedN, seedN, seedG, // 64..71
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 72..79
seedN, seedN, seedN, seedN, seedT, seedN, seedN, seedN, // 80..87
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 88..95
seedN, seedA, seedN, seedC, seedN, seedN, seedN, seedG, // 96..103
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 104..111
seedN, seedN, seedN, seedN, seedT, seedN, seedN, seedN, // 112..119
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 120..127
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 128..135
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 136..143
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 144..151
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 152..159
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 160..167
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 168..175
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 176..183
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 184..191
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 192..199
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 200..207
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 208..215
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 216..223
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 224..231
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 232..239
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, // 240..247
seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN // 248..255
};
/*// assembly rol
inline uint64_t rol (uint64_t v, size_t n) {
asm("rol %b1, %0":"+r,r"(v):"i,c"(n));
return (v);
}
// assembly ror
inline uint64_t ror (uint64_t v, size_t n) {
asm("ror %b1, %0":"+r,r"(v):"i,c"(n));
return (v);
}*/
// rotate "v" to the left 1 position
inline uint64_t rol1(const uint64_t v) {
return (v << 1) | (v >> 63);
}
// rotate "v" to the left by "s" positions
inline uint64_t rol(const uint64_t v, int s) {
if ((s &= 63) == 0)
return v;
return (v << s) | (v >> (64 - s));
}
// rotate "v" to the right by 1 position
inline uint64_t ror1(const uint64_t v) {
return (v >> 1) | (v << 63);
}
// rotate "v" to the right by "s" positions
inline uint64_t ror(const uint64_t v, int s) {
if ((s &= 63) == 0)
return v;
return (v >> s) | (v << (64 - s));
}
// forward-strand hash value of the base kmer, i.e. fhval(kmer_0)
inline uint64_t getFhval(const char * kmerSeq, const unsigned k) {
uint64_t hVal=0;
for(unsigned i=0; i<k; i++)
hVal ^= rol(seedTab[(unsigned char)kmerSeq[i]], k-1-i);
return hVal;
}
// reverse-strand hash value of the base kmer, i.e. rhval(kmer_0)
inline uint64_t getRhval(const char * kmerSeq, const unsigned k) {
uint64_t hVal=0;
for(unsigned i=0; i<k; i++)
hVal ^= rol(seedTab[(unsigned char)kmerSeq[i]&cpOff], i);
return hVal;
}
// ntHash basic function, i.e. ntBase, using rotate ops
inline uint64_t NT64(const char * kmerSeq, const unsigned k) {
return getFhval(kmerSeq, k);
}
// ntHash for sliding k-mers, using rotate ops
inline uint64_t NT64(const uint64_t fhVal, const unsigned char charOut, const unsigned char charIn, const unsigned k) {
return(rol1(fhVal) ^ rol(seedTab[charOut], k) ^ seedTab[charIn]);
}
// ntHash with seeding option, using rotate ops
inline uint64_t NT64(const char * kmerSeq, const unsigned k, const unsigned seed) {
uint64_t hVal=0;
for(unsigned i=0; i<k; i++)
hVal ^= rol(seedTab[(unsigned char)kmerSeq[i]], k-1-i);
hVal *= seed ^ k * multiSeed;
hVal ^= hVal >> multiShift;
return hVal;
}
// canonical ntBase, using rotate ops
inline uint64_t NTC64(const char * kmerSeq, const unsigned k) {
uint64_t fhVal=0, rhVal=0;
fhVal=getFhval(kmerSeq, k);
rhVal=getRhval(kmerSeq, k);
return (rhVal<fhVal)? rhVal : fhVal;
}
// canonical ntHash, using rotate ops
inline uint64_t NTC64(const char * kmerSeq, const unsigned k, uint64_t& fhVal, uint64_t& rhVal) {
fhVal = getFhval(kmerSeq, k);
rhVal = getRhval(kmerSeq, k);
return (rhVal<fhVal)? rhVal : fhVal;
}
// canonical ntHash for sliding k-mers, using rotate ops
inline uint64_t NTC64(const unsigned char charOut, const unsigned char charIn, const unsigned k, uint64_t& fhVal, uint64_t& rhVal) {
fhVal = rol1(fhVal) ^ rol(seedTab[charOut], k) ^ seedTab[charIn];
rhVal = ror1(rhVal) ^ ror1(seedTab[charOut&cpOff]) ^ rol(seedTab[charIn&cpOff], k-1);
return (rhVal<fhVal)? rhVal : fhVal;
}
// canonical ntBase with seeding option, using rotate ops
inline uint64_t NTC64(const char * kmerSeq, const unsigned k, const unsigned seed) {
uint64_t hVal = NTC64(kmerSeq,k);
hVal *= seed ^ k * multiSeed;
hVal ^= hVal >> multiShift;
return hVal;
}
/*
* Using pre-computed seed matrix msTab instead of rotate opts
*/
// ntHash
inline uint64_t NTP64(const char * kmerSeq, const unsigned k) {
uint64_t hVal=0;
for(unsigned i=0; i<k; i++)
hVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
return hVal;
}
// ntHash for sliding k-mers
inline uint64_t NTP64(const uint64_t fhVal, const unsigned char charOut, const unsigned char charIn, const unsigned k) {
return(rol1(fhVal) ^ msTab[charOut][k%64] ^ msTab[charIn][0]);
}
// ntBase with seeding option
inline uint64_t NTP64(const char * kmerSeq, const unsigned k, const unsigned seed) {
uint64_t hVal=0;
for(unsigned i=0; i<k; i++)
hVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
hVal *= seed ^ k * multiSeed;
hVal ^= hVal >> multiShift;
return hVal;
}
// canonical ntBase
inline uint64_t NTPC64(const char * kmerSeq, const unsigned k) {
uint64_t fhVal=0, rhVal=0;
for(unsigned i=0; i<k; i++) {
fhVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
rhVal ^= msTab[(unsigned char)kmerSeq[i]&cpOff][i%64];
}
return (rhVal<fhVal)? rhVal : fhVal;
}
// canonical ntHash
inline uint64_t NTPC64(const char * kmerSeq, const unsigned k, uint64_t& fhVal, uint64_t& rhVal) {
fhVal=0, rhVal=0;
for(unsigned i=0; i<k; i++) {
fhVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
rhVal ^= msTab[(unsigned char)kmerSeq[i]&cpOff][i%64];
}
return (rhVal<fhVal)? rhVal : fhVal;
}
// canonical ntHash for sliding k-mers
inline uint64_t NTPC64(const unsigned char charOut, const unsigned char charIn, const unsigned k, uint64_t& fhVal, uint64_t& rhVal) {
fhVal = rol1(fhVal) ^ msTab[charOut][k%64] ^ msTab[charIn][0];
rhVal = ror1(rhVal) ^ msTab[charOut&cpOff][63] ^ msTab[charIn&cpOff][(k-1)%64];
return (rhVal<fhVal)? rhVal : fhVal;
}
// canonical ntBase with seeding option
inline uint64_t NTPC64(const char * kmerSeq, const unsigned k, const unsigned seed) {
uint64_t hVal = NTPC64(kmerSeq,k);
hVal *= seed ^ k * multiSeed;
hVal ^= hVal >> multiShift;
return hVal;
}
// multihash ntHash, ntBase
inline void NTM64(const char * kmerSeq, const unsigned k, const unsigned m, uint64_t *hVal) {
uint64_t bVal=0, tVal=0;
bVal = NTP64(kmerSeq, k);
hVal[0] = bVal;
for(unsigned i=1; i<m; i++) {
tVal = bVal * (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
hVal[i] = tVal;
}
}
// one extra hash for given base hash
inline uint64_t NTE64(const uint64_t hVal, const unsigned k, const unsigned i) {
uint64_t tVal = hVal;
tVal *= (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
return tVal;
}
// multihash ntHash for sliding k-mers
inline void NTM64(const unsigned char charOut, const unsigned char charIn, const unsigned k, const unsigned m, uint64_t *hVal) {
uint64_t bVal=0, tVal=0;
bVal = rol1(hVal[0]) ^ msTab[charOut][k%64] ^ msTab[charIn][0];
hVal[0] = bVal;
for(unsigned i=1; i<m; i++) {
tVal = bVal * (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
hVal[i] = tVal;
}
}
// canonical multihash ntBase
inline void NTMC64(const char * kmerSeq, const unsigned k, const unsigned m, uint64_t *hVal) {
uint64_t bVal=0, tVal=0;
bVal = NTPC64(kmerSeq, k);
hVal[0] = bVal;
for(unsigned i=1; i<m; i++) {
tVal = bVal * (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
hVal[i] = tVal;
}
}
// canonical multihash ntHash
inline void NTMC64(const char * kmerSeq, const unsigned k, const unsigned m, uint64_t& fhVal, uint64_t& rhVal, uint64_t *hVal) {
uint64_t bVal=0, tVal=0;
bVal = NTPC64(kmerSeq, k, fhVal, rhVal);
hVal[0] = bVal;
for(unsigned i=1; i<m; i++) {
tVal = bVal * (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
hVal[i] = tVal;
}
}
// canonical multihash ntHash for sliding k-mers
inline void NTMC64(const unsigned char charOut, const unsigned char charIn, const unsigned k, const unsigned m, uint64_t& fhVal, uint64_t& rhVal, uint64_t *hVal) {
uint64_t bVal=0, tVal=0;
bVal = NTPC64(charOut, charIn, k, fhVal, rhVal);
hVal[0] = bVal;
for(unsigned i=1; i<m; i++) {
tVal = bVal * (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
hVal[i] = tVal;
}
}
/*
* ignoring k-mers containing nonACGT using ntHash function
*/
// canonical ntBase
inline bool NTPC64(const char *kmerSeq, const unsigned k, uint64_t& hVal, unsigned& locN) {
hVal=0;
locN=0;
uint64_t fhVal=0,rhVal=0;
for(int i=k-1; i>=0; i--) {
if(msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64]==seedN) {
locN=i;
return false;
}
fhVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
rhVal ^= msTab[(unsigned char)kmerSeq[i]&cpOff][i%64];
}
hVal = (rhVal<fhVal)? rhVal : fhVal;
return true;
}
// canonical multihash ntBase
inline bool NTMC64(const char *kmerSeq, const unsigned k, const unsigned m, unsigned& locN, uint64_t* hVal) {
uint64_t bVal=0, tVal=0, fhVal=0, rhVal=0;
locN=0;
for(int i=k-1; i>=0; i--) {
if(msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64]==seedN) {
locN=i;
return false;
}
fhVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
rhVal ^= msTab[(unsigned char)kmerSeq[i]&cpOff][i%64];
}
bVal = (rhVal<fhVal)? rhVal : fhVal;
hVal[0] = bVal;
for(unsigned i=1; i<m; i++) {
tVal = bVal * (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
hVal[i] = tVal;
}
return true;
}
// canonical ntHash
inline bool NTPC64(const char *kmerSeq, const unsigned k, uint64_t& fhVal, uint64_t& rhVal, uint64_t& hVal, unsigned& locN) {
hVal=fhVal=rhVal=0;
locN=0;
for(int i=k-1; i>=0; i--) {
if(msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64]==seedN) {
locN=i;
return false;
}
fhVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
rhVal ^= msTab[(unsigned char)kmerSeq[i]&cpOff][i%64];
}
hVal = (rhVal<fhVal)? rhVal : fhVal;
return true;
}
// canonical multihash ntHash
inline bool NTMC64(const char *kmerSeq, const unsigned k, const unsigned m, uint64_t& fhVal, uint64_t& rhVal, unsigned& locN, uint64_t* hVal) {
fhVal=rhVal=0;
uint64_t bVal=0, tVal=0;
locN=0;
for(int i=k-1; i>=0; i--) {
if(msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64]==seedN) {
locN=i;
return false;
}
fhVal ^= msTab[(unsigned char)kmerSeq[i]][(k-1-i)%64];
rhVal ^= msTab[(unsigned char)kmerSeq[i]&cpOff][i%64];
}
bVal = (rhVal<fhVal)? rhVal : fhVal;
hVal[0] = bVal;
for(unsigned i=1; i<m; i++) {
tVal = bVal * (i ^ k * multiSeed);
tVal ^= tVal >> multiShift;
hVal[i] = tVal;
}
return true;
}
#endif