Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://github.com/splatlab/squeakr
09 January 2021, 06:17:16 UTC
  • Code
  • Branches (7)
  • Releases (3)
  • Visits
    • Branches
    • Releases
    • HEAD
    • refs/heads/development
    • refs/heads/exact
    • refs/heads/large-K
    • refs/heads/master
    • refs/heads/refactoring_new_cli
    • refs/tags/V0.6
    • refs/tags/V0.7
    • v0.5
    • v0.1.master
    • v0.1.large-K
  • 3a22b56
  • /
  • src
  • /
  • kmer.cc
Raw File Download Save again
Take a new snapshot of a software origin

If the archived software origin currently browsed is not synchronized with its upstream version (for instance when new commits have been issued), you can explicitly request Software Heritage to take a new snapshot of it.

Use the form below to proceed. Once a request has been submitted and accepted, it will be processed as soon as possible. You can then check its processing state by visiting this dedicated page.
swh spinner

Processing "take a new snapshot" request ...

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
  • revision
  • snapshot
origin badgecontent badge
swh:1:cnt:0e282cb41ebcf4fb8641f0122ed92179a35f5c16
origin badgedirectory badge
swh:1:dir:b04087d63176af24a40077a31ffaa281e802df3f
origin badgerevision badge
swh:1:rev:0d5813442b49454d35507ec4445170f1645ded6d
origin badgesnapshot badge
swh:1:snp:7c3618ec31c30a936757f656619fa38a7d7b80d8

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
  • revision
  • snapshot
(requires biblatex-software package)
Generating citation ...
(requires biblatex-software package)
Generating citation ...
(requires biblatex-software package)
Generating citation ...
(requires biblatex-software package)
Generating citation ...
Tip revision: 0d5813442b49454d35507ec4445170f1645ded6d authored by prashantpandey on 18 December 2020, 05:05:14 UTC
Merge branch 'development'
Tip revision: 0d58134
kmer.cc
/*
 * ============================================================================
 *
 *        Authors:  Prashant Pandey <ppandey@cs.stonybrook.edu>
 *                  Rob Johnson <robj@vmware.com>   
 *                  Rob Patro (rob.patro@cs.stonybrook.edu)
 *
 * ============================================================================
 */

#include <fstream>
#include "kmer.h"
#include <stdlib.h>
#include <iostream>

/*return the integer representation of the base */
char Kmer::map_int(uint8_t base)
{
	switch(base) {
		case DNA_MAP::A: { return 'A'; }
		case DNA_MAP::T: { return 'T'; }
		case DNA_MAP::C: { return 'C'; }
		case DNA_MAP::G: { return 'G'; }
		default:  { return DNA_MAP::G+1; }
	}
}

/*return the integer representation of the base */
uint8_t Kmer::map_base(char base)
{
	switch(base) {
		case 'A': { return DNA_MAP::A; }
		case 'T': { return DNA_MAP::T; }
		case 'C': { return DNA_MAP::C; }
		case 'G': { return DNA_MAP::G; }
		default:  { return DNA_MAP::G+1; }
	}
}

/**
 * Converts a string of "ATCG" to a uint64_t
 * where each character is represented by using only two bits
 */
__int128_t Kmer::str_to_int(std::string str)
{
	__int128_t strint = 0;
	for (auto it = str.begin(); it != str.end(); it++) {
		uint8_t curr = 0;
		switch (*it) {
			case 'A': { curr = DNA_MAP::A; break; }
			case 'T': { curr = DNA_MAP::T; break; }
			case 'C': { curr = DNA_MAP::C; break; }
			case 'G': { curr = DNA_MAP::G; break; }
		}
		strint = strint | curr;
		strint = strint << 2;
	}
	return strint >> 2;
}

/**
 * Converts a uint64_t to a string of "ACTG"
 * where each character is represented by using only two bits
 */
std::string Kmer::int_to_str(__int128_t kmer, uint64_t kmer_size)
{
	uint8_t base;
	std::string str;
	for (uint32_t i = kmer_size; i > 0; i--) {
		base = (kmer >> (i*2-2)) & 3ULL;
		char chr = Kmer::map_int(base);
		str.push_back(chr);
	}
	return str;
}

/* Return the reverse complement of a base */
int Kmer::reverse_complement_base(int x) { return 3 - x; }

/* Calculate the revsese complement of a kmer */
__int128_t Kmer::reverse_complement(__int128_t kmer, uint64_t kmer_size)
{
	__int128_t rc = 0;
	uint8_t base = 0;
	for (uint32_t i = 0; i < kmer_size; i++) {
		base = kmer & 3ULL;
		base = reverse_complement_base(base);
		kmer >>= 2;
		rc |= base;
		rc <<= 2;
	}
	rc >>=2;
	return rc;
}

/* Compare the kmer and its reverse complement and return the result 
 * Return true if the kmer is greater than or equal to its
 * reverse complement. 
 * */
bool Kmer::compare_kmers(__int128_t kmer, __int128_t kmer_rev)
{
	return kmer >= kmer_rev;
}

void Kmer::parse_kmers(const char *filename, uint64_t kmer_size,
											 std::unordered_set<uint64_t>& kmerset) {
	std::ifstream ipfile(filename);
	std::string read;
	while (ipfile >> read) {

start_read:
		if (read.length() < kmer_size)
			continue;
		{
			uint64_t first = 0;
			uint64_t first_rev = 0;
			uint64_t item = 0;
			bool flag = false;
			for(uint32_t i = 0; i < kmer_size; i++) { //First kmer
				uint8_t curr = Kmer::map_base(read[i]);
				if (curr > DNA_MAP::G) { // 'N' is encountered
					if (i + 1 < read.length())
						read = read.substr(i + 1, read.length());
					else {
						flag = true;
						break;
					}
					goto start_read;
				}
				first = first | curr;
				first = first << 2;
			}
			if (flag) continue;
			first = first >> 2;
			first_rev = Kmer::reverse_complement(first, kmer_size);

			if (Kmer::compare_kmers(first, first_rev))
				item = first;
			else
				item = first_rev;

			kmerset.insert(item);

			uint64_t next = (first << 2) & BITMASK(2*kmer_size);
			uint64_t next_rev = first_rev >> 2;

			for(uint32_t i=kmer_size; i<read.length(); i++) { //next kmers
				uint8_t curr = Kmer::map_base(read[i]);
				if (curr > DNA_MAP::G) { // 'N' is encountered
					if (i + 1 < read.length())
						read = read.substr(i + 1, read.length());
					else {
						flag = true;
						break;
					}
					goto start_read;
				}
				if (flag) continue;
				next |= curr;
				uint64_t tmp = Kmer::reverse_complement_base(curr);
				tmp <<= (kmer_size*2-2);
				next_rev = next_rev | tmp;
				if (Kmer::compare_kmers(next, next_rev))
					item = next;
				else
					item = next_rev;

				kmerset.insert(item);

				next = (next << 2) & BITMASK(2*kmer_size);
				next_rev = next_rev >> 2;
			}
		}
	}
}

back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API