https://github.com/lh3/bwa
Raw File
Tip revision: 139f68fc4c3747813783a488aef2adc86626b01b authored by Heng Li on 22 September 2022, 23:52:12 UTC
Merge pull request #367 from martin-g/github-actions-linux-aarch64
Tip revision: 139f68f
rle.c
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include "rle.h"

const uint8_t rle_auxtab[8] = { 0x01, 0x11, 0x21, 0x31, 0x03, 0x13, 0x07, 0x17 };

// insert symbol $a after $x symbols in $str; marginal counts added to $cnt; returns the size increase
int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6])
{
	uint16_t *nptr = (uint16_t*)block;
	int diff;

	block += 2; // skip the first 2 counting bytes
	if (*nptr == 0) {
		memset(cnt, 0, 48);
		diff = rle_enc1(block, a, rl);
	} else {
		uint8_t *p, *end = block + *nptr, *q;
		int64_t pre, z, l = 0, tot, beg_l;
		int c = -1, n_bytes = 0, n_bytes2, t = 0;
		uint8_t tmp[24];
		beg_l = bc[0] + bc[1] + bc[2] + bc[3] + bc[4] + bc[5];
		tot   = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5];
		if (x < beg_l) {
			beg_l = 0, *beg = 0;
			memset(bc, 0, 48);
		}
		if (x == beg_l) {
			p = q = block + (*beg); z = beg_l;
			memcpy(cnt, bc, 48);
		} else if (x - beg_l <= ((tot-beg_l)>>1) + ((tot-beg_l)>>3)) { // forward
			z = beg_l; p = block + (*beg);
			memcpy(cnt, bc, 48);
			while (z < x) {
				rle_dec1(p, c, l);
				z += l; cnt[c] += l;
			}
			for (q = p - 1; *q>>6 == 2; --q);
		} else { // backward
			memcpy(cnt, ec, 48);
			z = tot; p = end;
			while (z >= x) {
				--p;
				if (*p>>6 != 2) {
					l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3;
					z -= l; cnt[*p&7] -= l;
					l = 0; t = 0;
				} else {
					l |= (*p&0x3fL) << t;
					t += 6;
				}
			}
			q = p;
			rle_dec1(p, c, l);
			z += l; cnt[c] += l;
		}
		*beg = q - block;
		memcpy(bc, cnt, 48);
		bc[c] -= l;
		n_bytes = p - q;
		if (x == z && a != c && p < end) { // then try the next run
			int tc;
			int64_t tl;
			q = p;
			rle_dec1(q, tc, tl);
			if (a == tc)
				c = tc, n_bytes = q - p, l = tl, z += l, p = q, cnt[tc] += tl;
		}
		if (z != x) cnt[c] -= z - x;
		pre = x - (z - l); p -= n_bytes;
		if (a == c) { // insert to the same run
			n_bytes2 = rle_enc1(tmp, c, l + rl);
		} else if (x == z) { // at the end; append to the existing run
			p += n_bytes; n_bytes = 0;
			n_bytes2 = rle_enc1(tmp, a, rl);
		} else { // break the current run
			n_bytes2 = rle_enc1(tmp, c, pre);
			n_bytes2 += rle_enc1(tmp + n_bytes2, a, rl);
			n_bytes2 += rle_enc1(tmp + n_bytes2, c, l - pre);
		}
		if (n_bytes != n_bytes2 && end != p + n_bytes) // size changed
			memmove(p + n_bytes2, p + n_bytes, end - p - n_bytes);
		memcpy(p, tmp, n_bytes2);
		diff = n_bytes2 - n_bytes;
	}
	return (*nptr += diff);
}

int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6])
{
	int beg = 0;
	int64_t bc[6];
	memset(bc, 0, 48);
	return rle_insert_cached(block, x, a, rl, cnt, ec, &beg, bc);
}

void rle_split(uint8_t *block, uint8_t *new_block)
{
	int n = *(uint16_t*)block;
	uint8_t *end = block + 2 + n, *q = block + 2 + (n>>1);
	while (*q>>6 == 2) --q;
	memcpy(new_block + 2, q, end - q);
	*(uint16_t*)new_block = end - q;
	*(uint16_t*)block = q - block - 2;
}

void rle_count(const uint8_t *block, int64_t cnt[6])
{
	const uint8_t *q = block + 2, *end = q + *(uint16_t*)block;
	while (q < end) {
		int c;
		int64_t l;
		rle_dec1(q, c, l);
		cnt[c] += l;
	}
}

void rle_print(const uint8_t *block, int expand)
{
	const uint16_t *p = (const uint16_t*)block;
	const uint8_t *q = block + 2, *end = block + 2 + *p;
	while (q < end) {
		int c;
		int64_t l, x;
		rle_dec1(q, c, l);
		if (expand) for (x = 0; x < l; ++x) putchar("$ACGTN"[c]);
		else printf("%c%ld", "$ACGTN"[c], (long)l);
	}
	putchar('\n');
}

void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6])
{
	int a;
	int64_t tot, cnt[6];
	const uint8_t *p;

	y = y >= x? y : x;
	tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5];
	if (tot == 0) return;
	if (x <= (tot - y) + (tot>>3)) {
		int c = 0;
		int64_t l, z = 0;
		memset(cnt, 0, 48);
		p = block + 2;
		while (z < x) {
			rle_dec1(p, c, l);
			z += l; cnt[c] += l;
		}
		for (a = 0; a != 6; ++a) cx[a] += cnt[a];
		cx[c] -= z - x;
		if (cy) {
			while (z < y) {
				rle_dec1(p, c, l);
				z += l; cnt[c] += l;
			}
			for (a = 0; a != 6; ++a) cy[a] += cnt[a];
			cy[c] -= z - y;
		}
	} else {
#define move_backward(_x) \
		while (z >= (_x)) { \
			--p; \
			if (*p>>6 != 2) { \
				l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; \
				z -= l; cnt[*p&7] -= l; \
				l = 0; t = 0; \
			} else { \
				l |= (*p&0x3fL) << t; \
				t += 6; \
			} \
		} \

		int t = 0;
		int64_t l = 0, z = tot;
		memcpy(cnt, ec, 48);
		p = block + 2 + *(const uint16_t*)block;
		if (cy) {
			move_backward(y)
			for (a = 0; a != 6; ++a) cy[a] += cnt[a];
			cy[*p&7] += y - z;
		}
		move_backward(x)
		for (a = 0; a != 6; ++a) cx[a] += cnt[a];
		cx[*p&7] += x - z;

#undef move_backward
	}
}
back to top