https://github.com/sfu-compbio/mrsfast
Revision 6aa8bb74d3d327b01c0f52cee7a271efbaf5abe1 authored by Iman Sarrafi on 16 May 2013, 22:07:04 UTC, committed by Iman Sarrafi on 16 May 2013, 22:07:04 UTC
1 parent 39958d0
Tip revision: 6aa8bb74d3d327b01c0f52cee7a271efbaf5abe1 authored by Iman Sarrafi on 16 May 2013, 22:07:04 UTC
Bug Fix: Quality not reversed in single best mode
Bug Fix: Quality not reversed in single best mode
Tip revision: 6aa8bb7
SNPReader.c
/*
* Copyright (c) <2008 - 2020>, University of Washington, Simon Fraser University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list
* of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or other
* materials provided with the distribution.
* - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
* used to endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Author:
* Faraz Hach (fhach AT cs DOT sfu DOT ca)
* Iman Sarrafi (isarrafi AT cs DOT sfu DOT ca)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "Common.h"
#include "HashTable.h"
#include "SNPReader.h"
int MAX_SNIP_CNT = 3750000; // max num of 1bp SNPs in a chromosome
CompressedSeq *_snp_SNPMap = NULL;
int _snp_SNPMapLength = 0;
ChrSNPs *_snp_chrSNPs = NULL;
int _snp_chrCnt = 0;
int _snp_currentChr = 0;
int _snp_currentLoc = 0;
/**********************************************/
void initLoadingSNPs(char *fileName)
{
int i, j, loc, t, found;
char cname[CONTIG_NAME_SIZE]; // chromosome name
int ccnt; // chr count in the file
char **chrNames;
int *dummy = getMem(MAX_SNIP_CNT * sizeof(int));
_snp_chrCnt = getChrCnt();
chrNames = getChrNames();
_snp_chrSNPs = getMem(_snp_chrCnt * sizeof(ChrSNPs));
for (i = 0; i < _snp_chrCnt; i++)
{
_snp_chrSNPs[i].chrName = chrNames[i];
_snp_chrSNPs[i].locCnt = 0;
_snp_chrSNPs[i].locs = getMem(MAX_SNIP_CNT * sizeof(int));
}
_snp_SNPMapLength = (calculateCompressedLen(CONTIG_MAX_SIZE)+1) * sizeof(CompressedSeq);
_snp_SNPMap = getMem(_snp_SNPMapLength);
strcpy(cname, "chr");
FILE *fp = fopen(fileName, "rt");
t = fread(&ccnt, sizeof(int), 1, fp); // must be 25 for homosapien
for (i = 1; i <= ccnt; i++)
{
if (i < 23)
sprintf(cname+3, "%d", i);
else
{
switch (i)
{
case 23: cname[3] = 'X'; break;
case 24: cname[3] = 'Y'; break;
case 25: cname[3] = 'M'; break;
default: cname[3] = 'T'; break; // something invalid
}
cname[4] = '\0';
}
found = 0;
for (j = 0; j < _snp_chrCnt; j++)
{
if (!strcmp(cname, _snp_chrSNPs[j].chrName))
{
found = 1;
t = fread(&_snp_chrSNPs[j].locCnt, sizeof(int), 1, fp);
t = fread(_snp_chrSNPs[j].locs, sizeof(int), _snp_chrSNPs[j].locCnt, fp);
break;
}
}
if (!found)
{
t = fread(dummy, sizeof(int), 1, fp);
t = fread(dummy+1, sizeof(int), dummy[0], fp);
}
}
fclose(fp);
freeMem(dummy, MAX_SNIP_CNT * sizeof(int));
}
/**********************************************/
void finalizeSNPs()
{
int i;
for (i = 0; i < _snp_chrCnt; i++)
freeMem(_snp_chrSNPs[i].locs, MAX_SNIP_CNT * sizeof(int));
freeMem(_snp_chrSNPs, _snp_chrCnt * sizeof(ChrSNPs));
freeMem(_snp_SNPMap, _snp_SNPMapLength);
}
/**********************************************/
CompressedSeq *loadSNPMap(char *chrName, int contigStartIndex, int contigLength)
{
//memset(_snp_SNPMap, -1, calculateCompressedLen(contigLength) * sizeof(CompressedSeq));
memset(_snp_SNPMap, -1, _snp_SNPMapLength);
int contigEnd = contigStartIndex + contigLength;
int loc, offset;
CompressedSeq *snp, mask;
if ( strcmp(chrName, _snp_chrSNPs[_snp_currentChr].chrName) ) // new chr
{
_snp_currentChr++;
_snp_currentLoc = 0;
}
if (_snp_chrSNPs[_snp_currentChr].locCnt)
{
int i = _snp_currentChr; // just to make the code more readable
int pos = _snp_currentLoc;
while ( pos < _snp_chrSNPs[i].locCnt && _snp_chrSNPs[i].locs[pos] < contigStartIndex ) // this should never happen!
pos ++;
while ( pos < _snp_chrSNPs[i].locCnt && _snp_chrSNPs[i].locs[pos] < contigEnd )
{
loc = _snp_chrSNPs[i].locs[pos] - contigStartIndex - 1;
offset = loc % 21;
mask = 0x7000000000000000;
mask = ~(mask >> offset*3);
snp = _snp_SNPMap + (loc/21);
*snp &= mask;
pos ++;
}
_snp_currentLoc = pos;
}
return _snp_SNPMap;
}
Computing file changes ...