https://github.com/MoorjaniLab/DATES_v4010
Raw File
Tip revision: e034dc0d6fe8d41a828796f07791d50011b6bb04 authored by MoorjaniLab on 09 May 2022, 23:55:41 UTC
Add files via upload
Tip revision: e034dc0
mcio.c
#include <fcntl.h>
#include <ctype.h>
#include  <mcio.h>
#include  <xsearch.h>
#include  <ranmath.h>

/*! \file mcio.c 
 *
 * \brief Input/Output Library
*/



/* global data */

extern int numchrom;
int usecm = NO;			//!< genetic distances are in cMorgans
int plinkinputmode = NO;
static int snprawtab = NO;
static int debug = NO;
extern char *trashdir;
extern int qtmode;		//!< user parameter (phenotype is quantitative)
extern int verbose;		//!< user parameter (print additional output to stdout)
extern int familynames;		//!< user parameter (prepend PLINK family names with colon to individual names)
extern double lp1, lp2;
extern double a1, b1;

extern int packmode;		//!< flag - input {is not,is} in packed mode
extern char *packgenos;		//!< packed genotype data (packit.h)
extern char *packepath;
extern long packlen;		//!< allocated size of packgenos data space
extern long rlen;		//!< number of bytes in packgenos space that each SNP's data occupies
extern int malexhet;		//!< user parameter (retain het genotype data on male X chromosome)
extern int hashcheck;		//!< user parameter (check input file hashes against input data)
extern int outputall;
extern int sevencolumnped;
static int dofreeped = YES;

int tempnum = 0;
int tempfake = 0;

static int *snpord = NULL;	//!< snpord[i] == j if and only if  snpm[j] is ith SNP in input file
static int numsnpord = 0;	//!< current size of array snpord
static int *snporda[3];		//!< Copies of snpord for various data sets (used by mergeit)
static int numsnporda[3];	//!< Number of elements of snporda in use

static int badpedignore = NO;	//!< flag - ignore bad allele symbols in PED file 

static int maxgenolinelength = -1;
static int tersemode = NO;
int checksizemode = YES;
int pedignore = YES;
enum outputmodetype outputmode = PACKEDANCESTRYMAP;
static double maxgpos[MAXCH];
static int chrmode = NO;
static int chimpmode = NO;
static int pordercheck = YES;
static int snpordered;
static int isgdis = YES;	// no means gsid 0 in input 
// fails if packed and out of order 
static int familypopnames = NO;
// in .fam output use popnames (egroup) 


SNPDATA *tsdpt;

/* local function prototypes */

int getbedgenos (char *gname, SNP ** snpmarkers, Indiv ** indivmarkers,
		 int numsnps, int numindivs, int nignore);

void freeped ();

static char x2base (int x);
static void gtox (int g, char *cvals, int *p1, int *p2);

int ancval (int x);
static int setskipit (char *sx);	// ignore lines in snp, map files
int calcishash (SNP ** snpm, Indiv ** indiv, int numsnps, int numind,
		int *pihash, int *pshash);

/* ---------------------------------------------------------------------------------------------------- */

void
setfamilypopnames (int fpop)
{

  familypopnames = fpop;

}


void
clearsnpord ()
{

  free (snpord);
  snpord = NULL;
  numsnpord = 0;

}

void
snpsortit (int **spos, int *indx, int n)
{
  long *lkode;
  int i, base[3];

  base[0] = 1;
  base[1] = 10 ^ 8;
  base[2] = 10 ^ 9;

  ZALLOC (lkode, n, long);
  for (i = 0; i < n; i++) {
    lkode[i] = lkodeitbb (spos[i], 3, base);
  }

  lsortit (lkode, indx, n);

  free (lkode);
  return;

}

int
getsnps (char *snpfname, SNP *** snpmarkpt, double spacing,
	 char *badsnpname, int *numignore, int numrisks)
{
  // returns number of SNPS
  // numrisks 
  /* read file of real SNPS store in temporary structure */

  SNPDATA **snpraw, *sdpt;
  static SNP **snpmarkers;
  SNP *cupt;
  int **snppos;
  int nreal, nfake, numsnps = 0, i, t, j;
  int *snpindx;
  double xspace;
  int failx = 0;

  if (snpfname == NULL)
    fatalx ("(getsnps) null snpname");
  xspace = spacing;
  nreal = getsizex (snpfname);
  if (nreal <= 0)
    fatalx ("no snps found: snpfname: %s\n", snpfname);
  ZALLOC (snpraw, nreal, SNPDATA *);

  if (snpord == NULL) {
    ZALLOC (snpord, nreal, int);
    ivclear (snpord, -1, nreal);
    numsnpord = nreal;
  }
  for (i = 0; i < nreal; i++) {
    ZALLOC (snpraw[i], 1, SNPDATA);
    charclear (snpraw[i]->cchrom, CNULL, 7);
    snpraw[i]->inputrow = -1;
    snpraw[i]->alleles[0] = '1';
    snpraw[i]->alleles[1] = '2';
  }
  nreal = readsnpdata (snpraw, snpfname);
  dobadsnps (snpraw, nreal, badsnpname);

  ZALLOC (snppos, nreal, int *);
  for (i = 0; i < nreal; i++) {
    ZALLOC (snppos[i], 3, int);
  }

  for (i = 0; i < nreal; i++) {
    sdpt = snpraw[i];
    snppos[i][0] = sdpt->chrom;
    if ((sdpt->ignore) && (plinkinputmode)) {
      snppos[i][0] = 99;
      if (pordercheck == YES) {
	pordercheck = NO;
	printf ("PLINK input. No check on SNP order\n");
      }
    }
    t = snppos[i][1] = nnint ((sdpt->gpos) * GDISMUL);
    if (isgdis)
      snppos[i][1] = 0;
    snppos[i][2] = nnint (sdpt->ppos);
    // sdpt -> gpos = ((double) t)/ GDISMUL ;
  }

/**
  for (i=nreal-10; i<nreal; i++) { 
   printf("zzyy: %d ", i) ; printimat(snppos[i], 1, 3) ;
  }
*/

  ZALLOC (snpindx, nreal, int);
//snpsortit(snppos, snpindx, nreal) ;
  ipsortit (snppos, snpindx, nreal, 3);

  snpordered = YES;

  for (i = 0; i < nreal; ++i) {
    j = snpindx[i];
    sdpt = snpraw[j];

    //printf("zzz %d %d %s ",   i, j, sdpt -> ID) ;
    //printimat(snppos[j], 1, 3) ;

    if (j != i) {
      snpordered = NO;
      ++failx;
      if (failx < 10) {
	printf
	  ("snp order check fail; snp list not ordered: %s (processing continues)",
	   snpfname);
	printimat (snppos[i], 1, 3);
	printf ("zzz %d %d\n", i, j);
      }
    }
  }

  if ((usecm) && (xspace > 0.5)) {
    printf ("*** warning fake spacing given in cM\n");
    xspace /= 100.0;
  }

  // get number of fakes

  nfake = numfakes (snpraw, snpindx, nreal, xspace);
  numsnps = nreal + nfake;

  tempnum = numsnps;
  tempfake = nfake;

  // allocate storage

  ZALLOC (snpmarkers, numsnps, SNP *);
  for (i = 0; i < numsnps; i++) {
    ZALLOC (snpmarkers[i], 1, SNP);
    cupt = snpmarkers[i];
    clearsnp (cupt);
    ZALLOC (cupt->modelscores, numrisks, double);
    ZALLOC (cupt->totmodelscores, numrisks, double);
  }
  tsdpt = snpraw[0];
  *snpmarkpt = snpmarkers;
  numsnps = loadsnps (snpmarkers, snpraw, snpindx, nreal, xspace, numignore);

/**
  for (i=numsnps-10; i<numsnps; i++) { 
   cupt = snpmarkers[i] ;
   printf("zzyy3: %d %d %12.0f\n", i, cupt -> chrom, cupt -> physpos) ;
  }
*/




  // and free up temporary storage
  for (i = 0; i < nreal; i++) {
    free (snpraw[i]);
    free (snppos[i]);
  }
  free (snpraw);
  free (snppos);
  free (snpindx);

  /* printf("numsnps: %d\n", numsnps) ; */

  /* 
     if (snpord != NULL) { 
     printimat(snpord, 1, MIN(100, numsnps)) ;
     }
   */
  cupt = snpmarkers[0];
  if (isnumword (cupt->ID))
    printf
      ("*** warning: first snp %s is number.  perhaps you are using .map format\n",
       cupt->ID);

  return numsnps;
}




/* ---------------------------------------------------------------------------------------------------- */
int
getsizex (char *fname)
{
  char line[MAXSTR + 1], c;
  char *spt[MAXFF], *sx;
  int nsplit, num = 0;
  int skipit;
  int len;

  FILE *fff;
  openit (fname, &fff, "r");
  line[MAXSTR] = '\0';
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = setskipit (sx);	// comment line
    if (skipit == NO) {
      ++num;
    }

    // now flush the rest of the line if necessary.
    len = strlen (line);
    c = line[len - 1];
    if (c != '\n') {
      while ((c = fgetc (fff)) != EOF) {
	if (c == '\n')
	  break;
      }
    }
    freeup (spt, nsplit);
    continue;
  }
  fclose (fff);
  fflush (stdout);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
ismapfile (char *fname)
{
  // PLINK map file ? 
  // just look at file name (perhaps should look at format) 

  char *sx;
  int len;
  len = strlen (fname);
  if (len < 4)
    return NO;
  sx = fname + len - 4;

  if (strcmp (sx, ".map") == 0)
    return YES;
  if (strcmp (sx, ".bim") == 0)
    return YES;

  if (len < 7)
    return NO;
  sx = fname + len - 7;
  if (strcmp (sx, ".pedsnp") == 0)
    return YES;

  return NO;

}



/* ---------------------------------------------------------------------------------------------------- */
int
ispedfile (char *fname)
{
  // PLINK ped file ? 
  // just look at file name (perhaps should look at format) 
  char *sx;
  int len;
  len = strlen (fname);
  if (len < 4)
    return NO;
  sx = fname + len - 4;

  if (strcmp (sx, ".ped") == 0)
    return YES;
  if (strcmp (sx, ".fam") == 0)
    return YES;

  if (len < 7)
    return NO;
  sx = fname + len - 7;
  if (strcmp (sx, ".pedind") == 0)
    return YES;

  return NO;
}


/* ---------------------------------------------------------------------------------------------------- */
int
isbedfile (char *fname)
{
  // PLINK ped file ? 
  // just look at file name (perhaps should look at format) 

  char *sx;
  int len;
  len = strlen (fname);
  if (len < 4)
    return NO;
  sx = fname + len - 4;

  if (strcmp (sx, ".bed") == 0)
    return YES;
  return NO;

}

/* ---------------------------------------------------------------------------------------------------- */
int
readsnpdata (SNPDATA ** snpraw, char *fname)
{
  char line[LONGSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k;
  int skipit;
  SNPDATA *sdpt;

  double maxg = -9999.0;

  FILE *fff;
  int chrom;
  int nbad = 0;

  plinkinputmode = NO;
  // if this is a PLINK file, call PLINK input routine
  if (ismapfile (fname)) {
    plinkinputmode = YES;
    return readsnpmapdata (snpraw, fname);
  }
  usecm = NO;

  vclear (maxgpos, -9999.0, MAXCH);
  openit (fname, &fff, "r");
  while (fgets (line, LONGSTR, fff) != NULL) {

    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = setskipit (sx);
    if (skipit == NO) {
      if (nsplit < 4)
	fatalx ("(readsnpdata) bad line: %s\n", line);
      sdpt = snpraw[num];
      sdpt->inputrow = num;

      if (strlen (spt[0]) >= IDSIZE)
	fatalx ("snp ID too long: %s\n", spt[0]);
      strcpy (sdpt->ID, spt[0]);

      sdpt->chrom = chrom = str2chrom (spt[1]);
      strncpy (sdpt->cchrom, spt[1], 6);

      if ((chrom >= MAXCH) || (chrom <= 0)) {
	if (nbad < 10)
	  printf ("warning: bad chrom: %s", line);
	++nbad;

	sdpt->chrom = MIN (chrom, BADCHROM);
	sdpt->chrom = MAX (chrom, 0);
	sdpt->ignore = YES;
      }

      // the genetic positions will be converted to Morgans (assumed to be in cM) if and only if
      // any genetic position is greater than 100

      sdpt->gpos = atof (spt[2]);
      if (sdpt->gpos > 100) {
	if (sdpt->gpos > 1.0e6)
	  fatalx ("absurd genetic distance:\n%s\n", line);
	if (!usecm) {
	  printf ("*** warning.  genetic distances are in cM not Morgans\n");
	  printf ("%s\n", line);
	}
	usecm = YES;		// set flag to connvert to Morgans
      }

      maxgpos[chrom] = MAX (maxgpos[chrom], sdpt->gpos);
      maxg = MAX (maxg, maxgpos[chrom]);

      setsdpos (sdpt, atoi (spt[3]));
      if (nsplit < 8) {
	ivzero (sdpt->nn, 4);
	if (nsplit == 6) {
	  sx = spt[4];
	  sdpt->alleles[0] = toupper (sx[0]);
	  sx = spt[5];
	  sdpt->alleles[1] = toupper (sx[0]);
	}
      }
      else {			// QUESTION:  when does a SNP file have more than seven columns?
	for (k = 0; k < 4; k++) {
	  sdpt->nn[k] = atoi (spt[4 + k]);
	}
	if (nsplit == 10) {
	  sx = spt[8];
	  sdpt->alleles[0] = toupper (sx[0]);
	  sx = spt[9];
	  sdpt->alleles[1] = toupper (sx[0]);
	}
      }
      ++num;
    }
    freeup (spt, nsplit);
    continue;
  }				// elihw

  // if all genetic positions are set to zero, set from physical position 
  if (maxg <= 0.00001) {
    isgdis = NO;
    printf ("%s: genetic distance set from physical distance\n", fname);
    usecm = NO;
    for (k = 0; k < num; ++k) {
      snpraw[k]->gpos = 1.0e-8 * snpraw[k]->ppos;
    }
  }

  // convert to Morgans 
  if (usecm) {
    for (k = 0; k < num; ++k) {
      snpraw[k]->gpos /= 100.0;
    }
  }

  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
readsnpmapdata (SNPDATA ** snpraw, char *fname)
{
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k, t;
  int skipit, len;
  SNPDATA *sdpt;
  int nbad = 0;

  FILE *fff;
  int chrom;
  double maxg = -9999.0;

  vclear (maxgpos, -9999.0, MAXCH);
  openit (fname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = setskipit (sx);
    if (skipit == NO) {
      if (nsplit < 4)
	fatalx ("(readsnpmapdata) bad line: %s\n", line);
      sdpt = snpraw[num];
      if (strlen (spt[1]) >= IDSIZE)
	fatalx ("snp ID too long: %s\n", spt[1]);
      strcpy (sdpt->ID, spt[1]);

      if (nsplit >= 6) {	// alleles in .map file are optional
	sx = spt[4];
	sdpt->alleles[0] = sx[0];
	sx = spt[5];
	sdpt->alleles[1] = sx[0];
	if (sdpt->alleles[0] == '0')
	  sdpt->alleles[0] = 'X';	// unknown
	if (sdpt->alleles[1] == '0')
	  sdpt->alleles[1] = 'X';
      }
      else {
	charclear ( sdpt->alleles, CNULL, 2);
      }

      sx = spt[0];
      sdpt->chrom = chrom = str2chrom (sx);
      strncpy (sdpt->cchrom, sx, 6);

      if ((chrom >= MAXCH) || (chrom <= 0)) {
	if (nbad < 10)
	  printf ("warning (mapfile): bad chrom: %s", line);
	++nbad;

	sdpt->chrom = MIN (chrom, BADCHROM);
	sdpt->chrom = MAX (chrom, 0);
	sdpt->chrom = 99;
	strcpy (sdpt->cchrom, "99");
	sdpt->ignore = YES;
      }

      // the genetic positions will be converted to Morgans (assumed to be in cM) if and only if
      // any genetic position is greater than 100

      sdpt->gpos = atof (spt[2]);
      if (sdpt->gpos > 100) {
	if (sdpt->gpos > 1.0e6)
	  fatalx ("absurd genetic distance:\n%s\n", line);
	if (!usecm) {
	  printf ("*** warning.  genetic distances are in cM not Morgans\n");
	  printf ("%s\n", line);
	}
	usecm = YES;
      }
      maxgpos[chrom] = MAX (maxgpos[chrom], sdpt->gpos);
      maxg = MAX (maxg, maxgpos[chrom]);
      sdpt->ppos = atof (spt[3]);
      if (nsplit < 8) {
	ivzero (sdpt->nn, 4);
      }
      else {
	for (k = 0; k < 4; k++) {
	  sdpt->nn[k] = atoi (spt[4 + k]);
	}
      }
      sdpt->inputrow = num;
//    printf("zz %d %d %s %12.0f\n", num, sdpt -> chrom, sdpt -> ID, sdpt -> ppos) ;
      ++num;
    }
    freeup (spt, nsplit);
    continue;
  }

  if (maxg <= 0.00001) {
    printf ("genetic distance set from physical distance\n");
    usecm = NO;
    isgdis = NO;
    for (k = 0; k < num; ++k) {
      snpraw[k]->gpos = 1.0e-8 * snpraw[k]->ppos;
    }
  }

  if (usecm) {
    for (k = 0; k < num; ++k) {
      snpraw[k]->gpos /= 100.0;
    }
  }

  if (snpord == NULL) {
    ZALLOC (snpord, num, int);
    ivclear (snpord, -1, num);
    numsnpord = num;
  }

  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
numfakes (SNPDATA ** snpraw, int *snpindx, int nreal, double spacing)
{

  // it seems better for this internal routine 
  // to use the precomputed values

  int nignore, numsnps;
  int nfake = 0, i, k, indx;
  int num = 0;
  SNP *cupt;
  SNPDATA *sdpt;
  char *sname;
  int *sp;
  int xc = 0, chrom;
  double fakedis, realdis;	// gpos for fake marker 
  double yf, yr;
  double physpos;

  if (spacing <= 0.0)
    fakedis = 1.0e20;

  for (k = 0; k < nreal; k++) {

    indx = snpindx[k];
    sdpt = snpraw[indx];

    chrom = sdpt->chrom;
    realdis = sdpt->gpos;

    if (chrom != xc) {
      fakedis = nextmesh (realdis, spacing);
      xc = chrom;
    }
    while (fakedis < realdis) {
      fakedis += spacing;
      ++nfake;
    }
  }

  // nfake is number of multiples of fakedis in chromosome

  return nfake;
}



/* ---------------------------------------------------------------------------------------------------- */
double
nextmesh (double val, double spacing)
{
  double y;

  if (spacing == 0.0)
    return 1.0e8;
  y = ceil (val / spacing) * spacing;
  if (y < val)
    y += spacing;
  return y;
}


/* ---------------------------------------------------------------------------------------------------- */
/*! \fn int loadsnps(SNP **snpm, SNPDATA **snpraw, 
            int *snpindx, int nreal, double spacing, int *numignore)    
    \brief  Store raw SNP data in final array of type SNP *
    \param  snpm    Pointer to array of type SNP * in which to store data
    \param  snpraw  Pointer to array of type SNPDATA * in which preliminary data was stored
    \param  snpindx  On entry, kth element of snpindx is index of the kth SNP in snpraw (This is not
      the same as the value k itself if the SNPs were out of order in the file.)
    \param  nreal   Number of SNPs stored in snpraw
    \param  spacing    Maximum spacing between SNPs (not relevant to EIGENSOFT)
    \param  numignore  Return number of SNPs to ignore here
*/


int
loadsnps (SNP ** snpm, SNPDATA ** snpraw,
	  int *snpindx, int nreal, double spacing, int *numignore)
{

  // snppos, snpindx could be recalculated but 
  // it seems better for this internal routine 
  // to use the precomputed values
  // do NOT call externally

  int nignore, numsnps;
  int nfake = 0, i, k, indx;
  int num = 0, tnum;
  SNP *cupt = NULL, *lastcupt = NULL, *tcupt;
  SNPDATA *sdpt;
  char *sname;
  int *sp;
  int xc = 0, chrom;
  double fakedis, realdis, xrealdis;	// gpos for fake marker 
  double yf, yr;
  double physpos;
  double xl, xr, xmid, al, ar, fraw;
  double y;
  int nn[2], n0, n1;
  int cnum, t;
  int inputrow, chimpfudge, xchimpfudge;
  int ischimp = NO;
  char ss[6];

  if (spacing <= 0.0)
    fakedis = 1.0e20;
  strcpy (ss, "??");

  for (k = 0; k < nreal; k++) {

    indx = snpindx[k];
    sdpt = snpraw[indx];

    chrom = sdpt->chrom;
// defensive programming;  should not be needed:
    if (sdpt->cchrom[0] == CNULL) {
      sprintf (sdpt->cchrom, "%d", chrom);
    }
    sname = sdpt->ID;
    realdis = sdpt->gpos;
    physpos = sdpt->ppos;
    inputrow = sdpt->inputrow;
    if (sdpt->chimpfudge)
      ischimp = YES;

/**
    if (k>(nreal-10)) { 
     printf("zzyy2b %d %d %12.0f %d\n", k, chrom, physpos, inputrow) ;
    }
*/

    t = strcmp (ss, sdpt->cchrom);
    if (t != 0) {
      fakedis = nextmesh (realdis, spacing);
      xc = chrom;
      cnum = 0;
      strcpy (ss, sdpt->cchrom);
    }

    yf = fakedis;
    yr = realdis;

    // insert fake SNPs so the distance between SNPs is no greater than spacing
    while (fakedis < realdis) {

      if (cnum == 0)
	break;			// first SNP on chromosome
      if (sdpt->ignore)
	break;

      if (nfake >= tempfake)
	fatalx (" too many fake markers (bug) %d %d\n", num, nfake);
      if (num >= tempnum)
	fatalx (" too many markers (bug) %d %d\n", num, nfake);

      cupt = snpm[num];
      if (cupt == NULL)
	fatalx ("bad loadsnps\n");
      sprintf (cupt->ID, "fake-%d:%d", xc, nfake);
      cupt->estgenpos = cupt->genpos = fakedis;
      tcupt = lastcupt;
      for (;;) {
	xl = tcupt->genpos;
	if (xl < fakedis)
	  break;
	tnum = tcupt->markernum;
	--tnum;
	if (tnum < 0)
	  fatalx ("verybadbug\n");
	tcupt = snpm[tnum];
	if (tcupt->chrom != chrom)
	  fatalx ("badbug\n");
      }
      al = tcupt->physpos;
      xr = realdis;;
      ar = physpos;
      y = cupt->physpos = interp (xl, xr, fakedis, al, ar);
      if (chrom == -199) {
	printf
	  ("zzinterp %12.6f  %12.6f  %12.6f  %12.0f  %12.0f     %12.6f\n", xl,
	   xr, fakedis, al, ar, y);
      }
      cupt->markernum = num;
      cupt->isfake = YES;
      cupt->chrom = xc;
      strncpy (cupt->cchrom, ss, 6);
      fakedis += spacing;
      ++num;
      ++nfake;
    }

    cupt = snpm[num];
    if (cupt == NULL)
      fatalx ("bad loadsnps\n");
    strcpy (cupt->ID, sname);
    sdpt->cuptnum = num;
    cupt->estgenpos = cupt->genpos = realdis;
    cupt->physpos = physpos;
    cupt->markernum = num;
    cupt->isfake = NO;
    cupt->ignore = sdpt->ignore;
    // if ((cupt -> ignore == NO) && (cupt -> isfake == NO))   
    if ((cupt->isfake = NO)) {
      lastcupt = cupt;
      ++cnum;
    }
    cupt->isrfake = sdpt->isrfake;
    cupt->chrom = xc;
    strncpy (cupt->cchrom, ss, 6);
    cupt->tagnumber = inputrow;	// just used for pedfile 
    if (inputrow >= 0) {
      if (inputrow >= numsnpord)
	fatalx ("snpord overflow\n");
      snpord[inputrow] = num;
    }

    n0 = sdpt->nn[0];
    n1 = sdpt->nn[1];
    fraw = mknn (nn, n0, n1);
    copyiarr (nn, cupt->af_nn, 2);
    cupt->aftrue = cupt->af_freq = fraw;
    cupt->aa_aftrue = cupt->aa_af_freq = fraw;

    if (sdpt->alleles != NULL) {
      cupt->alleles[0] = sdpt->alleles[0];
      cupt->alleles[1] = sdpt->alleles[1];
    }
    else {
      cupt->alleles[0] = '1';
      cupt->alleles[1] = '2';
    }

    n0 = sdpt->nn[2];
    n1 = sdpt->nn[3];
    fraw = mknn (nn, n0, n1);
    copyiarr (nn, cupt->cauc_nn, 2);
    cupt->cftrue = cupt->cauc_freq = fraw;
    cupt->aa_cftrue = cupt->aa_cauc_freq = fraw;
    ++num;
  }

  // now make list of ignored snps used by loadgeno for check
  numsnps = num;
  for (k = 0; k < nreal; k++) {
    indx = snpindx[k];
    sdpt = snpraw[indx];
    if (sdpt->ignore == NO)
      continue;
    inputrow = sdpt->inputrow;
    chrom = sdpt->chrom;
    sname = sdpt->ID;
    realdis = sdpt->gpos;
    physpos = sdpt->ppos;
    cupt = snpm[sdpt->cuptnum];
    cupt->tagnumber = inputrow;	// just used for pedfile 
    /* 
       strncpy(cupt -> ID, sname, IDSIZE-1) ; 
       cupt -> genpos = realdis ;                          
       cupt -> physpos = physpos ;
       cupt -> markernum = num ;
       cupt -> isfake = NO ;
       cupt -> ignore = YES ;
       cupt -> chrom  = chrom ;
     */
    ++num;
  }
  nignore = 0;
  for (k = 0; k < numsnps; ++k) {
    cupt = snpm[k];
    if (ischimp && (cupt->chrom == 2))
      cupt->chimpfudge = YES;
    if (cupt->ignore)
      ++nignore;
  }
  *numignore = nignore;
  return numsnps;
}

/* ---------------------------------------------------------------------------------------------------- */
double
interp (double l, double r, double x, double al, double ar)
{
  // linearly interp ;
  double y, y1, y2;
  y = (r - l);
  if (y == 0.0)
    return 0.5 * (al + ar);
  y1 = (r - x) / y;
  y2 = (x - l) / y;
  return y1 * al + y2 * ar;
}

/* ---------------------------------------------------------------------------------------------------- */
int
getindivs (char *indivfname, Indiv *** indmarkpt)
{
  static Indiv **indivmarkers;
  int nindiv, i;

  if (indivfname == NULL)
    fatalx ("(getindivs) NULL indivfname\n");
  nindiv = getsizex (indivfname);
  if (nindiv <= 0)
    fatalx ("no indivs found: indivname: %s\n", indivfname);
  ZALLOC (indivmarkers, nindiv, Indiv *);

  for (i = 0; i < nindiv; i++) {
    ZALLOC (indivmarkers[i], 1, Indiv);
  }
  clearind (indivmarkers, nindiv);
  *indmarkpt = indivmarkers;
  readinddata (indivmarkers, indivfname);
  return nindiv;
}


/* ---------------------------------------------------------------------------------------------------- */
int
readinddata (Indiv ** indivmarkers, char *fname)
{
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k;
  int skipit;
  Indiv *indx;

  FILE *fff;

  // Call routine to read PLINK format file
  if (ispedfile (fname)) {
    plinkinputmode = YES;
    return readindpeddata (indivmarkers, fname);
  }

  // Read ANCESTRYMAP/EIGENSTRAT format individual file
  openit (fname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = setskipit (sx);
    if (skipit == NO) {
      if (nsplit < 3)
	fatalx ("%s bad line: %s", fname, line);
      indx = indivmarkers[num];
      if (strlen (sx) >= IDSIZE)
	fatalx ("Indiv ID too long: %s\n", sx);
      strcpy (indx->ID, sx);
      indx->idnum = num;
      sx = spt[1];
      indx->gender = sx[0];
      indx->affstatus = indx->ignore = NO;
      sx = spt[2];
      if (strcmp (sx, "Ignore") == 0)
	indx->ignore = YES;
      if ((qtmode) && (!indx->ignore)) {	// store quantitative phenotype in qval
	indx->egroup = strdup ("Case");
	indx->qval = indx->rawqval = atof (sx);
      }
      else {
	indx->egroup = strdup (sx);	// store discrete phenotype in egroup
      }
      // affstatus set by setstatus  
      ++num;
    }
    freeup (spt, nsplit);
    continue;
  }
  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
readindpeddata (Indiv ** indivmarkers, char *fname)
{
  char *line;
  char *spt[MAXFF], *sx, *sx0, gender;
  int nsplit, num = 0, k, i;
  int skipit;
  Indiv *indx;
  int nindiv;
  int maxnsplit = 0;
  char nnbuff[IDSIZE];
  int nok = 0;

  FILE *fff;

  maxgenolinelength = maxlinelength (fname);
  ZALLOC (line, maxgenolinelength + 1, char);
  openit (fname, &fff, "r");

  while (fgets (line, maxgenolinelength, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx0 = sx = spt[0];
    skipit = NO;
    if (sx[0] == '#')
      skipit = YES;
    if (skipit == NO) {
      if (nsplit < 6)
	fatalx ("%s bad line: %s", fname, line);
      indx = indivmarkers[num];

      maxnsplit = MAX (maxnsplit, nsplit);

      sx = spt[1];
      pedname (nnbuff, sx0, sx);
      strcpy (indx->ID, nnbuff);
      indx->idnum = num;
      sx = spt[4];
      k = atoi (sx);
      gender = 'U';
      if (k == 1)
	gender = 'M';
      if (k == 2)
	gender = 'F';
      indx->gender = gender;
      indx->affstatus = indx->ignore = NO;

      sx = spt[5];
      if (qtmode) {
	indx->egroup = strdup ("Case");
	indx->qval = indx->rawqval = atof (sx);
      }
      else {
	k = 99;
	if (strcmp (sx, "-9") == 0)
	  k = -9;
	if (strcmp (sx, "9") == 0)
	  k = 9;
	if (strcmp (sx, "0") == 0)
	  k = 0;
	if ((pedignore == NO) && (k == 0))
	  k = 3;
	if (strcmp (sx, "1") == 0)
	  k = 1;
	if (strcmp (sx, "2") == 0)
	  k = 2;
	switch (k) {
	case 9:
	  indx->ignore = YES;
	  printf ("%s ignored\n", indx->ID);
	  break;
	case -9:
	  indx->ignore = YES;
	  printf ("%s ignored\n", indx->ID);
	  break;
	case 0:
	  indx->ignore = YES;
	  printf ("%s ignored\n", indx->ID);
	  break;
	case 1:
	  indx->egroup = strdup ("Control");
	  break;
	case 2:
	  indx->egroup = strdup ("Case");
	  break;
	case 3:
	  indx->egroup = strdup ("???");
	  break;
	default:
	  indx->egroup = strdup (sx);
	}
      }

      // affstatus set by setstatus  
      if (indx->ignore == NO)
	++nok;
      ++num;
    }
    freeup (spt, nsplit);
    continue;
  }

  if (nok == 0) {
    printf ("all individuals set ignore.  Likely input problem (col 6)\n");
    printf ("resetting all individual...\n");
    for (i = 0; i < num; i++) {
      indx = indivmarkers[i];
      indx->ignore = NO;
      indx->egroup = strdup ("???");
    }
  }

  if (maxnsplit < 8)
    maxgenolinelength = -1;
  free (line);

  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
void
pedname (char *cbuff, char *sx0, char *sx1)
{
  int l0, l1, ll;

  l0 = strlen (sx0);
  l1 = strlen (sx1);
  ll = l0 + l1 + 1;
  if (familynames == NO)
    ll = l1;
  if (ll >= IDSIZE) {
    fatalx ("idnames too long: %s %s ll: %d limit: %d\n", sx0, sx1, ll,
	    IDSIZE - 1);
  }
  if (familynames == YES) {	// prepend family name to individual name
    strcpy (cbuff, sx0);
    cbuff[l0] = ':';
    strcpy (cbuff + l0 + 1, sx1);
    return;
  }
  strcpy (cbuff, sx1);

}

/* ---------------------------------------------------------------------------------------------------- */
int
readtldata (Indiv ** indivmarkers, int numindivs, char *inddataname)
{
  // warning printed if theta/lambda not in file
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k, ind, i;
  int skipit;
  Indiv *indx;
  double y;
  double gg[3];
  int *xcheck;

  FILE *fff;
  ZALLOC (xcheck, numindivs, int);
  openit (inddataname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = NO;
    if (strcmp (sx, "Indiv_Index") == 0) {
      // hack.  thetafile should be output with leading ## 
      freeup (spt, nsplit);
      continue;
    }
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }
    if (nsplit < 8)
      fatalx ("%s bad line: %s", inddataname, line);
    sx = spt[1];
    ind = indindex (indivmarkers, numindivs, sx);
    if (ind < 0)
      fatalx ("(readtldata) indiv: %s not found \n", sx);
    indx = indivmarkers[ind];

    indx->theta_mode = atof (spt[3]);
    indx->lambda_mode = atof (spt[7]);
    indx->Xtheta_mode = atof (spt[5]);
    indx->Xlambda_mode = atof (spt[9]);
    xcheck[ind] = 1;

    freeup (spt, nsplit);
    continue;
  }
  for (i = 0; i < numindivs; ++i) {
    indx = indivmarkers[i];
    if (indx->ignore)
      continue;
    if (xcheck[i] == 1)
      continue;
    printf ("*** warning (readtldata) ");
    printf ("%s not found in tlname file", indx->ID);
    printnl ();
  }

  free (xcheck);
  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
readfreqdata (SNP ** snpm, int numsnps, char *inddataname)
{
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k, ind;
  int skipit;
  SNP *cupt;

  FILE *fff;
  openit (inddataname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = NO;
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }
    if (nsplit < 6)
      fatalx ("%s bad line: %s", inddataname, line);
    sx = spt[2];
    ind = snpindex (snpm, numsnps, sx);
    if (ind < 0)
      fatalx ("(readfreqdata) snp %s not found \n", sx);
    cupt = snpm[ind];
    cupt->aa_af_freq = cupt->af_freq = atof (spt[3]);
    cupt->aa_cauc_freq = cupt->cauc_freq = atof (spt[5]);

    freeup (spt, nsplit);
    ++num;
    continue;
  }

  fclose (fff);
  return num;
}



/* ---------------------------------------------------------------------------------------------------- */
int
setstatus (Indiv ** indm, int numindivs, char *smatch)
{
  // return number set 
  // smatch = NULL => set everything 
  return setstatusv (indm, numindivs, smatch, YES);
}

/* ---------------------------------------------------------------------------------------------------- */
int
setstatusv (Indiv ** indm, int numindivs, char *smatch, int val)
{
  // return number set 
  // smatch = NULL => set everything 
  int i, n = 0;
  Indiv *indx;
  char *sx;
  for (i = 0; i < numindivs; i++) {
    indx = indm[i];
    if (indx->ignore)
      continue;
    sx = indx->egroup;
    if (smatch == NULL) {
      ++n;
      indx->affstatus = val;
      continue;
    }
    if (strcmp (sx, smatch) == 0) {
      ++n;
      indx->affstatus += val;
    }
    if (indx->affstatus > 1)
      fatalx ("aff2bug\n");
  }
  return n;
}


/* ---------------------------------------------------------------------------------------------------- */
long
getgenos (char *genoname, SNP ** snpmarkers, Indiv ** indivmarkers,
	  int numsnps, int numindivs, int nignore)
{
  // read genofile.  Use hashtable to improve search 
  // if genofile is gzipped decompress to trashdir
  char *gname, *genotmp = NULL;
  ENTRY *hashlist, *iteml;
  ENTRY item1;
  int k, num, indiv, lgt;
  int val;
  void *basept = 0;
  int bigoff;
  int tcheck;

  // we use a trick: want to store k 
  // store basept + k instead 
  // basept + k + bigoff for individual ID 

  SNP *cupt;
  Indiv *indx;

  char line[MAXSTR], cmd[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, nsnp;
  int skipit, kret, tpackmode, teigenstratmode;

  FILE *fff;
  int gnlen, ngenos = 0;

  double y;
  char *pbuff;

  item1.key = NULL;
  item1.data = NULL;

  if (genoname == NULL)
    fatalx ("(getgenos) NULL genoname\n");
  gname = genoname;
  gnlen = strlen (genoname);

  // Unzip file if necessary
  if (strcmp (genoname + gnlen - 3, ".gz") == 0) {
    makedir (trashdir);
    sprintf (line, "%s/genotmp:%d", trashdir, getpid ());
    genotmp = strdup (line);
    sprintf (cmd, "gunzip -c %s > %s", genoname, genotmp);
    printf ("unzip cmd: %s\n", cmd);
    system (cmd);
    kret = system (cmd);
    if (kret < 0) {
      perror ("gunzip failed\n");
      fatalx ("gunzip failed... probably out of disk space\n");
    }
    printf ("geno file unzipped\n");
    gname = genotmp;
  }

  // Enforce data size limits
  tcheck = checksize (numsnps, numindivs, outputmode);
  if (tcheck == -2)
    fatalx
      ("Data sets with more than 8 billion genotypes are not permitted\n");
  if (tcheck == -1)
    fatalx
      ("Output files of size >2GB are not permitted: use a more compact output data format. Also see documentation of chrom, badsnpname and checksizemode parameters.\n");

  // Call routine to read PLINK format unpacked genotype file
  if (ispedfile (gname)) {

    if (snpord == NULL)
      fatalx ("snpord not allocated (no map file ?)");
    getpedgenos (genoname, snpmarkers, indivmarkers, numsnps, numindivs,
		 nignore);
    freeped ();
    return numsnps * numindivs;
  }

  // Call routine to read PLINK format packed genotype file
  if (isbedfile (gname)) {
    return getbedgenos (genoname, snpmarkers, indivmarkers, numsnps,
			numindivs, nignore);
  }

  // Check whether file is packed ANCESTRYMAP format (packed EIGENSTRAT does not exist)
  tpackmode = ispack (gname);
  nsnp = numsnps;

  // Call routine to read packed ANCESTRYMAP format
  if (tpackmode) {
    inpack (gname, snpmarkers, indivmarkers, nsnp, numindivs);
    for (k = 0; k < nsnp; k++) {
      cupt = snpmarkers[k];
      if (cupt->ignore)
	continue;
      if ((cupt->isfake) && (!(cupt->isrfake)))
	continue;
      if (cupt->gtypes == NULL)
	ZALLOC (cupt->gtypes, 1, int);
      cupt->ngtypes = numindivs;
    }
    packmode = YES;
    return nsnp * numindivs;
  }

  teigenstratmode = iseigenstrat (gname);

  // Call routine to read EIGENSTRAT format
  if (teigenstratmode) {
    packmode = YES;
    ineigenstrat (gname, snpmarkers, indivmarkers, nsnp, numindivs);
    freeped ();
    return nsnp * numindivs;
  }


  // (If execution reaches here, the file is unpacked ANCESTRYMAP format)

  // rlen is number of bytes needed to store each SNP's genotype data
  y = (double) (numindivs * 2) / (8 * (double) sizeof (char));
  rlen = nnint (ceil (y));
  packlen = rlen * numsnps;
  if (packlen < 0)
    fatalx ("yuckk\n");
  if (packmode) {
    ZALLOC (packgenos, packlen, char);
    pbuff = packgenos;
    clearepath (packgenos);
  }

  // instantiate hash table
  num = nsnp + numindivs;
  xhcreate (5 * num);
  ZALLOC (hashlist, num, ENTRY);
  bigoff = nsnp + 100;

  // hash SNPs (key=name, value=index in snpmarkers)
  for (k = 0; k < nsnp; k++) {
    cupt = snpmarkers[k];
    if ((cupt->isfake) && (!(cupt->isrfake)))
      continue;
    iteml = hashlist + k;
    iteml->key = cupt->ID;
    iteml->data = basept + k;
    if (xhsearch (*iteml, FIND) != NULL)
      fatalx ("duplicate ID: %s\n", iteml->key);
    (void) xhsearch (*iteml, ENTER);
  }

  // hash individuals (key=name, value=index in indivmarkers)
  for (k = 0; k < numindivs; k++) {

    indx = indivmarkers[k];
    iteml = hashlist + numsnps + k;
    iteml->key = indx->ID;
    iteml->data = basept + k + bigoff;
    if (xhsearch (*iteml, FIND) != NULL)
      fatalx ("duplicate ID: %s\n", iteml->key);
    (void) xhsearch (*iteml, ENTER);
  }

  // read genotype file
  openit (gname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = NO;
    if (sx[0] == '#')
      skipit = YES;
    skipit = setskipit (sx);
    if (skipit == NO) {
      if (nsplit < 3)
	fatalx ("bad geno line.  missing field?\n", line);

      // Look up SNP and individual indices in hash table
      item1.key = spt[0];
      iteml = xhsearch (item1, FIND);
      if (iteml == NULL) {
	fatalx ("(genotypes) bad ID (SNP): %s\n", line);
      }
      k = (int) (iteml->data - basept);

      if (k >= numsnps) {
	fatalx ("bad genotype line: `snp' may be Indiv Id\n%s\n", line);
      }

      cupt = snpmarkers[k];
      if (cupt->ignore) {
	freeup (spt, nsplit);
	continue;
      }
      item1.key = spt[1];
      iteml = xhsearch (item1, FIND);
      if (iteml == NULL) {
	fatalx ("(genotypes) bad ID: (Indiv) %s\n", line);
      }
      indiv = (int) (iteml->data - basept);
      indiv -= bigoff;
      val = atoi (spt[2]);

      indx = indivmarkers[indiv];
      if (indx->ignore)
	val = -1;
      if (checkxval (cupt, indx, val) == NO)
	val = -1;
      if (val > 2) {
	printf ("*** warning invalid genotype: %s %s %d\n",
		cupt->ID, indx->ID, val);
	val = -1;
      }

      if (cupt->ngtypes == 0) {

	// If this is the first datum for this SNP, initialize
	// Set cupt->puff to point to the SNP's data in the genotype array.
	// Set cupt->gtypes to the number of individuals stored in the genotype.

	if (packmode == NO) {
	  ZALLOC (cupt->gtypes, numindivs, int);
	}
	else {
	  ZALLOC (cupt->gtypes, 1, int);
	  cupt->pbuff = pbuff;
	  pbuff += rlen;
	}
	cupt->ngtypes = numindivs;
	for (k = 0; k < numindivs; ++k) {
	  putgtypes (cupt, k, -1);	// initialize all individuals to "missing data"
	}
      }
      putgtypes (cupt, indiv, val);	// store this individual's genotype at this SNP
      ++ngenos;
    }
    freeup (spt, nsplit);
  }
  fclose (fff);

  // destroy hash table
  free (hashlist);
  xhdestroy ();


  // if this is a temporary file (gunzipped), delete it
  if (genotmp != NULL) {
    unlink (gname);
  }
  /* printf("genotype file processed\n") ; */
  freeped ();
  return ngenos;
}


/* ---------------------------------------------------------------------------------------------------- */
void
freeped ()
{
  // destructor for snpord 
  if (snpord == NULL)
    return;
  if (dofreeped == NO)
    return;
  free (snpord);
  snpord = NULL;
  numsnpord = 0;
  maxgenolinelength = -1;
}

/* ---------------------------------------------------------------------------------------------------- */
int
checkxval (SNP * cupt, Indiv * indx, int val)
{
  // check Male X marker not het
  if (cupt->chrom != numchrom + 1)
    return YES;
  if (indx->gender != 'M')
    return YES;
  if (val != 1)
    return YES;
  if (malexhet)
    return YES;
  return NO;
}

/* ---------------------------------------------------------------------------------------------------- */
void
clearsnp (SNP * cupt)
{

  cupt->af_freq = cupt->cauc_freq = -1;
  cupt->aa_af_freq = cupt->aa_cauc_freq = -1;
  cupt->estgenpos = 0;
  cupt->genpos = 0;
  cupt->physpos = 0;
  cupt->ngtypes = 0;
  cupt->pbuff = NULL;
  cupt->ebuff = NULL;
  cupt->gtypes = NULL;
  cupt->modelscores = NULL;
  cupt->totmodelscores = NULL;
  cupt->score = cupt->weight = 0.0;
  cupt->isfake = NO;
  cupt->ignore = NO;
  cupt->isrfake = NO;
  cupt->estdis = 0;
  cupt->dis = 0;
  cupt->esum = 0;
  cupt->lsum = 0;
  cupt->gpsum = 0;
  cupt->gpnum = 0;
  cupt->pcupt = NULL;
  cupt->tagnumber = -1;

  charclear (cupt->cchrom, CNULL, 7);
  strcpy (cupt->cchrom, "");
  cupt->chimpfudge = NO;
  charclear (cupt->alleles, CNULL, 2);
}

/* ---------------------------------------------------------------------------------------------------- */
int
rmindivs (SNP ** snpm, int numsnps, Indiv ** indivmarkers, int numindivs)
{
  // squeeze out ignore
  // dangerous bend.  Of course indivmarkers indexing will change
  int n = 0, g, i, k;
  int x;
  Indiv *indx;
  SNP *cupt;

  // n is index of next unused array element 

  for (k = 0; k < numindivs; ++k) {
    if (indivmarkers[k]->ignore == YES)
      continue;			// don't store
    if (n == k) {		// if no ignored found yet, 
      ++n;			//   next unused is next element
      continue;			//   and no need to copy
    }

    // copy k -> n 
    indx = indivmarkers[n];	// if kth element is not ignored, put it 
    indivmarkers[n] = indivmarkers[k];	//   into next unused element
    indx->idnum = n;
    for (i = 0; i < numsnps; i++) {
      cupt = snpm[i];
      if (cupt->gtypes == NULL)
	break;
      if (cupt->ignore)
	continue;		// copy only genotypes of non-ignored SNPs
      g = getgtypes (cupt, k);
      putgtypes (cupt, n, g);
    }
    ++n;
  }

  for (i = 0; i < numsnps; i++) {	// reset number of individuals 
    cupt = snpm[i];
    cupt->ngtypes = n;
  }
  return n;

}

/* ---------------------------------------------------------------------------------------------------- */
int
rmsnps (SNP ** snpm, int numsnps, char *deletesnpoutname)
{

  int i, x;
  SNP *cupt;
  int lastc, chrom;

  freesnpindex ();		// clear hash table

  // wipe out fakes not between real markers
  lastc = -1;
  for (i = 0; i < numsnps; i++) {
    cupt = snpm[i];
    if (cupt->ignore)
      continue;
    chrom = cupt->chrom;
    if ((cupt->isfake) && (chrom != lastc)) {
      cupt->ignore = YES;	// precedes first real SNP
      logdeletedsnp (cupt->ID, "isfake", deletesnpoutname);
    }
    if (!cupt->isfake)
      lastc = chrom;
  }

  lastc = -1;
  for (i = numsnps - 1; i >= 0; i--) {
    cupt = snpm[i];
    if (cupt->ignore)
      continue;
    chrom = cupt->chrom;
    if ((cupt->isfake) && (chrom != lastc)) {
      cupt->ignore = YES;	// follows last real SNP
      logdeletedsnp (cupt->ID, "isfake", deletesnpoutname);
    }
    if (!cupt->isfake)
      lastc = chrom;
  }

  x = 0;			// index of next retained SNP in the array 
  for (i = 0; i < numsnps; i++) {
    cupt = snpm[i];
    if (cupt->ignore) {
      freecupt (&cupt);
      continue;
    }
    snpm[x] = snpm[i];
    ++x;
  }

  // reset own-index field
  for (i = 0; i < x; i++) {
    cupt = snpm[i];
    cupt->markernum = i;
  }

  return x;
}


/* ---------------------------------------------------------------------------------------------------- */
void
freecupt (SNP ** cuppt)
{
  SNP *cupt;
  cupt = *cuppt;
  if (cupt->modelscores != NULL) {
    free (cupt->modelscores);
  }
  if (cupt->totmodelscores != NULL) {
    free (cupt->totmodelscores);
  }
  free (cupt);
  cupt = NULL;
}


/* ---------------------------------------------------------------------------------------------------- */
void
clearind (Indiv ** indm, int numind)
{
  Indiv *indx;
  double theta;
  int i;

  for (i = 0; i < numind; i++) {
    indx = indm[i];
    indx->egroup = NULL;
    indx->affstatus = indx->ignore = NO;
    indx->gender = 'U';
    indx = indm[i];
    indx->Xtheta_mode = indx->theta_mode = a1 / (a1 + b1);
    indx->Xlambda_mode = indx->lambda_mode = lp1 / lp2;
    indx->thetatrue = -1.0;	// silly value
    indx->qval = indx->rawqval = 0.0;
  }
  cleartg (indm, numind);
}

/* ---------------------------------------------------------------------------------------------------- */
void
cleartg (Indiv ** indm, int nind)
{
  int i;
  Indiv *indx;

  for (i = 0; i < nind; i++) {
    indx = indm[i];
    vzero (indx->totgamms, 3);
    indx->totscore = 0.0;
  }
}


/* ---------------------------------------------------------------------------------------------------- */
double
mknn (int *nn, int n0, int n1)
{
  double x;
  int t;

  nn[0] = n0 + 1;
  nn[1] = n1 + 1;

  // no clipping.  (Old code clipped here)
  t = intsum (nn, 2);
  x = ((double) nn[0]) / (double) t;

  return x;
}

/* ---------------------------------------------------------------------------------------------------- */
void
setug (Indiv ** indm, int numind, char gender)
{
  Indiv *indx;
  double theta;
  int i;

  for (i = 0; i < numind; i++) {
    indx = indm[i];
    if (indx->gender == 'U')
      indx->gender = gender;
  }
}


/* ---------------------------------------------------------------------------------------------------- */
void
dobadsnps (SNPDATA ** snpraw, int nreal, char *badsnpname)
{

  FILE *fff;
  char line[MAXSTR];
  char *spt[MAXFF];
  char *ss;
  int indx, nsplit, n;

  if (badsnpname == NULL)
    return;
  openit (badsnpname, &fff, "r");

  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    if (spt[0][0] == '#') {
      freeup (spt, nsplit);
      continue;
    }

    // look up index in snpraw
    indx = snprawindex (snpraw, nreal, spt[0]);
    if (indx >= 0) {
      snpraw[indx]->ignore = YES;
      if ((nsplit >= 2) && (checkfake (spt[1]))) {
	snpraw[indx]->ignore = NO;
	snpraw[indx]->isrfake = YES;
      }
    }
    freeup (spt, nsplit);
  }
  fclose (fff);
}


/* ---------------------------------------------------------------------------------------------------- */
int
checkfake (char *ss)
{
  // yes if string ss is "Fake" 
  // ss is overwritten

  ss[0] = tolower (ss[0]);
  if (strcmp (ss, "fake") == 0)
    return YES;
  return NO;

}

void
mkchrom (char *ss, int chrom, double *ppos, int fudge, int chrmode)
{
  char *sx;
  int big = 200 * 1000 * 1000;

  sx = ss;
  if (chrmode) {
    strcpy (ss, "chr");
    sx += 3;
  }
  if ((chrom != 2) || (fudge == NO)) {
    sprintf (sx, "%d", chrom);
    return;
  }

  if (*ppos <= big) {
    sprintf (sx, "2a");
  }

  if (*ppos > big) {
    sprintf (sx, "2b");
    *ppos -= big;
  }

}


/* ---------------------------------------------------------------------------------------------------- */
void
printsnps (char *snpoutfilename, SNP ** snpm, int num, Indiv ** indm,
	   int printfake, int printvalids)
{

  int i, chrom;
  double ppos;
  SNP *cupt;
  char ss[10];
  FILE *xfile;
  int numvcase, numvcontrol;
  char c;

  if ((snpoutfilename != NULL) && (strcmp (snpoutfilename, "NULL") == 0))
    return;
  if (snpoutfilename != NULL) {
    openit (snpoutfilename, &xfile, "w");
  }
  else
    xfile = stdout;

  if (tersemode == NO) {
    fprintf (xfile, "\n");
    fprintf (xfile, "###DETAILS ABOUT THE MARKERS\n");
    fprintf (xfile,
	     "##Gen_Pos: genetic position, Phys_pos: Physical position\n");
    fprintf (xfile,
	     "##Afr_vart: Parental African variant allele count, Afr_ref: Parental African reference allele count\n");
    fprintf (xfile,
	     "##Eur_vart: Parental European variant allele count, Eur:ref:Parental European reference allele count\n");

    fprintf (xfile, "\n");
    fprintf (xfile, "%20s %5s %10s %18s", "#SNP_Id", "Chr_Num", "Gen_Pos",
	     "Phys_Pos");
    fprintf (xfile, " %9s %9s %9s %9s", "Afr_vart", "Afr_ref", "Eur_vart",
	     "Eur_ref");
    fprintf (xfile, "\n");
  }
  for (i = 0; i < num; ++i) {
    cupt = snpm[i];
    if (outputall == NO) {
      if (!printfake && (ignoresnp (cupt)))
	continue;
      if (!printfake && (cupt->isrfake))
	continue;
    }

    ppos = cupt->physpos;

    mkchrom (ss, cupt->chrom, &ppos, cupt->chimpfudge, chrmode);
    fprintf (xfile, "%20s %5s ", cupt->ID, ss);

    if (cupt->genpos == 0.0) {
      fprintf (xfile, "%15.0f %15.0f", cupt->genpos, ppos);
    }
    else {
      fprintf (xfile, "%15.6f %15.0f", cupt->genpos, ppos);
    }

    if (tersemode) {
      printalleles (cupt, xfile);
      fprintf (xfile, "\n");
      continue;
    }

    fprintf (xfile, " %8d ", cupt->af_nn[0]);
    fprintf (xfile, "%8d ", cupt->af_nn[1]);
    fprintf (xfile, "%8d ", cupt->cauc_nn[0]);
    fprintf (xfile, "%8d", cupt->cauc_nn[1]);
    if (!printvalids) {
      printalleles (cupt, xfile);
      fprintf (xfile, "\n");
      continue;
    }
    numvcase = numvalidgtx (indm, cupt, 1);
    numvcontrol = numvalidgtx (indm, cupt, 0);
    fprintf (xfile, "   %6d %6d", numvcase, numvcontrol);
    fprintf (xfile, "    %d %d %d", cupt->ignore, cupt->isfake,
	     cupt->isrfake);
    printalleles (cupt, xfile);
    fprintf (xfile, "\n");
  }
  if (snpoutfilename != NULL)
    fclose (xfile);
}


/* ---------------------------------------------------------------------------------------------------- */
void
printalleles (SNP * cupt, FILE * fff)
{
  char c;
  if ((c = cupt->alleles[0]) != CNULL)
    fprintf (fff, " %c", c);
  if ((c = cupt->alleles[1]) != CNULL)
    fprintf (fff, " %c", c);

}

/* ---------------------------------------------------------------------------------------------------- */
void
printdata (char *genooutfilename, char *indoutfilename,
	   SNP ** snpm, Indiv ** indiv, int numsnps, int numind, int packem)
{

  FILE *gfile, *ifile;
  int i, j, t;
  SNP *cupt;
  Indiv *indx;
  char ss[MAXSTR];
  char *gfilename;
  int dogenos = YES;

  if (packem)
    printf ("packedancestrymap output\n");
  else
    printf ("ancestrymap output\n");

  if ((genooutfilename != NULL) && (strcmp (genooutfilename, "NULL") == 0))
    dogenos = NO;
  if (genooutfilename == NULL)
    dogenos = NO;

  if (dogenos) {
    gfilename = genooutfilename;
    if (packem) {
      outpack (genooutfilename, snpm, indiv, numsnps, numind);
      gfilename = NULL;
    }

    // print unpacked genotype output
    if (gfilename != NULL) {
      openit (gfilename, &gfile, "w");
      if (tersemode == NO)
	fprintf (gfile, "#SNP_ID,INDIV_ID,VART_ALLELE_CNT\n");
    }

    for (i = 0; i < numsnps; i++) {
      if (gfilename == NULL)
	break;
      cupt = snpm[i];

      if (outputall == NO) {
	if (ignoresnp (cupt))
	  continue;
	if (cupt->isrfake)
	  continue;
      }

      for (j = 0; j < cupt->ngtypes; j++) {
	indx = indiv[j];
	if (indx->ignore)
	  continue;
	fprintf (gfile, "%20s %20s %3d\n", cupt->ID, indx->ID,
		 getgtypes (cupt, j));
      }
    }

    if (gfilename != NULL)
      fclose (gfile);

  }

  if (indoutfilename == NULL)
    return;
  if ((indoutfilename != NULL) && (strcmp (indoutfilename, "NULL") == 0))
    return;
  if (indoutfilename != NULL)
    openit (indoutfilename, &ifile, "w");

  /* fprintf(ifile,"#INDIV,GENDER,POPULATION\n"); */
  for (i = 0; i < numind; i++) {
    indx = indiv[i];
    if (indx->ignore)
      continue;
    strcpy (ss, indx->egroup);
    if ((qtmode) && (!indx->ignore)) {
      sprintf (ss, "%9.3f", indx->rawqval);
    }
    if (tersemode) {
      fprintf (ifile, "%20s %c %10s", indx->ID, indx->gender, ss);
      fprintf (ifile, "\n");
      continue;
    }
    t = numvalids (indx, snpm, 0, numsnps - 1);
    fprintf (ifile, "%20s %c %10s %5d\n", indx->ID, indx->gender, ss, t);
  }

  if (indoutfilename != NULL)
    fclose (ifile);
}




/* ---------------------------------------------------------------------------------------------------- */
int
readindval (Indiv ** indivmarkers, int numindivs, char *inddataname)
{

  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k, ind;
  int skipit;
  Indiv *indx;
  double y;
  double gg[3];

  FILE *fff;
  openit (inddataname, &fff, "r");
  for (k = 0; k < numindivs; ++k) {
    indx = indivmarkers[k];
    indx->affstatus = NO;
    indx->qval = -999.0;
  }
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit < 2) {
      freeup (spt, nsplit);
      continue;
    }
    sx = spt[0];
    if (strcmp (sx, "Indiv_Index") == 0) {
      freeup (spt, nsplit);
      continue;
    }
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }
    ind = indindex (indivmarkers, numindivs, sx);
    if (ind < 0)
      fatalx ("(readindval) indiv: %s not found \n", sx);
    indx = indivmarkers[ind];
    indx->qval = atof (spt[1]);
    indx->affstatus = YES;
    freeup (spt, nsplit);
    continue;
  }

  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
readgdata (Indiv ** indivmarkers, int numindivs, char *gname)
     // only needed for logreg 
     // not correct for X chromosome 
     // Needs correction  for males 
{
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k, ind;
  int skipit;
  Indiv *indx;
  double y;
  double gg[3];

  FILE *fff;

  cleartg (indivmarkers, numindivs);
  openit (gname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = NO;
    skipit = setskipit (sx);
    if (skipit == NO) {
      if (nsplit < 4)
	fatalx ("%s bad line: %s", gname, line);
      ind = indindex (indivmarkers, numindivs, sx);
      if (ind < 0)
	fatalx ("(readgdata) indiv: %s not found \n", sx);
      indx = indivmarkers[ind];
      for (k = 0; k < 3; k++) {
	gg[k] = atof (spt[k + 1]);
      }
      y = asum (gg, 3);
      vst (gg, gg, 1.0 / y, 3);
      y = 0.5 * (gg[1] + 2.0 * gg[2]);	/* est caucasian ancestry */
      indx->thetatrue = y;
      copyarr (gg, indx->totgamms, 3);
    }
    freeup (spt, nsplit);
    continue;
  }

  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
putweights (char *fname, SNP ** snpm, int numsnps)
{
  int num = 0, k;
  SNP *cupt;
  double weight;

  FILE *fff;
  openit (fname, &fff, "w");

  for (k = 0; k < numsnps; ++k) {
    cupt = snpm[k];
    if (cupt->ignore)
      continue;
    fprintf (fff, "%20s ", cupt->ID);
    fprintf (fff, "%15.9f ", cupt->weight);
    fprintf (fff, "\n");
    ++num;
  }
  fclose (fff);
  return num;
}


/* ---------------------------------------------------------------------------------------------------- */
int
getindvals (char *fname, Indiv ** indivmarkers, int numindivs)
{
  // number of read lines 
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0;
  int skipit, k;
  double qval;

  FILE *fff;
  for (k = 0; k < numindivs; ++k) {
    indivmarkers[k] -> qval = 0.0;
  }
  if (fname == NULL) return 0 ;

  openit (fname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0) {
      continue;
    }
    sx = spt[0];
    skipit = NO;
    skipit = setskipit (sx);
    k = indindex (indivmarkers, numindivs, sx);
    if (k < 0)
      skipit = YES;
    if (skipit == NO) {
      if (nsplit > 1) {
	sx = spt[1];
	qval = atof (sx);
	indivmarkers[k]->qval = qval;
//      printf ("qval set: %20s %9.3f\n", indivmarkers[k]->ID, qval);
	++num;
      }
    }
    freeup (spt, nsplit);
    continue;
  }
  fclose (fff);
  fflush (stdout);
  return num;
}

int
getweights (char *fname, SNP ** snpm, int numsnps)
{
  // number of real lines 
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0;
  int skipit, k;
  double weight;

  FILE *fff;
  for (k = 0; k < numsnps; ++k) {
    snpm[k]->weight = 1.0;
  }
  if (fname == NULL) return 0 ;
  openit (fname, &fff, "r");
  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0) {
      continue;
    }
    sx = spt[0];
    skipit = NO;
    skipit = setskipit (sx);
    k = snpindex (snpm, numsnps, sx);
    if (k < 0)
      skipit = YES;
    if (skipit == NO) {
      if (nsplit > 1) {
	sx = spt[1];
	weight = atof (sx);
	snpm[k]->weight = weight;
//      printf ("weight set: %20s %9.3f\n", snpm[k]->ID, weight);
	++num;
      }
    }
    freeup (spt, nsplit);
    continue;
  }
  fclose (fff);
  fflush (stdout);
  return num;
}

/* ---------------------------------------------------------------------------------------------------- */
void
outpack (char *genooutfilename, SNP ** snpm, Indiv ** indiv, int numsnps,
	 int numind)
{
  char **arrx;
  int n, num, ihash, shash, i, g, j, k;
  int nind, nsnp, irec;
  Indiv *indx;
  SNP *cupt;
  double y;
  unsigned char *buff;
  int fdes, ret;
  char *packit;

  n = numind;
  ZALLOC (arrx, n, char *);

  num = 0;
  for (i = 0; i < n; i++) {
    indx = indiv[i];
    if ((outputall == NO) && indx->ignore)
      continue;
    arrx[num] = strdup (indx->ID);
    ++num;
  }

  // compute hash on individuals
  ihash = hasharr (arrx, num);
  nind = num;
  freeup (arrx, num);
  free (arrx);

  n = numsnps;
  ZALLOC (arrx, n, char *);
  num = 0;
  for (i = 0; i < n; i++) {
    cupt = snpm[i];
    if (outputall == NO) {
      if (ignoresnp (cupt))
	continue;
      if (cupt->isrfake)
	continue;
    }
    arrx[num] = strdup (cupt->ID);
    ++num;
  }

  // compute hash on SNPs
  shash = hasharr (arrx, num);
  nsnp = num;
  freeup (arrx, num);
  free (arrx);
  // printf("ihash:  %x   shash: %x\n", ihash, shash) ; 

  // rlen is number of bytes each SNP will occupy in packed format
  y = (double) (nind * 2) / (8 * (double) sizeof (char));
  rlen = nnint (ceil (y));
  rlen = MAX (rlen, 48);
  // printf("nind: %d  rlen:  %d\n", nind, rlen) ;
  ZALLOC (buff, rlen, unsigned char);
  sprintf ((char *) buff, "GENO %7d %7d %x %x", nind, nsnp, ihash, shash);

  ridfile (genooutfilename);
  fdes = open (genooutfilename, O_CREAT | O_TRUNC | O_RDWR, 0666);

  if (fdes < 0) {
    perror ("bad genoout");
    fatalx ("open failed for %s\n", genooutfilename);
  }
  if (verbose)
    printf ("file %s opened\n", genooutfilename);

  ret = write (fdes, buff, rlen);
  if (ret < 0) {
    perror ("write failure");
    fatalx ("(outpack) bad write");
  }

  irec = 1;
  for (i = 0; i < numsnps; i++) {
    cupt = snpm[i];
    if (outputall == NO) {
      if (ignoresnp (cupt))
	continue;
      if (cupt->isrfake)
	continue;
    }
    cclear (buff, 0X00, rlen);
    num = 0;
    for (j = 0; j < numind; j++) {
      indx = indiv[j];
      if (indx->ignore)
	continue;
      g = getgtypes (cupt, j);
      if (g < 0)
	g = 3;
      wbuff (buff, num, g);	// store two-bit genotype in packed data buffer
      ++num;
    }
    ret = write (fdes, buff, rlen);	// print out all SNPs in packed data buffer
    if (ret < 0) {
      perror ("write failure");
      fatalx ("(outpack) bad write");
    }
    if (verbose) {
      printf ("record: %4d ", irec);
      for (k = 0; k < rlen; ++k) {
	printf (" %02x", (unsigned char) buff[k]);
      }
      printf ("\n");
    }
    ++irec;
  }
  close (fdes);
  free (buff);
  // printf("check: %s %d\n", genooutfilename, ispack(genooutfilename)) ;
}



/* ---------------------------------------------------------------------------------------------------- */
int
ispack (char *gname)
{
  // checks if file is packed gfile 
  int fdes, t, ret;
  char buff[8];

  fdes = open (gname, O_RDONLY);
  if (fdes < 0) {
    perror ("open failure");
    fatalx ("(ispack) bad open %s\n", gname);
  }
  t = read (fdes, buff, 8);
  if (t < 0) {
    perror ("read failure");
    fatalx ("(ispack) bad read");
  }
  close (fdes);
  buff[4] = '\0';
  ret = strcmp (buff, "GENO");
  if (ret == 0)
    return YES;
  return NO;

}



/* ---------------------------------------------------------------------------------------------------- */
int
iseigenstrat (char *gname)
{

  FILE *fff;
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k;


  openit (gname, &fff, "r");

  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }
    freeup (spt, nsplit);
    fclose (fff);
    if (nsplit > 1)
      return NO;
    return YES;
  }
  fatalx ("(iseigenstrat) no genotyped data found\n");
  return NO ;

}



/* ---------------------------------------------------------------------------------------------------- */
int
ineigenstrat (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps,
	      int numind)
{
  // supports enhanced format fist character X => all missing data for SNP
  FILE *fff;
  char *line = NULL, c;
  char *spt[2], *sx;
  int nsplit, rownum = 0, k, num;
  int maxstr, maxff = 2;
  int nind, nsnp, len;
  double y;
  unsigned char *buff;
  char *packit, *pbuff;
  int g, g1, g2;
  SNP *cupt;
  Indiv *indx;
  int nbad = 0;


  packmode = YES;
  maxstr = numind + 10;
  ZALLOC (line, maxstr, char);

  nind = numind;
  nsnp = numsnps;

  // rlen is number of bytes used to store each SNP's genotype data
  y = (double) (nind * 2) / (8 * (double) sizeof (char));
  rlen = nnint (ceil (y));
  rlen = MAX (rlen, 48);
  ZALLOC (buff, rlen, unsigned char);

  packlen = rlen * nsnp;
  if (packgenos == NULL) {
    ZALLOC (packgenos, packlen, char);
    clearepath (packgenos);
  }

  openit (gname, &fff, "r");

  rownum = 0;
  pbuff = packgenos;
  while (fgets (line, maxstr, fff) != NULL) {
    nsplit = splitup (line, spt, maxff);
    if (nsplit == 0)
      continue;

    sx = spt[0];
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }

    if (nsplit > 1)
      fatalx ("(ineigenstrat) more than 1 field\n");	// white space not expected

    if (rownum >= numsnps)
      fatalx ("(ineigenstrat) too many lines in file %d %d\n", rownum,
	      numsnps);
    num = snpord[rownum];
    cupt = snpm[num];
    ++rownum;
    if (cupt == NULL)
      continue;

    if (cupt->ngtypes == 0) {
      if (packmode == NO) {
	ZALLOC (cupt->gtypes, numind, int);
	ivclear (cupt->gtypes, -1, numind);
      }
      else {
	ZALLOC (cupt->gtypes, 1, int);
	cupt->pbuff = pbuff;
	pbuff += rlen;
      }
      cupt->ngtypes = numind;
    }

    if (sx[0] == 'X') {
      freeup (spt, nsplit);
      continue;
    }

    len = strlen (sx);
    if (len != nind) {
      printf ("(ineigenstrat) bad line %d ::%s\n", rownum, line);
      fatalx ("(ineigenstrat) mismatch line length %d %d\n", len, nind);
    }

    for (k = 0; k < len; k++) {
      sscanf (sx + k, "%c", &c);
      g = -2;
      if (c == '0')
	g = 0;
      if (c == '1')
	g = 1;
      if (c == '2')
	g = 2;
      if (c == '9')
	g = -1;

      if (g == -2)
	fatalx ("(ineigenstrat) bad character %c\n", c);
      if (indiv[k]->ignore)
	g = -1;
      if (checkxval (cupt, indiv[k], g) == NO)
	g = -1;

      indx = indiv[k];
      if (checkxval (cupt, indx, g) == NO)
	g = -1;
      g2 = g;
      if (g2 < 0)
	continue;
      g1 = getgtypes (cupt, k);
      if ((g1 >= 0) && (g1 != g2))
	++nbad;			// something is already stored there
      putgtypes (cupt, k, g2);
    }
    freeup (spt, nsplit);
  }
  if (rownum != numsnps)
    fatalx ("(ineigenstrat) mismatch in numsnps %d and numlines %d\n",
	    numsnps, rownum);
  fclose (fff);
  freestring (&line);

  return nbad;
}

/* ---------------------------------------------------------------------------------------------------- */
int
calcishash (SNP ** snpm, Indiv ** indiv, int numsnps, int numind, int *pihash,
	    int *pshash)
{
  char **arrx;
  int ihash, shash, n, num;
  int i;
  Indiv *indx;
  SNP *cupt;

  n = numind;
  ZALLOC (arrx, n, char *);

  num = 0;
  for (i = 0; i < n; i++) {
    indx = indiv[i];
    arrx[num] = strdup (indx->ID);
    ++num;
  }
  *pihash = hasharr (arrx, num);

  freeup (arrx, num);
  free (arrx);

  n = numsnps;
  ZALLOC (arrx, n, char *);
  num = 0;
  for (i = 0; i < n; i++) {
    cupt = snpm[i];
    if (cupt->isfake)
      continue;
    arrx[num] = strdup (cupt->ID);
    cupt->ngtypes = numind;
    ++num;
  }
  *pshash = hasharr (arrx, num);
  freeup (arrx, num);
  free (arrx);
  return num;

}


long
bigread (int fdes, char *packg, long numbytes)
{
  long x;
  int xx;
  char *pt;
  long nb, t, nr = 0;
  int pswitch = NO;


  pt = packg;

  x = nnint (pow (2, 30));

  nb = numbytes;
  if (nb > x)
    pswitch = YES;

  for (;;) {
    xx = MIN (x, nb);
    t = read (fdes, pt, xx);
    if (t < 0) {
      perror ("read failure");
      fatalx ("(bigread) bad data read");
    }
    if (t != xx) {
      perror ("read failure (length mismatch)");
      fatalx ("(bigread) bad data read (length mismatch) %ld %ld\n", t, xx);
    }
    nb -= xx;
    nr += xx;
    if (pswitch)
      printf ("read %ld bytes\n", nr);
    if (nb == 0)
      break;
    pt += xx;
  }
  return nr;
}

int
getsnpordered ()
{
  return snpordered;
}

void
putsnpordered (int mode)
{
  snpordered = mode;
}

void
setpordercheck (int mode)
{
  pordercheck = mode;
}

void
failorder ()
{
  fatalx
    ("snps out of order and packed format.  Run convertf with pordercheck: NO\n");
}

/* ---------------------------------------------------------------------------------------------------- */
void
inpack (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps, int numind)
{

  char **arrx, junk[10];
  int n, num, ihash, shash, i, g, j, k;
  long t;
  int xihash, xshash, xnsnp, xnind;
  int nind, nsnp, irec;
  Indiv *indx;
  SNP *cupt;
  double y;
  unsigned char *buff;
  int fdes, ret;
  char *packit, *pbuff;

  nind = n = numind;
  nsnp = calcishash (snpm, indiv, numsnps, numind, &ihash, &shash);

  // rlen is the number of bytes needed to store one SNP's genotype data
  y = (double) (nind * 2) / (8 * (double) sizeof (char));
  rlen = nnint (ceil (y));
  rlen = MAX (rlen, 48);
  ZALLOC (buff, rlen, unsigned char);

  // open binary file and check readability
  fdes = open (gname, O_RDONLY);
  if (fdes < 0) {
    perror ("open failure");
    fatalx ("(ispack) bad open %s\n", gname);
  }
  t = read (fdes, buff, rlen);
  if (t < 0) {
    perror ("read failure");
    fatalx ("(inpack) bad read");
  }

  if (pordercheck && (snpordered == NO))
    failorder ();

  // check for file modification
  if (hashcheck) {
    sscanf ((char *) buff, "GENO %d %d %x %x", &xnind, &xnsnp, &xihash,
	    &xshash);
    if (xnind != nind)
      fatalx ("OOPS number of individuals %d != %d in input files\n", nind,
	      xnind);
    if (xnsnp != nsnp)
      fatalx ("OOPS number of SNPs %d != %d in input file: %s\n", nsnp, xnsnp,
	      gname);
    if (xihash != ihash)
      fatalx
	("OOPS indiv file has changed since genotype file was created\n");
    if (xshash != shash)
      fatalx ("OOPS snp file has changed since genotype file was created\n");
  }

  packlen = rlen * nsnp;
  ZALLOC (packgenos, packlen, char);
  clearepath (packgenos);

  // printf("packgenos: %x end: %x  len:  %d\n", packgenos, packgenos+packlen-1, packlen) ;

  t = bigread (fdes, packgenos, packlen);
  if (t < 0) {
    perror ("read failure");
    fatalx ("(inpack) bad data read");
  }
  if (t != packlen) {
    perror ("read failure (length mismatch)");
    printf ("numsnps: %d  nsnp (from geno file): %d\n", numsnps, nsnp);
    fatalx ("(inpack) bad data read (length mismatch) %ld %ld\n", t, packlen);
  }
  else
    printf ("packed geno read OK\n");

  // now set up pointers into packed data 
  pbuff = packgenos;
  for (i = 0; i < numsnps; i++) {
    j = snpord[i];
    if (snpordered == YES)
      j = i;
    if (j < 0)
      fatalx ("(inpack) bug\n");
    if (j > nsnp)
      fatalx ("(inpack) bug\n");
    cupt = snpm[j];
/**
    if ((i % 100000) == 0)  { 
     printf("zz %d %d %d\n", i, j, numsnps) ;  fflush(stdout) ;
    }
*/
    if (cupt->isfake)
      continue;
    cupt->pbuff = pbuff;
    pbuff += rlen;
    // now check xhets
    for (k = 0; k < numind; ++k) {
      indx = indiv[k];
      g = getgtypes (cupt, k);
      if (checkxval (cupt, indx, g) == NO) {
	putgtypes (cupt, k, -1);
      }
    }
  }

  free (buff);
  close (fdes);

  printf ("end of inpack\n");
  fflush (stdout);
}


/* ---------------------------------------------------------------------------------------------------- */
void
getsnpsc (char *snpscname, SNP ** snpm, int numsnps)
{

  FILE *fff;
  int score;
  SNP *cupt;
  char line[MAXSTR];
  char *spt[MAXFF], *sx;
  int nsplit, num = 0, k;
  double y;


  if (snpscname == NULL)
    fatalx ("no snpsc file\n");
  else
    openit (snpscname, &fff, "r");

  while (fgets (line, MAXSTR, fff) != NULL) {
    nsplit = splitup (line, spt, MAXFF);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }
    k = snpindex (snpm, numsnps, sx);
    if (k < 0) {
      printf ("*** warning.  snp %s in snpscname but not in main snp file\n",
	      spt[0]);
      freeup (spt, nsplit);
      continue;
    }
    y = atof (spt[1]);
    y += .1 * gauss ();		// dither 
    cupt = snpm[k];
    cupt->score = y;
    freeup (spt, nsplit);
  }

  if (snpscname != NULL)
    fclose (fff);
}

/* ---------------------------------------------------------------------------------------------------- */
void
setepath (SNP ** snpm, int nsnps)
{

  int i;
  SNP *cupt;
  char *pbuff;

  if (packlen == 0)
    fatalx ("(setepath) packlen unset\n");
  ZALLOC (packepath, packlen, char);
  printf ("setepath. packlen: %ld  rlen: %ld\n", packlen, rlen);
  pbuff = packepath;
  for (i = 0; i < nsnps; i++) {
    cupt = snpm[i];
    if (cupt->isfake)
      continue;
    cupt->ebuff = pbuff;
    pbuff += rlen;
  }
  clearepath (packepath);
}


/* ---------------------------------------------------------------------------------------------------- */
void
clearepath (char *packp)
{
  cclear ((unsigned char *) packp, 0XFF, packlen);
}


/* ---------------------------------------------------------------------------------------------------- */
int
getpedgenos (char *gname, SNP ** snpmarkers, Indiv ** indivmarkers,
	     int numsnps, int numindivs, int nignore)
{
  int val;
  int ngenos = 0;

  SNP *cupt;
  Indiv *indx;

  char *line;
  char **spt, *sx;
  char c;
  int nsplit, num = 0;
  int skipit;
  int numf, snpnumber, nsnp;
  int k, n, t, i;
  FILE *fff;
  int **gcounts, *gvar, *gref;
  int xvar, xref;
  int parity, colbase, ncols;
  int snpnum;
  int markernum = -99;
  int n1, n2;

  /* 
     markernum = snpindex(snpmarkers, numsnps, "rs3002685") ;
     if (markernum <0) fatalx("qq1") ;
   */

  maxgenolinelength = MAX (maxgenolinelength, maxlinelength (gname));

  // printf("maxlinelen %d\n", maxlinelength(gname)) ;
  ZALLOC (line, maxgenolinelength + 1, char);

  cleargdata (snpmarkers, numsnps, numindivs);
  nsnp = numsnps;

  ZALLOC (gcounts, nsnp, int *);
  for (i = 0; i < nsnp; i++) {
    ZALLOC (gcounts[i], 5, int);
  }
  genopedcnt (gname, gcounts, nsnp);

  ZALLOC (gvar, nsnp, int);
  ZALLOC (gref, nsnp, int);

  // designate ref and var alleles from counts
  setgref (gcounts, nsnp, gvar, gref);

  // Override improvised ref and var designations if they were in the .map file
  for (i = 0; i < nsnp; ++i) {
    cupt = snpmarkers[i];
    if (cupt->alleles[0] != CNULL) {
      c = cupt->alleles[0];
      gvar[i] = xpedval (c);
      c = cupt->alleles[1];
      gref[i] = xpedval (c);
    }
    else {
      c = x2base (gvar[i]);
      cupt->alleles[0] = c;
      c = x2base (gref[i]);
      cupt->alleles[1] = c;
    }
  }

  numf = 2 * nsnp + 10;
  ZALLOC (spt, numf, char *);

  // Read genotype file, one line per individual
  openit (gname, &fff, "r");
  while (fgets (line, maxgenolinelength, fff) != NULL) {

    nsplit = splitup (line, spt, numf);
    if (nsplit == 0)
      continue;
    sx = spt[0];
    skipit = NO;
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }

    // On first individual, set column base (6 or 7)
    if (num == 0) {
      parity = nsplit % 2;
      ncols = nsplit;
      colbase = 6 + parity;
    }
    if (nsplit != ncols)
      fatalx ("bad number of fields %d %d\n", ncols, nsplit);

    // Loop over SNPs 
    for (k = colbase; k < nsplit - 1; k += 2) {
      snpnumber = (k - colbase) / 2;

      if (snpnumber >= numsnpord)
	fatalx ("snpord overflow\n");
      snpnum = snpord[snpnumber];
      if (snpnum < 0)
	fatalx ("logic bug (bad snpord)\n");

      xvar = gvar[snpnum];
      xref = gref[snpnum];

      t = 0;

      n1 = n = pedval (spt[k]);
      n2 = pedval (spt[k + 1]);

      if ((n1 == 5) || (n2 == 5)) {	// Missing data or invalid
	val = -1;
	putgtypes (cupt, num, val);
	continue;
      }

      if ((n < 0) || (n > 4))
	fatalx ("(getpedgenos) %s bad geno %s\n", gname, spt[k]);
      if (n == xvar)
	++t;
      if ((n != xvar) && (n != xref))
	t = -10;

      n = n2;
      if ((n < 0) || (n > 4))
	fatalx ("(getpedgenos) %s bad geno %s\n", gname, spt[k + 1]);
      if (n == xvar)
	++t;
      if ((n != xvar) && (n != xref))
	t = -10;

      if (t < 0)
	t = -1;			// Any unexpected allele is stored as "missing"
      cupt = snpmarkers[snpnum];
      if (cupt->ignore)
	continue;
      val = t;
      if (checkxval (cupt, indivmarkers[num], val) == NO)
	val = -1;
      putgtypes (cupt, num, val);	// Store genotype 
      if (val >= 0)
	++ngenos;

    }				// rof (SNP)
    freeup (spt, nsplit);
    ++num;

  }				// elihw (individual)

  free (spt);
  fclose (fff);

  for (i = 0; i < nsnp; i++) {
    free (gcounts[i]);
  }

  free (gcounts);
  free (gref);
  free (gvar);
  free (line);

  printf ("genotype file processed\n");
  return ngenos;
}


/* ---------------------------------------------------------------------------------------------------- */
void
genopedcnt (char *gname, int **gcounts, int nsnp)
{
  char *line;
  char **spt, *sx;
  int nsplit, num = 0;
  int skipit;
  int numf, snpnumber, snpnum;
  int k, n;
  FILE *fff;
  int parity, ncols, colbase;

  // gcounts already zeroed 

  maxgenolinelength = MAX (maxgenolinelength, maxlinelength (gname));

  //  printf("maxlinelen %d\n", maxlinelength(gname)) ;
  ZALLOC (line, maxgenolinelength + 1, char);

  numf = 2 * nsnp + 10;
  ZALLOC (spt, numf, char *);

  openit (gname, &fff, "r");
  while (fgets (line, maxgenolinelength, fff) != NULL) {

    nsplit = splitup (line, spt, numf);
    if (nsplit == 0)
      continue;
    skipit = NO;
    sx = spt[0];
    if (sx[0] == '#') {
      freeup (spt, nsplit);
      continue;
    }
    if (num == 0) {
      parity = nsplit % 2;
      ncols = nsplit;
      colbase = 6 + parity;	// QUESTION:  what is the optional seventh column?
    }

    for (k = colbase; k < nsplit - 1; k += 2) {
      snpnumber = (k - colbase) / 2;
      if (snpnumber >= numsnpord)
	fatalx ("snpord overflow\n");
      snpnum = snpord[snpnumber];
      if (snpnum < 0)
	fatalx ("logic bug (bad snpord)\n");
      n = pedval (spt[k]);
      //  if ((n<0) || (n>4)) fatalx("(genopedcnt) %s bad geno %s\n", gname, spt[k]) ;
      if ((n < 0) || (n > 4))
	continue;
      if (n > 0) {
	++gcounts[snpnum][n];
	++num;
      }
      n = pedval (spt[k + 1]);
      //  if ((n<0) || (n>4)) fatalx("(genopedcnt) %s bad geno %s\n", gname, spt[k+1]) ;
      if ((n < 0) || (n > 4))
	continue;
      if (n > 0) {
	++gcounts[snpnum][n];
	++num;
      }
    }
    freeup (spt, nsplit);
    continue;
  }
  free (spt);
  free (line);
  fclose (fff);
  return;
}


/* ---------------------------------------------------------------------------------------------------- */
void
outfiles (char *snpname, char *indname, char *gname, SNP ** snpm,
	  Indiv ** indiv, int numsnps, int numindx, int packem, int ogmode)
{
  /*  call at end of main program usually  */

  int sizelimit = 10000000;
  int numind;

  // Squeeze out individuals with ignore flag set
  numind = rmindivs (snpm, numsnps, indiv, numindx);
  if (snpname == NULL) {
    printf ("*** warning output snpname NULL\n");
    printf ("snpname: %s %d\n", snpname, numsnps);
    printf ("indname:  %s %d\n", indname, numind);
    printf ("gname: %s\n", gname);
  }

  switch (outputmode) {

  case EIGENSTRAT:
    printf ("eigenstrat output\n");
    outeigenstrat (snpname, indname, gname, snpm, indiv, numsnps, numind);
    return;

  case PED:
    printf ("ped output\n");
    outped (snpname, indname, gname, snpm, indiv, numsnps, numind, ogmode);
    return;

  case PACKEDPED:
    printf ("packedped output\n");
    outpackped (snpname, indname, gname, snpm, indiv, numsnps, numind,
		ogmode);
    return;

  case PACKEDANCESTRYMAP:
    if (snpname != NULL)
      printsnps (snpname, snpm, numsnps, indiv, NO, NO);
    packem = YES;
    printdata (gname, indname, snpm, indiv, numsnps, numind, packem);
    return;

  case ANCESTRYMAP:
  default:
    if (snpname != NULL)
      printsnps (snpname, snpm, numsnps, indiv, NO, NO);
    packem = NO;
    if (numsnps > (sizelimit / numind))
      packem = YES;
    printdata (gname, indname, snpm, indiv, numsnps, numind, packem);
    return;
  }
}

/* ---------------------------------------------------------------------------------------------------- */
void
outeigenstrat (char *snpname, char *indname, char *gname, SNP ** snpm,
	       Indiv ** indiv, int numsnps, int numind)
{

  FILE *fff, *ifile;
  int g, i, k;
  SNP *cupt;
  Indiv *indx;
  char ss[MAXSTR];


  settersemode (YES);
  if (snpname != NULL)
    printsnps (snpname, snpm, numsnps, indiv, NO, NO);

  // Print individual data to .ind file
  if (indname != NULL) {
    openit (indname, &ifile, "w");
    for (i = 0; i < numind; i++) {
      indx = indiv[i];
      if (indx->ignore)
	continue;
      strcpy (ss, indx->egroup);
      if (qtmode) {
	sprintf (ss, "%9.3f", indx->rawqval);
      }
      fprintf (ifile, "%20s %c %10s", indx->ID, indx->gender, ss);
      fprintf (ifile, "\n");
      continue;
    }
    fclose (ifile);
  }

  if (gname == NULL)
    return;

  // Print genotypes to .geno file
  openit (gname, &fff, "w");
  for (k = 0; k < numsnps; k++) {
    cupt = snpm[k];
    if (outputall == NO) {
      if (ignoresnp (cupt))
	continue;
      if (cupt->isrfake)
	continue;
    }
    for (i = 0; i < numind; i++) {
      indx = indiv[i];
      if (indx->ignore)
	continue;
      g = getgtypes (cupt, i);
      if (g < 0)
	g = 9;
      fprintf (fff, "%1d", g);
    }
    fprintf (fff, "\n");
  }
  fclose (fff);
}



/* ---------------------------------------------------------------------------------------------------- */
void
setgref (int **gcounts, int nsnp, int *gvar, int *gref)
{
  int tt[5];
  int i, kmax;

  for (i = 0; i < nsnp; i++) {
    copyiarr (gcounts[i], tt, 5);
    tt[0] = -9999;		// Ensure "missing data" is not ref or var allele
    ivlmaxmin (tt, 5, &kmax, NULL);
    gvar[i] = kmax;		// designate major allele "variant"
    if (tt[kmax] == 0)
      gvar[i] = 5;
    tt[kmax] = -9999;
    ivlmaxmin (tt, 5, &kmax, NULL);
    gref[i] = kmax;		// designate minor allele "variant"
    if (tt[kmax] == 0)
      gref[i] = 5;
  }
}


/* ---------------------------------------------------------------------------------------------------- */
void
cleargdata (SNP ** snpmarkers, int numsnps, int numindivs)
{

  // wipe out all genotype data 
  int i, k;
  SNP *cupt;
  char *pbuff;
  double y;

  // rlen is number of bytes needed to store each SNP's genotype data in packed mode
  y = (double) (numindivs * 2) / (8 * (double) sizeof (char));
  rlen = nnint (ceil (y));
  rlen = MAX (rlen, 48);
  packlen = rlen * numsnps;

  if (packlen <= 0)
    fatalx ("bad packlen\n");

  if ((packmode) && (packgenos == NULL)) {
    ZALLOC (packgenos, packlen, char);
    clearepath (packgenos);
  }

  pbuff = packgenos;

  for (i = 0; i < numsnps; i++) {
    cupt = snpmarkers[i];
    //  if (cupt -> ignore) continue ;
    if (cupt->ngtypes == 0) {
      if (packmode == NO) {
	ZALLOC (cupt->gtypes, numindivs, int);
      }
      else {
	ZALLOC (cupt->gtypes, 1, int);
	cupt->pbuff = pbuff;
	pbuff += rlen;
      }
      cupt->ngtypes = numindivs;
      for (k = 0; k < numindivs; ++k) {
	putgtypes (cupt, k, -1);
      }
    }
  }
}


/* ---------------------------------------------------------------------------------------------------- */
void
setgenotypename (char **gname, char *iname)
{
  if (ispedfile (iname) == NO)
    return;
  if ((*gname != NULL) && strcmp (*gname, "NULL") == 0) {
    *gname = NULL;
    return;
  }
  if (*gname != NULL)
    return;
  *gname = strdup (iname);
}



/* ---------------------------------------------------------------------------------------------------- */
int
maxlinelength (char *fname)
{
  // linelength including \n 

  int len, maxlen;
  int nl, t;
  FILE *fff;

  maxlen = -1;

  len = 0;
  nl = (int) (unsigned char) '\n';

  openit (fname, &fff, "r");
  while ((t = fgetc (fff)) != EOF) {
    ++len;
    if (t == nl) {
      maxlen = MAX (maxlen, len);
      len = 0;
    }
  }
  return maxlen;
}


/* ---------------------------------------------------------------------------------------------------- */
void
settersemode (int mode)
{
  tersemode = mode;
}


/* ---------------------------------------------------------------------------------------------------- */
void
outindped (char *indname, Indiv ** indiv, int numind, int ogmode)
{

  FILE *fff, *ifile;
  int g, i, k;
  Indiv *indx;
  char c;
  int pgender, astatus;
  int dcode = 1;

  if (indname == NULL)
    return;

  openit (indname, &ifile, "w");
  for (i = 0; i < numind; i++) {
    indx = indiv[i];
    if (indx->ignore)
      continue;

    if (familypopnames != YES)
      fprintf (ifile, "%6d ", i + 1);
    if (familypopnames == YES)
      fprintf (ifile, "%20s ", indx->egroup);
    fprintf (ifile, "  %12s", indx->ID);
    fprintf (ifile, " %d %d", 0, 0);	// parents 
    c = indx->gender;
    pgender = 0;
    if (c == 'M')
      pgender = 1;
    if (c == 'F')
      pgender = 2;
    fprintf (ifile, " %d", pgender);
    if (ogmode == NO) {
      astatus = indx->affstatus + 1;
      if (qtmode) {
	fprintf (ifile, "%9.3f", indx->rawqval);
      }
      else {
	fprintf (ifile, " %d", astatus);
      }
    }
    if (ogmode == YES)
      fprintf (ifile, " %10s", indx->egroup);
    if (sevencolumnped)
      fprintf (ifile, " %d", dcode);
    fprintf (ifile, "\n");
  }
  fclose (ifile);
}


/* ---------------------------------------------------------------------------------------------------- */
void
outped (char *snpname, char *indname, char *gname, SNP ** snpm,
	Indiv ** indiv, int numsnps, int numind, int ogmode)
{

  FILE *fff, *ifile;
  int g, i, k;
  SNP *cupt;
  Indiv *indx;
  char c;
  int pgender, astatus;
  int g1, g2, dcode = 1;

  settersemode (YES);
  if (snpname != NULL)
    printmap (snpname, snpm, numsnps, indiv);	// print .map file

  if (indname != NULL)
    outindped (indname, indiv, numind, ogmode);	// print .pedind file

  // Here, printt the .ped file
  if (gname == NULL)
    return;
  openit (gname, &fff, "w");
  for (i = 0; i < numind; i++) {
    indx = indiv[i];
    if (indx->ignore)
      continue;
    fprintf (fff, "%6d %12s", i + 1, indx->ID);	// make up a family name (index) and print individual name
    fprintf (fff, " %d %d", 0, 0);	// set parents to "not in data set"  
    c = indx->gender;
    pgender = 0;
    if (c == 'M')
      pgender = 1;
    if (c == 'F')
      pgender = 2;
    fprintf (fff, " %d", pgender);
    if (ogmode == NO) {
      astatus = indx->affstatus + 1;
      if (qtmode) {
	fprintf (fff, "%9.3f", indx->rawqval);
      }
      else
	fprintf (fff, " %d", astatus);
    }
    if (ogmode == YES)
      fprintf (fff, " %10s", indx->egroup);
    if (sevencolumnped)
      fprintf (fff, " %d", dcode);
    for (k = 0; k < numsnps; k++) {
      cupt = snpm[k];
      if (outputall == NO) {
	if (ignoresnp (cupt))
	  continue;
	if (cupt->isrfake)
	  continue;
      }
      g = getgtypes (cupt, i);
      gtox (g, cupt->alleles, &g1, &g2);
      fprintf (fff, "  %d %d", g1, g2);
      if ((g1 > 4) || (g2 > 4)) {
	fprintf (fff, "\n");
	fflush (fff);
	fclose (fff);
	printf ("bad genotype for snp %s alleles: ", cupt->ID);
	printalleles (cupt, stdout);
	printf ("\n");
	fatalx ("trying to make invalid ped file %s\n", gname);
      }
    }
    fprintf (fff, "\n");
  }
  fclose (fff);
}


/* ---------------------------------------------------------------------------------------------------- */
void
gtox (int g, char *cvals, int *p1, int *p2)
{
  // output values for ped file using allele array
  int g1, g2;

  switch (g) {
  case -1:
    *p1 = *p2 = 0;
    return;
  case 0:
    g1 = 1;
    g2 = 1;
    break;
  case 1:
    g1 = 1;
    g2 = 2;
    break;
  case 2:
    g1 = 2;
    g2 = 2;
    break;
  default:
    fatalx ("(outped) bug %d\n", g);
  }

  if (cvals != NULL) {
    g1 = 3 - g1;
    g2 = 3 - g2;
    g1 = xpedval (cvals[g1 - 1]);
    g2 = xpedval (cvals[g2 - 1]);
  }

  *p1 = MIN (g1, g2);
  *p2 = MAX (g1, g2);

}


/* ---------------------------------------------------------------------------------------------------- */
void
outpackped (char *snpname, char *indname, char *gname, SNP ** snpm,
	    Indiv ** indiv, int numsnps, int numind, int ogmode)
{

  FILE *fff, *ifile;
  int g, i, k;
  SNP *cupt;
  Indiv *indx;
  char c;
  int pgender, astatus;
  int g1, g2, dcode = 1;
  unsigned char ibuff[3];
  unsigned char *buff;
  int fdes, ret, blen;
  int *gtypes;
  double y;

  settersemode (YES);
  if (snpname != NULL)
    printmap (snpname, snpm, numsnps, indiv);	// print .map (not .bim)

  if (indname != NULL)		// print .pedind file
    outindped (indname, indiv, numind, ogmode);

  if (gname == NULL)
    return;

  /*  magic constants for snp major bed file */
  ibuff[0] = 0x6C;
  ibuff[1] = 0x1B;
  ibuff[2] = 0x01;


  // blen is number of bytes each SNP's data requires
  y = (double) (numind * 2) / (8 * (double) sizeof (char));
  blen = nnint (ceil (y));
  ZALLOC (buff, blen, unsigned char);
  ZALLOC (gtypes, numind, int);

  // open output file and check readability
  fdes = open (gname, O_CREAT | O_TRUNC | O_RDWR, 0666);
  if (fdes < 0) {
    perror ("bad gname");
    fatalx ("open failed for %s\n", gname);
  }
  if (verbose)
    printf ("file %s opened\n", gname);

  if (fdes < 0) {
    perror ("bad genoout");
    fatalx ("open failed for %s\n", gname);
  }

  if (verbose)
    printf ("file %s opened\n", gname);

  ret = write (fdes, ibuff, 3);

  for (i = 0; i < numsnps; i++) {
    cupt = snpm[i];
    if (outputall == NO) {
      if (ignoresnp (cupt))
	continue;
      if (cupt->isrfake)
	continue;
    }
    for (k = 0; k < numind; ++k) {
      g = getgtypes (cupt, k);
      if (g >= 0)
	g = 2 - g;
      gtypes[k] = g;
    }
    setbedbuff ((char *) buff, gtypes, numind);
    ret = write (fdes, buff, blen);
    if (ret < 0) {
      perror ("write failure");
      fatalx ("(outpackped) bad write");
    }
  }

  free (buff);
  close (fdes);
}


/* ---------------------------------------------------------------------------------------------------- */
void
setbedbuff (char *buff, int *gtypes, int numind)
{
  int i, k;
  double y;
  int blen, wnum, wplace, bplace, t, g;
  unsigned char c;

  y = (double) (numind * 2) / (8 * (double) sizeof (char));
  blen = nnint (ceil (y));

  c = 0xAA;			// missing 
  cclear ((unsigned char *) buff, c, blen);	// initialize buffer to "missing"

  for (k = 0; k < numind; k++) {
    wnum = k / 4;
    t = k % 4;
    wplace = 3 - t;		// switch for bed 
    bplace = 4 * wnum + wplace;
    g = bedval (gtypes[k]);
    wbuff ((unsigned char *) buff, bplace, g);
  }
}


/* ---------------------------------------------------------------------------------------------------- */
int
bedval (int g)
{
  if (g < 0)
    return 1;
  if (g == 2)
    return 3;
  if (g == 1)
    return 2;
  if (g == 0)
    return 0;

  fatalx ("(bedval) bad g value %d\n", g);
  return -1 ;
}


/* ---------------------------------------------------------------------------------------------------- */
void
atopchrom (char *ss, int chrom)
{

  // ancestry chromosome -> map convention  

/**
  if ( chrom == numchrom+1 )  {
    strcpy(ss, "X") ;
    return ;
  }
  else if ( chrom == numchrom+2 )  {
    strcpy(ss, "Y") ;
    return ;
  }
*/
  sprintf (ss, "%d", chrom);
}

/* ---------------------------------------------------------------------------------------------------- */
int
ptoachrom (char *ss)
{
  // map -> ancestry  
  char c;
  c = ss[0];

  if (c == 'X')
    return (numchrom + 1);
  if (c == 'Y')
    return (numchrom + 2);
  return atoi (ss);
}


/* ---------------------------------------------------------------------------------------------------- */
void
printmap (char *snpname, SNP ** snpm, int numsnps, Indiv ** indiv)
{

  char ss[5];
  int i;
  FILE *fff;
  SNP *cupt;
  char c;

  if (snpname == NULL)
    return;
  openit (snpname, &fff, "w");
  for (i = 0; i < numsnps; i++) {
    cupt = snpm[i];
    if (outputall == NO) {
      if (ignoresnp (cupt))
	continue;
      if (cupt->isrfake)
	continue;
    }
    atopchrom (ss, cupt->chrom);
    fprintf (fff, "%-2s", ss);
    fprintf (fff, " %12s", cupt->ID);
    fprintf (fff, " %12.6f", cupt->genpos);
    fprintf (fff, " %12.0f", cupt->physpos);
    printalleles (cupt, fff);
    fprintf (fff, "\n");
  }
  fclose (fff);
}


/* ---------------------------------------------------------------------------------------------------- */
char
x2base (int x)
{
  // 12345 -> ACGTX
  char *blist = "?ACGT";
  if (x < 0)
    return '?';
  if (x > 4)
    return 'X';
  return blist[x];
}

/* ---------------------------------------------------------------------------------------------------- */
int
xpedval (char c)
{
  char bb[2];

  bb[1] = '\0';
  bb[0] = c;

  if (isdigit (c))
    return atoi (bb);
  return pedval (bb);
}


/* ---------------------------------------------------------------------------------------------------- */
int
pedval (char *sx)
{
  char c;

  c = sx[0];
  if (c == 'A')
    return 1;
  if (c == 'C')
    return 2;
  if (c == 'G')
    return 3;
  if (c == 'T')
    return 4;
  if (c == 'X')
    return 5;
  if (c == 'N')
    return 5;
  if (c == 'N')
    return 5;
  if (c == 'D')
    return 5;
  if (c == 'I')
    return 5;

  if (c == '1')
    return 1;
  if (c == '2')
    return 2;
  if (c == '3')
    return 3;
  if (c == '4')
    return 4;
  if (c == '0')
    return 0;

  if (badpedignore)
    return 5;

  return 9;

}


/* ---------------------------------------------------------------------------------------------------- */
int
getbedgenos (char *gname, SNP ** snpmarkers, Indiv ** indivmarkers,
	     int numsnps, int numindivs, int nignore)
{

  int val, i, k, x, j;
  int t, wnum, wplace;
  int nsnp;
  int ngenos = 0;

  SNP *cupt;
  Indiv *indx;

  unsigned char *buff, ibuff[3], jbuff[3];
  double y;
  int blen;
  int fdes;

  // magic numbers for BED identification
  ibuff[0] = 0x6C;
  ibuff[1] = 0x1B;
  ibuff[2] = 0x01;

  cleargdata (snpmarkers, numsnps, numindivs);
  nsnp = numsnps;

  if (pordercheck && (snpordered == NO))
    failorder ();

  // blen is number of bytes needed to store each SNP's genotype
  y = (double) (numindivs * 2) / (8 * (double) sizeof (char));
  blen = nnint (ceil (y));
  ZALLOC (buff, blen, unsigned char);

  // open binary file and check that it is readable
  fdes = open (gname, O_RDONLY);
  if (fdes < 0) {
    perror ("open failure");
    fatalx ("(getbedgenos) bad open %s\n", gname);
  }
  t = read (fdes, jbuff, 3);
  if (t < 0) {
    perror ("read failure");
    fatalx ("(getbedgenos) bad read");
  }

  // check magic
  for (k = 0; k < 3; k++) {
    if (ibuff[k] != jbuff[k]) {
      fprintf (stderr, "magic failure: ");
      fprintf (stderr, " %x %x %x", jbuff[0], jbuff[1], jbuff[2]);
      fprintf (stderr, " %x %x %x", ibuff[0], ibuff[1], ibuff[2]);
      fprintf (stderr, "\n");
      fatalx ("(getbedgenos) magic failure\n");
    }
  }

  // Read genotype data
  for (i = 0; i < nsnp; i++) {

    j = snpord[i];
    if (snpordered == YES)
      j = i;
    if (j < 0)
      fatalx ("(readbedgenos) bug\n");
    if (j > nsnp)
      fatalx ("(readbedgenos) bug\n");

    cupt = snpmarkers[j];
    t = read (fdes, buff, blen);

    if (t < 0) {
      perror ("read failure");
      fatalx ("(getbedgenos) bad read");
    }
    if (cupt->ignore)
      continue;

    for (k = 0; k < numindivs; k++) {
      indx = indivmarkers[k];
      wnum = k / 4;
      t = k % 4;
      wplace = 3 - t;		// switch for bed 
      wplace += 4 * wnum;
      x = rbuff (buff, wplace);
      val = ancval (x);
      if (checkxval (cupt, indx, val) == NO)
	val = -1;
      putgtypes (cupt, k, val);
      if (val >= 0)
	++ngenos;
    }
  }

  free (buff);
  printf ("genotype file processed\n");
  return ngenos;

}


/* ---------------------------------------------------------------------------------------------------- */
int
ancval (int x)
{
  // bed -> anc 
  // 1/22/07 allele flipped
  if (x == 1)
    return -1;
  if (x == 3)
    return 0;
  if (x == 2)
    return 1;
  if (x == 0)
    return 2;
  fatalx ("(ancval) bad value %d\n", x);
  return -99 ;
}



/* ---------------------------------------------------------------------------------------------------- */
void
setomode (enum outputmodetype *outmode, char *omode)
{

  char *ss;
  int len, i;

  if (outmode == NULL)
    return;
  *outmode = PACKEDANCESTRYMAP;
  if (omode == NULL)
    return;

  ss = strdup (omode);
  len = strlen (ss);
  for (i = 0; i < len; i++) {
    ss[i] = tolower (ss[i]);
  }

  if (strcmp (ss, "eigenstrat") == 0)
    *outmode = EIGENSTRAT;
  if (strcmp (ss, "ascii") == 0)
    *outmode = EIGENSTRAT;
  if (strcmp (ss, "alkes") == 0)
    *outmode = EIGENSTRAT;
  if (strcmp (ss, "ped") == 0)
    *outmode = PED;
  if (strcmp (ss, "packedped") == 0)
    *outmode = PACKEDPED;
  if (strcmp (ss, "packedancestrymap") == 0)
    *outmode = PACKEDANCESTRYMAP;
  if (strcmp (ss, "ancestrymap") == 0)
    *outmode = ANCESTRYMAP;

  free (ss);
}


/* ---------------------------------------------------------------------------------------------------- */
void
snpdecimate (SNP ** snpm, int nsnp, int decim, int mindis, int maxdis)
{
  int chrom = -1;
  SNP **cbuff, *cupt, *cupt2;
  int k, k2, n, t;

  printf ("snpdecimate called: decim: %d  mindis: %d  maxdis: %d\n", decim,
	  mindis, maxdis);
  ZALLOC (cbuff, nsnp, SNP *);
  for (k = 0; k < nsnp; ++k) {
    cupt = snpm[k];
    if (cupt->chrom != chrom) {
      chrom = cupt->chrom;
      n = 0;
      for (k2 = k; k2 < nsnp; ++k2) {
	cupt2 = snpm[k2];
	if (cupt2->chrom != chrom)
	  break;
	if (cupt2->ignore)
	  continue;
	if (cupt2->isfake)
	  continue;
	cbuff[n] = cupt2;
	++n;
      }
      if (n < decim)
	continue;
      decimate (cbuff, n, decim, mindis, maxdis);
    }
  }
}



/* ---------------------------------------------------------------------------------------------------- */
void
decimate (SNP ** cbuff, int n, int decim, int mindis, int maxdis)
{
  int k, t, u, dis, len;
  int *ttt;
  SNP *cupt;

  cupt = cbuff[0];
  if (n < 2)
    return;
  if (n < decim)
    return;
  ZALLOC (ttt, n, int);
  for (k = 1; k < n; ++k) {
    dis = (int) (cbuff[k]->physpos - cbuff[k - 1]->physpos);
    if (dis > maxdis) {
      decimate (cbuff, k - 1, decim, mindis, maxdis);
      decimate (cbuff + k, n - k, decim, mindis, maxdis);
      return;
    }
  }
  t = ranmod (decim);
  ttt[t] = 1;

  u = t + decim;

  for (;;) {
    if (u >= n)
      break;
    dis = (int) (cbuff[u]->physpos - cbuff[t]->physpos);
    if (dis < mindis) {
      ++u;
      continue;
    }
    len = u - t - 1;
    ivclear (ttt + t + 1, 1, len);
    t = u;
    u = t + decim;
  }
  for (k = 0; k < n; ++k) {
    if (ttt[k] == 1)
      cbuff[k]->ignore = YES;
  }
// debug 
  if (verbose) {
    for (k = 0; k < n; ++k) {
      printf ("zz %6d %20s %20d  %3d\n", k, cbuff[k]->ID,
	      (int) cbuff[k]->physpos, ttt[k]);
    }
  }

  free (ttt);

}


/* ---------------------------------------------------------------------------------------------------- */
int
killhir2 (SNP ** snpm, int numsnps, int numind, double physlim, double genlim,
	  double rhothresh)
{
  // physlim = genlim = 0 => kill monomorphs 
  double *badbuff;
  int *xbadbuff;
  SNP *cupt, *cupt1, *cupt2;

#define BADBUFFSIZE 100000 ;

  int badbuffsize = BADBUFFSIZE;
  int i, j, k, nbad, kmax, kmin, t, j1, j2, lo, hi;
  int *gtypes;
  double *x1, *x2, mean, dis, *p1;
  int nkill = 0, tj;
  double y1, y2, y, rho, smax;
  double **xx1, *yy1;
  SNP **snpxl;

  if (physlim < 0)
    return 0;
  if (genlim < 0)
    return 0;

  // step 1 give score to each SNP 
  for (i = 0; i < numsnps; i++) {
    cupt = snpm[i];
    if (cupt->ignore)
      continue;
    cupt->score = numvalidgtypes (cupt);
    cupt->score += DRAND ();	// jitter
  }

  ZALLOC (badbuff, badbuffsize, double);
  ZALLOC (xbadbuff, badbuffsize, int);
  ZALLOC (x1, numind, double);
  ZALLOC (x2, numind, double);
  ZALLOC (gtypes, numind, int);

  xx1 = initarray_2Ddouble (10000, numind, 0.0);
  ZALLOC (yy1, 10000, double);
  ZALLOC (snpxl, 10000, SNP *);

  for (i = 0; i < numsnps; i += 5000) {

    lo = i;
    hi = i + 10000 - 1;
    hi = MIN (hi, numsnps - 1);

    for (j = lo; j <= hi; ++j) {
      p1 = xx1[j - lo];
      cupt = snpm[j];
      snpxl[j - lo] = cupt;
      grabgtypes (gtypes, cupt, numind);
      floatit (p1, gtypes, numind);
      vvadjust (p1, numind, NULL);
      y1 = asum2 (p1, numind);
      yy1[j - lo] = y1;
      if (y1 < 0.01) {
	++nkill;
	cupt->ignore = YES;
      }
    }
    for (j1 = 0; j1 < 5000; ++j1) {
      if (j1 > (hi - lo))
	break;
      cupt1 = snpxl[j1];
      if (cupt1->ignore)
	continue;
      nbad = 0;
      tj = 0;
      for (j2 = j1 + 1; j2 <= hi - lo; ++j2) {
	cupt2 = snpxl[j2];
	if (cupt2->ignore)
	  continue;
	if (cupt2->chrom != cupt1->chrom)
	  break;

	dis = cupt2->genpos - cupt1->genpos;
	if (dis > genlim)
	  break;

	dis = cupt2->physpos - cupt1->physpos;
	if (dis > physlim)
	  break;
	++tj;

	y1 = yy1[j1];
	y2 = yy1[j2];

	y = vdot (xx1[j1], xx1[j2], numind) / sqrt (y1 * y2);	// compute correlation
	rho = y * y;
	if (rho < rhothresh)
	  continue;
	badbuff[nbad] = cupt2->score;
	xbadbuff[nbad] = j2 + lo;
	++nbad;

      }
      t = (j1 + lo) % 100;
      if (nbad == 0)
	continue;
      vlmaxmin (badbuff, nbad, &kmax, &kmin);
      smax = snpm[kmax]->score;
      if (smax > cupt1->score) {
	cupt1->ignore = YES;
	++nkill;
	continue;
      }
      for (k = 0; k < nbad; ++k) {
	j = xbadbuff[k];
	snpm[j]->ignore = YES;
	++nkill;
      }
    }
  }


  free2D (&xx1, 10000);
  free (yy1);
  free (snpxl);
  free (gtypes);
  free (badbuff);
  free (xbadbuff);
  free (x1);
  free (x2);

  // re-initialize scores
  for (i = 0; i < numsnps; i++) {
    cupt = snpm[i];
    cupt->score = 0.0;
  }

  printf ("killr2 complete\n");
  return nkill;
}


/* ---------------------------------------------------------------------------------------------------- */
int
vvadjust (double *cc, int n, double *pmean)
{
  // take off mean  force missing to zero 
  // simpler version of vadjust 

  double ynum, ysum, y, ymean;
  int i, nmiss = 0;

  ynum = ysum = 0.0;
  for (i = 0; i < n; i++) {
    y = cc[i];
    if (y < 0.0) {
      ++nmiss;
      continue;
    }
    ++ynum;
    ysum += y;
  }
  if (ynum <= 1.5) {
    // no data or monomorphic 
    vzero (cc, n);
    if (pmean != NULL)
      *pmean = ysum / (ynum + 1.0e-8);
    return nmiss;
  }
  ymean = ysum / ynum;
  for (i = 0; i < n; i++) {
    y = cc[i];
    if (y < 0.0)
      cc[i] = 0.0;
    else
      cc[i] -= ymean;
  }
  if (pmean != NULL)
    *pmean = ymean;
  return nmiss;

}




/* ---------------------------------------------------------------------------------------------------- */
static int
setskipit (char *sx)
{
  int skipit = NO;
  if (sx[0] == '#')
    skipit = YES;
  if (strcmp (sx, "SNP_ID") == 0)
    skipit = YES;
  if (strcmp (sx, "Indiv_ID") == 0)
    skipit = YES;
  if (strcmp (sx, "Chr") == 0)
    skipit = YES;
  return skipit;
}



/* ---------------------------------------------------------------------------------------------------- */
int
inpack2 (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps, int numind)
{
  // load up packed genotype file for merge.

  char **arrx, junk[10];
  int n, num, ihash, shash, i, g, j, k, t, g1, g2;
  int xihash, xshash, xnsnp, xnind;
  int nind, nsnp, irec;
  Indiv *indx;
  SNP *cupt, *cupt2;
  SNP xsnp;
  double y;
  unsigned char *buff, *tbuff;
  int fdes, ret;
  char *packit, *pbuff;
  int nbad = 0;
  n = numind;

  ZALLOC (arrx, n, char *);

  // compute hashes to compare with file
  num = 0;
  for (i = 0; i < n; i++) {
    indx = indiv[i];
    arrx[num] = strdup (indx->ID);
    ++num;
  }
  ihash = hasharr (arrx, num);
  nind = num;

  freeup (arrx, num);
  free (arrx);

  n = numsnps;
  ZALLOC (arrx, n, char *);
  num = 0;
  for (i = 0; i < n; i++) {
    cupt = snpm[i];
    arrx[num] = strdup (cupt->ID);
    ++num;
  }
  shash = hasharr (arrx, num);
  nsnp = num;
  freeup (arrx, num);
  free (arrx);

  // rlen  is number of bytes each SNP's data requires in packed format
  y = (double) (nind * 2) / (8 * (double) sizeof (char));
  rlen = nnint (ceil (y));
  rlen = MAX (rlen, 48);
  ZALLOC (buff, rlen, unsigned char);
  ZALLOC (tbuff, rlen, unsigned char);

  // openfile and check readability
  fdes = open (gname, O_RDONLY);
  if (fdes < 0) {
    perror ("open failure");
    fatalx ("(inpack2) bad open %s\n", gname);
  }
  t = read (fdes, buff, rlen);
  if (t < 0) {
    perror ("read failure");
    fatalx ("(inpack2) bad read");
  }
  sscanf ((char *) buff, "GENO %d %d %x %x", &xnind, &xnsnp, &xihash,
	  &xshash);

  if (xnind != nind)
    fatalx ("(inpack2) nind mismatch %d %d \n", nind, xnind);
  if (xnsnp != nsnp)
    fatalx ("(inpack2) nsnp mismatch\n");
  if (xihash != ihash)
    fatalx ("(inpack2) ihash mismatch\n");
  if (xshash != shash)
    fatalx ("(inpack2) shash mismatch\n");


  // now copy genotypes 
  for (i = 0; i < n; i++) {
    t = read (fdes, tbuff, rlen);
    if (t != rlen) {
      perror ("read failure");
      fatalx ("(inpack2) bad data read");
    }
    cupt = snpm[i];
    if (cupt->isfake)
      continue;
    xsnp = *cupt;
    cupt2 = &xsnp;
    cupt2->pbuff = (char *) tbuff;
    for (k = 0; k < numind; ++k) {
      g2 = getgtypes (cupt2, k);	// store in temporary buffer
      if (g2 < 0)
	continue;
      g1 = getgtypes (cupt, k);
      if ((g1 >= 0) && (g1 != g2))
	++nbad;			// inconsistent data
      putgtypes (cupt, k, g2);
    }

    // now check xhets
    for (k = 0; k < numind; ++k) {
      if (cupt->chrom != (numchrom + 1))
	break;
      indx = indiv[k];
      g = getgtypes (cupt, k);
      if (checkxval (cupt, indx, g) == NO) {
	putgtypes (cupt, k, -1);
      }
    }
  }

  free (buff);
  free (tbuff);
  close (fdes);
  return nbad;
}

/* ---------------------------------------------------------------------------------------------------- */
void
getgenos_list (char *genotypelist, SNP ** snpmarkers, Indiv ** indivmarkers,
	       int numsnps, int numindivs, int nignore)
{

  char **fnames, *fn;
  int n;
  int k, nbad, isok;

  dofreeped = NO;
  n = numlines (genotypelist);
  ZALLOC (fnames, n, char *);

  // Read in list of genotype files
  n = getlist (genotypelist, fnames);

  // Load first one the ordinary way
  getgenos (fnames[0], snpmarkers, indivmarkers, numsnps, numindivs, nignore);

  // Load all others 
  for (k = 1; k < n; ++k) {
    fn = fnames[k];
    isok = NO;
    if (ispack (fn)) {
      nbad = inpack2 (fn, snpmarkers, indivmarkers, numsnps, numindivs);
      isok = YES;
    }
    if (iseigenstrat (fn)) {
      nbad = ineigenstrat (fn, snpmarkers, indivmarkers, numsnps, numindivs);
      isok = YES;
    }
    if (nbad > 0)
      printf ("%s genotypes mismatches: %d\n", fn, nbad);
    if (isok == NO)
      fatalx ("file %s must be packed or eigenstrat format\n");
  }

  dofreeped = YES;
  freeped ();

  free (fnames);
}



/* ---------------------------------------------------------------------------------------------------- */
int
setsdpos (SNPDATA * sdpt, int pos)
{
  int t;
  char ss[10], *sx;

  sdpt->ppos = pos;
  strcpy (ss, sdpt->cchrom);
  mkupper (ss);

  sdpt->chimpfudge = chimpmode;

  sx = strstr (ss, "CHR");
  if (sx != NULL)
    sx = ss + 3;
  else
    sx = ss;

  t = strcmp (sx, "2B");
  if (t == 0) {
    sdpt->ppos += 200000000;
    sdpt->chimpfudge = YES;
  }
  t = strcmp (sx, "2A");
  if (t == 0) {
    sdpt->chimpfudge = YES;
  }
  return sdpt->chimpfudge;
}

int
str2chrom (char *sss)
{
  char ss[6];
  if (strlen (sss) > 5)
    fatalx ("bad chrom: %s\n", sss);
  if (strstr (sss, "chr") != NULL) {
    strcpy (ss, sss + 3);
    setchr (YES);
  }
  else
    (strcpy (ss, sss));
  mkupper (ss);
  if (strcmp (ss, "X") == 0)
    return (numchrom + 1);
  if (strcmp (ss, "Y") == 0)
    return (numchrom + 2);
  if (strcmp (ss, "MT") == 0)
    return MTCHROM;
  if (strcmp (ss, "XY") == 0)
    return XYCHROM;
  if (strcmp (ss, "2A") == 0)
    return 2;
  if (strcmp (ss, "2B") == 0)
    return 2;
  if (!isnumword (ss))
    return -1;
  return atoi (ss);
}


/* ---------------------------------------------------------------------------------------------------- */
int
checksize (int numindivs, int numsnps, enum outputmodetype outputmode)
{
  // -1 try packed format

  double y;
  long z;

  if (sizeof (z) == 8)
    checksizemode = NO;
  if (checksizemode == NO)
    return 1;

  y = (double) numindivs;
  y *= (double) numsnps;

  if (y > 8.0e9)
    return -2;

  switch (outputmode) {

  case ANCESTRYMAP:
    if (y > 5.0e7)
      return -1;
    break;
  case EIGENSTRAT:
    if (y > 2.0e9)
      return -1;
    break;
  case PED:
    if (y > 4.0e8)
      return -1;
    break;
  case PACKEDPED:
    break;
  case PACKEDANCESTRYMAP:
    break;
  default:
    fatalx ("unknown outputmode\n");
  }
  return 1;
}

/* ---------------------------------------------------------------------------------------------------- */
int
snprawindex (SNPDATA ** snpraw, int nreal, char *sname)
{
  int k;
  char **ss;

  freesnpindex ();

  // if hash table is not set up, do it now
  if (snprawtab == NO) {
    snprawtab = YES;
    ZALLOC (ss, nreal, char *);
    for (k = 0; k < nreal; k++) {
      ss[k] = strdup (snpraw[k]->ID);
    }

    // hash SNP data (key=SNP name, data=index in snpraw)
    xloadsearch (ss, nreal);
    freeup (ss, nreal);
    free (ss);
  }

  // return index in snpraw    
  k = xfindit (sname);
  return k;
}



/* ---------------------------------------------------------------------------------------------------- */
void
freesnprawindex ()
{
  if (snprawtab == NO)
    return;
  snprawtab = NO;
  xdestroy ();
}


/* ---------------------------------------------------------------------------------------------------- */
void
cntpops (int *count, Indiv ** indm, int numindivs, char **eglist, int numeg)
{
  // count number of samples for each pop
  Indiv *indx;
  int t, k;

  ivzero (count, numeg);
  for (k = 0; k < numindivs; ++k) {
    indx = indm[k];
    if (indx->ignore)
      continue;
    t = indxindex (eglist, numeg, indx->egroup);
    if (t < 0)
      continue;
    ++count[t];
  }
}

/* ---------------------------------------------------------------------------------------------------- */
char *
getpackgenos ()
{
  return packgenos;
}


/* ---------------------------------------------------------------------------------------------------- */
void
clearpackgenos ()
{
  packgenos = NULL;
}


/* ---------------------------------------------------------------------------------------------------- */
void
genocloseit (genofile * gfile)
{

  genofile *gpt;
  SNP *cupt;
  int i;
  gpt = gfile;

  free (gpt->buff);
  for (i = 0; i < gpt->numsnps; i++) {
    cupt = gpt->snpm[i];
    freecupt (&cupt);
  }
  free (gpt->snpm);

  for (i = 0; i < gpt->numindivs; i++) {
    free (gpt->indivm[i]);
  }
  free (gpt->indivm);

  close (gpt->fdes);

}

/* ---------------------------------------------------------------------------------------------------- */
int
genoopenit (genofile ** gfile, char *geno2name, SNP ** snp2m,
	    Indiv ** indiv2m, int numsnp2, int numindiv2, int nignore)
{
  // only one gfile can be open 
  static genofile xfile;
  genofile *gpt;
  double y;
  int rlen, fdes, t;
  static unsigned char *buff;
  int xihash, xshash, xnsnp, xnind;
  int ihash, shash;
  char *gname;
  int nsnp, nind;


  if (geno2name == NULL)
    fatalx ("(genoopenit) null name\n");
  if (!ispack (geno2name))
    fatalx ("(genoopenit) not packed ancestrymap format\n");
  gpt = *gfile = &xfile;
  strcpy (gpt->gname, geno2name);
  gpt->snpm = snp2m;
  gpt->indivm = indiv2m;
  gpt->numsnps = numsnp2;
  gpt->numindivs = numindiv2;

  y = (double) (numindiv2 * 2) / (8 * (double) sizeof (char));
  rlen = nnint (ceil (y));

  gpt->rlen = rlen;
  rlen = MAX (rlen, 48);
  ZALLOC (buff, rlen, unsigned char);
  gpt->buff = buff;

  fdes = open (geno2name, O_RDONLY);
  if (fdes < 0)
    return fdes;
  gpt->fdes = fdes;
  gpt->snpindex = -1;

  t = read (fdes, buff, rlen);
  if (t < 0)
    fatalx ("(genoopenit) bad initial read\n");

  nsnp = numsnp2;
  nind = numindiv2;
  gname = geno2name;

  calcishash (snp2m, indiv2m, nsnp, nind, &ihash, &shash);
  if (hashcheck) {
    sscanf ((char *) buff, "GENO %d %d %x %x", &xnind, &xnsnp, &xihash,
	    &xshash);
    if (xnind != nind)
      fatalx ("OOPS number of individuals %d != %d in input files\n", nind,
	      xnind);
    if (xnsnp != nsnp)
      fatalx ("OOPS number of SNPs %d != %d in input file: %s\n", nsnp, xnsnp,
	      gname);
    if (xihash != ihash)
      fatalx
	("OOPS indiv file has changed since genotype file was created\n");
    if (xshash != shash)
      fatalx ("OOPS snp file has changed since genotype file was created\n");
  }

  return 0;


/* (Real definition is in admutils.h) 

typedef struct {  
 char gname[IDSIZE] ;  
 SNP **snpm ;
 Indiv **indivm ;  
 int numsnps; 
 int numindivs ; 
 int rlen ;
 int fdes ; 
 unsigned char *buff ;
 int snpindex ;
} genofile ;
*/

}

/* ---------------------------------------------------------------------------------------------------- */
int
genoreadit (genofile * gfile, SNP ** pcupt)
{
/*  
 return code 
 < 0 bad read  
 0 EOF 
 rlen good read 
*/
  genofile *gpt;
  SNP *cupt;
  int t, rlen, snum;
  int k;

  cupt = *pcupt = NULL;
  gpt = gfile;
  rlen = gpt->rlen;
  t = read (gpt->fdes, gpt->buff, rlen);
  if (t < 0)
    fatalx ("(genoreadit) bad read \n");
  if (t == 0)
    return 0;
  if (t < gpt->rlen)
    fatalx ("(genoopenit) premature EOF\n");
  ++gpt->snpindex;
  snum = gpt->snpindex;
  cupt = *pcupt = gpt->snpm[snum];
  cupt->tagnumber = snum;
  cupt->pbuff = (char *) gpt->buff;
  cupt->ngtypes = gpt->numindivs;
  if (cupt->gtypes == NULL)
    ZALLOC (cupt->gtypes, 1, int);
  return rlen;
}


/* ---------------------------------------------------------------------------------------------------- */
void
putped (int num)
{
  int *pp;
  int t;

  pp = snporda[num];
  if (pp != NULL)
    free (pp);
  pp = NULL;
  t = numsnporda[num] = numsnpord;
  if (t == 0)
    return;
  ZALLOC (snporda[num], t, int);
  pp = snporda[num];
  copyiarr (snpord, pp, t);
}

/* ---------------------------------------------------------------------------------------------------- */
void
getped (int num)
{
  int *pp;
  int t;

  pp = snpord;
  if (pp != NULL)
    free (pp);
  pp = NULL;
  t = numsnpord = numsnporda[num];
  if (t == 0)
    return;
  ZALLOC (snpord, t, int);
  pp = snpord;
  copyiarr (snporda[num], pp, t);
}

/* ---------------------------------------------------------------------------------------------------- */
void
setbadpedignore ()
{
  badpedignore = YES;
}

/* ---------------------------------------------------------------------------------------------------- */
void
logdeletedsnp (char *snpname, char *cmnt, char *deletesnpoutname)
{
  if (deletesnpoutname != NULL) {
    FILE *fid = fopen (deletesnpoutname, "a");
    fprintf (fid, "%-40s  %-40s\n", snpname, cmnt);
    fclose (fid);
  }
}


void
sortsnps (SNP ** snpa, SNP ** snpb, int n)
{
  SNP **tsnp, *cupt;
  int **snppos, *snpindx;
  int i, k;

  snppos = initarray_2Dint (n, 3, 0);
  ZALLOC (snpindx, n, int);
  ZALLOC (tsnp, n, SNP *);

  for (i = 0; i < n; i++) {
    cupt = snpa[i];
    snppos[i][0] = cupt->chrom;
    snppos[i][1] = nnint ((cupt->genpos) * GDISMUL);
    snppos[i][2] = nnint (cupt->physpos);
  }

  ZALLOC (snpindx, n, int);
  ipsortit (snppos, snpindx, n, 3);

  for (i = 0; i < n; ++i) {
    k = snpindx[i];
    tsnp[i] = snpa[k];
  }

  for (i = 0; i < n; ++i) {
    snpb[i] = tsnp[i];
  }

  free (snpindx);
  free2Dint (&snppos, n);
  free (tsnp);

}


int
setstatuslist (Indiv ** indm, int numindivs, char **smatchlist, int slen)
// return number set 
{
  int i, n, k;
  Indiv *indx;
  char *sx;

  n = 0;

  for (i = 0; i < numindivs; i++) {
    indx = indm[i];
    if (indx->ignore)
      continue;
    sx = indx->egroup;
    if (sx == NULL)
      continue;
    k = indxindex (smatchlist, slen, sx);
    if (k < 0)
      continue;
    indx->affstatus = k + 1;
    ++n;
  }
  return n ;
}










/* doxygen documentation */

/*! \fn int getsnps(char *snpfname, SNP ***snpmarkpt, double spacing,
                    char *badsnpname, int *numignore, int numrisks) 

    \brief Read SNP data from file
    \param snpfname   File name (.snp or .map)
    \param snpmarkpt  Pointer to array of type SNP * to store data in
    \param spacing 
    \param badsnpname Name of file with list of SNPs to ignore (or NULL for none)
    \param numignore  ???
    \param numrisks   ???

    Returns number of SNPs loaded
 */


/*!  \fn  int readsnpdata(SNPDATA **snpraw, char *fname)   
     \brief Read SNP file 
     \param snpraw  Array of (pointers to) type SNPDATA in which to temporarily store data
     \param fname   Name of SNP file

     For each SNP read in, stores data in one element (SNPDATA *) of snpraw.  
     Fills these elements of SNPDATA struct : inputrow (own index), chrom, gpos, ppos, alleles
     Also sets maxgpos[chrom] to highest genetic position in chromosome
 */


/*!  \fn  int readsnpmapdata(SNPDATA **snpraw, char *fname)   
     \brief Read PLINK format SNP file 
     \param snpraw  Array of (pointers to) type SNPDATA in which to temporarily store data
     \param fname   Name of SNP file

     For each SNP read in, stores data in one element (SNPDATA *) of snpraw.  
     Fills these elements of SNPDATA struct : inputrow (own index), chrom, gpos, ppos, alleles
     Also sets maxgpos[chrom] to highest genetic position in chromosome
 */


/*! \fn getsizex(char *fname)
    \brief Count number of non-comment lines in file
    This is the number of SNPs in a .map or .snp file
 */

/*! \fn int ismapfile(char *fname)
    \brief Look at file name to determine whether this is a PLINK .map file
    File is assumed to be PLINK if file extension is .map, .bim or .pedsnp
 */

/*! \fn int ispedfile(char *fname)
    \brief Look at file name to determine whether this is a PLINK .ped file.
    File is assumed to be PLINK if file extension is .ped or .fam
 */

/*! \fn int isbedfile(char *fname)
    \brief Look at file name to determine whether this is a PLINK .bed file.
    File is assumed to be PLINK if file extension is .ped or .fam
 */

/*! \fn static int setskipit(char *sx)   
    \brief Determine whether an input line from the SNP file should be skipped
    \param sx is the first token on the input line
    Skip if this is a comment or a line of column headers.
*/

/*! \fn int numfakes(SNPDATA **snpraw, int *snpindx, int nreal, double spacing)    
    \brief Return (approximate) number of fake SNPs that will be inserted

    Note:  for EIGENSOFT programs, spacing is always set to 0.0 (presumably not for 
    ANCESTRYMAP) which results in a return value of 0.  The fake SNPs are inserted
    so that the genetic distance between adjacent SNPs is not greater than spacing.
 */

/*! \fn  double nextmesh(double val, double spacing)  
    \brief Return least multiple of spacing greater than or equal to val
    (Used by numfakes and loadsnps to count number of fake SNPs.)
 */

/*! \fn double interp (double l, double r, double x, double al, double ar)   
    \brief Return linear interpolant a fractional x between the points (l,al) and (r, ar)
 */


/*! \fn int getindivs(char *indivfname, Indiv ***indmarkpt)    
    \brief Read individual data from file
    \param indivfname  File name
    \param indmarkpt   Pointer to array of type Indiv * in which data is to be stored.
 */

/*! \fn int readindpeddata(Indiv **indivmarkers, char *fname)   {
    \brief Read individual data from file
    \param indivfname  File name
    \param indmarkpt   Pointer to array of type Indiv * in which data is to be stored.
 */

/*! \fn void pedname(char *cbuff, char *sx0, char *sx1)    
    \brief Enforce name length requirements and prepend family names if desired.
 */


/*! \fn int readtldata(Indiv **indivmarkers, int numindivs, char *inddataname)    
    Not used in EIGENSOFT
 */

/*! \fn int readfreqdata(SNP **snpm, int numsnps, char *inddataname)     
    Not used in EIGENSOFT
 */

/*!  \fn int setstatus(Indiv **indm, int numindivs, char *smatch)    
     \brief Call setstatusv with value YES
 */

/*! \fn int setstatusv(Indiv **indm, int numindivs, char *smatch, int val)    
    \brief Set affstatus of all individuals with egroup equal to smatch to value val
    \param indm  Array in which individuals' data is stored
    \param numindivs  Number of individuals in the array
    \param  smatch  String in individual's field egroup to match
    \param  val     Value to set affstatus to
 */

/*! \fn int checksize(int numindivs, int numsnps, enum outputmodetype outputmode) 
    \brief Enforce size limits on genotype data file
  */

/*! \fn int ispack(char *gname)    
    \brief Open file and look for GENO at top.  If it's there, the file is packed (binary)
 */

/*! \fn int iseigenstrat(char *gname)    
    \brief If every line in the file is one "word" (i.e., no white space), the file is 
    assumed to be EIGENSTRAT format
 */

/*! \fn long getgenos(char *genoname, SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, int nignore)    
    \brief Read genotype data from file
    \param genoname      Name of genotype data file
    \param snpmarkers    Array in which SNP data is stored
    \param indivmarkers  Array in which individual data is stored
    \param numsnps       Number of SNPs in snpmarkers
    \param numindivs     Number of individuals in indivmarkers
    \param nignore       ???

    Returns number of genotypes read 

    Note:  Instantiates, uses and destroys the hash table.
 */

/*! \fn void genopedcnt(char *gname, int **gcounts, int nsnp)    
    \brief   Count number of alleles of each type in each SNP

    Return in gcounts[k][n] is number of "n" alleles in SNP k

    (This is used to discover and designate ref/var alleles) 

 */

/*! \fn void setgref(int **gcounts, int nsnp, int *gvar, int *gref)    
    \brief  Designate reference and variant alleles by looking at allele counts

    (This is for use with PED files)

 */

/*! \fn long getgenos(char *genoname, SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, int nignore)    
    \brief Read genotype data from PLINK .ped format file

    \param gname         Name of genotype data file
    \param snpmarkers    Array in which SNP data is stored
    \param indivmarkers  Array in which individual data is stored
    \param numsnps       Number of SNPs in snpmarkers
    \param numindivs     Number of individuals in indivmarkers
    \param nignore       ???

    Returns number of genotypes read 

    Note:  Instantiates, uses and destroys the hash table.
 */

/*! \fn int getbedgenos(char *gname, SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, int nignore)    
     \brief Read genotype data from PLINK .bed format file

    \param gname         Name of genotype data file
    \param snpmarkers    Array in which SNP data is stored
    \param indivmarkers  Array in which individual data is stored
    \param numsnps       Number of SNPs in snpmarkers
    \param numindivs     Number of individuals in indivmarkers
    \param nignore       ???

    Returns number of genotypes read 

    Note:  Instantiates, uses and destroys the hash table.
 */

 /*! \fn int inpack(char *gname, SNP **snpm, Indiv **indiv, int numsnps, int numind)    
    \brief Read genotype data from packed ANCESTRYMAP format file

    \param gname         Name of genotype data file
    \param snpmarkers    Array in which SNP data is stored
    \param indivmarkers  Array in which individual data is stored
    \param numsnps       Number of SNPs in snpmarkers
    \param numindivs     Number of individuals in indivmarkers

    Returns number of genotypes read 

    Note:  Instantiates, uses and destroys the hash table.
  */

/*! \fn void cleargdata(SNP **snpmarkers, int numsnps, int numindivs)
    \brief Wipe out all genotype data
 */

/*! \fn rmindivs(SNP ** snpm, int numsnps, Indiv **indivmarkers, int numindivs)
    \brief squeeze out ignored individuals

    Return number of individuals retained (not ignored)
 */


/*! \fn void freecupt(SNP **cuppt) 
    \brief Free memory associated with SNP *
 */

/*! \fn void clearind(Indiv **indm, int numind)
    \brief  Re-initialize all individuals
 */

/*! \fn void cleartg(indiv **indm, int nind 
    \brief  Zero out totgamms and totscore fields for all individuals
 */

/*! \fn void dobadsnps(SNPDATA **snpraw, int nreal, char *badsnpname)
    \brief  Read badsnps file and set ignore flag on all bad SNPs
    \param snpraw  Array of initial SNP data structs
    \param nreal   Number of elements in snpraw
    \param badsnpname  Name of badsnp file
 */

/*!  \fn void printsnps(char *snpoutfilename, SNP **snpm, int num, Indiv **indm, int printfake, int printvalids)
     \brief Print SNP output in EIGENSTRAT format
 */

/*! \fn void printalleles(SNP *cupt, FILE *fff) 
    \brief print SNP's alleles 
 */

/*!  \fn void outfiles(char *snpname, char *indname, char *gname, SNP **snpm, Indiv **indiv, 
                int numsnps, int numindx, int packem, int ogmode)
     \brief Determine which output function to call based on user parameter outputmode
     \param snpname   SNP output file name
     \param indname   Individual output file name
     \param gname     Genotype output file name
     \param snpm      Array of SNP data
     \param indiv     Array of individual data
     \param numsnps   Number of elements in snpm
     \param numindx   Number of elements in indiv
     \param packem    not used (used as local variable)
     \param ogmode    flag for PED, print quantitative or group phenotype
 */

/*! \fn void outpack(char *genooutfilename, SNP **snpm, Indiv **nindiv, int numsnps, int numind)
    \brief Print out genotype data in packed ANCESTRYMAP format
    \param genooutfilename  Genotype output file name
    \param snpm  Array of SNP data
    \param indiv Array of individual data
    \param numsnps  Number of elements in snpm
    \param numind   Number of elements in indiv
*/

/*! \fn void outeigenstrat(char *snpname, char *indname, char *gname, SNP **snpm, Indiv **indiv,
  int numsnps, int numind)
    \brief  Print output in EIGENSTRAT format
    \param snpname  SNP output file name
    \param indname  Individual output name
    \param gname    Genotype output name
    \param snpm     Array of SNP data
    \param indiv    Array of individual data
    \param numsnps  Number of elements in snpm
    \param numind   Number of elements in indiv
 */


/*!  \fn void outped(char *snpname, char *indname, char *gname, SNP **snpm, Indiv **indiv, int numsnps, int numind, int ogmode)
     \brief Print output in (unpacked) PED format

     \param snpname  SNP output file name
     \param indname  Individual output file name
     \param gname    Genotype output file name
     \param snpm     Array with SNP data
     \param indiv    Array with individual data
     \param numsnps  Number of elements in snpm
     \param numind   Number of individuals in indiv
     \param ogmode   phenotype output mode (quantitative or discrete)
 */



/*!  \fn void gtox(int g, char *cvals, int *p1, int *p2)
     \brief Get alleles in PED format
     \param g  Diploid genotype (0,1,2 or -1 for "missing")
     \param cvals  Array with char ref and var alleles
     \param p1  Output for first allele
     \param p2  Output for second allele

     If cvals is NULL, return alleles "1" and "2" (i.e., "A" and "C")
     Otherwise, look up actual alleles.  If the diploid is het, the alleles will be printed in the
     order (ref,var) not (var,ref)
 */

/*!  \fn void outindped(char *indname, Indiv **indiv, int numind, int ogmode)
     \brief  Print out individual names in PEDIND format (i.e., first six or seven columns of PED)
     \param indname  Individual output file name
     \param indiv    Array with individual data
     \param numind   Number of elements in indiv
     \param ogmode   Flag for phenotype type (quantitative or discrete)
 */

/*!  \fn void outpackped(char *snpname, char *indname, char *gname, SNP **snpm, Indiv **indiv, 
  int numsnps, int numind, int ogmode)
     \brief Print data in packed PED format (.bed)
     \param snpname  Output SNP file name
     \param indname  Output individual file name
     \param gname    Output genotype file name
     \param snpm     Array with SNP data
     \param indiv    Array with individual data
     \param numsnps  Number of elements in snpm
     \param numind   Number of individuals in indiv
     \param ogmode   Flag for phenotype type (quantitative or discrete)
 */


/*!  \fn void setbedbuff(char *buff, int *gtypes, int numind)  
     \brief Fill buffer with diploid genotypes in BED format
 */


/*!  \fn void bedval(int g)
     \brief Encode diploid genotype into its packed BED equivalent
 */


/*!  \fn void atopchrom(char *ss, int chrom)
     \brief Encode integer chromosome number to its MAP file equivalent
     \param  ss     output chromosome symbol (0-22 or "X" or "Y")  CHANGED 23 24
     \param  chrom  input chromosome symbol (0-24)
 */

/*!  \fn int ptoachrom(char *ss)
     \brief Encode MAP chromosome symbol to its numerical equivalent
     \param  ss     input chromosome symbol (0-22 or "X" or "Y")

     Return chromosome number (0-24)
 */


/*!  \fn void printmap(char *snpname, SNP **snpm, int numsnps, Indiv **indiv)
     \brief Print out SNP data in PLINK .map format
     \param snpname  Output SNP file name
     \param snpm     Array with SNP data
     \param numsnps  Number of elements in snpm
     \param indiv    not used
 */


/*!  \fn int calcishash(SNP **snpm, Indiv **indiv, int numsnps, int numind, int *pihash, int *pshash)
     \brief Calculate hashes on individuals and SNPs (to compare with file values.)
     \param snpm  Array of SNP data
     \param indiv Array if individual data
     \param numsnps  Number of elements in snpm
     \param numind  Number of elements in indiv 
     \param pihash  Output parameter for indiv hash
     \param pshash  Output parameter for SNP hash

     Return number of SNPs plus number if individuals
 */

/*!  \fn void freeped(void)
     \brief destructor for array snpord
 */

/*!  \fn int readinddata(Indiv **indivmarkers, char *fname)
     \brief  Read individual data from input file
     \param indivmarkers  Array to store data in
     \param fname         Individual input file
 */

/*! \fn int readtldata(Indiv **indivmarkers, int numindivs, char *inddataname)
    \brief Read theta/lambda data (for ANCESTRYMAP, not used in EIGENSOFT)
 */

/*! \fn int readfreqata(SNP **snpm, int numsnps, char *inddataname)
    \brief Read allele frequency data (for ANCESTRYMAP, not used in EIGENSOFT)
 */

/*! \fn int checkxval(SNP *cupt, Indiv *indx, int val)
    \brief Check that male X marker is not (invalidly) heterozygous
 */

/*! \fn void clearsnp(SNP *cupt)
    \brief Reinitialize all fields in SNP data structure
 */


/*! \fn int rmindivs(SNP **snpm, int numsnps, Indiv **indivmarkers, int numindivs)
    \brief Squeeze out individuals with ignore flag set.
    \param snpm      Array of SNP data
    \param numsnps   Number of elements in snpm
    \param indivmarkers  Array of individual data
    \param numindivs     Number of elements in indivmarkers
 */


/*! \fn int rmsnps(SNP **snpm, int numsnps)
    \brief Squeeze out SNPs with ignore flag set
    \param snpm      Array of SNP data
    \param numsnps   Number of elements in snpm
 */

/*! \fn void freecupt(SNP **cuppt)
    \brief Free memory associated with SNP data structure
 */

/*! \fn void clearind(Indiv **indm, int numind)
    \brief Free memory associated with all Indiv data structs in array
 */

/*! \fn void cleartg(Indiv **indm, int nind)
    \brief Free memory in two fields of all Indiv data structs in array
 */

/*! \fn void setug(Indiv **indm, int numind, char gender)
    \brief  Set all unknown gender fields to value passed in
 */

/*! \fn void dobadsnps(SNPDATA **snpraw, int nreal, char *badsnpname)
    \brief Remove bad SNPs from array
    \param snpraw     Array of (preliminary) SNP data
    \param nreal      Number of elements in snpraw
    \param badsnpname  Bad SNP file name
*/

/*! \fn int checkfake(char **ss)
    \brief Returns YES if and only if string ss is "fake"
 */

/*! \fn void printsnps(char *snpoutfilename, SNP **snpm, int num, Indiv **indm, int printfake, int printvalids)
    \brief Print ANCESTRYMAP format SNP file.
    \param snpoutfilename    Name of SNP output file
    \param snpm              Array with SNP data
    \param num               Number of SNPs in array
    \param indm              Array with individual data
    \param printfakes        Flag to print fake SNPs
    \param printvalids       Flag to print alleles
 */

/*! \fn void printalleles(SNP *cupt, FILE *fff)
    \brief  Print SNP alleles to file
 */

/*! \fn void printdata(char *genooutfilename, char *indoutfilename, SNP **snpm, 
   Indiv **indiv, int numsnps, int numind, int packem)
    \brief  Print ANCESTRYMAP format genotype file
    \param genooutfilename    Genotype output file name
    \param indoutfilename     Individual output file name
    \param snpm               Array of SNP data
    \param indiv              Array of individual data
    \param numsnps            Number of elements in snpm
    \param numind             Number of elements in indiv
    \param packem             Flag - print in packed mode if set
*/

/*! \fn void outpack(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind)
    \brief  Print packed ANCESTRYMAP format genotype file
    \param genooutfilename    Genotype output file name
    \param snpm               Array of SNP data
    \param indiv              Array of individual data
    \param numsnps            Number of elements in snpm
    \param numind             Number of elements in indiv
*/

/*! \fn int ineigenstrat(char *gname, SNP **snpm, Indiv **indiv, int numsnps, int numind)
    \brief Read EIGENSTRAT genotype file
    \param gname    Genotype input file
    \param snpm     Array of SNP data
    \param indiv    Array of individual data
    \param numsnps            Number of elements in snpm
    \param numind             Number of elements in indiv

    Return number of errors encountered 

 */

/*! \fn void clearepath(char *packp)
    \brief Fill memory with 0xFF
 */


/*! \fn void getsnpsc(char *snpscname, SNP **snpm, int numsnps)
    \brief Read SNP score input file (not used in EIGENSOFT)
 */

/*! \fn void setepath(SNP **snpm, int nsnps)
    \brief Clear packed genotype memory (i.e., set to "missing") and point SNP buffers-pointers to the SNP's position in packed memory.
 */

/*! \fn int getpedgenos(char *gname, SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, int nignore)
    \brief  Read PLINK format genotype file
    \param  gname   Name of genotype input file
    \param  snpmarkers  Array of SNP data
    \param  indivmarkers  Array of individual data
    \param  numsnps       Number of SNPs in snpmarkers
    \param  numindivs     Number of individuals in indivmarkers
    \param  nignore       (not used)
 */


/*! \fn void setgenotypename(char **gname, char *iname)
    \brief Copy PED genotypename from iname to *gname, checking that iname is not "NULL."
 */


/*! \fn int maxlinelength(char *fname)
    \brief Find and return length of longest line in file
 */


/*! \fn char x2base(int x)
    \brief Encode digit to char-type allele (PLINK convention)
 */

/*! \fn int xpedval(char c)
    \brief Encode char-type allele to digit (PLINK convention) 
 */

/*! \fn int pedval(char *sx)  
    \brief Encode char-type allele to digit (PLINK convention)
 */

/*! \fn int ancval(int x)
    \brief Encode BED allele digit to ANCESTRYMAP equivalent
 */

/*! \fn void setomode(enum outputmodetype *outmode, char *omode)
    \brief Set output mode from user parameter omode (default is packed ANCESTRYMAP)
 */

/*! \fn void decimate(SNP **cbuff, int n, int decim, int mindis, int maxdis)
    \brief (Undocumented feature)  Prune SNPs 
*/

/*! \fn void snpdecimate(SNP **snpm, int nsnp, int decim, int mindis, int maxdis)
    \brief (Undocumented feature)  Prune SNPs 
*/

/*! \fn int killhir2(SNP **snpm, int numsnps, int numind, double physlim, double genlim, double rhothresh)
    \brief Remove one of each pair of SNPs with r-squared greater than rhothresh 
    \param snpm     Array of SNP data
    \param numsnps  Number of SNPs in snpm
    \param numind   Number of individuals in each SNP's genotype data
    \param physlim  Only consider SNP pairs closer than this
    \param genlim   Only consider SNP pairs closer than this
    \param rhothresh  Maximum permissible r-squared value
*/


/*! \fn int vvadjust(double *cc, int n, double *pmean)
    \brief Mean-adjust data in array and force missing data to zero
    \param cc  Array of values to mean-adjust
    \param n   Number of values in array
    \param pmean Output parameter for returning the mean
 */


/*! \fn int inpack2(char *gname, SNP **snpm, Indiv **indiv, int numsnps, int numind)
    \brief Load packed genotype file for merge of genotype files (used by getgenos_list)
    \param gname    Name of input genotype file
    \param snpm     Array of SNP data
    \param indiv    Array of individual data
    \param numsnps  Number of SNPs in snpm
    \param numind   Number of individuals in indiv
 */


/*! \fn void getgenos_list(char *genotypelist, SNP **snpmarkers, Indiv **indivmarkers, int numsnps,
  int numindivs, int nignore)
    \brief (Undocumented feature) Read in data from all genotype files in a list 
    \param genotypelist  File with names of genotype files in it
    \param snpmarkers    Array of SNP data
    \param indivmarkers  Array of individual data
    \param numsnps       Number of SNPs in snpmarkers
    \param numindivs     Number of individuals in indivmarkers
    \param nignore       (not used)
 */


/*! \fn int str2chrom(char *ss)
    \brief Encode string representation of chromosome to digit equivalent
 */


/*! \fn int snprawindex(SNPDATA **snpraw, int nreal, char *sname)
    \brief Return index of SNP with name sname in array snpraw
 */


/*! \fn void freesnprawindex()
    \brief Free hash table used to look up indices in snpraw
 */

/*! \fn void cntpops(int *count, Indiv **indm, int numindivs, char **eglist, int numeg)
    \brief Count number of samples in each population
    \param count Array in which to store counts
    \param indm  Array of individual data
    \param numindivs  Number of individuals in indm
    \param eglist     Array of population names
    \param numeg      Number of individuals in eglist 
 */

/*! \fn int genoopenit(genofile **gfile, char *geno2name, SNP **snp2m, Indiv **indiv2m, int numsnp2,
   int numindiv2, int nignore)
    \brief Not used in EIGENSOFT (obsolete?)
 */

/*! \fn int genoreadit(genofile *gfile, SNP **pcupt)
    \brief Not used in EIGENSOFT (obsolete?)
 */

/*! \fn int putped(int num)
    \brief Store array snpord in snporda
    \param num   Index in snporda in which to store copy of array
 */

/*! \fn void getped(int num)
    \brief Copy array snpord from snporda
    \param num   Index in snporda from which to copy array
 */

/*! \fn int getweights(char *fname, SNP **snpm, int numsnps)
    \brief Read SNP weights from input file
    \param fname  Weight file name
    \param snpm   Array of SNP data
    \param numsnps  Number of SNPs in snpm
    \return Number of weights set
 */


void
setchr (int mode)
{
  chrmode = mode;
}

void
setchimpmode (int mode)
{
  chimpmode = mode;
}

void
ckdup (char **eglist, int n)
{
  if (checkdup (eglist, n)) {
    printdups (eglist, n);
    fatalx ("dup population found!\n");
  }
}
back to top