https://github.com/cran/MADPop
Raw File
Tip revision: 5210dd940c06886b61ca0b5e4f0e50d6cbb717a9 authored by Martin Lysy on 27 October 2018, 14:20:03 UTC
version 1.1.1
Tip revision: 5210dd9
UM.suff.R
#' Sufficient statistics for the Unconstrained-Multinomial model
#'
#' Converts a matrix or data.frame of genotype data into the sufficient statistics required to fit a Dirichlet-Multinomial hierarchical model.
#'
#' @param X Genotype adata.  Either a \code{N x 4} matrix with \code{NA}'s indicating duplicates or a \code{N x 5} column data.frame with the first column being the \code{popId}.
#' @param popId grouping variable for \code{X}. Must be supplied if \code{X} has 4 columns.
#' @return A list with elements:
#' \itemize{
#' \item \code{A}: Vector of unique alleles names.  The allele numbers in the following quantities correspond to the indices of \code{A}.
#' \item \code{G}: 4-column matrix of unique genotype combinations.  The presence of 0's indicates that less than four alleles were amplified indicating that a given genotype either has less than 4 distinct alleles or that some alleles are duplicated.
#' \item \code{tab}: Observed data in a simplified numerical format.  This is a contingency table with rows given by the unique elements of \code{popId} and columns given by each row of \code{G}.
#' }
#' @examples
#' # sufficient statistics in 3 lakes
#'
#' X <- fish215[fish215$Lake %in% c("Hogan", "Manitou", "Simcoe"),]
#' suff <- UM.suff(X)
#'
#' suff$A # allele names
#' suff$G # unique genotypes
#' suff$tab # contingency table
#'@export
UM.suff <- function(X, popId) {
  # get popId
  if(ncol(X) == 5) {
    popId <- X[,1]
    X <- X[,-1]
  } else if(ncol(X) == 4) {
    if(missing(popId)) {
      stop("Need to specify popId, either explicitly or implicitly as first column of X.")
    }
  } else {
    stop("X must be a matrix or data.frame with 4 or 5 columns.")
  }
  # format gene data
  af <- geno.format(X, Y.only = FALSE)
  # create contingency table
  #if(debug) browser()
  # gen <- apply(af$Y, 1,
  #              function(x) paste0(sort(x), collapse = "."))
  gen <- apply(af$Y, 1,
               function(x) paste0(sort(x[x>0]), collapse = "."))
  tab <- cbind(popId = as.character(popId), genotype = gen)
  tab <- table(tab[,1], tab[,2])
  list(A = af$A, G = af$G, tab = tab)
}
back to top