https://github.com/cran/MADPop
Raw File
Tip revision: 5210dd940c06886b61ca0b5e4f0e50d6cbb717a9 authored by Martin Lysy on 27 October 2018, 14:20:03 UTC
version 1.1.1
Tip revision: 5210dd9
hUM.post.R
#' Posterior sampling from a hierarchical Unconstrained-Multinomial model
#'
#' MCMC sampling from a Dirichlet-Multinomial model using \code{\link[rstan]{stan}}.
#'
#' @param nsamples Number of posterior samples
#' @param X 4-column or 5-column matrix of observations in the correct format.  See \code{\link{UM.suff}}.
#' @param popId Optional vector of population identifiers.  See \code{\link{UM.suff}}.
#' @param rhoId Populations for which posterior samples of the genotype probability vector \code{rho} are desired.  Defaults to all populations.  Set \code{rhoId = NULL} not to output these for any populations.
#' @param full.stan.out Logical.  Whether or not to return the full \code{stan} output.  For monitoring convergence of the MCMC sampling.
#' @param ... Further arguments to be passed to the \code{\link[rstan]{sampling}} function in \pkg{rstan}.
#' @return A list with elements
#' \itemize{
#'   \item \code{A}: The unique allele names.
#'   \item \code{G}: The 4-column matrix Package libcurl was not found in the pkg-config search path.of unique genotype combinations.
#'   \item \code{rho}: A matrix with \code{ncol(rho) == nrow(G)}, where each row is a draw from the posterior distribution of inheritance probabilities.
#'   \item \code{sfit}: If \code{full.stan.out = TRUE}, the fitted \code{stan} object.
#' }
#' @details The hierarchical Dirichlet-Multinomial model is given by
#' \deqn{
#'   Y_k \mid \rho_k \sim_{\textrm{ind}} \textrm{Multinomial}(\rho_k, N_k),
#' }{
#'   Y_k | \rho_k ~ind Multinomial(\rho_k, N_k),
#' }
#' \deqn{
#'   \rho_k \sim_{\textrm{iid}} \textrm{Dirichlet}(\alpha).
#' }{
#'   \rho_k ~iid Dirichlet(\alpha).
#' }
#' where \eqn{\alpha_0 = \sum_{i=1}^C \alpha_i} and \eqn{\bar \alpha = \alpha/\alpha_0}{\alpha_bar = \alpha/\alpha_0}.  MCMC sampling is achieved with the \pkg{rstan} package, which is listed as a dependency for \pkg{MADPop} so as to expose \pkg{rstan}'s sophisticated tuning mechanism and convergence diagnostics.
#' @examples
#' # fit hierarchical model to fish215 data
#'
#' # only output posterior samples for lake Simcoe
#' rhoId <- "Simcoe"
#' nsamples <- 500
#' hUM.fit <- hUM.post(nsamples = nsamples, X = fish215,
#'                     rhoId = rhoId,
#'                     chains = 1) # number of MCMC chains
#'
#' # plot first 20 posterior probabilities in lake Simcoe
#' rho.post <- hUM.fit$rho[,1,]
#' boxplot(rho.post[,1:20], las = 2,
#'         xlab = "Genotype", ylab = "Posterior Probability",
#'         pch = ".", col = "grey")
#' @export
hUM.post <- function(nsamples, X, popId, rhoId,
                     full.stan.out = FALSE, ...) {
  suff <- UM.suff(X, popId)
  Xobs <- suff$tab
  if(missing(rhoId)) {
    rhoId <- rownames(Xobs)
  }
  if(is.null(rhoId)) {
    iLrho <- numeric(0)
  } else {
    iLrho <- sapply(rhoId, function(ri) {
      if(ri %in% rownames(Xobs)) {
        ans <- which(ri == rownames(Xobs))
      } else {
        ans <- 0
      }
      ans
    })
    if(any(iLrho == 0)) stop("rhoId must be a subset of popId.")
  }
  nLrho <- length(iLrho)
  sfit <- sampling(object = stanmodels$DirichletMultinomial,
                   data = list(nG = ncol(Xobs), nL = nrow(Xobs),
                     nLrho = nLrho, iLrho = array(iLrho, dim = nLrho),
                     X = Xobs),
                   iter = nsamples, ...)
  #alpha <- extract(sfit, permuted = TRUE)$alpha
  #colnames(alpha) <- colnames(Xobs)
  ans <- list(A = suff$A, G = suff$G)
  if(nLrho > 0) {
    rho <- extract(sfit, permuted = TRUE)$rho
    dimnames(rho)[[2]] <- rownames(Xobs)[iLrho]
    dimnames(rho)[[3]] <- colnames(Xobs)
    ans <- c(ans, list(rho = rho))
  }
  if(full.stan.out) ans <- c(ans, list(sfit = sfit))
  ans
}
back to top