https://github.com/cran/scModels
Tip revision: 52f2e41e364dbd60836bc82b8e39535e3713f98d authored by Lisa Amrhein on 13 June 2019, 15:10:03 UTC
version 1.0.0
version 1.0.0
Tip revision: 52f2e41
par_est_fns.R
#' Functions to estimate parameters of probability distributions by fitting the distributions using optim()
#'
#' @param x Vector containing the discrete observations
#' @param type Keyword for the probability distribution the data is to be fitted
#' against. Possible values are ("pois", "nb", "pb", "pois2", "nb2",
#' "pb2", "zipois", "zinb", "zipb", "zipois2", "zinb2", "zipb2")
#' @param optim_control List of options to override presets in
#' the optim function; Set to list(maxit = 1000) by default.
#' For more details, please refer to the 'control' parameter in the
#' standard 'optim' function in package 'stats'.
#' @keywords parameter estimation
#' @name fit_params
#' @importFrom stats kmeans optim runif
#' @export
#' @examples
#' x1 <- rnbinom(100, size = 13, mu = 9)
#' p1 <- fit_params(x1, "nb")
#' s <- sample(x = c(0,1), size = 100, replace = TRUE, prob = c(0.3,0.7))
#' x2 <- s*x1 + (1-s)*rnbinom(100, size = 15, mu = 53)
#' p2 <- fit_params(x2, "nb2")
fit_params <- function(x, type, optim_control = list(maxit = 1000)) {
max_iter <- 20
x_mean = mean(x)
################################ base models ######################################
if (type == "pois") {
t <- system.time(o <- optim(par = x_mean, fn = nlogL_pois, data = x, method = "Brent", lower = x_mean-100, upper = x_mean+100, control = optim_control))
}
else if (type == "nb") {
p <- c(1, 1)
t <- system.time(o <- optim(par = p, fn = nlogL_nb, data = x, control = optim_control))
}
else if (type == "pb") {
par <- estimate_pb_optim_init_restarts(x)
t <- system.time(o <- optim(par = par, fn = nlogL_pb, data = x, control = optim_control))
}
################################ Zero-inflated models ###############################
######################################################
else if (type == "zipois") {
optim_restarts <- list()
optim_times <- list()
for(i in 1:max_iter) {
t <- system.time(o <- optim(par = runif(2), fn = nlogL_zipois, data = x, control = optim_control))
optim_restarts[[i]] <- o
optim_times[[i]] <- t
}
t <- system.time(o <- optim(par = c(0, x_mean), fn = nlogL_zipois, data = x, control = optim_control))
optim_restarts[[i+1]] <- o
optim_times[[i+1]] <- t
best_optim <- which.min(unlist(lapply(optim_restarts, function(x) x$value)))
o <- optim_restarts[[best_optim]]
t <- optim_times[[best_optim]]
}
######################################################
else if (type == "zinb") {
optim_restarts <- list()
optim_times <- list()
for(i in 1:max_iter) {
t <- system.time(o <- optim(par = runif(3), fn = nlogL_zinb, data = x, control = optim_control))
optim_restarts[[i]] <- o
optim_times[[i]] <- t
}
p <- c(0, fit_params(x, "nb")$par)
t <- system.time(o <- optim(par = p, fn = nlogL_zinb, data = x, control = optim_control))
optim_restarts[[i+1]] <- o
optim_times[[i+1]] <- t
best_optim <- which.min(unlist(lapply(optim_restarts, function(x) x$value)))
o <- optim_restarts[[best_optim]]
t <- optim_times[[best_optim]]
}
######################################################
else if (type == "zipb") {
p1 <- c(0, estimate_pb_optim_init_restarts(x))
p2 <- c(get_0inf_parameter(x), estimate_pb_optim_init_restarts(x))
nl1 <- nlogL_zipb(x, p1)
nl2 <- nlogL_zipb(x, p2)
par <- if (nl1 < nl2) p1 else p2
t <- system.time(o <- optim(par = par, fn = nlogL_zipb, data = x, control = optim_control))
}
######################################################
################################ 2pop ###############################################
######################################################
else if (type == "pois2") {
optim_restarts <- list()
optim_times <- list()
for(i in 1:max_iter) {
t <- system.time(o <- optim(par = runif(3), fn = nlogL_pois2, data = x, control = optim_control))
optim_restarts[[i]] <- o
optim_times[[i]] <- t
}
t <- system.time(o <- optim(par = c(1, x_mean, x_mean), fn = nlogL_pois2, data = x, control = optim_control))
optim_restarts[[i+1]] <- o
optim_times[[i+1]] <- t
best_optim <- which.min(unlist(lapply(optim_restarts, function(x) x$value)))
o <- optim_restarts[[best_optim]]
t <- optim_times[[best_optim]]
}
######################################################
else if (type == "nb2") {
optim_restarts <- list()
optim_times <- list()
for(i in 1:max_iter) {
t <- system.time(o <- optim(par = runif(5), fn = nlogL_nb2, data = x, control = optim_control))
optim_restarts[[i]] <- o
optim_times[[i]] <- t
}
t1 <- fit_params(x, "nb")$par
t <- system.time(o <- optim(par = c(1, t1, t1), fn = nlogL_nb2, data = x, control = optim_control))
optim_restarts[[i+1]] <- o
optim_times[[i+1]] <- t
best_optim <- which.min(unlist(lapply(optim_restarts, function(x) x$value)))
o <- optim_restarts[[best_optim]]
t <- optim_times[[best_optim]]
}
######################################################
else if (type == "pb2") {
t1<-estimate_pb_optim_init_restarts(x)
p1 <- c(1, t1, t1)
nl1 <- nlogL_pb2(x, p1)
p2 <- estimate_pb2_optim_init_kmeans(x)
nl2 <- nlogL_pb2(x, p2)
if(is.na(nl2)) {
par <- p1
} else {
par <- if (nl1 < nl2) p1 else p2
}
t <- system.time(o <- optim(par = par, fn = nlogL_pb2, data = x, control = optim_control))
}
################################ zi2 ###############################################
######################################################
else if (type == "zipois2"){
optim_restarts <- list()
optim_times <- list()
for(i in 1:max_iter) {
t <- system.time(o <- optim(par = c(runif(2)/2, runif(2)), fn = nlogL_zipois2, data = x, control = optim_control))
optim_restarts[[i]] <- o
optim_times[[i]] <- t
}
t <- system.time(o <- optim(par = c(0, 1, x_mean, x_mean), fn = nlogL_zipois2, data = x, control = optim_control))
optim_restarts[[i+1]] <- o
optim_times[[i+1]] <- t
best_optim <- which.min(unlist(lapply(optim_restarts, function(x) x$value)))
o <- optim_restarts[[best_optim]]
t <- optim_times[[best_optim]]
}
######################################################
else if (type == "zinb2") {
optim_restarts <- list()
optim_times <- list()
for(i in 1:max_iter) {
t <- system.time(o <- optim(par = c(runif(2)/2, runif(4)), fn = nlogL_zinb2, data = x, control = optim_control))
optim_restarts[[i]] <- o
optim_times[[i]] <- t
}
t1 <- fit_params(x, "nb")$par
t <- system.time(o <- optim(par = c(0, 1, t1, t1), fn = nlogL_zinb2, data = x, control = optim_control))
optim_restarts[[i+1]] <- o
optim_times[[i+1]] <- t
best_optim <- which.min(unlist(lapply(optim_restarts, function(x) x$value)))
o <- optim_restarts[[best_optim]]
t <- optim_times[[best_optim]]
}
######################################################
else if (type == "zipb2") {
t1<- estimate_pb_optim_init_restarts(x)
p1 <- c(0, 1, t1, t1)
nl1 <- nlogL_zipb2(x, p1)
p2 <- estimate_zipb2_optim_init_kmeans(x)
nl2 <- nlogL_zipb2(x, p2)
if(is.na(nl2)) {
par <- p1
} else {
par <- if (nl1 < nl2) p1 else p2
}
t <- system.time(o <- optim(par = par, fn = nlogL_zipb2, data = x, control = optim_control))
}
######################################################
else {
warning("Invalid distribution type.")
return(NULL)
}
fit_param <- o
fit_param$time <- t
fit_param$AIC <- 2 * length(o$par) + 2 * o$value
fit_param$BIC <- log(length(x)) * length(o$par) + 2 * o$value
return(fit_param)
}
estimate_pb_optim_init <- function(x, iter = 200) {
sampled_params <- c()
n <- length(x)
for (i in 1:iter) {
d1 <- sample(x, n, replace = TRUE)
e1 <- mean(d1)
e2 <- mean(d1 * ( d1 - 1 ))
e3 <- mean(d1 * (d1 - 1) * (d1 - 2))
r1 <- e1
r2 <- e2 / e1
r3 <- e3 / e2
x1 <- r1 * r2 - 2 * r1 * r3 + r2 * r3
x2 <- r1 - 2 * r2 + r3
alpha <- 2 * r1 * (r3 - r2) / x1
if(alpha < 0 || is.infinite(alpha) || is.na(alpha))
alpha <- runif(1)
cm <- c(alpha, 0, max(d1))
cm[2] <- (function(a, c, m) a * c / m - a)(cm[1], cm[3], mean(d1))
sampled_params <- rbind(sampled_params, cm)
}
return(colMeans(sampled_params))
}
estimate_pb2_optim_init_kmeans <- function(x) {
k <- kmeans(x = x, centers = 2)
c1 <- x[which(k$cluster == 1)]
c2 <- x[which(k$cluster == 2)]
t1 <- tryCatch(
estimate_pb_optim_init(c1),
error = function(err) {
return(runif(3, 1, 100))
}
)
t2 <- tryCatch(
estimate_pb_optim_init(c2),
error = function(err) {
return(runif(3, 1, 100))
}
)
p <- length(c1)/length(x)
par <- c(p,t1, t2)
return(par)
}
estimate_zipb2_optim_init_kmeans <- function(x) {
k <- kmeans(x = x, centers = 2)
c1 <- x[which(k$cluster == 1)]
c2 <- x[which(k$cluster == 2)]
t1 <- tryCatch(
estimate_pb_optim_init(c1),
error = function(err) {
return(runif(3, 1, 100))
}
)
t2 <- tryCatch(
estimate_pb_optim_init(c2),
error = function(err) {
return(runif(3, 1, 100))
}
)
p <- length(c1)/length(x)
par <- c(get_0inf_parameter(x), p,t1, t2)
return(par)
}
estimate_pb_optim_init_restarts <- function(x, n = 10) {
p <- estimate_pb_optim_init(x)
val <- nlogL_pb(x, p)
for (i in 1:n) {
p_temp <- estimate_pb_optim_init(x)
val_temp <- nlogL_pb(x, p_temp)
if(val_temp < val) {
p <- p_temp
val <- val_temp
}
}
return(p)
}
get_0inf_parameter <- function(x) length(c(which(x == 0))) / length(x)