#' Negative log Likelihood functions for Poisson, negative binomial and #' Poisson-beta distributions #' #' The negative log Likelihood functions for Poisson, negative binomial #' and Poisson-beta distributions. Mixing two distributions of the same #' kind and/or adding zero-inflation allows to take characteristics #' of real data into account. #' Additionally, one population and two population mixtures - with and #' without zero-inflations - allow distribution fittings of the Poisson, #' negative binomial and the Poisson-beta distribution. #' #' #' @details #' Functions nlogL_pois, nlogL_nb, nlogL_pb compute the negative #' log-likelihood of Poisson, negative binomial and the Poisson-beta #' distributions given the data. #' Functions nlogL_pois2, nlogL_nb2 and nlogL_pb2 compute the negative #' log-likelihood values for a two population mixture of distributions whereas #' nlogL_zipois, nlogL_zinb, nlogL_zipb compute the same for the #' zero-inflated distributions. Furthermore, nlogL_zipois2, nlogL_zinb2 #' and nlogL_zipb2 are for two population mixtures with zero-inflation. #' @param data Vector containing the discrete observations #' @param par.pois Scalar containing the lambda parameter #' of the Poisson distribution #' @param par.nb Vector of length 2, containing the size and the mu #' parameter of the negative binomial distribution #' @param par.pb Vector of length 3, containing the alpha, beta #' and c parameter of the Poisson-beta distribution #' @param par.pois2,par.nb2,par.pb2 Vector containing the parameters #' of the two mixing distributions. First entry represents the #' fraction of the first distribution, followed by all parameters #' of the first, then all of the second distribution. #' @param par.zipois,par.zinb,par.zipb Vector containing the respective #' zero-inflated distribution parameters. The additional first #' entry is the inflation parameter for all cases. #' @param par.zipois2,par.zinb2,par.zipb2 Parameters for the zero-inflated #' two population model. #' #' @keywords likelihood negative binomial Poisson-beta #' #' @name nlogL #' @importFrom stats dpois dnbinom rnorm #' @export #' @examples #' x <- rpois(100, 11) #' nl1 <- nlogL_pois(x, 11) #' nl2 <- nlogL_pois(x, 13) nlogL_pois <- function(data, par.pois) { if (par.pois <= 0) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else { nl <- -sum(dpois(x = data, lambda = par.pois, log = TRUE)) if (is.infinite(nl)) return (nl_inf + (rnorm(1, 10000, 20) ^ 2)) else return(nl) } } #' @rdname nlogL #' @export #' @examples #' x <- rnbinom(100, size = 13, mu = 9) #' nl <- nlogL_nb(x, c(13, 9)) nlogL_nb <- function(data, par.nb) { if (par.nb[1] <= 0 || par.nb[2] < 0) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else { nl <- -sum(dnbinom(x = data, size = par.nb[1], mu = par.nb[2], log = TRUE)) if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else return(nl) } } #' @rdname nlogL #' @export #' @examples #' x <- rpb(n = 1000, alpha=5, beta= 3, c=20) #' nl <- nlogL_pb(x, c(5, 3, 20)) nlogL_pb <- function(data, par.pb) { if (par.pb[1] < 0 || par.pb[2] < 0 || par.pb[3] <= 0) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else { nl <- -sum(dpb(x = data, alpha = par.pb[1], beta = par.pb[2], c = par.pb[3], log = TRUE)) if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else return(nl) } } sum_2pop_terms <- function(t1, t2) { b1 <- (t1)/log(10) -300 b3 <- (t2)/log(10) -300 b13 <- pmax(b1,b3) b2 <- (t1)/log(10) +300 b4 <- (t2)/log(10) +300 b24 <- pmin(b2,b4) b <- rowMeans(matrix(c(b1*is.finite(b1),b2*is.finite(b2),b3*is.finite(b3),b4*is.finite(b4)),ncol = 4, byrow= FALSE), na.rm = TRUE ) t1_b <- (t1 / log(10)-b) t2_b <- (t2 / log(10)-b) t_b_check_1<- (t2_b - t1_b > 600) t_b_check_2<- (t1_b - t2_b > 600) nl <- sum( b[!t_b_check_1 & !t_b_check_2]*log(10) + log( 10 ^(t1[!t_b_check_1 & ! t_b_check_2] / log(10)-b[!t_b_check_1 & !t_b_check_2]) + 10 ^(t2[ !t_b_check_1 & !t_b_check_2] / log(10)-b[!t_b_check_1 & !t_b_check_2])))+ sum( (t2[t_b_check_1] )) + sum( (t1[t_b_check_2] )) return(-nl) } #' @rdname nlogL #' @export #' @examples #' s <- sample(x = c(0,1), size = 100, replace = TRUE, prob = c(0.3,0.7)) #' x <- s*rpois(100, 7) + (1-s)*rpois(100, 13) #' nl1 <- nlogL_pois2(x, c(0.7, 13, 7)) #' nl2 <- nlogL_pois2(x, c(0.3, 7, 13)) #' ## both values should be same: 296.9517 nlogL_pois2 <- function(data, par.pois2) { if (par.pois2[2] <= 0 || par.pois2[3] <= 0 || par.pois2[1] < 0 || par.pois2[1] > 1) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else if (par.pois2[1] == 0) { new.par.pois2 <- c(1, par.pois2[c(3, 2)]) return(nlogL_pois2(data, new.par.pois2)) } else { t1 <- log(par.pois2[1])+dpois(x = data, lambda = par.pois2[2], log = TRUE) t2 <- log(1-par.pois2[1])+dpois(x = data, lambda = par.pois2[3], log = TRUE) nl <- sum_2pop_terms(t1, t2) if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } #' @rdname nlogL #' @export #' @examples #' s <- sample(x = c(0,1), size = 100, replace = TRUE, prob = c(0.3,0.7)) #' x <-s*rnbinom(100, size = 13, mu = 9) + (1-s)*rnbinom(100, size = 17, mu = 29) #' nl <- nlogL_nb2(x, c(0.7, 17, 29, 13, 9)) nlogL_nb2 <- function(data, par.nb2) { if (par.nb2[2] <= 0 || par.nb2[3] < 0 || par.nb2[4] <= 0 || par.nb2[5] < 0 || par.nb2[1] < 0 || par.nb2[1] > 1) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else if (par.nb2[1] == 0) { new.par.nb2 <- c(1, par.nb2[c(4, 5, 2, 3)]) return(nlogL_nb2(data, new.par.nb2)) } else { t1 <- log(par.nb2[1]) + dnbinom(x = data,size = par.nb2[2],mu = par.nb2[3],log = TRUE) t2 <- log(1-par.nb2[1]) + dnbinom(x = data,size = par.nb2[4],mu = par.nb2[5],log = TRUE) nl <- sum_2pop_terms(t1, t2) if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else return(nl) } } #' @rdname nlogL #' @export #' @examples #' s <- sample(x = c(0,1), size = 100, replace = TRUE, prob = c(0.3,0.7)) #' x <- s*rpb(100, 5, 3, 20) + (1-s)*rpb(100, 7, 13, 53) #' nl <- nlogL_pb2(x, c(0.7, 7, 13, 53, 5, 3, 20)) nlogL_pb2 <- function(data, par.pb2) { if (par.pb2[2] < 0 || par.pb2[3] < 0 || par.pb2[4] <= 0 || par.pb2[5] < 0 || par.pb2[6] < 0 || par.pb2[7] <= 0 || par.pb2[1] < 0 || par.pb2[1] > 1) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else if (par.pb2[1] == 0) { new.par.pb2 <- c(1, par.pb2[c(5:7, 2:4)]) return(nlogL_pb2(data, new.par.pb2)) } else { t1 <- log(par.pb2[1]) + dpb(x = data, alpha = par.pb2[2], beta = par.pb2[3], c = par.pb2[4], log = TRUE) t2 <- log(1-par.pb2[1]) + dpb(x = data, alpha = par.pb2[5], beta = par.pb2[6], c = par.pb2[7], log = TRUE) nl <- sum_2pop_terms(t1, t2) if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } #' @rdname nlogL #' @export #' @examples #' x <- c(rep(0, 10), rpois(90, 7)) #' nl <- nlogL_zipois(x, c(0.1, 7)) nlogL_zipois <- function(data, par.zipois) { if (par.zipois[2] <= 0 || par.zipois[1] < 0 || par.zipois[1] > 1) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else { n <- length(data) n0 <- length(which(data == 0)) non_zero <- data[which(data != 0)] if(n0 == 0) { t1 = 0 } else { l <- log(par.zipois[1] + (1 - par.zipois[1]) * exp(-par.zipois[2])) # This can happen when the inflation parameter is 0 and the probability # of a 0 is very low. For an explanation, see this: #>>>> > exp(-1259) #>>>> [1] 0 if(is.nan(l)){ t1 = -par.zipois[2] } else { t1 <- l } } nl <- n0 * t1 + (n - n0) * log(1 - par.zipois[1]) + sum(dpois(x = non_zero, lambda = par.zipois[2], log = TRUE)) nl <- -nl if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } #' @rdname nlogL #' @export #' @examples #' x <- c(rep(0,10), rnbinom(90, size = 13, mu = 9)) #' nl <- nlogL_zinb(x, c(0.1, 13, 9)) nlogL_zinb <- function(data, par.zinb) { if (par.zinb[2] <= 0 || par.zinb[3] < 0 || par.zinb[1] < 0 || par.zinb[1] > 1) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else { n <- length(data) n0 <- length(which(data == 0)) non_zero <- data[which(data != 0)] if(n0 == 0) { t1 = 0 } else { l <- log(par.zinb[1] + (1 - par.zinb[1])*dnbinom(0, size = par.zinb[2], mu = par.zinb[3])) # Motivated by conclusions from zipois, but the same scenario # occurs in less cases here. The nestorowa dataset had convergent fits # for all cases without this condition. if(is.nan(l)){ t1 = dnbinom(0, size = par.zinb[2], mu = par.zinb[3], log = TRUE) } else { t1 <- l } } nl <- n0 * t1 + (n-n0)*log(1-par.zinb[1])+sum(dnbinom(x = non_zero, size = par.zinb[2], mu = par.zinb[3], log = TRUE)) nl <- -nl if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } #' @rdname nlogL #' @export #' @examples #' x <- c(rep(0, 10), rpb(n = 90, alpha=5, beta= 3, c=20)) #' nl <- nlogL_zipb(x, c(0.1, 5, 3, 20)) nlogL_zipb <- function(data, par.zipb) { if (par.zipb[2] < 0 || par.zipb[3] < 0 || par.zipb[4] <= 0 || par.zipb[1] < 0 || par.zipb[1] > 1) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else { n <- length(data) n0 <- length(which(data == 0)) non_zero <- data[which(data != 0)] if(n0 == 0) { t1 = 0 } else{ l <- log(par.zipb[1] + (1 - par.zipb[1])*dpb(0, par.zipb[2], par.zipb[3], par.zipb[4])) if(is.nan(l)){ # Going by the change from zipois to zinb [more parameters => better fit?], # maybe even lesser cases happen here. Inference not tested. t1 = dpb(0, par.zipb[2], par.zipb[3], par.zipb[4], log = TRUE) } else { t1 <- l } } nl <- n0 * t1 + (n-n0)*log(1-par.zipb[1])+sum(dpb(x = non_zero, par.zipb[2], par.zipb[3], par.zipb[4], log = TRUE)) nl <- -nl if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } #' @rdname nlogL #' @export #' @examples #' s <- sample(x = c(0,1), size = 90, replace = TRUE, prob = c(0.3,0.7)) #' x <- c(rep(0, 10), s*rpois(90, 7) + (1-s)*rpois(90, 13)) #' nl1 <- nlogL_zipois2(x, c(0.1, 0.63, 13, 7)) nlogL_zipois2 <- function(data, par.zipois2) { if (par.zipois2[1] < 0 || par.zipois2[1] > 1 || par.zipois2[2] < 0 || par.zipois2[2] > 1 || par.zipois2[1] + par.zipois2[2] > 1 || par.zipois2[3] <= 0 || par.zipois2[4] <= 0) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else if (par.zipois2[2] == 0) { new.par.zipois2 <- c(par.zipois2[1], 1- par.zipois2[1], par.zipois2[c(4, 3)]) return(nlogL_zipois2(data, new.par.zipois2)) } else { n <- length(data) n0 <- length(which(data == 0)) non_zero <- data[which(data != 0)] t1 <- log(par.zipois2[2])+dpois(x = 0, lambda = par.zipois2[3], log = TRUE) t2 <- log(1-(par.zipois2[1]+par.zipois2[2]))+dpois(x = 0, lambda = par.zipois2[4], log = TRUE) nl_zero <- -sum_2pop_terms(t1, t2) t1 <- log(par.zipois2[2])+dpois(x = non_zero, lambda = par.zipois2[3], log = TRUE) t2 <- log(1-(par.zipois2[1]+par.zipois2[2]))+dpois(x = non_zero, lambda = par.zipois2[4], log = TRUE) nl_non_zero <- -sum_2pop_terms(t1, t2) # reduce expression when no zero-inflation if(par.zipois2[1] == 0) nl <- n0 * nl_zero + nl_non_zero else nl <- n0 * log(par.zipois2[1] + exp(nl_zero ) ) + nl_non_zero nl <- -nl if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } #' @rdname nlogL #' @export #' @examples #' s <- sample(x = c(0,1), size = 90, replace = TRUE, prob = c(0.3,0.7)) #' x <- c(rep(0, 10), s*rnbinom(90, size = 13, mu = 9) + (1-s)*rnbinom(90, size = 17, mu = 29)) #' nl <- nlogL_zinb2(x, c(0.1, 0.63, 17, 29, 13, 9)) nlogL_zinb2 <- function(data, par.zinb2) { if (par.zinb2[1] < 0 || par.zinb2[1] > 1 || par.zinb2[2] < 0 || par.zinb2[2] > 1 || par.zinb2[1] + par.zinb2[2] > 1 || par.zinb2[3] <= 0 || par.zinb2[4] < 0 || par.zinb2[5] <= 0 || par.zinb2[6] < 0) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else if (par.zinb2[2] == 0) { new.par.zinb2 <- c(par.zinb2[1], 1 - par.zinb2[1], par.zinb2[c(5, 6, 3, 4)]) return(nlogL_zinb2(data, new.par.zinb2)) } else { n <- length(data) n0 <- length(which(data == 0)) non_zero <- data[which(data != 0)] t1 <- log(par.zinb2[2]) + dnbinom(x = 0, size = par.zinb2[3], mu = par.zinb2[4], log = TRUE) t2 <- log(1 - (par.zinb2[1] + par.zinb2[2])) + dnbinom(x = 0, size = par.zinb2[5], mu = par.zinb2[6], log = TRUE) nl_zero <- -sum_2pop_terms(t1, t2) t1 <- log(par.zinb2[2]) + dnbinom(x = non_zero, size = par.zinb2[3], mu = par.zinb2[4], log = TRUE) t2 <- log(1 - (par.zinb2[1] + par.zinb2[2])) + dnbinom(x = non_zero, size = par.zinb2[5], mu = par.zinb2[6], log = TRUE) nl_non_zero <- -sum_2pop_terms(t1, t2) if(par.zinb2[1] == 0) nl <- n0 * nl_zero + nl_non_zero else nl <- n0 * log(par.zinb2[1] + exp(nl_zero ) ) + nl_non_zero nl <- -nl if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } #' @rdname nlogL #' @export #' @examples #' s <- sample(x = c(0,1), size = 90, replace = TRUE, prob = c(0.3,0.7)) #' x <- c(rep(0,10), s*rpb(90, 5, 3, 20) + (1-s)*rpb(90, 7, 13, 53)) #' nl <- nlogL_zipb2(x, c(0.1, 0.63, 7, 13, 53, 5, 3, 20)) nlogL_zipb2 <- function(data, par.zipb2) { if (par.zipb2[1] < 0 || par.zipb2[1] > 1 || par.zipb2[2] < 0 || par.zipb2[2] > 1 || par.zipb2[1] + par.zipb2[2] > 1 || par.zipb2[3] < 0 || par.zipb2[4] < 0 || par.zipb2[5] <= 0 || par.zipb2[6] < 0 || par.zipb2[7] < 0 || par.zipb2[8] <= 0) { return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) } else if (par.zipb2[2] == 0) { new.par.zipb2 <- c(par.zipb2[1], 1 - par.zipb2[1], par.zipb2[c(6:8, 3:5)]) return(nlogL_zipb2(data, new.par.zipb2)) } else { n <- length(data) n0 <- length(which(data == 0)) non_zero <- data[which(data != 0)] t1 <- log(par.zipb2[2]) + dpb(x = 0, alpha = par.zipb2[3], beta = par.zipb2[4], c = par.zipb2[5], log = TRUE) t2 <- log(1 - (par.zipb2[1] + par.zipb2[2])) + dpb(x = 0, alpha = par.zipb2[6], beta = par.zipb2[7], c = par.zipb2[8], log = TRUE) nl_zero <- -sum_2pop_terms(t1, t2) t1 <- log(par.zipb2[2]) + dpb(x = non_zero, alpha = par.zipb2[3], beta = par.zipb2[4], c = par.zipb2[5], log = TRUE) t2 <- log(1 - (par.zipb2[1] + par.zipb2[2])) + dpb(x = non_zero, alpha = par.zipb2[6], beta = par.zipb2[7], c = par.zipb2[8], log = TRUE) nl_non_zero <- -sum_2pop_terms(t1, t2) if(par.zipb2[1] == 0) nl <- n0 * nl_zero + nl_non_zero else nl <- n0 * log(par.zipb2[1] + exp(nl_zero ) ) + nl_non_zero nl <- -nl if (is.infinite(nl)) return(nl_inf + (rnorm(1, 10000, 20) ^ 2)) else{ return(nl) } } } nl_inf <- 1e+100