Raw File
#' Kernel estimation of vine copula densities
#'
#' The function estimates a vine copula density using kernel estimators for the
#' pair copulas (based on the \link{kdecopula} package).
#'
#' @param data (\eqn{n x d}) matrix of copula data (have to lie in \eqn{[0,1^d]}).
#' @param matrix R-Vine matrix (\eqn{n x d}) specifying the structure of the vine;
#'  if \code{NA} (default) the structure selection heuristic of Dissman et al.
#'  (2013) is applied.
#' @param method see \code{\link[kdecopula:kdecop]{kdecop}}.
#' @param renorm.iter see \code{\link[kdecopula:kdecop]{kdecop}}.
#' @param mult see \code{\link[kdecopula:kdecop]{kdecop}}.
#' @param test.level significance level for independence test. If you provide a
#' number in \eqn{[0, 1]}, an independence test
#' (\code{\link[VineCopula:BiCopIndTest]{BiCopIndTest}}) will be performed for
#' each pair; if the null hypothesis of independence cannot be rejected, the
#' independence copula will be set for this pair. If \code{test.level = NA}
#' (default), no independence test will be performed.
#' @param trunc.level integer; the truncation level. All pair copulas in trees
#' above the truncation level will be set to independence.
#' @param treecrit criterion for structure selection; defaults to \code{"tau"}.
#' @param cores integer; if \code{cores > 1}, estimation will be parallized
#' within each tree (using \code{\link[foreach]{foreach}}).
#' @param info logical; if \code{TRUE}, additional information about the
#' estimate will be gathered (see \code{\link[kdecopula:kdecop]{kdecop}}).
#'
#' @return An object of class \code{kdevinecop}. That is, a list containing
#' \item{T1, T2, ...}{lists of the estimted pair copulas in each tree,}
#' \item{matrix}{the structure matrix of the vine,}
#' \item{info}{additional information about the fit (if \code{info = TRUE}).}
#'
#' @references
#' Nagler, T., Czado, C. (2016) \cr Evading the curse of
#' dimensionality in nonparametric density estimation with simplified vine
#' copulas. \cr \emph{Journal of Multivariate Analysis 151, 69-89
#' (doi:10.1016/j.jmva.2016.07.003)}
#'
#' Nagler, T., Schellhase, C. and Czado, C. (2017) \cr Nonparametric
#' estimation of simplified vine copula models: comparison of methods
#' arXiv:1701.00845
#'
#' Dissmann, J., Brechmann, E. C., Czado, C., and Kurowicka, D. (2013). \cr
#' Selecting and estimating regular vine copulae and application to financial
#' returns. \cr
#' Computational Statistics & Data Analysis, 59(0):52--69.
#'
#' @seealso
#' \code{\link{dkdevinecop}},
#' \code{\link[kdecopula:kdecop]{kdecop}},
#' \code{\link[VineCopula:BiCopIndTest]{BiCopIndTest}},
#' \code{\link[foreach]{foreach}}
#'
#' @examples
#' data(wdbc, package = "kdecopula")
#' # rank-transform to copula data (margins are uniform)
#' u <- VineCopula::pobs(wdbc[, 5:7], ties = "average")
#' \dontshow{u <- u[1:30, ]}
#' fit <- kdevinecop(u)                   # estimate density
#' dkdevinecop(c(0.1, 0.1, 0.1), fit)     # evaluate density estimate
#' contour(fit)                           # contour matrix (Gaussian scale)
#' pairs(rkdevinecop(500, fit))           # plot simulated data
#'
#' @importFrom kdecopula kdecop hkdecop
#' @importFrom VineCopula BiCopIndTest RVineMatrix TauMatrix
#' @importFrom foreach foreach %dopar%
#' @importFrom parallel detectCores makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @export
kdevinecop <- function(data, matrix = NA, method = "TLL2", renorm.iter = 3L,
                       mult = 1, test.level = NA, trunc.level = NA,
                       treecrit = "tau", cores = 1, info = FALSE) {
    ## adjust input
    if (is.null(info))
        info <- FALSE
    if (is.null(matrix))
        matrix <- NA
    if (is.null(method))
        method <- "TLL2"
    if (is.null(mult))
        mult <- 1
    if (is.null(test.level))
        test.level <- 1
    if (is.na(test.level))
        test.level <- 1
    if (is.null(trunc.level))
        trunc.level <- ncol(data)
    if (is.na(trunc.level))
        trunc.level <- ncol(data)
    if (is.null(treecrit))
        treecrit <- "tau"
    if (is.na(treecrit))
        treecrit <- "tau"
    if (is.null(cores))
        cores <- 1

    data <- as.matrix(data)
    data <- pobs(data, ties.method = "first")
    matrix <- as.matrix(matrix)

    ## sanity checks
    d <- ncol(data)
    n <- nrow(data)
    if (any(data >= 1) || any(data < 0))
        stop("Data has be in the interval [0,1].")
    if (n < 2)
        stop("Number of observations has to be at least 2.")
    if (d < 2)
        stop("Dimension has to be at least 2.")
    if (!(treecrit %in% c("tau", "AIC", "cAIC", "hoeffd")))
        stop("'treecrit' not available; please choose either 'tau', 'AIC', 'cAIC', or 'hoeffd'")

    ## call structure selection routine if no matrix given
    if (any(is.na(matrix)) & d > 2) {
        return(structure_select2(data,
                                 type = 0,
                                 method = method,
                                 mult = mult,
                                 info = info,
                                 struct.crit = treecrit,
                                 test.level = test.level,
                                 trunc.level = trunc.level,
                                 renorm.iter = renorm.iter,
                                 cores = cores))
    } else if (any(is.na(matrix)) & d == 2) {
        # set standard matrix for d = 2 (there is only on possible structure)
        matrix <- matrix(c(2, 1, 0, 1), 2, 2)
    } else if (nrow(matrix) == 1 && matrix == 1) {  # select C-Vine
        return(structure_select2(data,
                                 type = 1,
                                 method = method,
                                 mult = mult,
                                 info = info,
                                 struct.crit = treecrit,
                                 test.level = test.level,
                                 trunc.level = trunc.level,
                                 renorm.iter = renorm.iter,
                                 cores = cores))
    }

    ## check matrix if provided
    if (nrow(matrix) != ncol(matrix))
        stop("Structure matrix has to be quadratic.")
    if (ncol(data) != ncol(matrix))
        stop("Dimensions of data and matrix don't match.")
    if (max(matrix) > nrow(matrix))
        stop("Error in the structure matrix.")

    ## register parallel backend
    if (cores != 1 | is.na(cores)) {
        if (is.na(cores))
            cores <- max(1, detectCores() - 1)
        if (cores > 1) {
            cl <- makeCluster(cores)
            registerDoParallel(cl)
            on.exit(try(stopCluster(), silent = TRUE))
        }
    }

    ##
    M <- ToLowerTri(matrix)
    Mold <- M
    o <- diag(M)
    M <- reorderRVineMatrix(M)
    data <- data[, o[length(o):1]]
    MaxMat <- createMaxMat(M)
    CondDistr <- neededCondDistr(M)

    ## initialize objects
    res <- as.list(numeric(d - 1))
    for (i in 1:(d - 1))
        res[[i]] <- as.list(numeric(d - i))
    llikv <- array(0, dim = c(d, d, n))
    llik <- matrix(0, d, d)
    effp <- matrix(0, d, d)
    AIC <- matrix(0, d, d)
    cAIC <- matrix(0, d, d)
    BIC <- matrix(0, d, d)
    nms <- matrix("", d - 1, d - 1)
    V <- list()
    V$direct <- array(NA, dim = c(d, d, n))
    V$indirect <- array(NA, dim = c(d, d, n))
    V$direct[d, , ] <- t(data[, d:1])

    ## For independence pair-copulas
    indepinfo <- list(effp = 0,
                      likvalues = rep(1, n),
                      loglik = 0,
                      effp = 0,
                      AIC = 0,
                      cAIC = 0,
                      BIC = 0)

    for (k in d:2) {
        doEst <- function(i) {
            if (k > i) {
                m <- MaxMat[k, i]
                zr1 <- V$direct[k, i, ]

                zr2 <- if (m == M[k, i]) {
                    V$direct[k, (d - m + 1), ]
                } else {
                    V$indirect[k, (d - m + 1), ]
                }
                samples <- cbind(zr2, zr1)

                indep <- ifelse(test.level < 1,
                                BiCopIndTest(zr2, zr1)$p.value >= test.level,
                                FALSE)
                if (trunc.level <= (d-k))
                    indep <- TRUE

                if (indep) {
                    cfit <- list()
                    if (info)
                        cfit$info <- indepinfo
                    class(cfit) <- c("kdecopula", "indep.copula")
                } else {
                    cfit <- kdecop(samples,
                                   mult = mult,
                                   method = method,
                                   renorm.iter = renorm.iter,
                                   info = info)
                }

                hfit <- list()
                direct <- indirect <- NULL
                if (CondDistr$direct[k - 1, i]) {
                    direct <- hkdecop(samples,
                                      obj = cfit,
                                      cond.var = 1L)
                }
                if (CondDistr$indirect[k - 1, i]) {
                    indirect <- hkdecop(samples,
                                        obj = cfit,
                                        cond.var = 2L)
                }
                names <- naming(Mold[c(i, k:d), i])
                res.ki <- list(c = cfit, name = names)
                return(list(direct = direct,
                            indirect = indirect,
                            res.ki = res.ki))
            } else {
                return(NULL)
            }
        }
        res.k <- if (cores > 1) {
            foreach(i = 1:(k-1),
                    .export = c("naming"),
                    .packages = c("kdevine", "kdecopula")) %dopar% doEst(i)
        } else lapply(1:(k-1), doEst)

        ## clean up and finalize
        for (i in 1:(d-1)) {
            nums <- Mold[c(i, k:d), i]
            #nums[1:2] <- nums[2:1]
            name <- naming(nums)
            if (any(extract_nums(res.k) == name)) {
                ind <- which(extract_nums(res.k) == name)
                res.ki <- res.k[[ind]]
                res[[d + 1 - k]][[i]] <- res.ki$res.ki
                if (!is.null(res.ki$direct)) {
                    V$direct[k - 1, i, ] <- res.ki$direct
                }
                if (!is.null(res.ki$indirect)) {
                    V$indirect[k - 1, i, ] <- res.ki$indirect
                }

                if (info) {
                    cfit <- res.ki$res.ki$c
                    llikv[k, i, ] <- log(cfit$info$likvalues)
                    llik[k, i] <- cfit$info$loglik
                    effp[k, i] <- cfit$info$effp
                    AIC[k, i] <- -2 * cfit$info$loglik + 2 * effp[k, i]
                    cAIC[k, i] <-
                        AIC[k, i] + (2 * effp[k, i] * (effp[k, i] + 1)) /
                        (n - effp[k, i] - 1)
                    BIC[k, i] <- -2 * cfit$info$loglik + log(n) * effp[k, i]
                }
            }
        } # end i = 1:(d-1)
    } # end k = d:2

    ## finalize results
    res[[d]] <- data
    res[[d + 1]] <- Mold
    res[[d + 2]] <- list(llikv       = apply(llikv, 3, sum),
                         loglik      = sum(llik),
                         pair.loglik = llik,
                         effp        = sum(effp),
                         pair.effp   = effp,
                         AIC         = sum(AIC),
                         pair.AIC    = AIC,
                         cAIC        = sum(AIC) +
                             (2 * sum(effp) * (sum(effp) + 1)) /
                             (n - sum(effp) - 1),
                         pair.cAIC   = cAIC,
                         BIC         = sum(BIC),
                         pair.BIC    = BIC)
    names(res) <- vapply(1:(d - 1), function(x) paste("T", x, sep = ""), "")
    names(res)[d:(d + 2)] <- c("data", "matrix", "info")

    if (!info)
        res[[d + 2]] <- NULL
    ## return results
    class(res) <- "kdevinecop"
    res
}




## required functions from VineCopula package ----------------------------------

normalizeRVineMatrix <- function(RVM) {

    oldOrder <- diag(RVM$Matrix)
    Matrix <- reorderRVineMatrix(RVM$Matrix)

    return(RVineMatrix(Matrix,
                       RVM$family,
                       RVM$par,
                       RVM$par2,
                       names = rev(RVM$names[oldOrder])))
}

reorderRVineMatrix <- function(Matrix) {
    oldOrder <- diag(Matrix)

    O <- apply(t(1:nrow(Matrix)), 2, "==", Matrix)

    for (i in 1:nrow(Matrix)) {
        Matrix[O[, oldOrder[i]]] <- nrow(Matrix) - i + 1
    }

    return(Matrix)
}


createMaxMat <- function(Matrix) {
    if (dim(Matrix)[1] != dim(Matrix)[2])
        stop("Structure matrix has to be quadratic.")

    MaxMat <- reorderRVineMatrix(Matrix)
    n <- nrow(MaxMat)

    for (j in 1:(n - 1)) {
        for (i in (n - 1):j) {
            MaxMat[i, j] <- max(MaxMat[i:(i + 1), j])
        }
    }

    tMaxMat <- MaxMat
    tMaxMat[is.na(tMaxMat)] <- 0
    oldSort <- diag(Matrix)
    oldSort <- oldSort[n:1]

    for (i in 1:n) {
        MaxMat[tMaxMat == i] <- oldSort[i]
    }

    return(MaxMat)
}

neededCondDistr <- function(Vine) {
    if (dim(Vine)[1] != dim(Vine)[2])
        stop("Structure matrix has to be quadratic.")

    Vine <- reorderRVineMatrix(Vine)
    MaxMat <- createMaxMat(Vine)
    d <- nrow(Vine)

    M <- list()
    M$direct <- matrix(FALSE, d, d)
    M$indirect <- matrix(FALSE, d, d)
    M$direct[2:d, 1] <- TRUE

    for (i in 2:(d - 1)) {
        v <- d - i + 1
        bw <- as.matrix(MaxMat[i:d, 1:(i - 1)]) == v
        direct <- Vine[i:d, 1:(i - 1)] == v

        M$indirect[i:d, i] <- apply(as.matrix(bw & (!direct)), 1, any)
        M$direct[i:d, i] <- TRUE
        M$direct[i, i] <- any(as.matrix(bw)[1, ] & as.matrix(direct)[1, ])
    }

    return(M)
}

ToLowerTri <- function(x) {
    ## only change matrix if not already lower triagonal
    if(all(x[lower.tri(x)] == 0)) {
        x[nrow(x):1, ncol(x):1]
    } else {
        x
    }
}
back to top