## Copyright (C) 2010 Marius Hofert and Martin Maechler ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . require(nacopula) set.seed(1) n <- 2000 N <- 100 # number of variates used for the Kolmogorov-Smirnov test ## maximal deviation for deciding if sample versions of Kendall's tau are ## "close enough" to population versions; NB: depends on 'n' eps.tau <- 0.06 doPlots <- (Sys.getenv("USER") == "maechler") ## ==== 3d check functions ===================================================== ##' correlation check function and run time measuring ##' @param n number of variates to be drawn ##' @param th0 theta0 ##' @param th1 theta1 ##' @param cop copula ##' @return a list containing run times for V0 and V01 and Kendall's taus ##' @author Marius Hofert, Martin Maechler corCheck <- function(n, th0,th1, cop) { mat <- matrix(0,nrow = n,ncol = 3) V0time <- system.time(V0 <- cop@V0(n,th0)) V01time <- system.time(V01 <- cop@V01(V0,th0,th1)) mat <- cbind(runif(n), exp(-V0*cop@psiInv(cop@psi(rexp(n)/V01,th1),th0)), exp(-V0*cop@psiInv(cop@psi(rexp(n)/V01,th1),th0))) mat[,] <- cop@psi(-log(mat[,])/V0,th0) list(V0time,V01time, name = cop@name, cor = cor(mat, method = "kendall")) } ##' create output prt.tau.diff <- function(c1, c2){ stopifnot(is.matrix(c1)) delta.c <- 1000 * abs(c1 - c2) cat(sprintf("Max & Mean distance * 1000 to true pairwise Kendall's taus: %7.1f %7.1f\n", max(delta.c), mean(delta.c))) invisible() } ##' create output corCheckout <- function(x, trCorr, famName = x$name) { cat(sprintf("Time [ms] V0 for '%s': %7.1f\n", famName, 1000*x[[1]][1])) cat(sprintf("Time [ms] V01 for '%s': %7.1f\n", famName, 1000*x[[2]][1])) prt.tau.diff(x[["cor"]], trCorr) ; cat("\n") } ##' Function implementing the chi^2 test ##' @param n [integer] sample size ##' @param N [integer] number of replications ##' @param cop nacopula to generate from ##' @param nInt positive integer: the number of intervals used for each grid ##' dimension ##' @return an "chiSqChk_cop" object; just a list(...) and a print method ##' @author Marius Hofert, Martin Maechler chiSq_check_cop <- function(n,N,cop,nInt, verbose = interactive()){ copName <- deparse(substitute(cop)) # copula name d <- dim(cop) # copula dimension stopifnot(is.numeric(d), d >= 1, is.numeric(nInt), nInt >= 1) pts <- (1:nInt)/nInt # (upper) division points [lower = upper - h # = {0, ..., 1-h}; h=1/nInt] mygrid <- do.call("expand.grid", rep.int(list(pts), d)) # build grid m <- nInt^d # == grid length == nrow(mygrid) v.cube <- nInt^(0:(d-1)) ## build a function that returns the number of the cube in which each row ## of U falls cube <- function(U, pts) { di <- dim(U) intervals <- array(cut(U, breaks = pts, include.lowest = TRUE, labels = FALSE), # find "interval number" for # each component of U; these numbers # are in {NA,1,2,3,...,nInt-1} dim = di) intervals[is.na(intervals)] <- 0 # NAs correspond to smallest interval as.vector(intervals %*% v.cube) + 1 } ## determine the expected number of observations in each cube prob_up <- function(u) { ## probability mass in cube with upper corner 'u' mesh <- 1/nInt l <- u - mesh prob(cop, l, u) } masscube <- apply(mygrid, 1, prob_up) E_nobs <- n * masscube # expected number of observations in each cube ## now simulate data, count observations in each cube, and compute test ## statistic k <- 0 CPU <- system.time({ T <- replicate(N, { if(verbose) cat(sprintf("%2d%1s",{k <<- k+1}, if(k %% 20)"" else "\n")) U <- rnacopula(n,cop) # generate data cubenumbers <- cube(U,pts) # for each row vector of U, find the # number of the cube in which the # vector falls nobs <- tabulate(cubenumbers, nbins = m) # number of observations # in each cube sum((nobs - E_nobs)^2 / E_nobs) # chi^2 test statistic }); if(verbose) cat("done\n") })[1] structure(class = "chiSqChk_cop", list( ## compute the result of the Kolmogorov-Smirnov test based ## on the N realizations of the chi^2 test statistics: ks = ks.test(T, "pchisq", df = m-1), T = T, CPU = CPU, n=n, N=N, copName = copName, m = m, ## percentage of cubes that fulfill the rule of thumb: percentrot = (sum(E_nobs >= 5)/m)*100 )) } ##' a print method for this class: print.chiSqChk_cop <- function(x, ...) { stopifnot(is.list(x), all(c("ks","T","CPU","n","N") %in% names(x)), is.numeric(pv <- x$ks[[2]])) cat(sprintf("%s (3d)NAcopula (n=%d):\n -- %s (N=%d): %s\n -- ", x$copName, x$n, "P-value of the chi-square test", x$N, format.pval(pv)), sprintf("Percentage fulfilling chi^2 rule of thumb: %4.1f\n -- ", x$percentrot), sprintf("CPU (user) time needed = c(N,n; cop) = %8.1f [ms]\n", 1000 * x$CPU), sep="") if(pv < 0.05) { if(pv < 0.01) cat("\n*************** P-value < 0.01 <<<<<<<<<<<<<<<<<<<<<<<<\n", "\n*************** ============== <<<<<<<<<<<<<<<<<<<<<<<<\n\n") else cat("\n*** > > > P-value < 0.05 <<<<<<<<<<<<<<<<<<<<<<<<\n\n") stopifnot(pv > 0.001) } invisible(x) } ##' compute the probability to fall in a cube with lower point l and upper point ##' u for d=3 probin3dcube <- function(cop,l,u) { pnacopula(cop,u)+ - pnacopula(cop,c(l[1],u[2],u[3]))+ - pnacopula(cop,c(u[1],l[2],u[3]))+ - pnacopula(cop,c(u[1],u[2],l[3]))+ + pnacopula(cop,c(l[1],l[2],u[3]))+ + pnacopula(cop,c(l[1],u[2],l[3]))+ + pnacopula(cop,c(u[1],l[2],l[3]))+ - pnacopula(cop,l) } ## ==== 3d examples ============================================================ ## ==== AMH ==================================================================== theta0 <- 0.7135 # tau_{12}=tau_{13}=0.2, tau_{23}=0.3 theta1 <- 0.9430 ## check 1 corCheckAMH <- corCheck(n,theta0,theta1,copAMH) trCorr <- rbind(c(1,0.2,0.2), c(0.2,1,0.3), c(0.2,0.3,1)) corCheckout(corCheckAMH,trCorr) stopifnot(max(abs(corCheckAMH[["cor"]]-trCorr)) < eps.tau) ## check 2 AMH3d <- new("outer_nacopula", copula = setTheta(copAMH, theta0), comp = as.integer( 1 ), childCops = list(new("nacopula", copula = setTheta(copAMH, theta1), comp = as.integer(c(2,3)))) # no childCops ) ## constructor forms of the above: rr <- onacopula("A", C(0.7135, 1, list(C(0.943, 2:3, NULL)))) r0 <- onacopula("A", C(0.7135, 1, C(0.943, 2:3, NULL))) r1 <- onacopula("A", C(0.7135, 1, C(0.943, 2:3, ))) r2 <- onacopula("AMH", C(0.7135, 1, C(0.943, 2:3 ))) stopifnot(identical(AMH3d, rr), identical(rr, r0), identical(r0, r1), identical(r1, r2)) ## check (chkAMH <- chiSq_check_cop(n,N,AMH3d,5)) ## check probability l <- c(.1, .05, .3) u <- c(.4, .7, .6) stopifnot(all.equal(print( prob(AMH3d,l,u)), probin3dcube(AMH3d,l,u), tol=1e-14)) ## ==== Clayton ================================================================ theta0 <- 0.5 # tau_{12}=tau_{13}=0.2, tau_{23}=0.5 theta1 <- 2 ## check 1 corCheckClayton <- corCheck(n,theta0,theta1,copClayton) trCorr <- rbind(c(1,0.2,0.2), c(0.2,1,0.5), c(0.2,0.5,1)) corCheckout(corCheckClayton,trCorr) stopifnot(max(abs(corCheckClayton[["cor"]]-trCorr)) < eps.tau) ## check 2 Clayton3d <- onacopula("Clayton", C(theta0, 1, C(theta1, 2:3))) (chkClayton <- chiSq_check_cop(512,100,Clayton3d,5)) ## check probability stopifnot(all.equal(print( prob(Clayton3d,l,u)), probin3dcube(Clayton3d,l,u), tol=1e-14)) ## ==== Frank ================================================================== theta0 <- 1.8609#tau_{12}=tau_{13}=0.2, tau_{23}=0.5 theta1 <- 5.7363 ## check 1 corCheckFrank <- corCheck(n,theta0,theta1,copFrank) corCheckout(corCheckFrank,trCorr) stopifnot(max(abs(corCheckFrank[["cor"]]-trCorr)) < eps.tau) ## check 2 Frank3d <- onacopula("F", C(theta0, 1, C(theta1, 2:3))) (chkFrank <- chiSq_check_cop(n,N,Frank3d,5)) ## check probability stopifnot(all.equal(print( prob(Frank3d,l,u)), probin3dcube(Frank3d,l,u), tol=1e-14)) ## ==== Gumbel ================================================================= theta0 <- 1.25 theta1 <- 2 #--> tau_{12}=tau_{13}=0.2, tau_{23}=0.5 trCorr <- rbind(c(1,0.2,0.2), c(0.2,1,0.5), c(0.2,0.5,1)) ## check 1 corCheckGumbel <- corCheck(n,theta0,theta1,copGumbel) corCheckout(corCheckGumbel,trCorr) stopifnot(max(abs(corCheckGumbel[["cor"]]-trCorr)) < eps.tau) ## check 2 Gumbel3d <- onacopula("Gumbel", C(theta0, 1, C(theta1, 2:3))) (chkGumbel <- chiSq_check_cop(n,N,Gumbel3d,5)) ## check probability stopifnot(all.equal(print( prob(Gumbel3d,l,u)), probin3dcube(Gumbel3d,l,u), tol=1e-14)) ## ==== Joe ==================================================================== theta0 <- 1.4438#tau_{12}=tau_{13}=0.2, tau_{23}=0.5 theta1 <- 2.8562 ## check 1 corCheckJoe <- corCheck(n,theta0,theta1,copJoe) corCheckout(corCheckJoe,trCorr) stopifnot(max(abs(corCheckJoe[["cor"]]-trCorr)) < eps.tau) ## check 2 Joe3d <- onacopula("J", C(theta0, 1, C(theta1, 2:3))) (chkJoe <- chiSq_check_cop(n,N,Joe3d,5)) ## check probability stopifnot(all.equal(print( prob(Joe3d,l,u)), probin3dcube(Joe3d,l,u), tol=1e-14)) ## ==== Examples that check pnacopula() and rnacopula() =================================== ## generate output for the examples prt.stats <- function(c1,c2, rt) { cat("Run time [ms] for generating", n, "vectors of variates: ", round(1000*rt[1],1), "\n") prt.tau.diff(c1, c2) ; cat("\n") } ## ==== 3d Ali-Mikhail-Haq copula example ====================================== c3 <- onacopula("A", C(0.7135, 1, list(C(0.943, 2:3)))) ## basic check d <- dim(c3) stopifnot(d == 3, allComp(c3) == 1:3, allComp(c3@childCops[[1]]) == 2:3) ## test pnacopula() u <- c(.3, .4, .5) ## with function: v <- pnacopula(c3, u) ## by hand psi <- function(t,theta) { (1-theta)/(exp(t)-theta) } psiInv <- function(t,theta) { log((1-theta*(1-t))/t) } th0 <- 0.7135 th1 <- 0.9430 level1 <- psi(psiInv(u[2],th1) + psiInv(u[3],th1), th1) level0 <- psi(psiInv(u[1],th0) + psiInv(level1, th0), th0) stopifnot(all.equal(v, level0, tol = 1e-14)) ## test rnacopula() rt <- system.time(rC3 <- rnacopula(n,c3)) C3 <- cor(rC3,method = "kendall") trCorr <- rbind(c(1,0.2,0.2), c(0.2,1,0.3), c(0.2,0.3,1)) # tau_{12}=tau_{13}=0.2, tau_{23}=0.3 stopifnot(is.numeric(rC3), is.matrix(rC3), dim(rC3) == c(n, 3),max(abs(C3-trCorr)) < eps.tau) prt.stats(C3,trCorr,rt) if(doPlots) pairs(rC3, panel = function(...) { par(new = TRUE); smoothScatter(...) }) ## ==== 2d Clayton copula example ============================================== c2 <- onacopula("Clayton", C(0.5, c(1,2))) # no childCops ## or simply c2 <- onacopula("Clayton", C(0.5, 1:2)) ## basic check d <- dim(c2) stopifnot(d == 2, allComp(c2) == 1:2) ## test pnacopula() v <- pnacopula(c2, u = c(.3, .4)) stopifnot(all.equal(v, local( { u1 <- .3; u2 <- .4 (u1^(-1/2)+u2^(-1/2)-1)^(-2) }), tol = 1e-14)) ## test rnacopula() rt <- system.time(rC2 <- rnacopula(n,c2)) C2 <- cor(rC2,method = "kendall") trCorr <- rbind(c(1,0.2), c(0.2,1)) # tau_{12}=0.2 stopifnot(is.numeric(rC2), is.matrix(rC2), dim(rC2) == c(n, 2), max(abs(C2-trCorr)) < eps.tau) prt.stats(C2,trCorr,rt) if(doPlots) smoothScatter(rC2) ## ==== 3d Clayton copula example ============================================== c3 <- onacopula("C", C(0.5, 1, C(2., c(2,3)))) ## basic check d <- dim(c3) stopifnot(d == 3, allComp(c3) == 1:3, allComp(c3@childCops[[1]]) == 2:3) ## test pnacopula() v <- pnacopula(c3, u = c(.3, .4, .5)) stopifnot(all.equal(v, local( { u1 <- .3; u2 <- .4; u3 <- .5 1/((1/u2^2 +1/u3^2 -1)^(1/4) -1 +1/sqrt(u1))^2 }), tol = 1e-14)) ## test rnacopula() rt <- system.time(rC3 <- rnacopula(n,c3)) C3 <- cor(rC3,method = "kendall") trCorr <- matrix(c(1,0.2,0.2,0.2,1,0.5,0.2,0.5,1),nrow = 3,byrow = TRUE) # tau_{12}=tau_{13}=0.2, tau_{23}=0.5 stopifnot(is.numeric(rC3), is.matrix(rC3), dim(rC3) == c(n, 3),max(abs(C3-trCorr)) < eps.tau) prt.stats(C3,trCorr,rt) if(doPlots) pairs(rC3, panel = function(...) { par(new = TRUE); smoothScatter(...) }) ## ==== 9d Clayton copula example ============================================== c9 <- onacopula("Clayton", C(0.5, c(3,6,1), C(2., c(9,2,7,5), C(3., c(8,4))))) c9Lis <- list(0.5, c(3,6,1), list(list(2., c(9,2,7,5), list(list(3., c(8,4)))))) ## consistency onacopula() <-> onacopulaL() : stopifnot(identical(c9, onacopulaL("Clayton", c9Lis))) ## basic check d <- dim(c9) stopifnot(d == 9, allComp(c9) == c(3,6,1,9,2,7,5,8,4), allComp(c9@childCops[[1]]) == c(9,2,7,5,8,4), allComp(c9@childCops[[1]]@childCops[[1]]) == c(8,4)) ## test pnacopula() u <- seq(0.1,0.9,by = 0.1) ## with pnacopula function: v <- pnacopula(c9, u) ## by hand psi <- function(t,theta) { (1+t)^(-1/theta) } psiInv <- function(t,theta) { t^(-theta) - 1 } th0 <- 0.5 th1 <- 2 th2 <- 3 level2 <- psi(psiInv(u[8],th2) + psiInv(u[4],th2), th2) level1 <- psi(psiInv(u[9],th1)+ psiInv(u[2],th1)+ psiInv(u[7],th1)+ psiInv(u[5],th1) + psiInv(level2, th1), th1) level0 <- psi(psiInv(u[3],th0)+ psiInv(u[6],th0)+ psiInv(u[1],th0)+ psiInv(level1, th0), th0) stopifnot(all.equal(v, level0, tol = 1e-14)) ## test rnacopula() rt <- system.time(rC9 <- rnacopula(n,c9)) C9 <- cor(rC9,method = "kendall") ## Theoretical values: ## (11,12,13,14,15,16,17,18,19)=(1,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2) ## (22,23,24,25,26,27,28,29)=(1,0.2,0.5,0.5,0.2,0.5,0.5,0.5) ## (33,34,35,36,37,38,39)=(1,0.2,0.2,0.2,0.2,0.2,0.2) ## (44,45,46,47,48,49)=(1,0.5,0.2,0.5,0.6,0.5) ## (55,56,57,58,59)=(1,0.2,0.5,0.5,0.5) ## (66,67,68,69)=(1,0.2,0.2,0.2) ## (77,78,79)=(1,0.5,0.5) ## (88,89)=(1,0.5) C9.true <- rbind(c(1. ,rep(0.2,8)), c(0.2,1. ,0.2,0.5,0.5,0.2, rep(0.5,3)), c(0.2,0.2,1. , rep(0.2,6)), c(0.2,0.5,0.2,1. ,0.5,0.2,0.5,0.6,0.5), c(0.2,0.5,0.2,0.5,1. ,0.2, rep(0.5,3)), c(rep(0.2,5), 1. , rep(0.2,3)), c(0.2,0.5,0.2,0.5,0.5,0.2,1. ,0.5,0.5), c(0.2,0.5,0.2,0.6,0.5,0.2,0.5,1. ,0.5), c(0.2,0.5,0.2,0.5,0.5,0.2,0.5,0.5,1. )) stopifnot(dim(rC9) == c(n, 9), max(abs(C9-C9.true)) < eps.tau) prt.stats(C9,C9.true,rt) if(doPlots && dev.interactive()) # "large" pairs(rC9, gap = .1, pch = 20, cex = 0.2, col = rgb(.2,.1,.7, alpha = .5), main = paste(n," vectors of a ",d, "-dimensional nested Clayton copula",sep = "")) ## ==== 125d Clayton copula example ============================================ c125 <- onacopula("Clayton", C(0.5, , # no direct components list(C(2, 1:10), C(3, 11:40), C(2, 41:60), C(2, 61:85), C(3, 86:105), C(2,106:125)))) c125Lis <- list(0.5, integer(0), # <- could use NULL list(list(2, 1:10), list(3, 11:40), list(2, 41:60), list(2, 61:85), list(3, 86:105), list(2,106:125))) ## consistency onacopula() <-> onacopulaL() : stopifnot(identical(c125, onacopulaL("C", c125Lis))) ## basic check d <- dim(c125) stopifnot(d == 125, allComp(c125) == 1:125, allComp(c125@childCops[[1]]) == 1:10, allComp(c125@childCops[[2]]) == 11:40, allComp(c125@childCops[[3]]) == 41:60, allComp(c125@childCops[[4]]) == 61:85, allComp(c125@childCops[[5]]) == 86:105, allComp(c125@childCops[[6]]) == 106:125 ) ## test rnacopula() rt <- system.time(rC125 <- rnacopula(n,c125)) stopifnot(is.numeric(rC125), is.matrix(rC125), dim(rC125) == c(n, 125)) cat("Time elapsed for generating ",n," vectors of variates:\n",sep = "") rt cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons''