https://github.com/cran/nacopula
Revision 161411bb86f97e5a8bd89091cd61d03a33c2761a authored by Martin Maechler on 06 February 2012, 00:00 UTC, committed by Gabor Csardi on 06 February 2012, 00:00 UTC
1 parent 5bc804b
Raw File
Tip revision: 161411bb86f97e5a8bd89091cd61d03a33c2761a authored by Martin Maechler on 06 February 2012, 00:00 UTC
version 0.8-0
Tip revision: 161411b
logL-vis.R
## Copyright (C) 2010 Marius Hofert and Martin Maechler
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.

require(nacopula)

##' @title [m]inus Log Likelihood for Archimedean copula "fast version"
##' @param theta parameter (length 1 for our current families)
##' @param acop Archimedean copula (of class "acopula")
##' @param u data matrix n x d
##' @param n.MC if > 0 MC is applied with sample size equal to n.MC; otherwise,
##'        the exact formula is used
##' @param ... potential further arguments, passed to <acop> @dacopula()
##' @return
##' @author Martin Maechler (Marius originally)
mLogL <- function(theta, acop, u, n.MC=0, ...) { # -(log-likelihood)
    -sum(acop@dacopula(u, theta, n.MC=n.MC, log=TRUE, ...))
}

##' @title
##' @param cop an outer_nacopula (currently with no children)
##' @param u n x d  data matrix
##' @param xlim x-range for curve() plotting
##' @param main title for curve()
##' @param XtrArgs a list of further arguments for mLogL()
##' @param ... further arguments for curve()
##' @return
##' @author Martin Maechler
curveLogL <- function(cop, u, xlim, main, XtrArgs=list(), ...) {
    unam <- deparse(substitute(u))
    stopifnot(is(cop, "outer_nacopula"), is.list(XtrArgs),
              (d <- ncol(u)) >= 2, d == dim(cop),
              length(cop@childCops) == 0# not yet *nested* A.copulas
              )
    acop <- cop@copula
    th. <- acop@theta # the true theta
    acop <- setTheta(acop, NA) # so it's clear, the true theta is not used below
    if(missing(main)) {
        tau. <- cop@copula@tau(th.)
        main <- substitute("Neg. Log Lik."~ -italic(l)(theta, UU) ~ TXT ~~
			   FUN(theta['*'] == Th) %=>% tau['*'] == Tau,
			   list(UU = unam,
				TXT= sprintf("; n=%d, d=%d;  A.cop",
					     nrow(u), d),
				FUN = acop@name,
				Th = format(th.,digits=3),
				Tau = format(tau., digits=3)))
    }
    r <- curve(do.call(Vectorize(mLogL, "theta"), c(list(x, acop, u), XtrArgs)),
               xlim=xlim, main=main,
               xlab = expression(theta),
               ylab = substitute(- log(L(theta, u, ~~ COP)), list(COP=acop@name)),
               ...)
    if(is.finite(th.))
        axis(1, at = th., labels=expression(theta["*"]),
             lwd=2, col="dark gray", tck = -1/30)
    else warning("non-finite cop@copula@theta = ", th.)
    axis(1, at = initOpt(acop@name),
         labels = FALSE, lwd = 2, col = 2, tck = 1/20)
    invisible(r)
}



###--------------------- "Joe" --- tau = 0.2 ---------------------------------

n <- 200
d <- 100
tau <- 0.2
(theta <- copJoe@tauInv(tau))# 1.44381
(cop <- onacopulaL("Joe",list(theta,1:d)))

set.seed(1)
U1 <- rnacopula(n,cop)
enacopula(U1, cop, "mle") # 1.432885 --  fine
(th4 <- 1 + (1:4)/4)
mL.tr <- c(-3558.5, -3734.4, -3299.5, -2505.)
mLt1 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log.poly")) # default
mLt2 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log1p"))
mLt3 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="poly"))
stopifnot(all.equal(mLt1, mL.tr, tol=5e-5),
          all.equal(mLt2, mL.tr, tol=5e-5),
          all.equal(mLt3, mL.tr, tol=5e-5))
##--> Funktion für Gesamtplot:
system.time(r1l  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log.poly")))
system.time(r1J  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="poly"),
                              add=TRUE, col=adjustcolor("red", .4)))
system.time(r1m  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log1p"),
                              add=TRUE, col=adjustcolor("blue",.5)))

U2 <- rnacopula(n,cop)
enacopula(U2, cop, "mle") # 1.4399  -- no warning any more
## the density for the *correct* parameter looks okay
summary(dnacopula(cop, U2))
## hmm:  max = 5.5e177
system.time(r2 <- curveLogL(cop, U2, c(1, 2.5)))

mLogL(1.8, cop@copula, U2)# -4070.148 (was -Inf)

U3 <- rnacopula(n,cop)
enacopula(U3, cop, "mle") # 1.44957
system.time(r3 <- curveLogL(cop, U3, c(1, 2.5)))

U4 <- rnacopula(n,cop)
enacopula(U4, cop, "mle") # 1.451929  was 2.351..  "completely wrong"
summary(dnacopula(cop, U4)) # ok (had one Inf)
system.time(r4 <- curveLogL(cop, U4, c(1, 2.5)))

mLogL(2.2351, cop@copula, U4)
mLogL(1.5,    cop@copula, U4)
mLogL(1.2,    cop@copula, U4)

system.time(r4. <- curveLogL(cop, U4, c(1, 1.01)))
system.time(r4. <- curveLogL(cop, U4, c(1, 1.0001)))
system.time(r4. <- curveLogL(cop, U4, c(1, 1.000001)))
##--> limit goes *VERY* steeply  up to .. probably 0

mLogL(1.2,    cop@copula, U4)
##--> theta 1.164 is about the boundary:
cop@copula@dacopula(U4[118,], theta=1.164, log = TRUE)
##  600.5926  (was Inf)
## now that we have  polyJ(...., log=TRUE)

##--------------------- "Joe" --- harder cases: d = 150, tau = 0.3 -------------
n <- 200
d <- 150
tau <- 0.3
(theta <- copJoe@tauInv(tau))# 1.772
(cop <- onacopulaL("Joe",list(theta,1:d)))
U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 1.776743
system.time(r. <- curveLogL(cop, U., c(1.1, 3)))
## still looks very good

##--------------------- "Joe" --- even harder: d = 180, tau = 0.4 -------------
d <- 180
tau <- 0.4
(theta <- copJoe@tauInv(tau))# 2.219
(cop <- onacopulaL("Joe",list(theta,1:d)))
U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 2.22666
system.time(r. <- curveLogL(cop, U., c(1.1, 4)))
## still looks very good


###--------------------- The same for  Gumbel ---------------------------------
n <- 200
d <- 100
tau <- 0.2
(theta <- copGumbel@tauInv(tau))# 1.25
(cop <- onacopulaL("Gumbel",list(theta,1:d)))

set.seed(1)
U1 <- rnacopula(n,cop)
U2 <- rnacopula(n,cop)
U3 <- rnacopula(n,cop)

enacopula(U1, cop, "mle") # 1.241927
##--> Plots with "many" likelihood evaluations
system.time(r1 <- curveLogL(cop, U1, c(1, 2.1)))
system.time(r2 <- curveLogL(cop, U2, c(1, 2.1)))
system.time(r3 <- curveLogL(cop, U3, c(1, 2.1)))

##--------------------- "Gumbel" ---  harder: d = 150, tau = 0.6 -------------
d <- 150
tau <- 0.6
(theta <- copGumbel@tauInv(tau))# 2.5
cG.5 <- onacopulaL("Gumbel",list(theta,1:d))

set.seed(17)
U4 <- rnacopula(n,cG.5)
U5 <- rnacopula(n,cG.5)
U6 <- rnacopula(n,cG.5)

enacopula(U4, cG.5, "mle") # 2.475672
enacopula(U5, cG.5, "mle") # 2.484243
enacopula(U6, cG.5, "mle") # 2.504111

##--> Plots with "many" likelihood evaluations
(th. <- seq(1, 3, by= 1/4))
## each of these take about 3.3 seconds [nb-mm3]
system.time(r4 <- sapply(th., mLogL, acop=cG.5@copula, u=U4))
system.time(r5 <- sapply(th., mLogL, acop=cG.5@copula, u=U5))
system.time(r6 <- sapply(th., mLogL, acop=cG.5@copula, u=U6))
r4. <- c(0, -18375.33, -21948.033, -24294.995, -25775.502,
         -26562.609, -26772.767, -26490.809, -25781.224)
stopifnot(all.equal(r4, r4., tol = 8e-8))

if(FALSE) ## for speed analysis, etc
    debug(nacopula:::polyG)
mLogL(1.65, cG.5@copula, U4)# -23472.96

dd <- cG.5@copula@dacopula(U4, 1.64, log = TRUE)
summary(dd) ## no NaN's anymore

###-------------------- "Frank" --- a case we found "hard": --------------------
n <- 64
d <- 5
tau <- 0.8
(theta <- copFrank@tauInv(tau))# 18.192
(cop <- onacopulaL("Frank",list(theta,1:d)))
set.seed(11) ## these seeds give no problem: 101, 41, 21
U. <- rnacopula(n,cop)
## forget the true theta:
cop@copula <- setTheta(cop@copula, NA)
system.time(f.ML <- emle(U., cop)); f.ML ## --> fine: theta = 18.033, Log-lik = 314.01
## with MC :
system.time(f.mlMC <- emle(U., cop, n.MC = 1e4))## takes a long time
## (7.5 sec on nb-mm3 2010)
stopifnot(
	  all.equal(unname(coef(f.ML)), 18.03331, tol= 1e-6)
	  ,
	  all.equal(f.ML@min, -314.0143, tol=1e-6)
	  ,
	  ## Simulate MLE (= SMLE) is "extra" random,  hmm...
	  all.equal(unname(coef(f.mlMC)), 17.8, tol= 0.01)
	  ##		   64-bit ubuntu: 17.817523
	  ##		 ? 64-bit Mac:	  17.741
	 )

cop@copula <- setTheta(cop@copula, theta)# for the plot:
r. <- curveLogL(cop, U., c(1, 200))
## now looks fine (well, not really ..) -- with finite values much longer..
## but still has -Inf: at end:
tail(as.data.frame(r.), 15)
stopifnot( is.finite( r.$y ) )

back to top