https://github.com/cran/nacopula
Tip revision: 69748f90a7dcf58ecb7667db0678e958a9fff7a6 authored by Martin Maechler on 04 March 2011, 00:00:00 UTC
version 0.4-4
version 0.4-4
Tip revision: 69748f9
retstable-ex.R
## Copyright (C) 2010 Marius Hofert and Martin Maechler
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.
#### Generating Exponentially Tilted Stable Random Vars (r ET stable)
#### ================================================================
#### --> Experiments with retstable*() versions
require(nacopula)
### --- ====== --------------------------#--
### --- Part I --- Experiments with retstableR()
### --- ====== --------------------------#--
##' Zolotarev's function A(.) to the power 1-alpha,
##' i.e. sin(alpha*x)^alpha * sin((1-alpha)*x)^(1-alpha) / sin(x)
##' and other things
##' @param ialpha parameter 1-alpha
##' @param x sequence of points in (0,pi)
##' @param col color
##' @return a plot
##' @author Martin Maechler
p.A <- function(ialpha, x = seq(0, 3.1, length=100), col = "tomato") {
stopifnot(0 <= ialpha, ialpha <= 1, 0 <= x, x <= pi)
if(is.unsorted(x)) x <- sort(x)
tit <- substitute(list("Zolotarev's "* {A(x, alpha) - 1} ,
" "* 1-alpha == IA), list(IA = ialpha))
alpha <- 1 - ialpha
A1 <- A..Z(x, alpha, ialpha) - 1
A1d <- A..Z(x, alpha, 1-alpha) - 1
plot(x, A1, ylim = range(A1, A1d), ylab = expression(A(x,alpha) - 1),
col = col, main = tit, type ="o")
abline(h=0, lty=2)
## add the "dumb" version that just works with alpha
gray <- rgb(.4, .5, .6, alpha = .3) # opaque color
lines(x, A1d, col = gray, lwd = 3)
}
if(!dev.interactive(orNone=TRUE))
pdf("retstable-ex.pdf")
## A(.) --> 1
par(mfrow = c(2,2), mar = .1+c(4,4,4,1), mgp = c(1.5, .6, 0))
p.A(1e-7)
p.A(1e-9)
p.A(1e-12)
p.A(1e-14) # still fine, visually
p.A( 1e-15) # wiggliness!
p.A(.8e-15) # more wiggliness; dumb version very slightly differs
p.A(.5e-15) # more -- dumb version differs clearly
p.A(.2e-15) # ditto
p.A(.1e-15) # even more -- but this is already < .Machine$double.eps:
.Machine$double.eps / .1e-15
p.A(.08e-15) # = 8e-17
p.A(.06e-15) # = 6e-17 still just wiggliness
p.A(.05e-15) # complete breakdown
## Conclusion: for the small range of 1-alpha in [0.6, 8] * 10^-17
## ---------- using 'accurate 1-alpha' instead of just compute "1 - alpha"
## *is* somewhat more accurate
## investigate the m(V0) = "optimal m" : ----------------------
V0 <- seq(0, 40, by = 1/256)
nm <- length(m <- sapply(V0, nacopula:::m.opt.retst))
iJmp <- which(diff(m) != 0)
stopifnot( all.equal(m[c(iJmp,nm)], 1:40) )
iKept <- c(1, rbind(iJmp, iJmp+1), nm)
V0. <- V0[iKept]
m. <- m[iKept]
V0[iJmp] - m[iJmp] ## 0.3828125 0.4296875 0.4492188 0.4609375 => conv. to 0.5
V0[iJmp+1] - m[iJmp+1] ## .... converging to -0.5
## ------------ How many times is rstable1(1, ...) called ? ------------------
## --> we *really* should call rstable1(N, ....)
## and then use these values where N = N(V0):
saveFile <- "retstable_Nstat.rda"
## This needs roughly two and a half hours -- elapsed time !!
if(file.exists(saveFile)) {
load (saveFile)
} else {
.e <- new.env()
## our tracer function is just a "counter":
rstTR <- function() .e$N <- if(is.null(.e$N)) 0L else .e$N + 1L
trace(rstable1, rstTR, print=FALSE)
## For now, we use the R (reliable / correct) version:
nV <- length(V0s <- c(1:4, 5*(1:10), 10*(6:20)))
nAlph <- length(alphas <- (1:9)/10)
nSim <- 1024
Nstat <- array(dim = c(nSim, nAlph, nV))
for(iV0 in 1:nV) {
V0 <- V0s[iV0]
cat(sprintf("V0 = %5d :", V0))
for(ia in 1:nAlph) {
alpha <- alphas[ia]
Nstat[, ia, iV0] <- replicate(nSim,
{ .e$N <- 0L # start counting ...
RR <- retstableR(alphas[ia], V0 = V0)
.e$N }
)
cat(".")
}
cat("\n")
}
save(V0s,alphas, nSim, Nstat, file = saveFile)
}
## Visualize 'Nstat' -- can do this after loading the file
boxplot(Nstat[,1,], range=2)
## Log log scale
op <- if(require("sfsmisc"))
sfsmisc::mult.fig(length(alphas))$old.par else { ## less nicely looking
m <- length(alphas); par(mfrow= c(3, ceiling(m/3))) }
for(i in seq_along(alphas)) {
N.st <- Nstat[,i,]
plot(NA,NA, xlim = range(V0s), ylim=range(N.st),
main = substitute(N == no({"calls to_ " * rstable1()}) * " for "
* { alpha == A },
list(A = alphas[i])),
xlab=expression(V[0]), ylab = "Nstat", type = "n", log="xy")
boxplot(N.st, at = V0s, range=2, add = TRUE)
lines(apply(N.st, 2, mean, trim=.05) ~ V0s, col = 2, lwd=2)
}
par(op)
str(Nst.mns <- apply(Nstat, 3:2, mean, trim=.05))
str(Nst.q95 <- apply(Nstat, 3:2, quantile, prob=.95))
str(Nst.max <- apply(Nstat, 3:2, max))
str(Nst.min <- apply(Nstat, 3:2, min))
## need k * V0 : what k
## in the mean:
rowMeans(Nst.mns / V0s) ## ?== Euler's e == exp(1)
## maximally
rowMeans(Nst.max / V0s)
## is it 'e' ??
plot(V0s, rowMeans(Nst.mns / V0s), log="x"); rug(V0s); abline(h=exp(1), col=2)
L <- V0s >= 20
plot(V0s[L], rowMeans(Nst.mns[L,] / V0s[L]), log="x"); rug(V0s[L])
abline(h=exp(1), col=2)
## well, as a limit probably
mtit <- substitute(N == `# `*group("{", rstable1() * " calls", "}") *
" for " * alpha %in% group("{", list(A,B,...,Z), "}"),
list(A=alphas[1], B=alphas[2], Z=tail(alphas,1)))
## normal scale
matplot (V0s, Nst.max, ylim=range(Nstat), col = "gray20",
ylab="", main=mtit, type="l", lty=2); rug(V0s)
matlines(V0s, Nst.min, col = "gray20", lty=2)
abline(a=0,b=1, col = "tomato"); text(20,20, expression(y == x), col="tomato",
adj=-.2)
matlines(V0s, Nst.mns, type = "b", lty=1)
matlines(V0s, Nst.q95, col = "gray80", lty=3)
## log log scale
matplot (V0s, Nst.max, ylim=range(Nstat), col = "gray20",
ylab="", main=mtit, type="l", lty=2, log="xy"); rug(V0s)
matlines(V0s, Nst.min, col = "gray20", lty=2)
abline(a=0,b=1, col = "tomato"); text(20,20, expression(y == x), col="tomato",
adj=-.2)
matlines(V0s, Nst.mns, type = "b", lty=1)
matlines(V0s, Nst.q95, col = "gray80", lty=3)
## or just the "residuals"
matplot(V0s, Nst.mns / V0s, type = "b", lty=1, log = "x")
rug(V0s)
if(dev.interactive()) par(mfrow=c(1,1)) else { dev.off(); pdf("retstable-ex-2.pdf") }
### --- ======= --------------------------#-----------
### --- Part II --- Experiments with retstableC() methods
### --- ======= --------------------------#-----------
##' mean function
meanretstable <- function(alpha,h,V0) alpha*V0*h^(alpha-1)
nalpha <- length(alpha <- c(0.05, (1:9)/10, 0.95))
nV0 <- length( V0 <- c(0.2,0.5,1,2:5,10))
nh <- length( h <- c(0.5, 0.75, 1, 1.5, 2, 5, 10))
meth <- c("MH", "LD")
nsim <- 100000 # a lot
hist.breaks <- 200 # == function(nsim, <parameters>)
saveFile2 <- "retstable_st2.rda"
saveFile3 <- "retstable_CPU2.rda"
if(file.exists(saveFile2) && file.exists(saveFile3)) {
## we have precomputed it ...
load (saveFile2)
load (saveFile3)
} else {
set.seed(47)
dnS <- list(alpha = paste("alpha", alpha, sep="="),
V0 = paste("V0", formatC(V0), sep="="),
h = paste("h", formatC(h), sep="="),
meth = meth)
dim.S <- c(nalpha, nV0, nh, length(meth))
St.c <- vector("list", prod(dim.S))
dim(St.c) <- dim.S
dimnames(St.c) <- dnS
St.. <- array(dim = c(nsim, length(meth)),
dimnames = list(NULL, meth))
CPU.c <- array(dim = dim(St.c), dimnames = dnS)
tt.pval <- array(dim = dim(St.c), dimnames = dnS)
KS.pval <- array(dim = dim(St.c)[1:3], dimnames = dnS[1:3])
count <- 1
for(ia in 1:nalpha) {
alph <- alpha[ia]
for(iV0 in 1:nV0) {
V0. <- V0[rep.int(iV0,nsim)]
cat("alpha=",alph,", V0=",V0[iV0],", h= ")
for(ih in 1:nh) {
cat(h[ih],"")
mu. <- meanretstable(alph, h = h[ih], V0 = V0.[1])
for(met in meth) {
## generate data, measure run time
CPU.c[ia, iV0, ih, met] <-
system.time(
St..[,met] <-
retstable(alph, V0= V0., h= h[ih], method = met)
)[1]
## (asymptotical) two-sided t-test
tt.pval[ia, iV0, ih, met] <- t.test(St..[,met], mu = mu.)$p.value
St.c[ia, iV0, ih, met] <-
list(hist(log(St..[,met]),
breaks = hist.breaks, plot = FALSE))
}
KS.pval[ia, iV0, ih] <- ks.test(St..[,"MH"],
St..[,"LD"])$p.value
}
cat("\nProc.time(): ",format(proc.time()[1]),"; ",
round(100*count/(nalpha*nV0)), "% done\n\n",sep="")
count <- count + 1
}
}
save(St.c, alpha, V0, h, meth, nsim, file = saveFile2)
save(CPU.c, KS.pval, tt.pval,
alpha, V0, h, meth, nsim, file = saveFile3)
}
## ---> Compare CPU.c for the two methods,
## --- statistically compare the two methods for St.c
if(getOption("width") < 100) options(width=100)
##' check the random variates via histogram plots
##' @param alphalab alpha label
##' @param V0lab V0 label
##' @param hlab h label
##' @param main title
##' @param nBreaks number of breaks
##' @param log log-scale
##' @return histogram
##' @author Martin Maechler
histSt <- function(alphalab, V0lab, hlab, main, nBreaks = 100, log = TRUE) {
stopifnot(## the "globals":
is.list(St.c), is.array(St.c), length(dnS <- dimnames(St.c)) == 4,
is.character(meth), is.character(alphalab), is.character(V0lab),
is.character(hlab), any(alphalab == dnS[[1]]),
any(V0lab == dnS[[2]]), any(hlab == dnS[[3]]))
if(missing(main) || is.null(main))
main <- paste(format(nsim, sci=FALSE),
" expo.tilted stable vars w/ ",
alphalab,", ",V0lab,", ",hlab,
if(log) " LOG scale", sep = "")
nmeth <- length(meth)
op <- if(require("sfsmisc"))
mult.fig(mfrow= c(nmeth,1), main = main)$old.par else { ## less nicely looking
cex.m <- par("cex.main")
pp <- par(mfrow= c(nmeth,1), oma = c(0,0, 1+ 1.5*cex.m, 0),
mgp = c(1.5, 0.6, 0))
plot.new()
mtext(main, side = 3, outer = TRUE, line = cex.m - .5,
cex = cex.m, font = par("font.main"), col = par("col.main"))
par(new = TRUE)
pp
}
on.exit(par(op))
S. <- St.c[alphalab, V0lab, hlab, ]
##f if(log) {
##f S. <- log(S.)
xl <- expression( log( tilde(S) ) )
##f } else
##f xl <- expression( tilde(S) )
##f breaks <- pretty(range(S.), n = nBreaks)
for(me in meth) {
## hist(S.[me,], breaks = breaks, main = me, xlab = xl, freq=FALSE)
plot(S.[[me]], main = me, xlab = xl, freq=FALSE)
##f lines(density(S.[me]), col=2, lwd=2)
}
}
histSt("alpha=0.3", "V0=5", "h=1")
histSt("alpha=0.5", "V0=1", "h=1")
histSt("alpha=0.1", "V0=0.5", "h=1")
## For h != 1
## Hmm: MH and LD look different here : this is suspicious:
histSt("alpha=0.1", "V0=10", "h=2")
histSt("alpha=0.1", "V0=5", "h=5")
histSt("alpha=0.8", "V0=10", "h=2")
histSt("alpha=0.8", "V0=5", "h=10")
histSt("alpha=0.9", "V0=5", "h=10")
## no problem for h == 1 :
histSt("alpha=0.9", "V0=5", "h=1")
## Instead of "just" the plots, now also look at the P-values:
## For both tests, the P-values look like ~ U[0,1] as they should:
hist(c(KS.pval))
hist(c(tt.pval))
## More formally, they do look like uniform ([0,1]):
ks.test(tt.pval, "punif")
ks.test(KS.pval, "punif")
## Times for nsim replicates for given alphas, V0s, hs, and methods
## ratio of the two methods : decision at r == 1 <==> log(r) == 0
CPUr <- CPU.c[,,,"MH"] / CPU.c[,,,"LD"]
plot(density(log10(CPUr))); rug(log10(CPUr))
if(FALSE) { # hmm, not sensical yet
plot(density(log10(CPUr)), log="x", xaxt = "n")
rug(log10(CPUr))
}
## x-range in log-scale and back-transformed
x.r <- 10^(xLr <- par("usr")[1:2])
(x. <- floor(xLr[1]):ceiling(xLr[2]))
x. <- sort(outer(10^x., c(1,2,5))); x. <- x.[(10^xLr[1] <= x.) &
(x. <= 10^xLr[2])]
x.
axis(1, at = log10(x.), label = x.)
abline(v=0, col = "skyblue", lty=2)
signif(CPUr[,, "h=1"], 3)
## --> the boundary is simply between V0=2 and V0 =5 (!)
signif(CPUr[,, "h=5"], 2) # slightly different picture
## Workaround for now:
(V0 <- as.numeric(sub("^V0=","", dimnames(CPUr)[[2]])))
(h <- as.numeric(sub("^h=","", dimnames(CPUr)[[3]])))
## or use the lattice equivalent
filled.contour(alpha, V0, log10(CPUr[,, "h=1"]))
filled.contour(alpha, V0, log10(CPUr[,, "h=5"]))
## Theory: expect boundary h^alpha * V0 <= c
## ------------------
str(dCPU <- cbind(expand.grid(alpha=alpha, V0=V0, h=h), CPUr = c(CPUr)))
rm(CPUr)# (so we know that dCPU is used below)
## original scale
plot (CPUr ~ I(h^alpha * V0), data = dCPU); abline(h=1, col="tomato")
##' sophisticated plot of CPU times
##' @param log scale
##' @param do.h.eq.1 h == 1
##' @param main title
##' @param data a dataframe "like 'dCPU'" (typically subset of it)
##' @param col vector of colors,
##' @param pch vector of point characters
##' @param ... further arguments to plot()
##' @return plot of CPU times
##' @author Martin Maechler
p.cpu <- function(log = "", do.h.eq.1 = TRUE,
main = expression(t[CPU](MH) / t[CPU](LD) *
" as " * f(h, alpha, V[0])),
data = dCPU, colT = NULL, pchT = NULL,
pal = { if(require(RColorBrewer))
brewer.pal(nalpha, "RdYlGn") # "Spectral"
else c("#A50026", "#D73027", "#F46D43", "#FDAE61",
"#FEE08B", "#FFFFBF", "#D9EF8B", "#A6D96A",
"#66BD63", "#1A9850", "#006837")
},
... )
{
doCol <- !is.null(colT) ; if(!doCol) col <- par("col")
doPch <- !is.null(pchT) ; if(!doPch) pch <- par("pch")
if(doCol || doPch) {
stopifnot(colT %in% c("V0","alpha","h"))
stopifnot(pchT %in% c("V0","alpha","h"))
a.c <- as.numeric(a.f <- as.factor(data[,"alpha"]))
V.c <- as.numeric(V.f <- as.factor(data[,"V0"]))
h.c <- as.numeric(h.f <- as.factor(data[,"h"]))
a.f <- paste("alpha", levels(a.f), sep="=")
V.f <- paste("V0", levels(V.f), sep="=")
h.f <- paste("h", levels(h.f), sep="=")
getC <- function(typ)
switch(typ, "V0" = V.c, "alpha" = a.c, "h" = h.c)
getL <- function(typ)
switch(typ, "V0" = V.f, "alpha" = a.f, "h" = h.f)
if(doPch) {
pch <- getC(pchT)
pLabs <- getL(pchT)
}
if(doCol) {
col <- 1 + getC(colT)
cLabs <- getL(colT)
## also set palette()!
oPal <- palette(pal)
## on exit, revert to previous color palette:
on.exit(palette(oPal))
}
}
plot(CPUr ~ I(h^alpha * V0), main=main, xlab =
expression(h ^ alpha %*% V[0]), data=data, log=log,
col=col, pch=pch, ...)
abline(h = 1, lty=2, col="tomato") # ratio == 1 <==> "MH" as fast as "LD"
if(do.h.eq.1) {
points(CPUr ~ I(h^alpha * V0), data=data, pch = 2, col = "red",
subset= h == 1)
legend("bottom", "h = 1", pch = 2, col = "red", inset=.01)
}
if(doPch)
legend("bottomright", legend = pLabs,
col = par("col"), pch = unique(pch), inset=.01)
if(doCol)
legend("topleft", legend = cLabs,
pch = par("pch"), col = unique(col), inset=.01)
}
p.cpu()
p.cpu(log="xy")
## cool! -- the formula seems correct!
p.cpu(log="xy", colT = "alpha", pchT = "V0")
## Zoom in:
p.cpu(log="xy", data = subset(dCPU, subset = 3 <= V0 & V0 <= 5),
do.h.eq.1=FALSE, colT = "alpha", pchT = "V0")# still a bit wide x-range
mtext("3 <= V0 <= 5")
## zoom in more, no longer log-scaled:
p.cpu(data = subset(dCPU, subset = 3 <= V0 & V0 <= 5),
xlim = c(2,5), ylim=c(0.5,2), # restricting range
do.h.eq.1=FALSE, colT = "alpha", pchT = "V0")
mtext("3 <= V0 <= 5")
summary(mod1 <- lm(log(CPUr) ~ I(alpha*log(h)) + log(V0), data = dCPU))
plot(mod1)
summary(mod2 <- lm(log(CPUr) ~ alpha*log(h) + log(V0), data = dCPU))
plot(mod2)
anova(mod1,mod2) # -> second model is not better
plot(residuals(mod1) ~ alpha, data=dCPU)
plot(residuals(mod1) ~ h, data=dCPU)
plot(residuals(mod1) ~ I(alpha*log(h)), data=dCPU)
## Only look "around" CPUr == 1 as we want to choose for that
dim(s.CPU <- with(dCPU, dCPU[1 <= V0 & V0 <= 5,])) # - only 165 obs
summary(mod.1 <- lm(log(CPUr) ~ I(alpha*log(h)) + I((alpha*log(h))^2) + log(V0),
data = s.CPU))
## coefficient of log(V0) is ~= 1 ---> set it == 1 :
summary(mod.2 <- lm(log(CPUr / V0) ~ I(alpha*log(h)) + I((alpha*log(h))^2),
data = s.CPU))
## or even better
summary(mod.3 <- lm(log(CPUr / V0) ~ I(alpha*log(h)^2), data = s.CPU))
plot(mod.1)
cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons''