https://github.com/cran/sn
Tip revision: 4d028ba2211e27c98228c62d20a35ff854d1c128 authored by Adelchi Azzalini on 07 April 2005, 00:00:00 UTC
version 0.33
version 0.33
Tip revision: 4d028ba
sn.R
# R package for the Skew-Normal (SN) and the skew-t (ST) distributions
# (the ST distribution since version 0.30).
#
# Author: A.Azzalini
# Home-page: http://azzalini.stat.unipd.it/SN
# major updates: 29/8/1997, 10/12/1997, 1/10/1998, 12/10/1998, 01/04/1999,
# 15/06/2002.
# It requires R 1.0.1, plus library mvtnorm for a few functions
#
#-------
dsn <- function(x, location=0, scale=1, shape=0, log=FALSE)
{
z <- (x-location)/scale
if(log)
y <- (-0.9189385332046727-logb(scale)-z^2/2+zeta(0,shape*z))
else
y <- 2*dnorm(z)*pnorm(z*shape)/scale
replace(y, scale<= 0, NaN)
}
psn <- function(x, location=0, scale=1, shape=0, ...)
{
z <- (x-location)/scale
p <- pmin(1, pmax(0, pnorm(z) - 2*T.Owen(z, shape,...)))
replace(p, scale<= 0, NaN)
}
rsn <- function(n=1, location=0, scale=1, shape=0){
u1 <- rnorm(n)
u2 <- rnorm(n)
id <- (u2 > shape*u1)
u1[id] <- (-u1[id])
y <- location+scale*u1
attr(y,"parameters") <- c(location,scale,shape)
return(y)
}
qsn <- function (p, location = 0, scale = 1, shape = 0, tol = 1e-08, ...)
{
max.q <- sqrt(qchisq(p,1))
min.q <- -sqrt(qchisq(1-p,1))
if(shape == Inf) return(location + scale * max.q)
if(shape == -Inf) return(location + scale * min.q)
na <- is.na(p) | (p < 0) | (p > 1)
zero <- (p == 0)
one <- (p == 1)
p <- replace(p, (na | zero | one), 0.5)
cum <- as.vector(sn.cumulants(shape, 4))
g1 <- cum[3]/cum[2]^(3/2)
g2 <- cum[4]/cum[2]^2
x <- qnorm(p)
x <- (x + (x^2 - 1) * g1/6 + x * (x^2 - 3) * g2/24 -
x * (2 * x^2 - 5) * g1^2/36)
x <- cum[1] + sqrt(cum[2]) * x
max.err <- 1
while (max.err > tol) {
x1 <- x - (psn(x, 0, 1, shape, ...) - p)/dsn(x, 0, 1, shape)
x1 <- pmin(x1,max.q)
x1 <- pmax(x1,min.q)
max.err <- max(abs(x1 - x)/(1 + abs(x)))
x <- x1
}
x <- replace(x, na, NA)
x <- replace(x, zero, -Inf)
x <- replace(x, one, Inf)
return(location + scale * x)
}
sn.cumulants <- function(shape=0, n=4)
{
cumulants.half.norm <- function(n=4){
n <- max(n,2)
n <- as.integer(2*ceiling(n/2))
half.n <- as.integer(n/2)
m <- 0:(half.n-1)
a <- sqrt(2/pi)/(gamma(m+1)*2^m*(2*m+1))
signs <- rep(c(1,-1),half.n)[1:half.n]
a <- as.vector(rbind(signs*a,rep(0,half.n)))
coeff <- rep(a[1],n)
for (k in 2:n) {
ind <- 1:(k-1)
coeff[k] <- a[k]-sum(ind*coeff[ind]*a[rev(ind)]/k)
}
kappa <- coeff*gamma((1:n)+1)
kappa[2] <- 1+kappa[2]
return(kappa)
}
# delta<-delta.of(shape)
delta <- shape/sqrt(1+shape^2)
kv <- cumulants.half.norm(n)
if(length(kv)>n) kv<-kv[-(n+1)]
kv[2] <- kv[2]-1
kappa <- outer(delta,1:n,"^")*matrix(rep(kv,length(shape)),ncol=n,byrow=TRUE)
kappa[,2] <- kappa[,2]+1
kappa
}
# lambda.of <- function(delta) delta/sqrt(1-delta^2)
# delta.of <- function(lambda) {
# inf <- (abs(lambda)==Inf)
# delta <-lambda/sqrt(1+lambda^2)
# delta[inf] <- sign(lambda[inf])
# delta
#}
T.Owen <- function(h, a, jmax=50, cut.point=6)
{
T.int <-function(h,a,jmax,cut.point)
{
fui<- function(h,i) (h^(2*i))/((2^i)*gamma(i+1))
seriesL <- seriesH <- NULL
i <- 0:jmax
low<- (h<=cut.point)
hL <- h[low]
hH <- h[!low]
L <- length(hL)
if (L>0) {
b <- outer(hL,i,fui)
cumb <- apply(b,1,cumsum)
b1 <- exp(-0.5*hL^2)*t(cumb)
matr <- matrix(1,jmax+1,L)-t(b1)
jk <- rep(c(1,-1),jmax)[1:(jmax+1)]/(2*i+1)
matr <- t(matr*jk) %*% a^(2*i+1)
seriesL <- (atan(a)-as.vector(matr))/(2*pi)
}
if (length(hH) >0)
seriesH <- atan(a)*exp(-0.5*(hH^2)*a/atan(a))*
(1+0.00868*(hH^4)*a^4)/(2*pi)
series <- c(seriesL,seriesH)
id <- c((1:length(h))[low],(1:length(h))[!low])
series[id] <- series # re-sets in original order
series
}
if(!is.vector(a) | length(a)>1) stop("a must be a vector of length 1")
if(!is.vector(h)) stop("h must be a vector")
aa <- abs(a)
ah <- abs(h)
if(aa==Inf) return(0.5*pnorm(-ah))
if(aa==0) return(rep(0,length(h)))
na <- is.na(h)
inf <- (ah==Inf)
ah <- replace(ah,(na|inf),0)
if(aa<=1)
owen <- T.int(ah,aa,jmax,cut.point)
else
owen<-0.5*pnorm(ah)+pnorm(aa*ah)*(0.5-pnorm(ah))-
T.int(aa*ah,(1/aa),jmax,cut.point)
owen <- replace(owen,na,NA)
owen <- replace(owen,inf,0)
return(owen*sign(a))
}
pnorm2 <- function(x,y,rho){
if(length(c(x,y,rho))>3) stop("non-scalar arguments")
if(x==0 & y==0) return(0.25+asin(rho)/(2*pi))
p <- 0.5*(pnorm(x)+pnorm(y))
if(x==0) p <- p-0.25*sign(y)
else p <- p-T.Owen(x,(y-rho*x)/(x*sqrt(1-rho^2)))
if(y==0) p <- p-0.25*sign(x)
else p <- p-T.Owen(y,(x-rho*y)/(y*sqrt(1-rho^2)))
if((x*y<0) | ((x*y==0) & (x+y)<0)) p <- p-0.5
return(p)
}
cp.to.dp <- function(param){
# converts centred parameters cp=(mu,sigma,gamma1)
# to direct parameters dp=(xi,omega,lambda)
# Note: mu can be m-dimensional, the other must be scalars
b <- sqrt(2/pi)
m <- length(param)-2
gamma1 <- param[m+2]
if(abs(gamma1)> 0.995271746431) stop("abs(gamma1)> 0.995271746431")
A <- sign(gamma1)*(abs(2*gamma1/(4-pi)))^(1/3)
delta <- A/(b*sqrt(1+A^2))
lambda <- delta/sqrt(1-delta^2)
E.Z <- b*delta
sd.Z <- sqrt(1-E.Z^2)
location <- param[1:m]
location[1] <- param[1]-param[m+1]*E.Z/sd.Z
scale <- param[m+1]/sd.Z
dp <- c(location,scale,lambda)
names(dp)[(m+1):(m+2)] <- c("scale","shape")
if(m==1) names(dp)[1] <- "location"
dp
}
dp.to.cp <- function(param){
# converts 'direct' dp=(xi,omega,lambda) to 'centred' cp=(mu,sigma,gamma1)
m <- length(param)-2
omega <-param[m+1]
lambda<-param[m+2]
mu.Z <- lambda*sqrt(2/(pi*(1+lambda^2)))
s.Z <- sqrt(1-mu.Z^2)
gamma1<- 0.5*(4-pi)*(mu.Z/s.Z)^3
sigma <- omega*s.Z
mu <- param[1:m]
mu[1] <- param[1]+sigma*mu.Z/s.Z
cp <- c(mu,sigma,gamma1)
names(cp)[(m+1):(m+2)]<-c("s.d.","skewness")
if(m==1) names(cp)[1] <- "mean"
cp
}
zeta <- function(k,x){# k integer in (0,4)
if(k<0 | k>4) return(NULL)
if(k != as.integer(k)) warning("k must be an integer")
k <- as.integer(k)
na<- is.na(x)
x <- replace(x,na,0)
z <- switch(k+1,
pnorm(x, log.p=TRUE)+ log(2),
ifelse(x>(-20), dnorm(x)/pnorm(x),
ifelse(x>(-200), exp(-x^2/2-0.5*log(2*pi) - pnorm(x,log.p=TRUE)),
-x*(1+1/x^2-2/x^4))),
(-zeta(1,x)*(x+zeta(1,x))),
(-zeta(2,x)*(x+zeta(1,x))-zeta(1,x)*(1+zeta(2,x))),
(-zeta(3,x)*(x+2*zeta(1,x))-2*zeta(2,x)*(1+zeta(2,x))),
NULL)
neg.inf<- (x == -Inf)
if(any(neg.inf))
z <- switch(k+1,
z,
replace(z, neg.inf, Inf),
replace(z, neg.inf, 1),
replace(z, neg.inf, 0),
replace(z, neg.inf, 0),
NULL)
if(k>1) z<- replace(z, x==Inf, 0)
replace(z,na,NA)
}
sn.em <-function(X, y, fixed, p.eps=1e-4, l.eps=1.e-2, trace=FALSE, data=FALSE)
{
#
# 1/10/1998 (elaborando dal em.lm.sn del 2-12-97)
#
# EM per caso con uno/due/tre parametri ignoti, parametrizzando in modo
# "diretta" con (xi, omega, lambda); internamente usa peraltro 'delta'.
# Le componenti ignote sono i termini NA di fixed, ma per semplicita'
# assumiamo che un NA implica che le componenti alla sua sx sono NA
# (e quindi il primo elemento di fixed e` sempre NA).
#
n<-length(y)
if(missing(X)) X<-matrix(rep(1,n),n,1)
nc<-ncol(X)
if(missing(fixed)) fixed <- rep(NA,3)
if(all(!is.na(fixed))) stop("all parameter are fixed")
if(is.na(fixed[3])) iter<-(1-log(l.eps,10)) else iter<-1
qrX<-qr(X)
beta<-qr.coef(qrX,y)
xi <- m <-qr.fitted(qrX,y)
omega <- fixed[2]
lambda <- fixed[3]
# delta <- delta.of(lambda)
delta <- lambda/sqrt(1+lambda^2)
s<-sqrt(sum((y-xi)^2)/n)
if(is.na(fixed[3])) {
gamma1 <- sum((y-m)^3)/(n*s^3)
a <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
delta<-sqrt(pi/2)*a/sqrt(1+a^2)
if(abs(delta)>=1) delta<-sign(delta)/(1+1/n)
# lambda<-lambda.of(delta)
lambda<-delta/sqrt(1-delta^2)
}
mean.Z <- sqrt(2/pi)*delta
sd.Z <- sqrt(1-mean.Z^2)
if(is.na(fixed[2])) omega <- s/sd.Z
if(is.na(fixed[1])) xi <- m-s*mean.Z/sd.Z
old.par <- c(beta,omega,lambda)
diverge <- 1
incr.logL <- Inf
logL <- -Inf
while(diverge>p.eps | incr.logL>l.eps){
# E-step
v <- (y-xi)/omega
p <- zeta(1,lambda*v)
u1 <- omega*(delta*v+p*sqrt(1-delta^2))
u2<-omega^2*((delta*v)^2+(1-delta^2)+p*v*delta*sqrt(1-delta^2))
# M-step
for(i in 1:iter){
beta<-qr.coef(qrX,y-delta*u1)
xi <- qr.fitted(qrX,y-delta*u1)
d <- y-xi
Q <- sum(d^2-2*delta*d*u1+u2)
if(is.na(fixed[2])) omega <-sqrt(Q/(2*n*(1-delta^2)))
r <- 2*sum(d*u1)/Q
if(is.na(fixed[3])) delta<-(sqrt((2*r)^2+1)-1)/(2*r)
}
# convergence? # lambda<-lambda.of(delta)
lambda<-delta/sqrt(1-delta^2)
param<-c(beta,omega,lambda)
names(param)[(nc+1):(nc+2)]<-c("scale","shape")
diverge<-sum(abs(param-old.par)/(1+abs(old.par)))/(nc+2)
old.par<-param
a<-sum(log(dsn(y,xi,omega,lambda)))
incr.logL<- a-logL
logL<-a
if(trace) print(c(param,logL),digits=5)
}
cp <- dp.to.cp(param)
result<-list(dp=param, cp=cp, logL=logL)
if(data) result$data <- list(X=X, y=y, residuals=d/omega)
result
}
#-------------------
gamma1.to.lambda<- function(gamma1){
max.gamma1 <- 0.5*(4-pi)*(2/(pi-2))^1.5
na <- (abs(gamma1)>max.gamma1)
if(any(na)) warning("NAs generated")
gamma1<-replace(gamma1,na,NA)
a <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
delta<- sqrt(pi/2)*a/sqrt(1+a^2)
lambda<-delta/sqrt(1-delta^2)
as.vector(lambda)
}
# Examples:
# a<-sn.2logL.profile(y=otis)
# a<-sn.2logL.profile(y=otis, use.cp=FALSE)
# a<-sn.2logL.profile(X=cbind(1,lbm), y=bmi, npts=50)
# a<-sn.2logL.profile(y=frontier,param.range=c(0.8,1.6,10,30), use.cp=FALSE, npts=11)
sn.2logL.profile<-function(X=matrix(rep(1,n)), y,
param.range=c(sqrt(var(y))*c(2/3, 3/2), -0.95, 0.95),
use.cp=TRUE, npts= 31 %/% d, plot.it=TRUE, ...)
{# plot 1D or 2D profile deviance (=-2logL) using either parameters
# if(plot.it & !exists(.Device)) stop("Device not active")
n<-length(y)
d<- round(length(param.range)/2)
if((d!=1)&(d!=2)) stop(" length(param.range) must be either 2 or 4")
if(d==1){
param1 <- seq(param.range[1],param.range[2],length=npts)
llik <- param2 <- rep(NA,npts)}
else{
param1 <- seq(param.range[1],param.range[2],length=npts)
param2 <- seq(param.range[3],param.range[4],length=npts)
llik <- matrix(NA,npts,npts)}
if(use.cp){
if(d==1){
gamma1<-param1
sigma <-param2
xlab <- "gamma1"
ylab <- ""}
else {
sigma <-param1
gamma1<-param2
xlab <- "sigma"
ylab <- "gamma1"
}
if(max(abs(gamma1))>0.9952717) stop("abs(gamma1)>0.9952717")
lambda <- gamma1.to.lambda(gamma1)
sc<-sqrt(1-(2/pi)*lambda^2/(1+lambda^2))
}
else{ # use dp
if(d==1) {
lambda<-param1
omega<-param2
xlab <- "lambda"
ylab <- ""}
else {
omega<-param1
sc <- rep(1,npts)
lambda<-param2
xlab <- "omega"
ylab <- "lambda"
}
}
cat(c("Running until",npts,":"))
for(i in 1:npts){
cat(" ");cat(i)
if(d==1) {
a<-sn.em(X, y, fixed=c(NA,NA,lambda[i]), ...)
llik[i]<-a$logL
# print(c(i,lambda[i],a$logL))
}
else{
for(j in 1:npts){
a<-sn.em(X, y, fixed=c(NA,param1[i]/sc[j],lambda[j]), ...)
llik[i,j] <- a$logL
# print( c(i,j, param1[i]/sc[j], lambda[j], a$logL))
}}
}
cat("\n")
#if(plot)
f<-2*(llik-max(llik))
if(plot.it){
if(d==1) plot(param1, f, type="l",
xlab=xlab, ylab="profile relative 2(logL)")
else contour(param1, param2, f, labcex=0.75,
xlab=xlab, ylab=ylab,
levels=-c(0.575, 1.386, 2.773, 4.605, 5.991, 9.210))
# qchisq(c(0.25,0.5,0.75,0.90,0.95,0.99),2)
title(main=paste("dataset:", deparse(substitute(y)),
"\nProfile relative 2(logLikelihood)", sep= " "))
}
invisible( list(param1=param1, param2=param2,
param.names=c(xlab,ylab), two.logL=f, maximum=2*max(llik)))
}
sn.mle <- function(X, y, cp, plot.it=TRUE, trace=FALSE, method="L-BFGS-B",
control=list(iter.max=100, abs.tol=1e-5))
{
xlab<-deparse(substitute(y))
if(!is.null(dim(y))) {
if(min(dim(y))==1) y<-as.vector(y)
else stop("y must be a vector")
}
n<-length(y)
if(missing(X)) {
X <-as.matrix(rep(1,n))
cp.names <- "mean"
}
else{
if(is.null(colnames(X)))
cp.names<- outer(deparse(substitute(X)),as.character(1:ncol(X)),
paste, sep=".")
else cp.names<- colnames(X)
}
cp.names<- c(cp.names,"s.d.","skewness")
m<-ncol(X)
if(missing(cp)) {
qrX <- qr(X)
s <- sqrt(sum(qr.resid(qrX, y)^2)/n)
gamma1 <- sum(qr.resid(qrX, y)^3)/(n*s^3)
if(abs(gamma1) > 0.99527) gamma1<- sign(gamma1)*0.95
cp <- c(qr.coef(qrX,y), s, gamma1)
}
else{
if(length(cp)!= (m+2)) stop("ncol(X)+2 != length(cp)")}
opt<- optim(cp, fn=sn.dev, gr=sn.dev.gh, method=method,
lower=c(-rep(Inf,m), 10*.Machine$double.eps, -0.99527),
upper=c(rep(Inf,m), Inf, 0.99527),
control=control, X=X, y=y, trace=trace, hessian=FALSE)
cp <- opt$par
if(trace) {
cat(paste("Message from optimization routine:", opt$message,"\n"))
cat("estimates (cp): ", cp, "\n")
}
if(abs(cp[m+2])> 0.9952717){
if(trace) cat("optim searched outside admissible range - restarted\n")
cp[m+2]<- sign(cp[m+2])*runif(1)
mle <- sn.mle(X, y, cp, plot.it, trace, method, control)
cp <- mle$cp
}
logL <- (-opt$value)/2
info <- attr(sn.dev.gh(cp, X, y, trace=FALSE, hessian=TRUE),"hessian")/2
# se <- sqrt(diag(solve(info)))
if(all(is.finite(info)))
{
qr.info <- qr(info)
info.ok <- (qr.info$rank == length(cp))
}
else info.ok <- FALSE
if(info.ok) {
se2 <- diag(solve(qr.info))
se <- sqrt(ifelse(se2 >= 0, se2, NA))
}
else
se <- rep(NA, length(cp))
if(plot.it) {
dp0<-cp.to.dp(cp)
if(all(X==rep(1,n)))
y0<-y
else {
y0<- as.vector(y - X %*% dp0[1:m])
dp0<-c(0,dp0[m+1],dp0[m+2])
xlab<-"residuals"
}
x<-seq(min(pretty(y0,10)),max(pretty(y0,10)),length=100)
pdf.sn <- dsn(x,dp0[1],dp0[2],dp0[3])
if(exists("sm.density",mode="function"))
{
a<-sm.density(x=y0, eval.points=x, h=hnorm(y0)/1.5, display="none")
a<-sm.density(x=y0, eval.points=x, h=hnorm(y0)/1.5, xlab=xlab,
lty=3, ylim=c(0,max(a$estimate,pdf.sn)))
}
else
{
h <- hist(y0, prob=TRUE, breaks="FD", plot=FALSE)
hist(y0, prob=TRUE, breaks="FD", xlim=c(min(x),max(x)),
xlab=xlab, main=xlab, ylim=c(0, max(pdf.sn, h$density)))
}
if(n<101) points(y0,rep(0,n),pch=1)
# title(deparse(substitute(y)))
curve(dsn(x, dp0[1], dp0[2], dp0[3]), add=TRUE, col=2)
}
names(cp)<- names(se)<- cp.names
list(call=match.call(), cp=cp, se=se, info=info, logL=logL, optim=opt)
}
sn.dev <- function(cp, X, y, trace=FALSE)
{ # -2*logL for centred parameters
m <- ncol(X)
if(abs(cp[m+2])> 0.9952717){
warning("optim search in abs(cp[m+2])> 0.9952717, value adjusted")
cp[m+2] <- 0.9952717*sign(cp[m+2])
}
dp <- as.vector(cp.to.dp(cp))
location <- as.vector(X %*% as.matrix(dp[1:m]))
logL <- sum(dsn(y, location, dp[m+1], dp[m+2], log=TRUE))
if(trace) {cat("sn.dev: (cp,dev)="); print(c(cp,-2*logL))}
return(-2*logL)
}
sn.dev.gh <- function(cp, X, y, trace=FALSE, hessian=FALSE)
{
# computes gradient and hessian of dev=-2*logL for centred parameters
# (and observed information matrix);
m <- ncol(X)
n <- nrow(X)
np <- m+2
if(abs(cp[m+2])> 0.9952717){
warning("optim search in abs(cp[m+2])> 0.9952717, value adjusted")
cp[m+2] <- 0.9952717*sign(cp[m+2])
}
score <- rep(NA,np)
info <- matrix(NA,np,np)
beta <- cp[1:m]
sigma <- cp[m+1]
gamma1 <- cp[m+2]
lambda <- gamma1.to.lambda(gamma1)
# dp<-cp.to.dp(c(beta,sigma,gamma1))
# info.dp <- sn.info(dp,y)$info.dp
mu <- as.vector(X %*% as.matrix(beta))
d <- y-mu
r <- d/sigma
E.Z<- lambda*sqrt(2/(pi*(1+lambda^2)))
s.Z<- sqrt(1-E.Z^2)
z <- E.Z+s.Z*r
p1 <- as.vector(zeta(1,lambda*z))
p2 <- as.vector(zeta(2,lambda*z))
omega<- sigma/s.Z
w <- lambda*p1-E.Z
DE.Z <- sqrt(2/pi)/(1+lambda^2)^1.5
Ds.Z <- (-E.Z/s.Z)*DE.Z
Dz <- DE.Z + r*Ds.Z
DDE.Z<- (-3)*E.Z/(1+lambda^2)^2
DDs.Z<- -((DE.Z*s.Z-E.Z*Ds.Z)*DE.Z/s.Z^2+E.Z*DDE.Z/s.Z)
DDz <- DDE.Z + r*DDs.Z
score[1:m] <- omega^(-2)*t(X) %*% as.matrix(y-mu-omega*w)
score[m+1] <- (-n)/sigma+s.Z*sum(d*(z-p1*lambda))/sigma^2
score[m+2] <- score.l <- n*Ds.Z/s.Z-sum(z*Dz)+sum(p1*(z+lambda*Dz))
Dg.Dl <-1.5*(4-pi)*E.Z^2*(DE.Z*s.Z-E.Z*Ds.Z)/s.Z^4
R <- E.Z/s.Z
T <- sqrt(2/pi-(1-2/pi)*R^2)
Dl.Dg <- 2*(T/(T*R)^2+(1-2/pi)/T^3)/(3*(4-pi))
R. <- 2/(3*R^2 * (4-pi))
T. <- (-R)*R.*(1-2/pi)/T
DDl.Dg <- (-2/(3*(4-pi))) * (T./(R*T)^2+2*R./(T*R^3)+3*(1-2/pi)*T./T^4)
score[m+2] <- score[m+2]/Dg.Dl # convert deriv wrt lamda to gamma1
gradient <- (-2)*score
if(hessian){
info[1:m,1:m] <- omega^(-2) * t(X) %*% ((1-lambda^2*p2)*X)
info[1:m,m+1] <- info[m+1,1:m] <-
s.Z* t(X) %*% as.matrix((z-lambda*p1)+d*(1-lambda^2*p2)*
s.Z/sigma)/sigma^2
info[m+1,m+1] <- (-n)/sigma^2+2*s.Z*sum(d*(z-lambda*p1))/sigma^3 +
s.Z^2*sum(d*(1-lambda^2*p2)*d)/sigma^4
info[1:m,m+2] <- info[m+2,1:m] <-
t(X)%*%(-2*Ds.Z*d/omega+Ds.Z*w+s.Z*(p1+lambda*p2*(z+lambda*Dz)
-DE.Z))/sigma
info[m+1,m+2] <- info[m+2,m+1] <-
-sum(d*(Ds.Z*(z-lambda*p1)+s.Z*(Dz-p1-p2*lambda*(z+lambda*Dz))
))/sigma^2
info[m+2,m+2] <- (n*(-DDs.Z*s.Z+Ds.Z^2)/s.Z^2+sum(Dz^2+z*DDz)-
sum(p2*(z+lambda*Dz)^2)- sum(p1*(2*Dz+lambda*DDz)))
info[np,] <- info[np,]/Dg.Dl # convert info wrt lamda to gamma1
info[,np] <- info[,np]*Dl.Dg # an equivalent form of the above
info[np,np] <- info[np,np]-score.l*DDl.Dg
attr(gradient,"hessian") <- 2*info
}
if(trace) {cat("sn.dev.gh: gradient="); print(-2*score)}
return(gradient)
}
#
dmsn <- function(x, xi=rep(0,d), Omega, alpha)
{# Density of Multivariate SN rv with parameters (xi, Omega, alpha)
# evaluated at x, which is either a d-vector or (n x d) matrix
scale <- sqrt(diag(Omega))
if(is.vector(x)) {n <-1 ; d <- length(x)}
else {n <-dim(x)[1] ; d <- dim(x)[2]}
X <- t(matrix(x, nrow=n,ncol=d))- xi
z <- X/scale
Q <- apply((solve(Omega)%*% X)* X,2,sum) # diag of (x Omega^(-1) x^T)
# d <- diag(qr(Omega)[[1]])
Det <- prod(abs( diag(qr(Omega)[[1]]) ))
pdf <- 2*exp(-0.5*Q) * pnorm(t(z)%*%as.matrix(alpha))/sqrt((2*pi)^d * Det)
as.vector(pdf)
}
rmsn <- function(n=1, xi=rep(0,d), Omega, alpha){
# generates SN_d(xi,Omega,alpha) variates using transformation method
d <- ncol(Omega)
Z <- msn.quantities(xi,Omega,alpha)
y <- matrix(rnorm(n*d),n,d) %*% chol(Z$Psi)
# each row of y is N_d(0,Psi)
abs.y0 <- abs(rnorm(n))
abs.y0 <- matrix(rep(abs.y0,d), ncol=d)
delta <- Z$delta
z <- delta * t(abs.y0) + sqrt(1-delta^2) * t(y)
y <- t(xi+Z$omega*z)
return(y)
}
dsn2.plot <- function(x, y, xi, Omega, alpha, ...)
{# plot bivariate density SN_2(xi,Omega,alpha) computed at (x,y) grid
if(any(dim(Omega)!=c(2,2))) stop("dim(Omega) != c(2,2)")
nx <- length(x)
ny <- length(y)
xoy <- cbind(rep(x,ny), as.vector(matrix(y,nx,ny,byrow=TRUE)))
X <- matrix(xoy, nx*ny, 2, byrow=FALSE)
pdf<-dmsn(X, xi, Omega, alpha)
pdf<-matrix(pdf, nx, ny)
contour(x, y, pdf, ...)
invisible(list(x=x, y=y, density=pdf, xi=xi, Omega=Omega, alpha=alpha))
}
msn.quantities <- function(xi,Omega,alpha){
# 21-12/1997; computes various quantities related to SN_k(xi,Omega,alpha)
Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
k <- length(alpha)
if(length(xi)!=k | any(dim(Omega)!=c(k,k)))
stop("dimensions of arguments do not match")
omega <- sqrt(diag(Omega))
O.cor <- Diag(1/omega) %*% Omega %*% Diag(1/omega)
tmp <- as.vector(sqrt(1 + t(as.matrix(alpha))%*%O.cor%*%alpha))
delta<- as.vector(O.cor %*%alpha)/tmp
lambda<- delta/sqrt(1-delta^2)
D <-diag(sqrt(1+lambda^2))
Psi <- D %*% (O.cor-outer(delta,delta)) %*% D
Psi <- (Psi+t(Psi))/2
O.inv <- solve(Omega)
oi<- sqrt(diag(O.inv))
O.pcor <- Diag(1/oi)%*% (-O.inv) %*% Diag(1/oi)
diag(O.pcor) <- rep(1,k)
muZ <- delta*sqrt(2/pi)
muY <- xi+omega*muZ
Sigma <- Diag(omega) %*% (O.cor-outer(muZ,muZ)) %*% Diag(omega)
Sigma <- (Sigma+t(Sigma))/2
cv <- muZ/sqrt(1-muZ^2)
gamma1 <- 0.5*(4-pi)*cv^3
list(xi=xi, Omega=Omega, alpha=alpha, omega=omega, mean=muY, variance=Sigma,
Omega.conc=O.inv, Omega.cor=O.cor, Omega.pcor=O.pcor,
lambda=lambda, Psi=Psi, delta=delta, skewness=gamma1)
}
msn.conditional <- function(xi, Omega, alpha, fixed.comp, fixed.values)
{
# conditional Multivariate SN (6/11/1997).
# Given a rv Y ~ SN_k(xi,Omega,alpha), this function computes cumulants of
# conditrional distribution, given that the fixed.com take on fixed.values;
# then it finds MSN with matching cumlants.
Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
msqrt <- function(A) Diag(sqrt(diag(as.matrix(A))))
imsqrt<- function(A) Diag(1/sqrt(diag(as.matrix(A))))
k<-length(alpha)
h<-length(fixed.comp)
if(any(dim(Omega)!=c(k,k)) | length(xi)!=k | h!=length(fixed.values))
stop("dimensions of parameters do not match")
fc <- fixed.comp
O <- as.matrix(Omega)
O11<- O[fc,fc, drop=FALSE]
O12<- O[fc,-fc, drop=FALSE]
O21<- O[-fc,fc, drop=FALSE]
O22<- O[-fc,-fc, drop=FALSE]
o22<- sqrt(diag(O22))
inv.O11 <- solve(O11)
xi1 <- xi[fc, drop=FALSE]
xi2 <- xi[-fc, drop=FALSE]
alpha1 <- matrix(alpha[fc])
alpha2 <- matrix(alpha[-fc])
O22.1 <- O22 - O21 %*% inv.O11 %*% O12
O22.b <- imsqrt(O22) %*% O22.1 %*% imsqrt(O22)
xi.c <- xi2 + as.vector(O21 %*% inv.O11 %*% matrix(fixed.values-xi1))
a <- sqrt(1+as.vector(t(alpha2) %*% O22.b %*% alpha2))
alpha.b <- (alpha1 + msqrt(O11) %*% inv.O11 %*% O12 %*% (alpha2/o22))/a
d2 <- as.vector(O22.b %*% alpha2)/a
x0 <- sum(alpha.b * (fixed.values-xi1)/sqrt(diag(O11)))
E.c <- xi.c + zeta(1,x0)*o22*d2
var.c <- O22.1+zeta(2,x0)*outer(o22*d2,o22*d2)
gamma1<- zeta(3,x0)*d2^3/diag(O22.b+zeta(2,x0)*d2^2)^1.5
cum <- list(as.vector(E.c),var.c,gamma1)
# cumulants are computed; now choose SN distn to fit them
a <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
E.z <- a/sqrt(1+a^2)
delta <- E.z*sqrt(pi/2)
omega <- sqrt(diag(var.c)/(1-E.z^2))
O.new <- var.c+outer(omega*E.z,omega*E.z)
xi.new<- E.c-omega*E.z
B <- Diag(1/omega)
m <- as.vector(solve(B %*% O.new %*% B) %*% as.matrix(delta))
a <- m/sqrt(1-sum(delta*m))
# cum2<- msn.cumulants(xi.new,O.new,a)
list(cumulants=cum, fit=list(xi=xi.new, Omega=O.new, alpha=a, delta=delta))
}
msn.marginal <- function(xi=NULL, Omega=NULL, alpha=NULL, comp=1:d, dp=NULL)
{# calcola parametri della marginale associata a comp di un SN_d
Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
if(!is.null(xi) && !is.null(dp)) stop("You cannot set both xi and dp")
if(!is.null(xi) && is.list(xi) && is.null(dp)) dp <- xi
if(!is.null(dp)){
xi <- dp$xi
Omega <- dp$Omega
alpha <- dp$alpha
}
xi <- as.vector(xi)
comp <- as.integer(comp)
alpha <- as.vector(alpha)
d <- length(alpha)
if(length(comp)<d){
if(any(comp>d | comp<1)) stop("comp makes no sense")
scale<- sqrt(diag(Omega))
O <- Diag(1/scale) %*% Omega %*% Diag(1/scale)
O11 <- O[comp,comp, drop=FALSE]
O12 <- O[comp,-comp, drop=FALSE]
O21 <- O[-comp,comp, drop=FALSE]
O22 <- O[-comp,-comp, drop=FALSE]
alpha1<- as.matrix(alpha[comp, drop=FALSE])
alpha2<- as.matrix(alpha[-comp, drop=FALSE])
O22.1 <- O22 - O21 %*% solve(O11) %*% O12
a.sum <- as.vector(t(alpha2) %*% O22.1 %*% alpha2)
a.new <- as.vector(alpha1+solve(O11) %*% O12 %*% alpha2)/sqrt(1+a.sum)
O.new <- Diag(scale[comp]) %*% O11 %*% Diag(scale[comp])
result<- list(xi=xi[comp], Omega=O.new, alpha=a.new)
}
else {
if(any(sort(comp)!=(1:d))) stop("comp makes no sense")
result <-
list(xi=xi[comp], Omega=as.matrix(Omega[comp,comp, drop=FALSE]), alpha=alpha[comp])
}
result
}
msn.cond.plot <- function(xi, Omega, alpha, fixed.comp, fixed.values, n=35)
{# fa il grafico di Y_2|Y_1; assumiamo che dim(Y_2)= 2
msn.pdf2.aux <- function(x,y,xi,Omega,alpha,fc,fv)
{
nx <- length(x)
ny <- length(y)
FV <- matrix(rep(fv,nx*ny), nx*ny, length(fv), byrow=TRUE)
X <- matrix(NA, nx*ny, length(alpha))
X[,fc] <- FV
xoy <- cbind(rep(x,ny), as.vector(matrix(y,nx,ny,byrow=TRUE)))
X[,-fc] <- matrix(xoy, nx*ny, 2, byrow=FALSE)
pdf<-dmsn(X,xi,Omega,alpha)
matrix(pdf,nx,ny)
}
dsn2 <- function(x,y,d1,d2,omega)
{
u <- (x*(d1-omega*d2)+y*(d2-omega*d1))/
sqrt((1-omega^2-d1^2-d2^2+2*omega*d1*d2)*(1-omega^2))
pdfn2 <- exp(-0.5*(x^2-2*omega*x*y+y^2)/(1-omega^2))/
(2*pi*sqrt(1-omega^2))
2*pdfn2*pnorm(u)
}
fc <- fixed.comp
fv <- fixed.values
cond <- msn.conditional(xi,Omega,alpha,fc,fv)
xi.c <- cond$fit$xi
O.c <- cond$fit$Omega
a.c <- cond$fit$alpha
if(any(dim(O.c)!=c(2,2))) stop("length(alpha)-length(fixed.com)!=2")
scale1<-sqrt(as.vector(O.c[1,1]))
scale2<-sqrt(as.vector(O.c[2,2]))
delta <- cond$fit$delta
omega <-as.vector(O.c[1,2])/(scale1*scale2)
x<-seq(xi.c[1]-3*scale1, xi.c[1]+3*scale1, length=n)
y<-seq(xi.c[2]-3*scale2, xi.c[2]+3*scale2, length=n)
plot(x,y,type="n", main="Conditional multivariate SN pdf")
z1<-(x-xi.c[1])/scale1
z2<-(y-xi.c[2])/scale2
pdf.fit<-outer(z1,z2,dsn2,d1=delta[1],d2=delta[2],omega=omega)/
(scale1*scale2)
cond$pdf<-list(x=x,y=y,f.fitted=pdf.fit)
levels<-pretty(pdf.fit,5)
contour(x,y,pdf.fit,levels=levels,add=TRUE,col=2)
# fino a qui per il calcolo della densitŕ approx;
# ora otteniamo quella esatta
numer <- msn.pdf2.aux(x, y, xi, Omega, alpha, fc, fv)
marg <- msn.marginal(xi, Omega, alpha, fc)
denom <- dmsn(fv, marg$xi, marg$Omega, marg$alpha)
pdf.exact<- numer/as.vector(denom)
contour(x, y, pdf.exact, add=TRUE, levels=levels, col=1, lty=4, labcex=0)
legend(x[1],y[length(y)],c("approx","exact"), lty=c(1,4),col=c(2,1))
cond$pdf$f.exact<-pdf.exact
cond$rel.error<-summary((pdf.fit-pdf.exact)/pdf.exact)
cond$abs.error<-summary(abs(pdf.fit-pdf.exact))
invisible(cond)
}
msn.moment.fit <- function(y){
# 31-12-1997: simple fit of MSN distribution usign moments
Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
y <- as.matrix(y)
k <- ncol(y)
m.y <- apply(y,2,mean)
var.y <- var(y)
y0 <- (t(y)-m.y)/sqrt(diag(var.y))
gamma1<- apply(y0^3,1,mean)
out <- (abs(gamma1)>0.99527)
gamma1[out] <- sign(gamma1[out])*0.995
a <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
delta <- sqrt(pi/2)*a/sqrt(1+a^2)
m.z <- delta*sqrt(2/pi)
omega <- sqrt(diag(var.y)/(1-m.z^2))
Omega <- var.y+outer(omega*m.z,omega*m.z)
xi <- m.y-omega*m.z
O.cor <- Diag(1/omega) %*% Omega %*% Diag(1/omega)
O.cor <- (t(O.cor)+O.cor)/2
O.inv <- solve(O.cor)
tmp <- as.vector(1-t(delta) %*% O.inv %*% delta)
if(tmp<=0) {tmp <- 0.0001; admissible <- FALSE}
else admissible<-TRUE
alpha <-as.vector(O.inv%*%delta)/sqrt(tmp)
list(xi=xi, Omega=Omega, alpha=alpha, Omega.cor=O.cor, omega=omega,
delta=delta, skewness=gamma1, admissible=admissible)
}
msn.fit <- function(X, y, freq, plot.it=TRUE, trace=FALSE, ... )
{
y.name <- deparse(substitute(y))
y.names<- dimnames(y)[[2]]
y <- as.matrix(y)
colnames(y)<-y.names
k <- ncol(y)
if(missing(freq)) freq<-rep(1,nrow(y))
n <- sum(freq)
if(missing(X)) {
X <- rep(1,nrow(y))
missing.X <- TRUE }
else
missing.X <- FALSE
X <- as.matrix(X)
m <- ncol(X)
if(length(dimnames(y)[[2]])==0) {
dimnames(y) <- list(NULL, outer("V",as.character(1:k),paste,sep=""))
y.names<- as.vector(dimnames(y)[[2]])
}
qrX <- qr(X)
mle<- msn.mle(X=X, y=y, freq=freq, trace=trace, ...)
mle$call <- match.call()
# print(mle$nlminb$message)
beta <- mle$dp$beta
Omega <- mle$dp$Omega
alpha <- mle$dp$alpha
omega <- sqrt(diag(Omega))
xi <- X %*% beta
if(plot.it & all(freq==rep(1,length(y)))) {
if(missing.X) {
y0 <-y
xi0 <- apply(xi,2,mean)}
else {
y0 <- y-xi
xi0 <- rep(0,k)
}
if(k>1) {
opt<-options()
options(warn=-1)
pairs(y0, labels=y.names,
panel=function(x, y, Y, y.names, xi, Omega, alpha)
{
for(i in 1:length(alpha)){
# if(y.names[i]==deparse(substitute(x))) Ix<-i
# if(y.names[i]==deparse(substitute(y))) Iy<-i
if(all(Y[,i]==x)) Ix<-i
if(all(Y[,i]==y)) Iy<-i
}
points(x,y)
marg<-msn.marginal(xi,Omega,alpha,c(Ix,Iy))
xi.marg<-marg$xi
Omega.marg<-marg$Omega
alpha.marg<-marg$alpha
x1 <- seq(min(x), max(x), length=30)
x2 <- seq(min(y), max(y), length=30)
dsn2.plot(x1, x2, xi.marg, Omega.marg, alpha.marg, add=TRUE, col=2)},
# end "panel" function
Y=y0, y.names=y.names, xi=xi0, Omega=Omega, alpha=alpha)
options(opt)
}
else{ # plot for case k=1
y0<-as.vector(y0)
x<-seq(min(pretty(y0,10)),max(pretty(y0,10)),length=100)
if(missing.X){
dp0<-c(xi0,omega,alpha)
xlab<-y.name}
else {
dp0<-c(0,omega,alpha)
xlab <- "residuals"}
hist(y0, prob=TRUE, breaks="FD", xlab=xlab, ylab="density")
lines(x, dsn(x,dp0[1],dp0[2],dp0[3]))
if(length(y)<101) points(y0, rep(0,n), pch=1)
title(y.name)
}
cat("Press <Enter> to continue..."); readline()
par(mfrow=c(1,2))
pp <- qchisq((1:n)/(n+1),k)
Xb <- qr.fitted(qrX,y)
res<- qr.resid(qrX,y)
rad.n <- apply((y-Xb) * ((y-Xb) %*% solve(var(res))), 1, sum)
rad.sn <- apply((y-xi) * ((y-xi) %*% solve(Omega)), 1, sum)
plot(pp, sort(rad.n), pch=1, ylim=c(0,max(rad.n,rad.sn)),
xlab="Percentiles of chi-square distribution",
ylab="Mahalanobis distances")
abline(0,1,lty=3)
title(main="QQ-plot for normal distribution", sub=y.name)
plot(pp, sort(rad.sn), pch=1, ylim=c(0,max(rad.n,rad.sn)),
xlab="Percentiles of chi-square distribution",
ylab="Mahalanobis distances")
abline(0,1,lty=3)
title(main="QQ-plot for skew-normal distribution", sub=y.name)
cat("Press <Enter> to continue..."); readline()
plot((1:n)/(n+1), sort(pchisq(rad.n,k)), xlab="", ylab="")
abline(0,1,lty=3)
title(main="PP-plot for normal distribution", sub=y.name)
plot((1:n)/(n+1), sort(pchisq(rad.sn,k)), xlab="", ylab="")
abline(0,1,lty=3)
title(main="PP-plot for skew-normal distribution", sub=y.name)
par(mfrow=c(1,1))
} # end ploting
dev.norm<- msn.dev(c(qr.coef(qrX,y),rep(0,k)), X, y, freq)
test <- dev.norm + 2*mle$logL
p.value <- 1-pchisq(test,k)
if(trace) {
cat("LRT for normality (test-function, p-value): ")
print(c(test,p.value))
}
mle$test.normality <- list(LRT=test, p.value=p.value)
invisible(mle)
}
msn.mle <-function(X, y, freq, start, trace=FALSE, method="BFGS",
control=list(iter.max=150) )
{
y <- as.matrix(y)
if(missing(X)) X <- rep(1,nrow(y))
else {if(!is.numeric(X)) stop("X must be numeric")}
if(missing(freq)) freq <- rep(1,nrow(y))
X <- as.matrix(X)
k <- ncol(y)
n <- sum(freq)
m <- ncol(X)
y.names<-dimnames(y)[[2]]
x.names<-dimnames(X)[[2]]
if(missing(start)) {
fit0 <- lm.fit(X, y, method="qr")
beta <- as.matrix(coef(fit0))
res <- resid(fit0)
a <- msn.moment.fit(res)
Omega <- a$Omega
omega <- a$omega
alpha <- a$alpha
if(!a$admissible) alpha<-alpha/(1+max(abs(alpha)))
beta[1,] <- beta[1,]-omega*a$delta*sqrt(2/pi)
}
else{
beta <- start$beta
Omega <- start$Omega
alpha <- start$alpha
omega <- sqrt(diag(Omega))
}
al.om <-alpha/omega
if(trace){
cat("Initial parameters:\n")
print(cbind(t(beta),al.om,Omega))
}
param<- c(beta,al.om)
dev <- msn.dev(param,X,y,freq)
opt <- optim(param, fn=msn.dev, gr=msn.dev.grad, method=method,
control=control, X=X, y=y, freq=freq, trace=trace)
if(trace)
cat(paste("Message from optimization routine:", opt$message,"\n"))
logL <- (-opt$value)/2
beta <- matrix(opt$par[1:(m*k)],m,k)
dimnames(beta)[2] <- list(y.names)
dimnames(beta)[1] <- list(x.names)
al.om <- opt$par[(m*k+1):(m*k+k)]
xi <- X %*% beta
Omega <- t(y-xi) %*% (freq*(y-xi))/n
omega <- sqrt(diag(Omega))
alpha <- al.om*omega
param <- cbind(omega,alpha)
dimnames(Omega) <- list(y.names,y.names)
dimnames(param)[1] <- list(y.names)
info <- num.deriv(opt$par, FUN="msn.dev.grad", X=X, y=y, freq=freq)/2
se <- sqrt(diag(solve(info)))
se.beta <- matrix(se[1:(m*k)],m,k)
se.alpha<- se[(m*k+1):(m*k+k)]*omega
dimnames(se.beta)[2]<-list(y.names)
dimnames(se.beta)[1]<-list(x.names)
se <- list(beta=se.beta, alpha=se.alpha, info=info)
dp <- list(beta=beta, Omega=Omega, alpha=alpha)
list(call=match.call(), dp=dp, logL=logL, se=se, optim=opt)
}
msn.dev<-function(param, X, y, freq, trace=FALSE){
k <- ncol(y)
# if(missing(freq)) freq<-rep(1,nrow(y))
n <- sum(freq)
m <- ncol(X)
beta<-matrix(param[1:(m*k)],m,k)
al.om<-param[(m*k+1):(m*k+k)]
y0 <- y-X %*% beta
Omega <- (t(y0) %*% (y0*freq))/n
d <- diag(qr(2*pi*Omega)[[1]])
logDet <- sum(log(abs(d)))
dev <- n*logDet-2*sum(zeta(0,y0 %*% al.om)*freq)+n*k
if(trace) {
cat("\nmsn.dev:",dev,"\n","parameters:");
print(rbind(beta,al.om))
}
dev
}
msn.dev.grad <- function(param, X, y, freq, trace=FALSE){
k <- ncol(y)
# if(missing(freq)) freq<-rep(1,nrow(y))
n <- sum(freq)
m <- ncol(X)
beta<-matrix(param[1:(m*k)],m,k)
al.om<-param[(m*k+1):(m*k+k)]
y0 <- y-X %*% beta
Omega <- (t(y0) %*% (freq*y0))/n
p1 <- zeta(1,as.vector(y0 %*% al.om))
Dbeta <- t(X)%*% (y0*freq) %*%solve(Omega) -
outer(as.vector(t(X*freq)%*%p1), al.om)
Dal.om <- as.vector(t(y0*freq) %*% p1)
if(trace){
cat("gradient:\n")
print(rbind(Dbeta,Dal.om))}
-2*c(Dbeta,Dal.om)
}
num.deriv <- function(coefficients, FUN, ...)
{# da rm.fit: derivate seconde numeriche, se FUN da` il gradiente
FUN <- get(FUN, inherit = TRUE)
values <- FUN(coefficients, ...)
p <- length(values)
H <- matrix(0, p, p)
h <- rep(0, p)
delta <- cbind((abs(coefficients) + 1e-10) * 1e-5, rep(1e-06, p))
delta <- apply(delta, 1, max)
for(i in 1:p) {
h[i] <- delta[i]
new.values <- FUN(coefficients + h, ...)
H[, i] <- (new.values - values)/delta[i]
h[i] <- 0
}
(H+t(H))/2
}
#---
# skew-t portion
dst <- function (x, location = 0, scale = 1, shape = 0, df=Inf)
{
if(df==Inf) return(dsn(x,location,scale,shape))
z <- (x - location)/scale
pdf <- dt(z, df=df)/scale
cdf <- pt(shape*z*sqrt((df+1)/(z^2+df)), df=df+1)
2 * pdf * cdf
}
rst <- function (n=1, location = 0, scale = 1, shape = 0, df=Inf)
{
z <- rsn(n,location=0,scale,shape)
if(df==Inf) return(z+location)
v <- rchisq(n,df)/df
y <- z/sqrt(v)+location
attr(y,"parameters")<- c(location,scale,shape,df)
return(y)
}
pst <- function (x, location = 0, scale = 1, shape = 0, df = Inf, ...)
{
if (df == Inf)
p <- psn(x, location, scale, shape)
else {
if(df<= 0) stop("df must be non-negative")
z <- (x-location)/scale
p <- numeric(length(z))
fp <- function(v, shape, df, t.value) psn(sqrt(v) * t.value, 0, 1,
shape) * dchisq(v * df, df = df) * df
for (i in 1:length(z)) {
if(z[i] < 10+50/df)
p[i]<- integrate(dst, -Inf, z[i], shape = shape, df = df,
...)$value
else
p[i] <- integrate(fp, 0, Inf, shape = shape, df = df,
t.value = z[i], ...)$value
}
}
pmax(0,pmin(1,p))
}
qst <- function (p, location = 0, scale = 1, shape = 0, df=Inf,
tol = 1e-08, ...)
{
if (df == Inf)
return(qsn(p, location, scale, shape))
max.q <- sqrt(qf(p, 1, df))
min.q <- -sqrt(qf(1 - p, 1, df))
if (shape == Inf)
return(location + scale * max.q)
if (shape == -Inf)
return(location + scale * min.q)
na <- is.na(p) | (p < 0) | (p > 1)
zero <- (p == 0)
one <- (p == 1)
p <- replace(p, (na | zero | one), 0.5)
cum <- st.cumulants(shape, max(df,5), n=4)
g1 <- cum[3]/cum[2]^(3/2)
g2 <- cum[4]/cum[2]^2
x <- qnorm(p)
x <- (x + (x^2 - 1) * g1/6 + x * (x^2 - 3) * g2/24 -
x * (2 * x^2 - 5) * g1^2/36)
x <- cum[1] + sqrt(cum[2]) * x
max.err <- 1
while (max.err > tol) {
x1 <- x - (pst(x, 0, 1, shape, df, ...) - p)/dst(x, 0, 1, shape, df)
x1 <- pmin(x1, max.q)
x1 <- pmax(x1, min.q)
max.err <- max(abs(x1 - x)/(1 + abs(x)))
x <- x1
}
x <- replace(x, na, NA)
x <- replace(x, zero, -Inf)
x <- replace(x, one, Inf)
return(location + scale * x)
}
st.cumulants <- function(shape=0, df=Inf, n=4)
{
if(df == Inf) return(sn.cumulants(shape, n=n))
n<- as.integer(n)
if(df <= n) stop("need df>n")
if(length(shape)>1) warning("only shape[1] will be used")
delta <- shape[1]/sqrt(1 + shape[1]^2)
cum<- numeric(min(n,4))
if(n<1) return(cum)
mu <- delta*sqrt(df/pi)*exp(lgamma((df-1)/2)-lgamma(df/2))
cum[1]<- mu
if(n>1) cum[2] <- df/(df-2) - mu^2
if(n>2) cum[3] <- mu*(df*(3-delta^2)/(df-3) - 3*df/(df-2)+2*mu^2)
if(n>3) cum[4] <- (3*df^2/((df-2)*(df-4)) - 4*mu^2*df*(3-delta^2)/(df-3)
+ 6*mu^2*df/(df-2)-3*mu^4)- 3*cum[2]^2
cum
}
dmst <- function(x, xi=rep(0,d), Omega, alpha, df=Inf)
{
# Density of multivariate ST rv with parameters (xi, Omega, alpha, df)
# evaluated at x, which is either a d-vector or (n,d) matrix
#
if(df==Inf) return(dmsn(x,xi,Omega,alpha))
if(is.vector(x)) {
n <- 1
d <- length(x)
}
else {
n <- dim(x)[1]
d <- dim(x)[2]
}
omega <- sqrt(diag(Omega))
X <- t(matrix(x, nrow = n, ncol = d)) - xi
z <- X/omega
Q <- apply((solve(Omega) %*% X) * X, 2, sum) # diag of (x Omega^(-1) x^T)
# Det <- as.numeric(det.Hermitian(as.Matrix(Omega), logarithm = F)$modulus)
Det <- abs(prod(diag(qr(Omega)$qr)))
L <- as.vector(t(z) %*% as.matrix(alpha))
pdf.mt <- (gamma((df+d)/2)/
(sqrt((pi*df)^d*Det) * gamma(df/2) * (1+Q/df)^((df+d)/2)))
cdf.T <- pt(L*sqrt((df+d)/(Q+df)), df+d)
2 * pdf.mt * cdf.T
}
rmst <- function(n=1, xi=rep(0,d), Omega, alpha, df=Inf)
{
d <- ncol(Omega)
if(df==Inf) x<-1
else x <- rchisq(n,df)/df
z <- rmsn(n,rep(0,d),Omega,alpha)
y <- t(xi+t(sqrt(x)*z))
return(y)
}
pmst <- function(x, xi=rep(0,d), Omega=1, alpha=rep(0,d), df=Inf, ...)
{
Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
d <- ifelse(is.matrix(Omega),nrow(Omega),1)
Omega<- matrix(Omega,d,d)
omega<- sqrt(diag(Omega))
Ocor <- Diag(1/omega) %*% Omega %*% Diag(1/omega)
# t(Omega /omega)/omega
O.alpha <- as.vector(Ocor %*% alpha)
delta <- O.alpha/sqrt(1+sum(alpha*O.alpha))
A <- matrix(rbind(c(1,-delta),cbind(-delta,Ocor)),d+1,d+1)
if(df==Inf)
2 * pmvnorm(upper=c(0,(x-xi)/omega), corr=A, ...)
else
2 * pmvt(upper=c(0,(x-xi)/omega), corr=A, df=df, ...)
}
pmsn <- function(x, xi=rep(0,d), Omega, alpha, ...)
{ d<- length(alpha)
pmst(x, xi, Omega, alpha, df=Inf, ...)
}
dst2.plot <- function(x, y, xi, Omega, alpha, df, ...)
{
# plot bivariate density ST_2(xi,Omega,alpha,df) computed at (x,y) grid
if(any(dim(Omega) != c(2, 2))) stop("dim(Omega) != c(2,2)")
nx <- length(x)
ny <- length(y)
xoy <- cbind(rep(x, ny), as.vector(matrix(y, nx, ny, byrow = TRUE)))
X <- matrix(xoy, nx * ny, 2, byrow = FALSE)
pdf <- dmst(X, xi, Omega, alpha, df)
pdf <- matrix(pdf, nx, ny)
contour(x, y, pdf, ...)
invisible(list(x = x, y = y, density = pdf, xi = xi, Omega = Omega,
alpha = alpha, df=df))
}
mst.fit <- function(X, y, freq, start, fixed.df=NA, plot.it=TRUE,
trace=FALSE, ... )
{
y.name <- deparse(substitute(y))
y.names<- dimnames(y)[[2]]
y <- as.matrix(y)
d <- ncol(y)
if(is.null(d)) d<- 1
if(d>1){
if(length(y.names)==0){
dimnames(y) <-
list(dimnames(y)[[1]], outer("V",as.character(1:d),paste,sep=""))
y.names<- as.vector(dimnames(y)[[2]])
}}
else
colnames(y)<-y.name
if(missing(freq)) freq <- rep(1,nrow(y))
n <- sum(freq)
if(missing(X)) {
X <- rep(1,nrow(y))
missing.X <- TRUE }
else
missing.X <- FALSE
X <- as.matrix(X)
qrX <- qr(X)
m <- ncol(X)
mle <- mst.mle(X=X, y=y, freq=freq, fixed.df=fixed.df, start=start,
trace=trace,...)
mle$call <- match.call()
beta <- mle$dp$beta
Omega <- mle$dp$Omega
alpha <- mle$dp$alpha
omega <- sqrt(diag(Omega))
df <- mle$dp$df
xi <- X %*% beta
if(plot.it & all(freq==rep(1,length(y)))) {
if(missing.X) {
y0 <-y
xi0 <- apply(xi,2,mean)}
else {
y0 <- y-xi
xi0 <- rep(0,d)
}
if(d>1) {
opt<-options()
options(warn=-1)
pairs(y0, labels=y.names,
panel=function(x, y, Y, y.names, xi, Omega, alpha)
{
for(i in 1:length(alpha)){
if(all(Y[,i]==x)) Ix<-i
if(all(Y[,i]==y)) Iy<-i
}
points(x,y)
marg <- msn.marginal(xi, Omega ,alpha, c(Ix,Iy))
xi.marg <- marg$xi
Omega.marg <- marg$Omega
alpha.marg <- marg$alpha
x1 <- seq(min(x), max(x), length=30)
x2 <- seq(min(y), max(y), length=30)
dst2.plot(x1, x2, xi.marg, Omega.marg, alpha.marg, df,
add=TRUE, col=2)
}, # end "panel" function
Y=y0, y.names=y.names, xi=xi0, Omega=Omega, alpha=alpha)
options(opt)
}
else{ # plot for case d=1
y0<-as.vector(y0)
x<-seq(min(pretty(y0,10)),max(pretty(y0,10)),length=100)
if(missing.X){
dp0<-c(xi0,omega,alpha,df)
xlab<-y.name}
else {
dp0<-c(0,omega,alpha,df)
xlab <- "residuals"}
hist(y0, prob=TRUE, breaks="FD", xlab=xlab, ylab="density", main="")
lines(x, dst(x,dp0[1],dp0[2],dp0[3],dp0[4]), col=2)
if(length(y)<101) points(y0, rep(0,n), pch=1)
title(y.name)
}
cat("Press <Enter> to continue..."); readline()
par(mfrow=c(1,2))
pp <- d*qf((1:n)/(n+1),d,df)
Xb <- qr.fitted(qrX,y)
res <- qr.resid(qrX,y)
rad.n <- apply(res * (res %*% solve(var(res))), 1, sum)
rad.sn <- apply((y-xi) * ((y-xi) %*% solve(Omega)), 1, sum)
plot(pp, sort(rad.n), pch=1, ylim=c(0,max(rad.n,rad.sn)),
xlab="Percentiles of chi-square distribution",
ylab="Mahalanobis distances")
abline(0,1,lty=3)
title(main="QQ-plot for normal distribution", sub=y.name)
plot(pp, sort(rad.sn), pch=1, ylim=c(0,max(rad.n,rad.sn)),
xlab="Percentiles of chi-square distribution",
ylab="Mahalanobis distances")
abline(0,1,lty=3)
title(main="QQ-plot for skew-t distribution", sub=y.name)
cat("Press <Enter> to continue..."); readline()
plot((1:n)/(n+1), sort(pchisq(rad.n,d)), xlab="", ylab="")
abline(0,1,lty=3)
title(main="PP-plot for normal distribution", sub=y.name)
plot((1:n)/(n+1), sort(pf(rad.sn/d,d,df)), xlab="", ylab="")
abline(0,1,lty=3)
title(main="PP-plot for skew-t distribution", sub=y.name)
par(mfrow=c(1,1))
} # end ploting
dev.norm<- msn.dev(c(qr.coef(qrX,y),rep(0,d)), as.matrix(X), y, freq)
test <- dev.norm + 2*mle$logL
p.value <- 1-pchisq(test,d+1)
if(trace) {
cat("LRT for normality (test-function, p-value): ")
print(c(test,p.value))
}
mle$test.normality <- list(LRT=test, df=d+1, p.value=p.value,
normal.logL=dev.norm/(-2))
invisible(mle)
}
#
st.mle <- function(X, y, freq, start, fixed.df=NA, trace=FALSE,
method="BFGS", control=list(iter.max=150))
{
y.name <- deparse(substitute(y))
y <- as.matrix(y)
dimnames(y)[[2]] <- list(y.name)
fit <- mst.mle(X, y, freq, start=start, fixed.df=fixed.df, trace=trace,
method=method, control=control)
mle <- list()
mle$call<- match.call()
dp <- fit$dp
se <- fit$se
p <- length(dp$beta)
dp.names<- c(if(p==1) "location" else dimnames(dp$beta)[[1]],
"scale","shape","df")
mle$dp <- c(dp$beta, sqrt(as.vector(dp$Omega)), dp$alpha, dp$df)
names(mle$dp) <- dp.names
mle$se <- c(se$beta, mle$dp[p+1] * se$internal[p+1], se$alpha,
dp$df * se$internal[p+3])
if(length(mle$se) == length(dp.names)) names(mle$se) <- dp.names
mle
}
mst.mle <- function(X, y, freq, start, fixed.df=NA, trace=FALSE,
method="BFGS", control=list(iter.max=150))
{
Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
y.name <- deparse(substitute(y))
y.names <- dimnames(y)[[2]]
y <- as.matrix(y)
if(missing(X)) X <- matrix(rep(1,nrow(y)),ncol=1)
else {if(!is.numeric(X)) stop("X must be numeric")}
if(missing(freq)) freq <- rep(1,nrow(y))
x.names<-dimnames(X)[[2]]
X <- as.matrix(X)
d <- ncol(y)
n <- sum(freq)
m <- ncol(X)
if(missing(start)){
qrX <- qr(X)
beta <- as.matrix(qr.coef(qrX,y))
Omega <- matrix(var(qr.resid(qrX,y)),d,d)
omega <- sqrt(diag(Omega))
alpha <- rep(0,d)
df <- ifelse(is.na(fixed.df), 10, fixed.df)
if(trace) {
cat("mst.mle: dp=","\n")
print(c(beta,Omega,alpha))
cat("df:", df,"\n")
}
}
else {
if(all(names(start)==c("beta", "Omega", "alpha", "df")))
{
beta <- start$beta
Omega <- start$Omega
alpha <- start$alpha
df <- start$df
}
else{
beta<- matrix(start[1:m],m,1)
Omega<- matrix(start[m+1]^2,1,1)
alpha<-start[m+2]
df<- start[m+3]
}
}
eta <- alpha/sqrt(diag(Omega))
Oinv<-solve(Omega)
Oinv<-(Oinv+t(Oinv))/2
upper <- chol(Oinv)
D <- diag(upper)
A<- upper/D
D <- D^2
if(d>1) param <- c(beta, -0.5*log(D), A[!lower.tri(A,diag=TRUE)], eta)
else param <- c(beta, -0.5*log(D), eta)
if(is.na(fixed.df)) param <- c(param,log(df))
opt <- optim(param, fn=mst.dev, gr=mst.dev.grad,
method=method, control=control, hessian=TRUE,
X=X, y=y, freq=freq, trace=trace, fixed.df=fixed.df)
dev <- opt$value
param<- opt$par
if(trace){
cat("Message from optimization routine:", opt$message,"\n")
cat("deviance:", dev,"\n")
}
beta <- matrix(param[1:(m*d)],m,d)
D <- exp(-2*param[(m*d+1):(m*d+d)])
if(d>1) {
A <- diag(d)
A[!lower.tri(A,diag=TRUE)] <- param[(m*d+d+1):(m*d+d+d*(d-1)/2)]
i0 <- m*d+d+d*(d-1)/2
}
else{
i0 <- m+1
A <- as.matrix(1)
}
eta <- param[(i0+1):(i0+d)]
if(is.na(fixed.df)) df <- exp(param[i0+d+1])
else df <- fixed.df
# Omega <- solve(t(A) %*% diag(D) %*% A)
Ainv <- backsolve(A,diag(d))
Omega<- Ainv %*% Diag(1/D) %*% t(Ainv)
omega<-sqrt(diag(Omega))
alpha<- eta*omega
dimnames(beta) <- list(x.names,y.names)
dimnames(Omega) <- list(y.names, y.names)
if(length(y.names)>0) names(alpha) <- y.names
info <- opt$hessian/2
if(all(is.finite(info)))
{
qr.info <- qr(info)
info.ok <- (qr.info$rank == length(param))
}
else info.ok <- FALSE
if(info.ok) {
se2 <- diag(solve(qr.info))
if(min(se2) < 0 )
se<- NA
else{
se <- sqrt(se2)
se.beta <- matrix(se[1:(m*d)],m,d)
se.alpha<- se[(i0+1):(i0+d)] * omega
dimnames(se.beta)[2] <- list(y.names)
dimnames(se.beta)[1] <- list(x.names)
names(se.alpha) <- y.names
se.df <- df*se[i0+d+1]
se <- list(beta=se.beta, alpha=se.alpha, df=se.df,
internal=se, info=info)
}}
else
se <- NA
dp <- list(beta=beta, Omega=Omega, alpha=alpha, df=df)
list(call=match.call(), logL=-0.5*dev, deviance=dev, dp=dp, se=se, optim=opt)
}
mst.dev <- function(param, X, y, freq=rep(1,nrow(X)), fixed.df=NA, trace=FALSE)
{
Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
d <- ncol(y)
# if(missing(freq)) freq<-rep(1,nrow(y))
n <- sum(freq)
m <- ncol(X)
beta<-matrix(param[1:(m*d)],m,d)
D <- exp(-2*param[(m*d+1):(m*d+d)])
if(d>1) {
A <- diag(d)
A[!lower.tri(A,diag=TRUE)] <- param[(m*d+d+1):(m*d+d+d*(d-1)/2)]
i0 <- m*d+d+d*(d-1)/2
}
else {
i0<- m+1
A <- as.matrix(1)
}
eta <- param[(i0+1):(i0+d)]
if(is.na(fixed.df)) df <- exp(param[i0+d+1])
else df <- fixed.df
Oinv <- t(A) %*% Diag(D) %*% A
# Omega <- solve(Oinv)
u <- y - X %*% beta
Q <- apply((u %*% Oinv)*u,1,sum)
L <- as.vector(u %*% eta)
logDet<- sum(log(df*pi/D))
dev <- (n*(2*lgamma(df/2) + logDet - 2*lgamma((df+d)/2))
+ (df+d) * sum(freq * log(1+Q/df))
-2*sum(freq * log(2*pt( L * sqrt((df+d)/(Q+df)),df+d))))
if(trace) cat("mst.dev: ",dev, "\n")
dev
}
mst.dev.grad <- function(param, X, y, freq=rep(1,nrow(X)), fixed.df=NA,
trace=FALSE)
{
Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
d <- ncol(y)
# if(missing(freq)) freq<-rep(1,nrow(y))
n <- sum(freq)
m <- ncol(X)
beta<- matrix(param[1:(m*d)],m,d)
D <- exp(-2*param[(m*d+1):(m*d+d)])
if(d>1) {
A <- diag(d)
A[!lower.tri(A,diag=TRUE)] <- param[(m*d+d+1):(m*d+d+d*(d-1)/2)]
i0 <- m*d+d+d*(d-1)/2
}
else{
i0 <- m*d+d
A <- as.matrix(1)
}
eta <- param[(i0+1):(i0+d)]
if(is.na(fixed.df)) df <- exp(param[i0+d+1])
else df <- fixed.df
tA <- t(A)
Oinv <- tA %*% Diag(D) %*% A
u <- y-X %*% beta
Q <- as.vector(apply((u %*% Oinv)*u,1,sum))
L <- as.vector(u %*% eta)
t. <- L*sqrt((df+d)/(Q+df))
dlogft<- -(df+d)/(2*df*(1+Q/df))
dt.dL <- sqrt((df+d)/(Q+df))
dt.dQ <- (-0.5)*L*sqrt(df+d)/(Q+df)^1.5
T. <- pt(t., df+d)
dlogT.<- dt(t., df+d)/T.
u.freq <- u*freq
Dbeta <- (-2* t(X) %*% (u.freq*dlogft) %*% Oinv
- outer(as.vector(t(X) %*% (dlogT. * dt.dL* freq)), eta)
- 2* t(X) %*% (dlogT.* dt.dQ * u.freq) %*% Oinv )
Deta <- apply(dlogT.*sqrt((df+d)/(Q+df))*u.freq, 2, sum)
if(d>1){
M <- 2*( Diag(D) %*% A %*% t(u * dlogft
+ u * dlogT. * dt.dQ) %*% u.freq)
DA <- M[!lower.tri(M,diag=TRUE)]
}
else DA<- NULL
M <- ( A %*% t(u*dlogft + u*dlogT.*dt.dQ) %*% u.freq %*% tA)
if(d>1) DD <- diag(M) + 0.5*n/D
else DD <- as.vector(M + 0.5*n/D)
grad <- (-2)*c(Dbeta,DD*(-2*D),DA,Deta)
# browser()
if(is.na(fixed.df)) {
dlogft.ddf <- 0.5 * (digamma((df+d)/2) - digamma(df/2) - d/df
+ (df+d)*Q/((1+Q/df)*df^2) - log(1+Q/df))
eps <- 1.0e-4
T.eps <- pt(L*sqrt((df+eps+d)/(Q+df+eps)), df+eps+d)
dlogT.ddf <- (log(T.eps)-log(T.))/eps
Ddf <- sum((dlogft.ddf + dlogT.ddf)*freq)
grad <- c(grad, -2*Ddf*df)
}
if(trace) cat("mst.dev.grad: norm is ",sqrt(sum(grad^2)),"\n")
return(grad)
}
#-------------
st.2logL.profile<-function(X=matrix(rep(1,n)), y, freq, trace=FALSE,
fixed.comp = c(ncol(X)+2, ncol(X)+3),
fixed.values = cbind(c(-4,4), log(c(1,25))),
npts=30/length(fixed.comp), plot.it=TRUE, ...)
{# plot2D profile deviance (=2(max.logL-logL)) using either parameters
# if(plot.it & !exists(.Device)) stop("Device not active")
#
if(missing(freq)) freq <- rep(1,length(y))
n <- sum(freq)
if(length(fixed.comp) == 1){
param1 <- seq(fixed.values[1], fixed.values[2], length=npts)
logL <- param2 <- rep(NA,npts)}
else{
param1 <- seq(fixed.values[1,1], fixed.values[2,1], length=npts)
param2 <- seq(fixed.values[1,2], fixed.values[2,2], length=npts)
logL <- matrix(NA,npts,npts)}
ls<-lm.fit(X,y)
omega<-sqrt(var(resid(ls)))
param <- c(coef(ls), log(omega), 0, log(20))[-fixed.comp]
# mle<- mst.mle(X=X, y=y)
# param <- mle$optim$par[-fixed.comp]
max.logL <- (-Inf)
cat(c("Running up to",npts,":"))
for(i in 1:npts){
cat(" ",i)
if(length(fixed.comp) == 1) {
opt <- optim(param, fn=st.dev.fixed, method="Nelder-Mead",
X=X, y=y, freq=freq, trace=trace,
fixed.comp=fixed.comp, fixed.values=param1[i])
logL[i] <- opt$value/(-2)
param <- opt$par
if(logL[i] > max.logL) {
max.logL<- logL[i]
best <- list(fixed.comp1=param1[i], fixed.comp2=NA, opt=opt)
}}
else{
for(j in 1:npts){
opt <- optim(param, fn=st.dev.fixed, method="Nelder-Mead",
X=X, y=y, freq=freq, trace=trace,
fixed.comp=fixed.comp,
fixed.values=c(param1[i], param2[j] ))
logL[i,j] <- opt$value/(-2)
if(j==1) param0 <- opt$par
if(j<npts)
param <- opt$par
else
param <- param0
if(logL[i,j] > max.logL) {
max.logL<- logL[i,j]
best <- list(fixed.comp1=param1[i], fixed.comp2=param2[j], opt=opt)
}
}}
}
cat("\n")
dev <- 2 * (max(logL) - logL)
if(plot.it){
if(length(fixed.comp) == 1){
plot(param1, dev, type="l", ...)
points(x=best$fixed.comp1, y=0, pch=1)
}
else{
contour(param1, param2, dev, labcex=0.75,
levels=c(0.57, 1.37, 2.77, 4.6, 5.99, 9.2),
labels=c(0.25, 0.5, 0.75, 0.90,0.95, 0.99),
...)
points(x=best$fixed.comp1, y=best$fixed.comp2, pch=1)
}
}
title(main=paste("dataset:", deparse(substitute(y)),
"\nProfile deviance", sep= " "))
invisible(list(call=match.call(), param1=param1, param2=param2,
deviance=dev, max.logL=max.logL, best=best))
}
st.dev.fixed <- function(free.param, X, y, freq, trace=FALSE,
fixed.comp=NA, fixed.values=NA)
{# param components: beta, log(omega), alpha, log(df)
n <- sum(freq)
m <- ncol(X)
param <- numeric(length(free.param)+length(fixed.comp))
param[fixed.comp] <- fixed.values
param[-fixed.comp] <- free.param
beta <- param[1:m]
omega <- exp(param[m+1])
eta <- param[m+2]/omega
df <- exp(param[m+3])
u <- y - X %*% beta
Q <- freq*(u/omega)^2
L <- u*eta
logDet <- log(df*pi*omega^2)
dev <- (n*(2*lgamma(df/2) + logDet - 2*lgamma((df+1)/2))
+ (df+1) * sum(log(1+Q/df))
-2*sum(log(2*pt(L * sqrt((df+1)/(Q+df)),df+1))))
if(trace) cat("st.dev.fixed (param, dev): ", param, dev,"\n")
dev
}
#----
sn.SFscore<-function(shape, data, trace=FALSE)
{# Sartori-Firth's modified score function
a42<- integrate(function(x) dsn(x,0,1,shape) * x^4 * zeta(1,shape*x)^2,
-Inf, Inf)$value
a22<- integrate(function(x) dsn(x,0,1,shape) * x^2 * zeta(1,shape*x)^2,
-Inf, Inf)$value
score<- sum(zeta(1,shape*data)*data)-0.5*shape*a42/a22
if(trace) cat("sn.SFscore:", shape, score,"\n")
score
}
sn.mmle <- function(X, y, plot.it=TRUE, trace=FALSE,...)
{
n <- length(y)
if (missing(X)){
X <- as.matrix(rep(1, n))
colnames(X) <- "mean"
}
m <- ncol(X)
dp <- cp.to.dp(sn.mle(X=X, y=y, plot.it=plot.it, trace=trace,...)$cp)
z <- as.vector(y- X %*% dp[1:m])/dp[m+1]
a0 <- 0
f0 <- sn.SFscore(a0, z, trace=trace)
a1 <- sign(dp[m+2])
f1 <- sn.SFscore(a1, z, trace=trace)
while(f0*f1 > 0){
a0 <- a1
f0 <- f1
a1 <- a1*2
f1 <- sn.SFscore(a1, z, trace=trace)
}
if(trace)cat("a0, a1: ",a0, a1,"\n")
a<- uniroot(sn.SFscore, interval=c(a0, a1), data=z, trace=trace)
dp[m+2]<- a$root
if (plot.it) {
dp0 <- dp
if (all(X == rep(1, n)))
y0 <- y
else {
y0 <- as.vector(y - X %*% dp0[1:m])
dp0 <- c(0, dp0[m + 1], dp0[m + 2])
xlab <- "residuals"
}
curve(dsn(x, dp0[1], dp0[2], dp[m+2]), add=TRUE, lty=2, col=3)
}
names(dp)[m+2] <- "shape"
info <- sn.Einfo(dp=dp,x=X)
se <- info$se.dp
names(se)<- names(dp)
list(call=match.call(), dp=dp, se=se, Einfo=info$info.dp)
}
sn.Einfo <- function(dp=NULL, cp=NULL, n=1, x=NULL)
{# computes Expected Fisher information matrix for SN variates
if(is.null(dp) & is.null(cp)) stop("either dp or cp must be set")
if(!is.null(dp) & !is.null(cp)) stop("either dp or cp must be set")
if(is.null(cp)) cp<- dp.to.cp(dp)
if(is.null(dp)) dp<- cp.to.dp(cp)
if(is.null(x))
{
x <-matrix(rep(1,n),nrow=n,ncol=1)
xx<- n
sum.x<-n
p <- 1
}
else
{ if(n!=1) warning("You have set both n and x, I am setting n<-nrow(x)")
n <- nrow(x)
p <- ncol(x)
xx <- t(x) %*% x
sum.x <- matrix(apply(x,2,sum))
}
if(length(cp) != (p+2)| length(dp) != (p+2))
stop("length(dp) | length(cp) must be equal to ncol(x)+2")
omega <- dp[p+1]
alpha <- dp[p+2]
E.z <- sqrt(2/pi)*alpha/sqrt(1+alpha^2)
s.z <- sqrt(1-E.z^2)
I.dp <- matrix(NA,p+2,p+2)
if(abs(alpha)< 200){
a0 <- integrate(function(x) dsn(x,0,1,alpha) * zeta(1,alpha*x)^2,
-Inf, Inf)$value
a1 <- integrate(function(x) dsn(x,0,1,alpha) *x * zeta(1,alpha*x)^2,
-Inf, Inf)$value
a2 <- integrate(function(x) dsn(x,0,1,alpha) *x^2 * zeta(1,alpha*x)^2,
-Inf, Inf)$value
}
else
{a0 <- sign(alpha)*0.7206/abs(alpha)
a1 <- -sign(alpha)*(0.6797/alpha)^2
a2 <- 0.005897/alpha^2 + 30.611/alpha^4
}
I.dp[1:p,1:p] <- xx* (1+alpha^2*a0)/omega^2
I.dp[p+1,p+1] <- n * (2+alpha^2*a2)/omega^2
I.dp[p+2,p+2] <- n * a2
I.dp[1:p,p+1] <- sum.x * (E.z*(1+E.z^2*pi/2)+alpha^2*a1)/omega^2
I.dp[p+1,1:p] <- t(I.dp[1:p,p+1])
I.dp[1:p,p+2] <- sum.x * (sqrt(2/pi)/(1+alpha^2)^1.5-alpha*a1)/omega
I.dp[p+2,1:p] <- t(I.dp[1:p,p+2])
I.dp[p+1,p+2] <- I.dp[p+2,p+1] <- n*(-alpha*a2)/omega
# cp <- dp.to.cp(dp)
sigma <-cp[p+1]
gamma1<-cp[p+2]
D <- diag(p+2)
R <- E.z/s.z
T <- sqrt(2/pi-(1-2/pi)*R^2)
Da.Dg <- 2*(T/(T*R)^2+(1-2/pi)/T^3)/(3*(4-pi))
DE.z <- sqrt(2/pi)/(1+alpha^2)^1.5
Ds.z <- (-E.z/s.z)*DE.z
D[1,p+1] <- (-R)
D[1,p+2] <- (-sigma*R)/(3*gamma1)
D[p+1,p+1] <- 1/s.z
D[p+1,p+2] <- (-sigma)* Ds.z* Da.Dg/s.z^2
D[p+2,p+2] <- Da.Dg
I.cp <- t(D) %*% I.dp %*% D
I.cp <- (I.cp + t(I.cp))/2
se.dp <- sqrt(diag(solve(I.dp)))
se.cp <- sqrt(diag(solve(I.cp)))
dimnames(I.cp)<- list(names(cp), names(cp))
dimnames(I.dp)<- list(names(dp), names(dp))
list(dp=dp, cp=cp, info.dp=I.dp, info.cp=I.cp, se.dp=se.dp, se.cp=se.cp, D=D)
}
#----
sn.logL.grouped <- function(param, breaks, freq, trace=FALSE)
{
cdf <- pmax(psn(breaks, param[1],exp(param[2]), param[3]), 0)
p <- diff(cdf)
logL <- sum(freq*log(p))
if(trace) print(c(param, logL))
logL
}
sn.mle.grouped <- function(breaks, freq, trace=FALSE, start=NA)
{
if(any(is.na(start))){
b <- breaks
d <- diff(b)
if(b[1]== -Inf) b[1]<- b[2]-d[2]
if(b[length(b)]==Inf) b[length(b)] <- b[length(b)-1]+d[length(d)-1]
mid<- (b[-1]+b[-length(b)])/2
dp <- msn.mle(y=mid, freq=freq, trace=trace)$dp
start <- c(dp[[1]], log(sqrt(dp[[2]])), dp[[3]])
}
opt <- optim(start, sn.logL.grouped,
control=list(fnscale=-1),
breaks=breaks, freq=freq, trace=trace)
param <- opt$par
dp <- c(param[1], exp(param[2]), param[3])
invisible(list(call=match.call(), dp=dp, logL=opt$value, end=param, opt=opt))
}
st.logL.grouped <- function(param, breaks, freq, trace=FALSE)
{
if(param[4] > 5.5214609)
cdf<- psn(breaks, param[1], exp(param[2]), param[3])
else
cdf<- pst(breaks, param[1], exp(param[2]), param[3], exp(param[4]))
p <- pmax(diff(cdf), 1.0e-10)
logL <- sum(freq*log(p))
if(trace) print(c(param, logL))
logL
}
st.mle.grouped <- function(breaks, freq, trace=FALSE, start=NA)
{
if(any(is.na(start))){
a <- sn.mle.grouped(breaks, freq)
start <- c(a$end, log(15))
if(trace) cat("Initial parameters set to:", format(start),"\n")
}
opt <- optim(start, st.logL.grouped,
control=list(fnscale=-1),
breaks=breaks, freq=freq, trace=trace)
param<-opt$par
dp <- c(param[1],exp(param[2]),param[3], exp(param[4]))
logL <- opt$value
invisible(list(call=match.call(), dp=dp, logL=logL, end=param, opt=opt))
}
msn.affine <- function(dp, A, a=0, drop=TRUE)
{
# computes distribution of affine transformation of MSN/MST variate, T=a+AY,
# using formuale in Appendix A.2 of Capitanio et al.(2003)
#
Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
if(is.null(dp$xi)) xi <- dp$beta else xi <- dp$xi
xi.T <- as.vector(A %*% matrix(xi,ncol=1)+a)
Omega <- dp$Omega
O.T <- as.matrix(A %*% Omega %*% t(A))
oi <- Diag(1/sqrt(diag(Omega)))
B <- oi %*% Omega %*% t(A)
tmp <- (oi %*% Omega %*% oi - B %*% solve(O.T) %*% t(B)) %*% dp$alpha
den <- sqrt(1+sum(dp$alpha*as.vector(tmp)))
num <- Diag(sqrt(diag(O.T))) %*% solve(O.T) %*% t(B) %*% dp$alpha
# cat("termine sotto radice -1: ", sum(dp$alpha*as.vector(tmp)),"\n")
alpha <- as.vector(num/den)
if(all(dim(O.T)==c(1,1)) & drop)
dp.T<- list(location=xi.T, scale=sqrt(as.vector(O.T)), shape=alpha)
else
dp.T <- list(xi=xi.T, Omega=O.T, alpha=alpha)
if(!is.null(tau=dp$tau)) dp.T$tau <- dp$tau
if(!is.null(tau=dp$df)) dp.T$df <- dp$df
return(dp.T)
}
mst.affine <- function(dp, A, a=0, drop=TRUE) msn.affine(dp, A, a, drop)
#---
.First.lib <- function(library, pkg)
{
Rv <- R.Version()
if(Rv$major == 0 |(Rv$major == 1 && Rv$minor < 0.1))
stop("This package requires R 1.0.1 or later")
assign(".sn.home", file.path(library, pkg),
pos=match("package:sn", search()))
sn.version <- "0.33 (2005-04-07)"
assign(".sn.version", sn.version, pos=match("package:sn", search()))
if(interactive())
{
cat("Library 'sn', version ", sn.version, ", © 1998-2005 A.Azzalini\n")
cat("type 'help(SN)' for summary information\n")
}
invisible()
}