https://github.com/cran/sn
Raw File
Tip revision: 4d028ba2211e27c98228c62d20a35ff854d1c128 authored by Adelchi Azzalini on 07 April 2005, 00:00:00 UTC
version 0.33
Tip revision: 4d028ba
sn.R
# R package for the Skew-Normal (SN)  and the skew-t (ST) distributions
# (the ST distribution since version 0.30).
#
# Author: A.Azzalini 
# Home-page: http://azzalini.stat.unipd.it/SN
# major updates: 29/8/1997, 10/12/1997, 1/10/1998, 12/10/1998, 01/04/1999, 
# 15/06/2002. 
# It requires R 1.0.1, plus library mvtnorm for a few functions
#
#------- 

dsn <- function(x, location=0, scale=1, shape=0, log=FALSE)
 {
   z <- (x-location)/scale
   if(log)
     y <- (-0.9189385332046727-logb(scale)-z^2/2+zeta(0,shape*z))
   else
     y <- 2*dnorm(z)*pnorm(z*shape)/scale
   replace(y, scale<= 0, NaN)
 }

psn <- function(x, location=0, scale=1, shape=0, ...)
 {
   z <- (x-location)/scale
   p <- pmin(1, pmax(0, pnorm(z) - 2*T.Owen(z, shape,...)))
   replace(p, scale<= 0, NaN)
 }
 
rsn <- function(n=1, location=0, scale=1, shape=0){
  u1 <- rnorm(n)
  u2 <- rnorm(n)
  id <- (u2 > shape*u1)
  u1[id] <- (-u1[id])
  y <- location+scale*u1
  attr(y,"parameters") <- c(location,scale,shape)
  return(y)
  }

qsn <- function (p, location = 0, scale = 1, shape = 0, tol = 1e-08, ...) 
{
    max.q <- sqrt(qchisq(p,1))
    min.q <- -sqrt(qchisq(1-p,1))
    if(shape == Inf) return(location + scale * max.q)
    if(shape == -Inf) return(location + scale * min.q)
    na <- is.na(p) | (p < 0) | (p > 1)
    zero <- (p == 0)
    one <- (p == 1)
    p <- replace(p, (na | zero | one), 0.5)
    cum <- as.vector(sn.cumulants(shape, 4))
    g1 <- cum[3]/cum[2]^(3/2)
    g2 <- cum[4]/cum[2]^2
    x <- qnorm(p)
    x <- (x + (x^2 - 1) * g1/6 + x * (x^2 - 3) * g2/24 -
          x * (2 * x^2 - 5) * g1^2/36)
    x <- cum[1] + sqrt(cum[2]) * x
    max.err <- 1
    while (max.err > tol) {
        x1 <- x - (psn(x, 0, 1, shape, ...) - p)/dsn(x, 0, 1, shape)
        x1 <- pmin(x1,max.q)
        x1 <- pmax(x1,min.q)
        max.err <- max(abs(x1 - x)/(1 + abs(x)))
        x <- x1
    }
    x <- replace(x, na, NA)
    x <- replace(x, zero, -Inf)
    x <- replace(x, one, Inf)
    return(location + scale * x)
}

sn.cumulants <- function(shape=0, n=4)
 {
   cumulants.half.norm <- function(n=4){
     n <- max(n,2)
     n <- as.integer(2*ceiling(n/2))
     half.n  <-  as.integer(n/2)
     m <- 0:(half.n-1)
     a <- sqrt(2/pi)/(gamma(m+1)*2^m*(2*m+1))
     signs <- rep(c(1,-1),half.n)[1:half.n]
     a <- as.vector(rbind(signs*a,rep(0,half.n)))
     coeff <- rep(a[1],n)
     for (k in 2:n) {
        ind <- 1:(k-1)
        coeff[k] <- a[k]-sum(ind*coeff[ind]*a[rev(ind)]/k)
        }
     kappa <- coeff*gamma((1:n)+1)
     kappa[2] <- 1+kappa[2]
     return(kappa)
    }
  # delta<-delta.of(shape)
  delta <- shape/sqrt(1+shape^2)
  kv <- cumulants.half.norm(n)
  if(length(kv)>n) kv<-kv[-(n+1)]
  kv[2] <- kv[2]-1
  kappa <- outer(delta,1:n,"^")*matrix(rep(kv,length(shape)),ncol=n,byrow=TRUE)
  kappa[,2] <- kappa[,2]+1
  kappa
}

# lambda.of <- function(delta) delta/sqrt(1-delta^2)

# delta.of <- function(lambda) {
#   inf <- (abs(lambda)==Inf)
#   delta <-lambda/sqrt(1+lambda^2)
#   delta[inf] <- sign(lambda[inf])
#   delta
#}

T.Owen <- function(h, a, jmax=50, cut.point=6)
{
 T.int <-function(h,a,jmax,cut.point)
   {
     fui<- function(h,i) (h^(2*i))/((2^i)*gamma(i+1)) 
     seriesL <- seriesH <- NULL
     i  <- 0:jmax
     low<- (h<=cut.point)
     hL <- h[low]
     hH <- h[!low]
     L  <- length(hL)
     if (L>0) {
       b    <- outer(hL,i,fui)
       cumb <- apply(b,1,cumsum)
       b1   <- exp(-0.5*hL^2)*t(cumb)
       matr <- matrix(1,jmax+1,L)-t(b1)
       jk   <- rep(c(1,-1),jmax)[1:(jmax+1)]/(2*i+1)
       matr <- t(matr*jk) %*%  a^(2*i+1)
       seriesL  <- (atan(a)-as.vector(matr))/(2*pi)
     }
     if (length(hH) >0) 
       seriesH <- atan(a)*exp(-0.5*(hH^2)*a/atan(a))*
                  (1+0.00868*(hH^4)*a^4)/(2*pi)
     series <- c(seriesL,seriesH)
     id <- c((1:length(h))[low],(1:length(h))[!low]) 
     series[id] <- series  # re-sets in original order
     series
  }
  if(!is.vector(a) | length(a)>1) stop("a must be a vector of length 1")
  if(!is.vector(h)) stop("h must be a vector")
  aa <- abs(a)    
  ah <- abs(h)
  if(aa==Inf) return(0.5*pnorm(-ah))
  if(aa==0)   return(rep(0,length(h)))
  na  <- is.na(h)
  inf <- (ah==Inf)
  ah  <- replace(ah,(na|inf),0)
  if(aa<=1)
    owen <- T.int(ah,aa,jmax,cut.point)
  else
    owen<-0.5*pnorm(ah)+pnorm(aa*ah)*(0.5-pnorm(ah))- 
               T.int(aa*ah,(1/aa),jmax,cut.point)
  owen <- replace(owen,na,NA)
  owen <- replace(owen,inf,0)
  return(owen*sign(a))
}


pnorm2 <- function(x,y,rho){
  if(length(c(x,y,rho))>3) stop("non-scalar arguments")
  if(x==0 & y==0) return(0.25+asin(rho)/(2*pi))
  p <- 0.5*(pnorm(x)+pnorm(y))
  if(x==0) p <- p-0.25*sign(y)
      else p <- p-T.Owen(x,(y-rho*x)/(x*sqrt(1-rho^2)))
  if(y==0) p <- p-0.25*sign(x)
      else p <- p-T.Owen(y,(x-rho*y)/(y*sqrt(1-rho^2)))
  if((x*y<0) | ((x*y==0) & (x+y)<0)) p <- p-0.5
  return(p)
  }  

cp.to.dp <- function(param){
  # converts centred parameters cp=(mu,sigma,gamma1)
  # to direct parameters dp=(xi,omega,lambda)
  # Note:  mu can be m-dimensional, the other must be scalars
  b <- sqrt(2/pi)
  m <- length(param)-2
  gamma1 <- param[m+2]
  if(abs(gamma1)> 0.995271746431) stop("abs(gamma1)> 0.995271746431")
  A <- sign(gamma1)*(abs(2*gamma1/(4-pi)))^(1/3)
  delta <- A/(b*sqrt(1+A^2))
  lambda <- delta/sqrt(1-delta^2)
  E.Z  <- b*delta
  sd.Z <- sqrt(1-E.Z^2)
  location    <- param[1:m]
  location[1] <- param[1]-param[m+1]*E.Z/sd.Z
  scale <- param[m+1]/sd.Z
  dp    <- c(location,scale,lambda)
  names(dp)[(m+1):(m+2)] <- c("scale","shape")
  if(m==1)  names(dp)[1] <- "location"
  dp
  }

dp.to.cp <- function(param){
# converts 'direct' dp=(xi,omega,lambda) to 'centred' cp=(mu,sigma,gamma1)
  m <- length(param)-2
  omega <-param[m+1]
  lambda<-param[m+2]
  mu.Z  <- lambda*sqrt(2/(pi*(1+lambda^2)))
  s.Z   <- sqrt(1-mu.Z^2)
  gamma1<- 0.5*(4-pi)*(mu.Z/s.Z)^3
  sigma <- omega*s.Z
  mu    <- param[1:m]
  mu[1] <- param[1]+sigma*mu.Z/s.Z
  cp    <- c(mu,sigma,gamma1)
  names(cp)[(m+1):(m+2)]<-c("s.d.","skewness")
  if(m==1) names(cp)[1] <- "mean"
  cp
}

zeta <- function(k,x){# k integer in (0,4)
  if(k<0 | k>4) return(NULL)
  if(k != as.integer(k)) warning("k must be an integer")
  k <- as.integer(k)
  na<- is.na(x)
  x <- replace(x,na,0)
  z <- switch(k+1,
            pnorm(x, log.p=TRUE)+ log(2),
            ifelse(x>(-20), dnorm(x)/pnorm(x), 
              ifelse(x>(-200), exp(-x^2/2-0.5*log(2*pi) - pnorm(x,log.p=TRUE)),
                               -x*(1+1/x^2-2/x^4))),              
            (-zeta(1,x)*(x+zeta(1,x))),
            (-zeta(2,x)*(x+zeta(1,x))-zeta(1,x)*(1+zeta(2,x))),
            (-zeta(3,x)*(x+2*zeta(1,x))-2*zeta(2,x)*(1+zeta(2,x))),
            NULL)
  neg.inf<- (x == -Inf)
  if(any(neg.inf))
    z <- switch(k+1,
                z,
                replace(z, neg.inf, Inf),
                replace(z, neg.inf, 1),
                replace(z, neg.inf, 0),
                replace(z, neg.inf, 0),
                NULL)
  if(k>1) z<- replace(z, x==Inf, 0)
  replace(z,na,NA)
}


sn.em <-function(X, y, fixed, p.eps=1e-4, l.eps=1.e-2, trace=FALSE, data=FALSE)
{
#
#  1/10/1998 (elaborando dal em.lm.sn del 2-12-97)
#
#  EM per caso con uno/due/tre parametri ignoti, parametrizzando in modo 
#  "diretta" con (xi, omega, lambda); internamente usa peraltro 'delta'.
#  Le componenti ignote sono i termini NA di fixed,  ma per semplicita' 
#  assumiamo che un NA implica che le componenti alla sua sx sono NA
#  (e quindi il primo elemento di fixed e` sempre NA).
#
  n<-length(y)
  if(missing(X)) X<-matrix(rep(1,n),n,1)
  nc<-ncol(X)
  if(missing(fixed)) fixed <- rep(NA,3)
  if(all(!is.na(fixed))) stop("all parameter are fixed") 
  if(is.na(fixed[3])) iter<-(1-log(l.eps,10)) else iter<-1 
  qrX<-qr(X)
  beta<-qr.coef(qrX,y)
  xi  <- m <-qr.fitted(qrX,y)
  omega  <- fixed[2] 
  lambda <- fixed[3]
  # delta  <- delta.of(lambda)
  delta <- lambda/sqrt(1+lambda^2)
  s<-sqrt(sum((y-xi)^2)/n)
  if(is.na(fixed[3])) {
    gamma1 <- sum((y-m)^3)/(n*s^3)
    a  <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
    delta<-sqrt(pi/2)*a/sqrt(1+a^2)
    if(abs(delta)>=1) delta<-sign(delta)/(1+1/n)
    # lambda<-lambda.of(delta)
    lambda<-delta/sqrt(1-delta^2)
    }
  mean.Z <- sqrt(2/pi)*delta
  sd.Z <- sqrt(1-mean.Z^2)
  if(is.na(fixed[2])) omega  <- s/sd.Z
  if(is.na(fixed[1])) xi     <- m-s*mean.Z/sd.Z
  old.par   <- c(beta,omega,lambda)
  diverge   <- 1
  incr.logL <- Inf
  logL      <- -Inf
  while(diverge>p.eps | incr.logL>l.eps){
    # E-step
    v  <- (y-xi)/omega
    p  <- zeta(1,lambda*v)
    u1 <- omega*(delta*v+p*sqrt(1-delta^2))
    u2<-omega^2*((delta*v)^2+(1-delta^2)+p*v*delta*sqrt(1-delta^2))
    # M-step
    for(i in 1:iter){
      beta<-qr.coef(qrX,y-delta*u1)
      xi <- qr.fitted(qrX,y-delta*u1)
      d  <- y-xi
      Q  <- sum(d^2-2*delta*d*u1+u2)
      if(is.na(fixed[2])) omega <-sqrt(Q/(2*n*(1-delta^2)))
      r  <- 2*sum(d*u1)/Q
      if(is.na(fixed[3])) delta<-(sqrt((2*r)^2+1)-1)/(2*r)
      }
    # convergence?    # lambda<-lambda.of(delta)
    lambda<-delta/sqrt(1-delta^2)
    param<-c(beta,omega,lambda)
    names(param)[(nc+1):(nc+2)]<-c("scale","shape")
    diverge<-sum(abs(param-old.par)/(1+abs(old.par)))/(nc+2)
    old.par<-param
    a<-sum(log(dsn(y,xi,omega,lambda)))
    incr.logL<- a-logL
    logL<-a
    if(trace) print(c(param,logL),digits=5)
  }
  cp <- dp.to.cp(param)
  result<-list(dp=param, cp=cp, logL=logL)
  if(data) result$data <- list(X=X, y=y, residuals=d/omega)
  result
}

#-------------------

gamma1.to.lambda<- function(gamma1){
  max.gamma1 <- 0.5*(4-pi)*(2/(pi-2))^1.5
  na <- (abs(gamma1)>max.gamma1)
  if(any(na)) warning("NAs generated") 
  gamma1<-replace(gamma1,na,NA)
  a    <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
  delta<- sqrt(pi/2)*a/sqrt(1+a^2)
  lambda<-delta/sqrt(1-delta^2)
  as.vector(lambda)
}

# Examples:
#  a<-sn.2logL.profile(y=otis)
#  a<-sn.2logL.profile(y=otis, use.cp=FALSE)
#  a<-sn.2logL.profile(X=cbind(1,lbm), y=bmi, npts=50)
#  a<-sn.2logL.profile(y=frontier,param.range=c(0.8,1.6,10,30), use.cp=FALSE, npts=11)

sn.2logL.profile<-function(X=matrix(rep(1,n)), y, 
      param.range=c(sqrt(var(y))*c(2/3, 3/2), -0.95, 0.95),
      use.cp=TRUE, npts= 31 %/% d, plot.it=TRUE, ...)
{# plot 1D or 2D profile deviance (=-2logL) using either parameters
   # if(plot.it & !exists(.Device)) stop("Device not active")
   n<-length(y)
   d<- round(length(param.range)/2)
   if((d!=1)&(d!=2)) stop(" length(param.range) must be either 2 or 4")
   if(d==1){
      param1 <- seq(param.range[1],param.range[2],length=npts)
      llik <- param2 <- rep(NA,npts)}
   else{ 
      param1 <- seq(param.range[1],param.range[2],length=npts)
      param2 <- seq(param.range[3],param.range[4],length=npts)
      llik   <- matrix(NA,npts,npts)}
   if(use.cp){
      if(d==1){
        gamma1<-param1
        sigma <-param2 
        xlab <- "gamma1"
        ylab <- ""}
      else {
        sigma <-param1
        gamma1<-param2
        xlab <- "sigma"
        ylab <- "gamma1"
        }
      if(max(abs(gamma1))>0.9952717) stop("abs(gamma1)>0.9952717")
      lambda <- gamma1.to.lambda(gamma1)
      sc<-sqrt(1-(2/pi)*lambda^2/(1+lambda^2))      
      }
   else{ # use dp 
      if(d==1) {
        lambda<-param1
        omega<-param2
        xlab <- "lambda"
        ylab <- ""}
      else {
         omega<-param1 
	 sc <- rep(1,npts)
         lambda<-param2
         xlab <- "omega"
         ylab <- "lambda"
         }
      }
   cat(c("Running until",npts,":"))
   for(i in 1:npts){
     cat(" ");cat(i)
     if(d==1) {
       a<-sn.em(X, y, fixed=c(NA,NA,lambda[i]), ...)       
       llik[i]<-a$logL
       # print(c(i,lambda[i],a$logL))
       }
     else{
     for(j in 1:npts){     
       a<-sn.em(X, y, fixed=c(NA,param1[i]/sc[j],lambda[j]), ...)
       llik[i,j] <- a$logL
       # print( c(i,j, param1[i]/sc[j], lambda[j], a$logL))
     }}
   }
  cat("\n")
  #if(plot)
  f<-2*(llik-max(llik))	
  if(plot.it){
    if(d==1) plot(param1, f, type="l", 
            xlab=xlab, ylab="profile relative 2(logL)")
    else contour(param1, param2, f, labcex=0.75, 
            xlab=xlab, ylab=ylab,
            levels=-c(0.575, 1.386, 2.773, 4.605, 5.991, 9.210))
            # qchisq(c(0.25,0.5,0.75,0.90,0.95,0.99),2)
    title(main=paste("dataset:", deparse(substitute(y)),
        "\nProfile relative 2(logLikelihood)", sep= " "))	
  }
  invisible( list(param1=param1, param2=param2,
      param.names=c(xlab,ylab), two.logL=f, maximum=2*max(llik)))
}

  
sn.mle <- function(X, y, cp, plot.it=TRUE, trace=FALSE, method="L-BFGS-B",
               control=list(iter.max=100, abs.tol=1e-5)) 
{
  xlab<-deparse(substitute(y))
  if(!is.null(dim(y))) {
    if(min(dim(y))==1) y<-as.vector(y)
    else stop("y must be a vector")
    }
  n<-length(y)
  if(missing(X)) {
     X <-as.matrix(rep(1,n))
     cp.names <- "mean"
   }
  else{
    if(is.null(colnames(X)))
       cp.names<-  outer(deparse(substitute(X)),as.character(1:ncol(X)),
                         paste, sep=".")
    else  cp.names<- colnames(X)
     }
  cp.names<- c(cp.names,"s.d.","skewness")
  m<-ncol(X)
  if(missing(cp)) {
    qrX <- qr(X)
    s <- sqrt(sum(qr.resid(qrX, y)^2)/n)
    gamma1 <- sum(qr.resid(qrX, y)^3)/(n*s^3)
    if(abs(gamma1) > 0.99527) gamma1<- sign(gamma1)*0.95
    cp <- c(qr.coef(qrX,y), s, gamma1)
    }
  else{ 
    if(length(cp)!= (m+2)) stop("ncol(X)+2 != length(cp)")}
  opt<- optim(cp, fn=sn.dev, gr=sn.dev.gh, method=method,
          lower=c(-rep(Inf,m), 10*.Machine$double.eps, -0.99527), 
          upper=c(rep(Inf,m), Inf, 0.99527), 
          control=control, X=X, y=y, trace=trace, hessian=FALSE)
  cp <- opt$par
  if(trace) {
     cat(paste("Message from optimization routine:", opt$message,"\n"))
     cat("estimates (cp): ", cp, "\n")
     }
  if(abs(cp[m+2])> 0.9952717){
     if(trace) cat("optim searched outside admissible range - restarted\n")
      cp[m+2]<- sign(cp[m+2])*runif(1)
      mle <- sn.mle(X, y, cp, plot.it, trace, method, control)
      cp  <- mle$cp
      }
  logL <- (-opt$value)/2
  info <- attr(sn.dev.gh(cp, X, y, trace=FALSE, hessian=TRUE),"hessian")/2
  # se <- sqrt(diag(solve(info)))
  if(all(is.finite(info))) 
    {
      qr.info <- qr(info)
      info.ok <- (qr.info$rank == length(cp))
     }
  else info.ok <- FALSE
  if(info.ok) {
    se2 <- diag(solve(qr.info))
    se <- sqrt(ifelse(se2 >= 0, se2, NA))
    }
  else
    se <- rep(NA, length(cp))
  if(plot.it) {
    dp0<-cp.to.dp(cp)
    if(all(X==rep(1,n))) 
      y0<-y        
    else {
      y0<- as.vector(y - X %*% dp0[1:m])
      dp0<-c(0,dp0[m+1],dp0[m+2])
      xlab<-"residuals"
      }
    x<-seq(min(pretty(y0,10)),max(pretty(y0,10)),length=100)
    pdf.sn <- dsn(x,dp0[1],dp0[2],dp0[3])
    if(exists("sm.density",mode="function"))
      {
      a<-sm.density(x=y0, eval.points=x, h=hnorm(y0)/1.5, display="none")
      a<-sm.density(x=y0, eval.points=x, h=hnorm(y0)/1.5, xlab=xlab, 
                    lty=3, ylim=c(0,max(a$estimate,pdf.sn)))
      }
    else 
      {
      h <- hist(y0, prob=TRUE, breaks="FD", plot=FALSE)
      hist(y0, prob=TRUE, breaks="FD", xlim=c(min(x),max(x)),
           xlab=xlab, main=xlab, ylim=c(0, max(pdf.sn, h$density)))
      }
    if(n<101) points(y0,rep(0,n),pch=1)
    # title(deparse(substitute(y)))
    curve(dsn(x, dp0[1], dp0[2], dp0[3]), add=TRUE, col=2)
  }
  names(cp)<-  names(se)<- cp.names
  list(call=match.call(), cp=cp,  se=se, info=info, logL=logL, optim=opt)
}


sn.dev <- function(cp, X, y, trace=FALSE)
{ # -2*logL for centred parameters  
  m <- ncol(X)
  if(abs(cp[m+2])> 0.9952717){
    warning("optim search in abs(cp[m+2])> 0.9952717, value adjusted")
    cp[m+2] <- 0.9952717*sign(cp[m+2])
  }
  dp <- as.vector(cp.to.dp(cp))
  location <- as.vector(X %*% as.matrix(dp[1:m]))
  logL <- sum(dsn(y, location, dp[m+1], dp[m+2], log=TRUE))
  if(trace) {cat("sn.dev: (cp,dev)="); print(c(cp,-2*logL))}
  return(-2*logL)
}

sn.dev.gh <- function(cp, X, y, trace=FALSE, hessian=FALSE)
{
  # computes gradient and hessian of dev=-2*logL for centred parameters 
  # (and observed information matrix);
  m  <- ncol(X)
  n  <- nrow(X)
  np <- m+2
  if(abs(cp[m+2])> 0.9952717){
    warning("optim search in abs(cp[m+2])> 0.9952717, value adjusted")
    cp[m+2] <- 0.9952717*sign(cp[m+2])
  }
  score <- rep(NA,np)
  info  <- matrix(NA,np,np)
  beta  <- cp[1:m]
  sigma <- cp[m+1]
  gamma1 <- cp[m+2]
  lambda <- gamma1.to.lambda(gamma1)
  # dp<-cp.to.dp(c(beta,sigma,gamma1))
  # info.dp <- sn.info(dp,y)$info.dp
  mu <- as.vector(X %*% as.matrix(beta))
  d  <- y-mu
  r  <- d/sigma
  E.Z<- lambda*sqrt(2/(pi*(1+lambda^2)))
  s.Z<- sqrt(1-E.Z^2)
  z  <- E.Z+s.Z*r
  p1 <- as.vector(zeta(1,lambda*z))
  p2 <- as.vector(zeta(2,lambda*z))
  omega<- sigma/s.Z
  w    <- lambda*p1-E.Z
  DE.Z <- sqrt(2/pi)/(1+lambda^2)^1.5
  Ds.Z <- (-E.Z/s.Z)*DE.Z
  Dz   <- DE.Z + r*Ds.Z
  DDE.Z<- (-3)*E.Z/(1+lambda^2)^2
  DDs.Z<- -((DE.Z*s.Z-E.Z*Ds.Z)*DE.Z/s.Z^2+E.Z*DDE.Z/s.Z)
  DDz  <- DDE.Z + r*DDs.Z
  score[1:m] <- omega^(-2)*t(X) %*% as.matrix(y-mu-omega*w) 
  score[m+1] <- (-n)/sigma+s.Z*sum(d*(z-p1*lambda))/sigma^2
  score[m+2] <- score.l <- n*Ds.Z/s.Z-sum(z*Dz)+sum(p1*(z+lambda*Dz))
  Dg.Dl <-1.5*(4-pi)*E.Z^2*(DE.Z*s.Z-E.Z*Ds.Z)/s.Z^4
  R <- E.Z/s.Z
  T <- sqrt(2/pi-(1-2/pi)*R^2)
  Dl.Dg <- 2*(T/(T*R)^2+(1-2/pi)/T^3)/(3*(4-pi))
  R. <- 2/(3*R^2 * (4-pi))
  T. <- (-R)*R.*(1-2/pi)/T
  DDl.Dg <- (-2/(3*(4-pi))) * (T./(R*T)^2+2*R./(T*R^3)+3*(1-2/pi)*T./T^4)
  score[m+2] <- score[m+2]/Dg.Dl  # convert deriv wrt lamda to gamma1 
  gradient <- (-2)*score
  if(hessian){
     info[1:m,1:m] <- omega^(-2) * t(X) %*% ((1-lambda^2*p2)*X)
     info[1:m,m+1] <- info[m+1,1:m] <- 
            s.Z* t(X) %*% as.matrix((z-lambda*p1)+d*(1-lambda^2*p2)*
            s.Z/sigma)/sigma^2
     info[m+1,m+1] <- (-n)/sigma^2+2*s.Z*sum(d*(z-lambda*p1))/sigma^3 +
            s.Z^2*sum(d*(1-lambda^2*p2)*d)/sigma^4
     info[1:m,m+2] <- info[m+2,1:m] <- 
            t(X)%*%(-2*Ds.Z*d/omega+Ds.Z*w+s.Z*(p1+lambda*p2*(z+lambda*Dz)
            -DE.Z))/sigma 
     info[m+1,m+2] <- info[m+2,m+1] <- 
            -sum(d*(Ds.Z*(z-lambda*p1)+s.Z*(Dz-p1-p2*lambda*(z+lambda*Dz))
             ))/sigma^2
     info[m+2,m+2] <- (n*(-DDs.Z*s.Z+Ds.Z^2)/s.Z^2+sum(Dz^2+z*DDz)-
            sum(p2*(z+lambda*Dz)^2)- sum(p1*(2*Dz+lambda*DDz)))
     info[np,] <- info[np,]/Dg.Dl # convert info wrt lamda to gamma1 
     info[,np] <- info[,np]*Dl.Dg # an equivalent form of the above
     info[np,np] <- info[np,np]-score.l*DDl.Dg
     attr(gradient,"hessian") <- 2*info
     }
  if(trace) {cat("sn.dev.gh: gradient="); print(-2*score)}
  return(gradient)
}

# 

dmsn <- function(x, xi=rep(0,d), Omega, alpha)
{# Density of Multivariate SN rv with parameters (xi, Omega, alpha) 
 # evaluated at x, which is either a d-vector or (n x d) matrix
  scale <- sqrt(diag(Omega))
  if(is.vector(x)) {n <-1 ;         d <- length(x)} 
        else       {n <-dim(x)[1] ; d <- dim(x)[2]}
  X     <- t(matrix(x, nrow=n,ncol=d))- xi
  z     <- X/scale
  Q     <- apply((solve(Omega)%*% X)* X,2,sum) # diag of (x Omega^(-1) x^T)
  # d <- diag(qr(Omega)[[1]])
  Det <- prod(abs( diag(qr(Omega)[[1]]) ))
  pdf   <- 2*exp(-0.5*Q) * pnorm(t(z)%*%as.matrix(alpha))/sqrt((2*pi)^d * Det)
  as.vector(pdf)
}


rmsn <- function(n=1, xi=rep(0,d), Omega, alpha){
# generates SN_d(xi,Omega,alpha) variates using transformation method
  d <- ncol(Omega)
  Z <- msn.quantities(xi,Omega,alpha)
  y <- matrix(rnorm(n*d),n,d) %*% chol(Z$Psi)
  # each row of y is N_d(0,Psi)
  abs.y0 <- abs(rnorm(n))  
  abs.y0 <- matrix(rep(abs.y0,d), ncol=d)
  delta  <- Z$delta
  z <- delta * t(abs.y0) +  sqrt(1-delta^2) * t(y)
  y <- t(xi+Z$omega*z)
  return(y)
  }


dsn2.plot <- function(x, y, xi, Omega, alpha, ...)
{# plot bivariate density SN_2(xi,Omega,alpha) computed at (x,y) grid
  if(any(dim(Omega)!=c(2,2))) stop("dim(Omega) != c(2,2)")
  nx <- length(x)
  ny <- length(y)
  xoy <- cbind(rep(x,ny), as.vector(matrix(y,nx,ny,byrow=TRUE)))
  X <- matrix(xoy, nx*ny, 2, byrow=FALSE)
  pdf<-dmsn(X, xi, Omega, alpha)
  pdf<-matrix(pdf, nx, ny)
  contour(x, y, pdf, ...)
  invisible(list(x=x, y=y, density=pdf, xi=xi, Omega=Omega, alpha=alpha))
}

msn.quantities <- function(xi,Omega,alpha){
# 21-12/1997; computes various quantities related to SN_k(xi,Omega,alpha)
  Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
  k <- length(alpha)
  if(length(xi)!=k | any(dim(Omega)!=c(k,k))) 
       stop("dimensions of arguments do not match")
  omega <- sqrt(diag(Omega))
  O.cor <- Diag(1/omega) %*% Omega %*% Diag(1/omega)
  tmp <- as.vector(sqrt(1 + t(as.matrix(alpha))%*%O.cor%*%alpha)) 
  delta<- as.vector(O.cor %*%alpha)/tmp
  lambda<- delta/sqrt(1-delta^2)
  D <-diag(sqrt(1+lambda^2))
  Psi <- D %*% (O.cor-outer(delta,delta)) %*% D
  Psi <- (Psi+t(Psi))/2
  O.inv <- solve(Omega)
  oi<- sqrt(diag(O.inv))
  O.pcor <- Diag(1/oi)%*% (-O.inv) %*% Diag(1/oi)
  diag(O.pcor) <- rep(1,k)
  muZ <- delta*sqrt(2/pi)
  muY <- xi+omega*muZ
  Sigma <- Diag(omega) %*% (O.cor-outer(muZ,muZ)) %*% Diag(omega) 
  Sigma <- (Sigma+t(Sigma))/2
  cv <- muZ/sqrt(1-muZ^2)
  gamma1 <- 0.5*(4-pi)*cv^3
  list(xi=xi, Omega=Omega, alpha=alpha, omega=omega,  mean=muY, variance=Sigma,
       Omega.conc=O.inv, Omega.cor=O.cor, Omega.pcor=O.pcor,
       lambda=lambda, Psi=Psi,  delta=delta, skewness=gamma1)
}

msn.conditional <- function(xi, Omega, alpha, fixed.comp, fixed.values)
{
# conditional Multivariate SN  (6/11/1997).
# Given a rv Y ~ SN_k(xi,Omega,alpha), this function computes cumulants of
# conditrional distribution, given that the fixed.com take on fixed.values;
# then it finds MSN with matching cumlants.  
  Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
  msqrt <- function(A) Diag(sqrt(diag(as.matrix(A))))
  imsqrt<- function(A) Diag(1/sqrt(diag(as.matrix(A))))
  k<-length(alpha) 
  h<-length(fixed.comp)
  if(any(dim(Omega)!=c(k,k)) | length(xi)!=k | h!=length(fixed.values))
        stop("dimensions of parameters do not match")
  fc <- fixed.comp
  O  <- as.matrix(Omega)
  O11<- O[fc,fc, drop=FALSE]
  O12<- O[fc,-fc, drop=FALSE]
  O21<- O[-fc,fc, drop=FALSE]
  O22<- O[-fc,-fc, drop=FALSE]
  o22<- sqrt(diag(O22))
  inv.O11 <- solve(O11)
  xi1 <- xi[fc, drop=FALSE]
  xi2 <- xi[-fc, drop=FALSE]
  alpha1 <- matrix(alpha[fc])
  alpha2 <- matrix(alpha[-fc]) 
  O22.1 <- O22 - O21 %*% inv.O11 %*% O12
  O22.b <- imsqrt(O22) %*% O22.1 %*% imsqrt(O22)
  xi.c  <- xi2 + as.vector(O21 %*% inv.O11 %*% matrix(fixed.values-xi1))
  a     <- sqrt(1+as.vector(t(alpha2) %*% O22.b %*% alpha2))
  alpha.b <- (alpha1 + msqrt(O11) %*% inv.O11 %*% O12 %*% (alpha2/o22))/a  
  d2    <- as.vector(O22.b %*% alpha2)/a
  x0    <- sum(alpha.b * (fixed.values-xi1)/sqrt(diag(O11)))
  E.c   <- xi.c + zeta(1,x0)*o22*d2
  var.c <- O22.1+zeta(2,x0)*outer(o22*d2,o22*d2)
  gamma1<- zeta(3,x0)*d2^3/diag(O22.b+zeta(2,x0)*d2^2)^1.5
  cum   <- list(as.vector(E.c),var.c,gamma1)
  # cumulants are computed; now choose SN distn to fit them
  a     <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
  E.z   <- a/sqrt(1+a^2)
  delta <- E.z*sqrt(pi/2) 
  omega <- sqrt(diag(var.c)/(1-E.z^2))
  O.new <- var.c+outer(omega*E.z,omega*E.z) 
  xi.new<- E.c-omega*E.z
  B   <- Diag(1/omega)
  m   <- as.vector(solve(B %*% O.new %*% B) %*% as.matrix(delta))
  a   <- m/sqrt(1-sum(delta*m))
  # cum2<- msn.cumulants(xi.new,O.new,a)
  list(cumulants=cum, fit=list(xi=xi.new, Omega=O.new, alpha=a, delta=delta))
}


msn.marginal <- function(xi=NULL, Omega=NULL, alpha=NULL, comp=1:d, dp=NULL)
{# calcola parametri della marginale associata a comp di un SN_d 
  Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
  if(!is.null(xi) && !is.null(dp)) stop("You cannot set both xi and dp")
  if(!is.null(xi) && is.list(xi) && is.null(dp)) dp <- xi
  if(!is.null(dp)){
    xi    <- dp$xi
    Omega <- dp$Omega
    alpha <- dp$alpha
    }
  xi <- as.vector(xi)
  comp <- as.integer(comp)
  alpha <- as.vector(alpha)
  d <- length(alpha)
  if(length(comp)<d){
    if(any(comp>d | comp<1)) stop("comp makes no sense")
    scale<- sqrt(diag(Omega))
    O   <- Diag(1/scale) %*% Omega %*% Diag(1/scale)
    O11 <- O[comp,comp, drop=FALSE]
    O12 <- O[comp,-comp, drop=FALSE]
    O21 <- O[-comp,comp, drop=FALSE]
    O22 <- O[-comp,-comp, drop=FALSE]
    alpha1<- as.matrix(alpha[comp, drop=FALSE])
    alpha2<- as.matrix(alpha[-comp, drop=FALSE])
    O22.1 <- O22 - O21 %*% solve(O11) %*% O12
    a.sum <- as.vector(t(alpha2) %*% O22.1 %*% alpha2)
    a.new <- as.vector(alpha1+solve(O11) %*% O12 %*% alpha2)/sqrt(1+a.sum)
    O.new <- Diag(scale[comp]) %*% O11 %*% Diag(scale[comp])
    result<- list(xi=xi[comp], Omega=O.new, alpha=a.new)
  }
  else {
   if(any(sort(comp)!=(1:d))) stop("comp makes no sense")
   result <-
    list(xi=xi[comp], Omega=as.matrix(Omega[comp,comp, drop=FALSE]), alpha=alpha[comp]) 
   }
  result
}

msn.cond.plot <- function(xi, Omega, alpha, fixed.comp, fixed.values, n=35)
{# fa il grafico di Y_2|Y_1; assumiamo che dim(Y_2)= 2 
  msn.pdf2.aux <- function(x,y,xi,Omega,alpha,fc,fv)
   {
     nx <- length(x)
     ny <- length(y)
     FV <- matrix(rep(fv,nx*ny), nx*ny, length(fv), byrow=TRUE)
     X <- matrix(NA, nx*ny, length(alpha))
     X[,fc] <- FV
     xoy <- cbind(rep(x,ny), as.vector(matrix(y,nx,ny,byrow=TRUE)))
     X[,-fc] <- matrix(xoy, nx*ny, 2, byrow=FALSE)
     pdf<-dmsn(X,xi,Omega,alpha)
     matrix(pdf,nx,ny)
   }
  dsn2 <- function(x,y,d1,d2,omega)
    {
     u <- (x*(d1-omega*d2)+y*(d2-omega*d1))/
          sqrt((1-omega^2-d1^2-d2^2+2*omega*d1*d2)*(1-omega^2))
     pdfn2 <- exp(-0.5*(x^2-2*omega*x*y+y^2)/(1-omega^2))/
              (2*pi*sqrt(1-omega^2))
     2*pdfn2*pnorm(u)
    }
  fc <- fixed.comp
  fv <- fixed.values
  cond <- msn.conditional(xi,Omega,alpha,fc,fv)
  xi.c <- cond$fit$xi
  O.c  <- cond$fit$Omega
  a.c  <- cond$fit$alpha
  if(any(dim(O.c)!=c(2,2))) stop("length(alpha)-length(fixed.com)!=2")
  scale1<-sqrt(as.vector(O.c[1,1]))
  scale2<-sqrt(as.vector(O.c[2,2]))
  delta <- cond$fit$delta
  omega <-as.vector(O.c[1,2])/(scale1*scale2)
  x<-seq(xi.c[1]-3*scale1, xi.c[1]+3*scale1, length=n)
  y<-seq(xi.c[2]-3*scale2, xi.c[2]+3*scale2, length=n)
  plot(x,y,type="n", main="Conditional multivariate SN pdf")
  z1<-(x-xi.c[1])/scale1
  z2<-(y-xi.c[2])/scale2
  pdf.fit<-outer(z1,z2,dsn2,d1=delta[1],d2=delta[2],omega=omega)/
                (scale1*scale2)
  cond$pdf<-list(x=x,y=y,f.fitted=pdf.fit)
  levels<-pretty(pdf.fit,5)
  contour(x,y,pdf.fit,levels=levels,add=TRUE,col=2)
  # fino a qui per il calcolo della densitŕ approx;
  # ora otteniamo quella esatta
  numer <- msn.pdf2.aux(x, y, xi, Omega, alpha, fc, fv)
  marg  <- msn.marginal(xi, Omega, alpha, fc)
  denom <- dmsn(fv, marg$xi, marg$Omega, marg$alpha)
  pdf.exact<- numer/as.vector(denom)
  contour(x, y, pdf.exact, add=TRUE, levels=levels, col=1, lty=4, labcex=0)
  legend(x[1],y[length(y)],c("approx","exact"), lty=c(1,4),col=c(2,1))
  cond$pdf$f.exact<-pdf.exact
  cond$rel.error<-summary((pdf.fit-pdf.exact)/pdf.exact)
  cond$abs.error<-summary(abs(pdf.fit-pdf.exact))
  invisible(cond)
}


msn.moment.fit <- function(y){
# 31-12-1997: simple fit of MSN distribution usign moments
  Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
  y     <- as.matrix(y)
  k     <- ncol(y)
  m.y   <- apply(y,2,mean)
  var.y <- var(y)
  y0    <- (t(y)-m.y)/sqrt(diag(var.y))
  gamma1<- apply(y0^3,1,mean)
  out   <- (abs(gamma1)>0.99527)
  gamma1[out] <- sign(gamma1[out])*0.995
  a     <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333
  delta <- sqrt(pi/2)*a/sqrt(1+a^2)
  m.z   <- delta*sqrt(2/pi) 
  omega <- sqrt(diag(var.y)/(1-m.z^2))
  Omega <- var.y+outer(omega*m.z,omega*m.z) 
  xi    <- m.y-omega*m.z
  O.cor <- Diag(1/omega) %*% Omega %*% Diag(1/omega)
  O.cor <- (t(O.cor)+O.cor)/2
  O.inv <- solve(O.cor)
  tmp   <- as.vector(1-t(delta) %*% O.inv %*% delta)
  if(tmp<=0) {tmp <- 0.0001; admissible <- FALSE} 
        else admissible<-TRUE
  alpha <-as.vector(O.inv%*%delta)/sqrt(tmp)
  list(xi=xi, Omega=Omega, alpha=alpha, Omega.cor=O.cor, omega=omega, 
       delta=delta, skewness=gamma1, admissible=admissible) 
}

msn.fit <- function(X, y, freq, plot.it=TRUE, trace=FALSE, ... )
{
  y.name <- deparse(substitute(y))
  y.names<- dimnames(y)[[2]] 
  y <- as.matrix(y)
  colnames(y)<-y.names
  k <- ncol(y)
  if(missing(freq)) freq<-rep(1,nrow(y))  
  n <- sum(freq)
  if(missing(X)) {
    X <- rep(1,nrow(y))
    missing.X <- TRUE }
  else
    missing.X <- FALSE
  X <- as.matrix(X)
  m <- ncol(X)
  if(length(dimnames(y)[[2]])==0) {
      dimnames(y) <- list(NULL, outer("V",as.character(1:k),paste,sep=""))
      y.names<- as.vector(dimnames(y)[[2]])
      }
  qrX <- qr(X)
  mle<- msn.mle(X=X, y=y, freq=freq, trace=trace, ...)
  mle$call <- match.call()
  # print(mle$nlminb$message)
  beta  <- mle$dp$beta
  Omega <- mle$dp$Omega
  alpha <- mle$dp$alpha
  omega <- sqrt(diag(Omega))
  xi    <- X %*% beta
  if(plot.it & all(freq==rep(1,length(y)))) {
    if(missing.X) { 
      y0  <-y 
      xi0 <- apply(xi,2,mean)} 
    else  {
      y0  <- y-xi 
      xi0 <- rep(0,k)
      }
    if(k>1) {
       opt<-options()
       options(warn=-1)
       pairs(y0, labels=y.names,
        panel=function(x, y, Y, y.names, xi, Omega, alpha)
         {
          for(i in 1:length(alpha)){
            # if(y.names[i]==deparse(substitute(x))) Ix<-i
            # if(y.names[i]==deparse(substitute(y))) Iy<-i
            if(all(Y[,i]==x)) Ix<-i
            if(all(Y[,i]==y)) Iy<-i
            }
          points(x,y)
          marg<-msn.marginal(xi,Omega,alpha,c(Ix,Iy))
          xi.marg<-marg$xi
          Omega.marg<-marg$Omega
          alpha.marg<-marg$alpha     
          x1 <- seq(min(x), max(x), length=30)
          x2 <- seq(min(y), max(y), length=30)
          dsn2.plot(x1, x2, xi.marg, Omega.marg, alpha.marg, add=TRUE, col=2)},  
          # end "panel" function
      Y=y0, y.names=y.names, xi=xi0, Omega=Omega, alpha=alpha)
      options(opt) 
      }
    else{ # plot for case k=1
      y0<-as.vector(y0)
      x<-seq(min(pretty(y0,10)),max(pretty(y0,10)),length=100)
      if(missing.X){
         dp0<-c(xi0,omega,alpha)
         xlab<-y.name}
      else {
         dp0<-c(0,omega,alpha)
         xlab <- "residuals"}
      hist(y0, prob=TRUE, breaks="FD", xlab=xlab, ylab="density")
      lines(x, dsn(x,dp0[1],dp0[2],dp0[3]))
      if(length(y)<101) points(y0, rep(0,n), pch=1)
      title(y.name)
      }
    cat("Press <Enter> to continue..."); readline()
    par(mfrow=c(1,2))
    pp <- qchisq((1:n)/(n+1),k)
    Xb <- qr.fitted(qrX,y)
    res<- qr.resid(qrX,y)
    rad.n  <- apply((y-Xb) * ((y-Xb) %*% solve(var(res))), 1, sum)
    rad.sn <- apply((y-xi) * ((y-xi) %*% solve(Omega)), 1, sum)
    plot(pp, sort(rad.n), pch=1, ylim=c(0,max(rad.n,rad.sn)),
        xlab="Percentiles of chi-square distribution", 
        ylab="Mahalanobis distances")
    abline(0,1,lty=3)
    title(main="QQ-plot for normal distribution", sub=y.name)
    plot(pp, sort(rad.sn), pch=1, ylim=c(0,max(rad.n,rad.sn)),
        xlab="Percentiles of chi-square distribution", 
        ylab="Mahalanobis distances")
    abline(0,1,lty=3)
    title(main="QQ-plot for skew-normal distribution", sub=y.name)
    cat("Press <Enter> to continue..."); readline()
    plot((1:n)/(n+1), sort(pchisq(rad.n,k)), xlab="", ylab="")
    abline(0,1,lty=3)
    title(main="PP-plot for normal distribution", sub=y.name)
    plot((1:n)/(n+1), sort(pchisq(rad.sn,k)), xlab="", ylab="")
    abline(0,1,lty=3)
    title(main="PP-plot for skew-normal distribution", sub=y.name)
    par(mfrow=c(1,1))
    } # end ploting
  dev.norm<- msn.dev(c(qr.coef(qrX,y),rep(0,k)), X, y, freq)
  test <- dev.norm + 2*mle$logL
  p.value <-  1-pchisq(test,k)
  if(trace) {
    cat("LRT for normality (test-function, p-value): ")
    print(c(test,p.value))
    }
  mle$test.normality <- list(LRT=test, p.value=p.value)
  invisible(mle)
}


msn.mle <-function(X, y, freq, start, trace=FALSE, method="BFGS",
                control=list(iter.max=150) )
{
  y <- as.matrix(y)
  if(missing(X)) X <- rep(1,nrow(y))
    else {if(!is.numeric(X)) stop("X must be numeric")}
  if(missing(freq)) freq <- rep(1,nrow(y))
  X <- as.matrix(X) 
  k <- ncol(y)  
  n <- sum(freq)
  m <- ncol(X)
  y.names<-dimnames(y)[[2]] 
  x.names<-dimnames(X)[[2]]
  if(missing(start)) {
      fit0  <- lm.fit(X, y, method="qr")
      beta  <- as.matrix(coef(fit0))
      res   <- resid(fit0)
      a     <- msn.moment.fit(res)
      Omega <- a$Omega
      omega <- a$omega
      alpha <- a$alpha
      if(!a$admissible) alpha<-alpha/(1+max(abs(alpha)))
      beta[1,] <- beta[1,]-omega*a$delta*sqrt(2/pi)  
     }
  else{
    beta  <- start$beta
    Omega <- start$Omega
    alpha <- start$alpha
    omega <- sqrt(diag(Omega)) 
    }
  al.om <-alpha/omega
  if(trace){ 
    cat("Initial parameters:\n")
    print(cbind(t(beta),al.om,Omega))
    }
  param<- c(beta,al.om)
  dev <- msn.dev(param,X,y,freq) 
  opt <- optim(param, fn=msn.dev, gr=msn.dev.grad, method=method,
               control=control, X=X, y=y, freq=freq, trace=trace)      
  if(trace) 
    cat(paste("Message from optimization routine:", opt$message,"\n"))
  logL <- (-opt$value)/2
  beta <- matrix(opt$par[1:(m*k)],m,k)
  dimnames(beta)[2] <- list(y.names)
  dimnames(beta)[1] <- list(x.names)
  al.om  <- opt$par[(m*k+1):(m*k+k)]
  xi    <- X %*% beta
  Omega <- t(y-xi) %*% (freq*(y-xi))/n
  omega <- sqrt(diag(Omega))
  alpha <- al.om*omega
  param <- cbind(omega,alpha)
  dimnames(Omega) <- list(y.names,y.names)
  dimnames(param)[1] <- list(y.names)
  info <- num.deriv(opt$par, FUN="msn.dev.grad", X=X, y=y, freq=freq)/2
  se      <- sqrt(diag(solve(info)))
  se.beta <- matrix(se[1:(m*k)],m,k)
  se.alpha<- se[(m*k+1):(m*k+k)]*omega
  dimnames(se.beta)[2]<-list(y.names)
  dimnames(se.beta)[1]<-list(x.names)
  se  <- list(beta=se.beta, alpha=se.alpha, info=info)
  dp  <- list(beta=beta, Omega=Omega, alpha=alpha)
  list(call=match.call(), dp=dp, logL=logL, se=se, optim=opt)
}
 
msn.dev<-function(param, X, y, freq, trace=FALSE){
  k <- ncol(y)
  # if(missing(freq)) freq<-rep(1,nrow(y))
  n <- sum(freq)
  m <- ncol(X)
  beta<-matrix(param[1:(m*k)],m,k)
  al.om<-param[(m*k+1):(m*k+k)]
  y0 <- y-X %*% beta
  Omega <- (t(y0) %*% (y0*freq))/n  
  d <- diag(qr(2*pi*Omega)[[1]])
  logDet <- sum(log(abs(d)))
  dev <- n*logDet-2*sum(zeta(0,y0 %*% al.om)*freq)+n*k
  if(trace) { 
    cat("\nmsn.dev:",dev,"\n","parameters:"); 
    print(rbind(beta,al.om))
    }
  dev
}

msn.dev.grad <- function(param, X, y, freq, trace=FALSE){
  k <- ncol(y)
  # if(missing(freq)) freq<-rep(1,nrow(y))
  n <- sum(freq)
  m <- ncol(X)
  beta<-matrix(param[1:(m*k)],m,k)
  al.om<-param[(m*k+1):(m*k+k)]
  y0 <- y-X %*% beta
  Omega <- (t(y0) %*% (freq*y0))/n
  p1 <- zeta(1,as.vector(y0 %*% al.om))
  Dbeta <- t(X)%*% (y0*freq) %*%solve(Omega) - 
            outer(as.vector(t(X*freq)%*%p1), al.om)
  Dal.om <- as.vector(t(y0*freq) %*% p1)
  if(trace){
    cat("gradient:\n")
    print(rbind(Dbeta,Dal.om))}
  -2*c(Dbeta,Dal.om)
}

num.deriv <- function(coefficients, FUN, ...)
{# da rm.fit: derivate seconde numeriche, se FUN da` il gradiente
   FUN <- get(FUN, inherit = TRUE)
   values <- FUN(coefficients, ...)
   p <- length(values)
   H <- matrix(0, p, p)
   h <- rep(0, p)
   delta <- cbind((abs(coefficients) + 1e-10) * 1e-5, rep(1e-06, p))
   delta <- apply(delta, 1, max)
   for(i in 1:p) {
   	h[i] <- delta[i]
   	new.values <- FUN(coefficients + h, ...)
   	H[, i] <- (new.values - values)/delta[i]
   	h[i] <- 0
   }
   (H+t(H))/2
}

#---
# skew-t portion

dst <-  function (x, location = 0, scale = 1, shape = 0, df=Inf)
{ 
  if(df==Inf) return(dsn(x,location,scale,shape))
  z   <- (x - location)/scale
  pdf <- dt(z, df=df)/scale
  cdf <- pt(shape*z*sqrt((df+1)/(z^2+df)), df=df+1)
  2 * pdf * cdf
}


rst <- function (n=1, location = 0, scale = 1, shape = 0, df=Inf)
{ 
  z <- rsn(n,location=0,scale,shape)
  if(df==Inf) return(z+location)
  v <- rchisq(n,df)/df
  y <- z/sqrt(v)+location
  attr(y,"parameters")<- c(location,scale,shape,df)
  return(y)
}



pst <- function (x, location = 0, scale = 1, shape = 0, df = Inf, ...) 
{
    if (df == Inf) 
        p <- psn(x, location, scale, shape)
    else {
        if(df<= 0) stop("df must be non-negative")
        z <- (x-location)/scale
        p <- numeric(length(z))
        fp <- function(v, shape, df, t.value) psn(sqrt(v) * t.value, 0, 1, 
                    shape) * dchisq(v * df, df = df) * df
        for (i in 1:length(z)) {          
          if(z[i] < 10+50/df)  
            p[i]<- integrate(dst, -Inf, z[i], shape = shape, df = df,
                             ...)$value
          else           
            p[i] <- integrate(fp, 0, Inf, shape = shape, df = df,
                              t.value = z[i], ...)$value
          }
      }
  pmax(0,pmin(1,p))
}

qst <- function (p, location = 0, scale = 1, shape = 0, df=Inf,  
                 tol = 1e-08, ...)
{ 
    if (df == Inf) 
        return(qsn(p, location, scale, shape))
    max.q <- sqrt(qf(p, 1, df))
    min.q <- -sqrt(qf(1 - p, 1, df))
    if (shape == Inf) 
        return(location + scale * max.q)
    if (shape == -Inf) 
        return(location + scale * min.q)
    na <- is.na(p) | (p < 0) | (p > 1)
    zero <- (p == 0)
    one <- (p == 1)
    p <- replace(p, (na | zero | one), 0.5)
    cum <- st.cumulants(shape, max(df,5), n=4)
    g1 <- cum[3]/cum[2]^(3/2)
    g2 <- cum[4]/cum[2]^2
    x <- qnorm(p)
    x <- (x + (x^2 - 1) * g1/6 + x * (x^2 - 3) * g2/24 -
           x * (2 *  x^2 - 5) * g1^2/36)
    x <- cum[1] + sqrt(cum[2]) * x
    max.err <- 1
    while (max.err > tol) {
        x1 <- x - (pst(x, 0, 1, shape, df, ...) - p)/dst(x, 0, 1, shape, df)
        x1 <- pmin(x1, max.q)
        x1 <- pmax(x1, min.q)
        max.err <- max(abs(x1 - x)/(1 + abs(x)))
        x <- x1
    }
    x <- replace(x, na, NA)
    x <- replace(x, zero, -Inf)
    x <- replace(x, one, Inf)
    return(location + scale * x)
}

st.cumulants <- function(shape=0, df=Inf, n=4)
{
  if(df == Inf) return(sn.cumulants(shape, n=n))
  n<- as.integer(n)
  if(df <= n) stop("need df>n")
  if(length(shape)>1) warning("only shape[1] will be used")
  delta <- shape[1]/sqrt(1 + shape[1]^2)
  cum<- numeric(min(n,4))
  if(n<1) return(cum)
  mu <- delta*sqrt(df/pi)*exp(lgamma((df-1)/2)-lgamma(df/2))
  cum[1]<- mu
  if(n>1) cum[2] <- df/(df-2) - mu^2
  if(n>2) cum[3] <- mu*(df*(3-delta^2)/(df-3) - 3*df/(df-2)+2*mu^2)
  if(n>3) cum[4] <- (3*df^2/((df-2)*(df-4)) - 4*mu^2*df*(3-delta^2)/(df-3)
                    + 6*mu^2*df/(df-2)-3*mu^4)- 3*cum[2]^2
  cum
}

dmst <- function(x, xi=rep(0,d), Omega, alpha, df=Inf)
{
# Density of multivariate ST rv with parameters (xi, Omega, alpha, df) 
# evaluated at x, which is either a d-vector or (n,d) matrix
#
  if(df==Inf) return(dmsn(x,xi,Omega,alpha))
  if(is.vector(x)) {
    n <- 1
    d <- length(x)
  }
  else {
    n <- dim(x)[1]
    d <- dim(x)[2]
  }
  omega <- sqrt(diag(Omega))
  X <- t(matrix(x, nrow = n, ncol = d)) - xi
  z <- X/omega
  Q <- apply((solve(Omega) %*% X) * X, 2, sum) # diag of (x Omega^(-1) x^T)
  # Det <- as.numeric(det.Hermitian(as.Matrix(Omega), logarithm = F)$modulus)
  Det <- abs(prod(diag(qr(Omega)$qr)))
  L <- as.vector(t(z) %*% as.matrix(alpha))
  pdf.mt <- (gamma((df+d)/2)/
            (sqrt((pi*df)^d*Det) * gamma(df/2) * (1+Q/df)^((df+d)/2)))
  cdf.T <- pt(L*sqrt((df+d)/(Q+df)), df+d)
  2 * pdf.mt * cdf.T
}

rmst <- function(n=1, xi=rep(0,d), Omega, alpha, df=Inf)
{ 
  d <- ncol(Omega)
  if(df==Inf) x<-1 
    else x <- rchisq(n,df)/df
  z <- rmsn(n,rep(0,d),Omega,alpha)
  y <- t(xi+t(sqrt(x)*z))
  return(y)
}

pmst <- function(x, xi=rep(0,d), Omega=1, alpha=rep(0,d), df=Inf, ...)
{
  Diag <- function(x) diag(x,nrow=length(x),ncol=length(x))
  d <- ifelse(is.matrix(Omega),nrow(Omega),1)
  Omega<- matrix(Omega,d,d) 
  omega<- sqrt(diag(Omega))
  Ocor <- Diag(1/omega) %*% Omega %*% Diag(1/omega)
  #  t(Omega /omega)/omega
  O.alpha <- as.vector(Ocor %*% alpha)
  delta <- O.alpha/sqrt(1+sum(alpha*O.alpha))
  A <- matrix(rbind(c(1,-delta),cbind(-delta,Ocor)),d+1,d+1)
  if(df==Inf) 
    2 * pmvnorm(upper=c(0,(x-xi)/omega), corr=A, ...)
  else 
    2 * pmvt(upper=c(0,(x-xi)/omega), corr=A, df=df, ...)
}

pmsn <- function(x, xi=rep(0,d), Omega, alpha, ...)
  { d<- length(alpha)
    pmst(x, xi, Omega, alpha, df=Inf, ...)  
  }


dst2.plot <- function(x, y, xi, Omega, alpha, df, ...)
{
# plot bivariate density ST_2(xi,Omega,alpha,df) computed at (x,y) grid
    if(any(dim(Omega) != c(2, 2))) stop("dim(Omega) != c(2,2)")
    nx <- length(x)
    ny <- length(y)
    xoy <- cbind(rep(x, ny), as.vector(matrix(y, nx, ny, byrow = TRUE)))
    X <- matrix(xoy, nx * ny, 2, byrow = FALSE)
    pdf <- dmst(X, xi, Omega, alpha, df)
    pdf <- matrix(pdf, nx, ny)
    contour(x, y, pdf, ...)
    invisible(list(x = x, y = y, density = pdf, xi = xi, Omega = Omega,
        alpha = alpha, df=df))
}

mst.fit <- function(X, y, freq, start, fixed.df=NA, plot.it=TRUE, 
                 trace=FALSE,  ... )
{
  y.name <- deparse(substitute(y))
  y.names<- dimnames(y)[[2]] 
  y <- as.matrix(y)
  d <- ncol(y)
  if(is.null(d)) d<- 1
  if(d>1){
    if(length(y.names)==0){
      dimnames(y) <-
         list(dimnames(y)[[1]], outer("V",as.character(1:d),paste,sep=""))
      y.names<- as.vector(dimnames(y)[[2]])
      }}
  else 
    colnames(y)<-y.name
  if(missing(freq)) freq <- rep(1,nrow(y))  
  n <- sum(freq)
  if(missing(X)) {
     X <- rep(1,nrow(y)) 
     missing.X <- TRUE }
  else 
     missing.X <- FALSE
  X   <- as.matrix(X)
  qrX <- qr(X)
  m   <- ncol(X)
  mle <- mst.mle(X=X, y=y, freq=freq,  fixed.df=fixed.df, start=start, 
            trace=trace,...)
  mle$call <- match.call()
  beta  <- mle$dp$beta
  Omega <- mle$dp$Omega
  alpha <- mle$dp$alpha
  omega <- sqrt(diag(Omega))
  df    <- mle$dp$df
  xi    <- X %*% beta
  if(plot.it & all(freq==rep(1,length(y)))) {
    if(missing.X) { 
      y0  <-y 
      xi0 <- apply(xi,2,mean)} 
    else  {
      y0  <- y-xi 
      xi0 <- rep(0,d)
      }
    if(d>1) {
       opt<-options()
       options(warn=-1)
       pairs(y0, labels=y.names,
        panel=function(x, y, Y, y.names, xi, Omega, alpha)
          {
          for(i in 1:length(alpha)){
            if(all(Y[,i]==x)) Ix<-i
            if(all(Y[,i]==y)) Iy<-i
            }
          points(x,y)
          marg <- msn.marginal(xi, Omega ,alpha, c(Ix,Iy))
          xi.marg <- marg$xi
          Omega.marg <- marg$Omega
          alpha.marg <- marg$alpha     
          x1 <- seq(min(x), max(x), length=30)
          x2 <- seq(min(y), max(y), length=30)
          dst2.plot(x1, x2, xi.marg, Omega.marg, alpha.marg, df, 
                    add=TRUE, col=2)
         },  # end "panel" function
      Y=y0, y.names=y.names, xi=xi0, Omega=Omega, alpha=alpha)
      options(opt)
      }
    else{ # plot for case d=1
      y0<-as.vector(y0)
      x<-seq(min(pretty(y0,10)),max(pretty(y0,10)),length=100)
      if(missing.X){
         dp0<-c(xi0,omega,alpha,df)
         xlab<-y.name}
      else {
         dp0<-c(0,omega,alpha,df)
         xlab <- "residuals"}
      hist(y0, prob=TRUE,  breaks="FD", xlab=xlab, ylab="density", main="")
      lines(x, dst(x,dp0[1],dp0[2],dp0[3],dp0[4]),  col=2)
      if(length(y)<101) points(y0, rep(0,n), pch=1)
      title(y.name)
      }
    cat("Press <Enter> to continue..."); readline()
    par(mfrow=c(1,2))
    pp  <- d*qf((1:n)/(n+1),d,df)
    Xb  <- qr.fitted(qrX,y)
    res <- qr.resid(qrX,y)
    rad.n  <- apply(res    * (res %*% solve(var(res))), 1, sum)
    rad.sn <- apply((y-xi) * ((y-xi) %*% solve(Omega)), 1, sum)
    plot(pp, sort(rad.n), pch=1, ylim=c(0,max(rad.n,rad.sn)),
        xlab="Percentiles of chi-square distribution", 
        ylab="Mahalanobis distances")
    abline(0,1,lty=3)
    title(main="QQ-plot for normal distribution", sub=y.name)
    plot(pp, sort(rad.sn), pch=1, ylim=c(0,max(rad.n,rad.sn)),
        xlab="Percentiles of chi-square distribution", 
        ylab="Mahalanobis distances")
    abline(0,1,lty=3)
    title(main="QQ-plot for skew-t distribution", sub=y.name)
    cat("Press <Enter> to continue..."); readline()
    plot((1:n)/(n+1), sort(pchisq(rad.n,d)), xlab="", ylab="")
    abline(0,1,lty=3)
    title(main="PP-plot for normal distribution", sub=y.name)
    plot((1:n)/(n+1), sort(pf(rad.sn/d,d,df)), xlab="", ylab="")
    abline(0,1,lty=3)
    title(main="PP-plot for skew-t distribution", sub=y.name)
    par(mfrow=c(1,1))
    } # end ploting
  dev.norm<- msn.dev(c(qr.coef(qrX,y),rep(0,d)), as.matrix(X), y, freq)
  test <- dev.norm + 2*mle$logL
  p.value <-  1-pchisq(test,d+1)
  if(trace) {
    cat("LRT for normality (test-function, p-value): ")
    print(c(test,p.value))
    }
  mle$test.normality <- list(LRT=test, df=d+1, p.value=p.value, 
                             normal.logL=dev.norm/(-2))
  invisible(mle)
}


#


st.mle <- function(X, y, freq,  start, fixed.df=NA, trace=FALSE, 
              method="BFGS", control=list(iter.max=150))
{ 
  y.name  <- deparse(substitute(y))
  y <- as.matrix(y)
  dimnames(y)[[2]] <- list(y.name) 
  fit <- mst.mle(X, y, freq, start=start, fixed.df=fixed.df, trace=trace, 
              method=method, control=control)
  mle <- list()
  mle$call<- match.call()
  dp <- fit$dp
  se <- fit$se
  p  <- length(dp$beta)
  dp.names<- c(if(p==1) "location" else dimnames(dp$beta)[[1]],
                "scale","shape","df")
  mle$dp  <- c(dp$beta, sqrt(as.vector(dp$Omega)), dp$alpha, dp$df)
  names(mle$dp) <- dp.names
  mle$se  <- c(se$beta, mle$dp[p+1] * se$internal[p+1], se$alpha,
          dp$df *  se$internal[p+3])
  if(length(mle$se) == length(dp.names)) names(mle$se) <- dp.names
  mle
}


mst.mle <- function(X, y, freq,  start, fixed.df=NA, trace=FALSE, 
         method="BFGS", control=list(iter.max=150))
{  
  Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
  y.name  <- deparse(substitute(y))
  y.names <- dimnames(y)[[2]] 
  y <- as.matrix(y)
  if(missing(X))    X <- matrix(rep(1,nrow(y)),ncol=1)
    else {if(!is.numeric(X)) stop("X must be numeric")}
  if(missing(freq)) freq <- rep(1,nrow(y))
  x.names<-dimnames(X)[[2]]
  X <- as.matrix(X) 
  d <- ncol(y)  
  n <- sum(freq)
  m <- ncol(X)
  if(missing(start)){
    qrX   <- qr(X)
    beta  <- as.matrix(qr.coef(qrX,y))
    Omega <- matrix(var(qr.resid(qrX,y)),d,d)
    omega <- sqrt(diag(Omega))
    alpha <- rep(0,d)
    df <- ifelse(is.na(fixed.df), 10, fixed.df)
    if(trace) {
      cat("mst.mle: dp=","\n")
      print(c(beta,Omega,alpha))
      cat("df:", df,"\n")
      }
    }
  else {
    if(all(names(start)==c("beta", "Omega", "alpha", "df")))
      {
      beta  <- start$beta
      Omega <- start$Omega
      alpha <- start$alpha
      df    <- start$df
      }
    else{
      beta<- matrix(start[1:m],m,1)
      Omega<- matrix(start[m+1]^2,1,1)
      alpha<-start[m+2]
      df<- start[m+3]
    }
  }
  eta <- alpha/sqrt(diag(Omega))
  Oinv<-solve(Omega)
  Oinv<-(Oinv+t(Oinv))/2
  upper <- chol(Oinv)
  D <- diag(upper)
  A<- upper/D
  D <- D^2
  if(d>1) param <- c(beta, -0.5*log(D), A[!lower.tri(A,diag=TRUE)], eta) 
     else param <- c(beta, -0.5*log(D), eta)
  if(is.na(fixed.df)) param <- c(param,log(df))
  opt  <- optim(param, fn=mst.dev,  gr=mst.dev.grad, 
              method=method,  control=control, hessian=TRUE, 
              X=X, y=y, freq=freq, trace=trace, fixed.df=fixed.df)
  dev  <- opt$value
  param<- opt$par
  if(trace){
    cat("Message from optimization routine:", opt$message,"\n")
    cat("deviance:", dev,"\n")
    }
  beta <- matrix(param[1:(m*d)],m,d)
  D    <- exp(-2*param[(m*d+1):(m*d+d)])
  if(d>1) {
    A <- diag(d)
    A[!lower.tri(A,diag=TRUE)] <- param[(m*d+d+1):(m*d+d+d*(d-1)/2)]
    i0 <- m*d+d+d*(d-1)/2
    }
  else{
    i0 <- m+1
    A  <- as.matrix(1)
    }
  eta  <- param[(i0+1):(i0+d)]
  if(is.na(fixed.df)) df <- exp(param[i0+d+1])
    else df <- fixed.df
  # Omega <- solve(t(A) %*% diag(D) %*% A)
  Ainv <- backsolve(A,diag(d))
  Omega<- Ainv %*% Diag(1/D) %*% t(Ainv)
  omega<-sqrt(diag(Omega))
  alpha<- eta*omega
  dimnames(beta) <- list(x.names,y.names)
  dimnames(Omega) <- list(y.names, y.names)
  if(length(y.names)>0) names(alpha) <- y.names
  info <- opt$hessian/2
  if(all(is.finite(info))) 
    {
      qr.info <- qr(info)
      info.ok <- (qr.info$rank == length(param))
     }
  else info.ok <- FALSE
  if(info.ok) {
    se2     <- diag(solve(qr.info))
    if(min(se2) < 0 )
      se<- NA
    else{
      se      <- sqrt(se2)
      se.beta <- matrix(se[1:(m*d)],m,d)
      se.alpha<- se[(i0+1):(i0+d)] * omega
      dimnames(se.beta)[2] <- list(y.names)
      dimnames(se.beta)[1] <- list(x.names)
      names(se.alpha) <- y.names
      se.df   <- df*se[i0+d+1]
      se      <- list(beta=se.beta, alpha=se.alpha, df=se.df,
                      internal=se, info=info)
    }}
  else
    se <- NA
  dp <- list(beta=beta, Omega=Omega,  alpha=alpha, df=df)
  list(call=match.call(), logL=-0.5*dev, deviance=dev, dp=dp, se=se, optim=opt)
}


mst.dev <- function(param, X, y, freq=rep(1,nrow(X)), fixed.df=NA, trace=FALSE)
{
  Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
  d <- ncol(y)
  # if(missing(freq)) freq<-rep(1,nrow(y))
  n <- sum(freq)
  m <- ncol(X)
  beta<-matrix(param[1:(m*d)],m,d)
  D <- exp(-2*param[(m*d+1):(m*d+d)])
  if(d>1) {
    A <- diag(d)
    A[!lower.tri(A,diag=TRUE)] <- param[(m*d+d+1):(m*d+d+d*(d-1)/2)]
    i0 <- m*d+d+d*(d-1)/2
    }
  else {
    i0<- m+1
    A <- as.matrix(1)
    }
  eta <- param[(i0+1):(i0+d)]
  if(is.na(fixed.df)) df <- exp(param[i0+d+1])
     else df <- fixed.df
  Oinv <- t(A) %*% Diag(D) %*% A
  # Omega <- solve(Oinv)
  u <-  y - X %*% beta
  Q <- apply((u %*% Oinv)*u,1,sum)
  L <- as.vector(u %*% eta) 
  logDet<- sum(log(df*pi/D))
  dev <- (n*(2*lgamma(df/2) + logDet - 2*lgamma((df+d)/2)) 
          + (df+d) * sum(freq * log(1+Q/df))
         -2*sum(freq * log(2*pt( L * sqrt((df+d)/(Q+df)),df+d))))
  if(trace) cat("mst.dev: ",dev, "\n")
  dev
}


mst.dev.grad <- function(param, X, y, freq=rep(1,nrow(X)), fixed.df=NA,
                         trace=FALSE)
{
  Diag <- function(x) diag(x, nrow=length(x), ncol=length(x))
  d <- ncol(y)
  # if(missing(freq)) freq<-rep(1,nrow(y))
  n   <- sum(freq)
  m   <- ncol(X)
  beta<- matrix(param[1:(m*d)],m,d)
  D   <- exp(-2*param[(m*d+1):(m*d+d)])
  if(d>1) {
    A  <- diag(d)
    A[!lower.tri(A,diag=TRUE)] <- param[(m*d+d+1):(m*d+d+d*(d-1)/2)]
    i0 <- m*d+d+d*(d-1)/2
    }
  else{
    i0 <- m*d+d
    A  <- as.matrix(1)
    }
  eta   <- param[(i0+1):(i0+d)]
  if(is.na(fixed.df)) df <- exp(param[i0+d+1])
     else df <- fixed.df
  tA    <- t(A)
  Oinv  <- tA %*% Diag(D) %*% A
  u     <- y-X %*% beta
  Q     <- as.vector(apply((u %*% Oinv)*u,1,sum))
  L     <- as.vector(u %*% eta) 
  t.    <- L*sqrt((df+d)/(Q+df))
  dlogft<- -(df+d)/(2*df*(1+Q/df))
  dt.dL <- sqrt((df+d)/(Q+df))
  dt.dQ <- (-0.5)*L*sqrt(df+d)/(Q+df)^1.5
  T.    <- pt(t., df+d)
  dlogT.<- dt(t., df+d)/T.
  u.freq <- u*freq
  Dbeta <- (-2* t(X) %*% (u.freq*dlogft) %*% Oinv 
            - outer(as.vector(t(X) %*% (dlogT. * dt.dL* freq)), eta)
            - 2* t(X) %*% (dlogT.* dt.dQ * u.freq) %*% Oinv )
  Deta  <- apply(dlogT.*sqrt((df+d)/(Q+df))*u.freq, 2, sum)
  if(d>1){
     M  <- 2*( Diag(D) %*% A %*% t(u * dlogft
               + u * dlogT. * dt.dQ) %*% u.freq)
     DA <- M[!lower.tri(M,diag=TRUE)]
     }
  else DA<- NULL
  M     <- ( A %*% t(u*dlogft + u*dlogT.*dt.dQ) %*% u.freq %*% tA)
  if(d>1) DD <- diag(M) + 0.5*n/D
     else DD <- as.vector(M + 0.5*n/D) 
  grad <- (-2)*c(Dbeta,DD*(-2*D),DA,Deta)
# browser()  
  if(is.na(fixed.df)) {  
    dlogft.ddf <- 0.5 * (digamma((df+d)/2) - digamma(df/2) - d/df
                        + (df+d)*Q/((1+Q/df)*df^2) - log(1+Q/df))
    eps   <- 1.0e-4
    T.eps <- pt(L*sqrt((df+eps+d)/(Q+df+eps)), df+eps+d)
    dlogT.ddf <- (log(T.eps)-log(T.))/eps
    Ddf   <- sum((dlogft.ddf + dlogT.ddf)*freq)
    grad <- c(grad, -2*Ddf*df)
    }
  if(trace)  cat("mst.dev.grad: norm is ",sqrt(sum(grad^2)),"\n")  
 return(grad)
}
#-------------

st.2logL.profile<-function(X=matrix(rep(1,n)), y, freq, trace=FALSE,
          fixed.comp = c(ncol(X)+2, ncol(X)+3), 
          fixed.values = cbind(c(-4,4), log(c(1,25))),
          npts=30/length(fixed.comp), plot.it=TRUE, ...)
{# plot2D profile deviance (=2(max.logL-logL)) using either parameters
 # if(plot.it & !exists(.Device)) stop("Device not active")
 #
   if(missing(freq)) freq <- rep(1,length(y))
   n <- sum(freq)
   if(length(fixed.comp) == 1){
      param1 <- seq(fixed.values[1], fixed.values[2], length=npts)
      logL <- param2 <- rep(NA,npts)}
   else{ 
      param1 <- seq(fixed.values[1,1], fixed.values[2,1], length=npts)
      param2 <- seq(fixed.values[1,2], fixed.values[2,2], length=npts)
      logL   <- matrix(NA,npts,npts)}
   ls<-lm.fit(X,y)
   omega<-sqrt(var(resid(ls)))
   param <- c(coef(ls), log(omega), 0, log(20))[-fixed.comp]
   # mle<- mst.mle(X=X, y=y)
   # param <- mle$optim$par[-fixed.comp]
   max.logL <- (-Inf)
   cat(c("Running up to",npts,":"))
   for(i in 1:npts){
     cat(" ",i)
     if(length(fixed.comp) == 1) {
       opt  <- optim(param, fn=st.dev.fixed, method="Nelder-Mead",
                 X=X, y=y, freq=freq, trace=trace, 
                 fixed.comp=fixed.comp, fixed.values=param1[i])
       logL[i] <- opt$value/(-2)
       param <- opt$par
       if(logL[i] > max.logL) {
         max.logL<- logL[i]
         best <- list(fixed.comp1=param1[i], fixed.comp2=NA, opt=opt)
       }}
     else{
     for(j in 1:npts){     
       opt  <- optim(param, fn=st.dev.fixed, method="Nelder-Mead",
                 X=X, y=y, freq=freq, trace=trace, 
                 fixed.comp=fixed.comp,
                 fixed.values=c(param1[i], param2[j] ))
       logL[i,j] <- opt$value/(-2)
       if(j==1) param0 <- opt$par
       if(j<npts) 
         param <- opt$par
       else
         param <- param0
       if(logL[i,j] > max.logL) {
         max.logL<- logL[i,j]
         best <- list(fixed.comp1=param1[i], fixed.comp2=param2[j], opt=opt)
       }   
     }}
   }
  cat("\n") 
  dev <- 2 * (max(logL) - logL)
  if(plot.it){
    if(length(fixed.comp) == 1){ 
      plot(param1, dev, type="l", ...)
      points(x=best$fixed.comp1, y=0, pch=1)
      }
    else{
      contour(param1, param2, dev,   labcex=0.75, 
          levels=c(0.57, 1.37, 2.77, 4.6, 5.99, 9.2), 
          labels=c(0.25, 0.5,  0.75, 0.90,0.95, 0.99),
          ...)
    points(x=best$fixed.comp1, y=best$fixed.comp2, pch=1)  
    }
  }    
  title(main=paste("dataset:", deparse(substitute(y)),
        "\nProfile deviance", sep= " "))
  invisible(list(call=match.call(), param1=param1, param2=param2,
            deviance=dev, max.logL=max.logL, best=best))
}


st.dev.fixed <- function(free.param, X, y, freq, trace=FALSE, 
                fixed.comp=NA,  fixed.values=NA)
{# param components: beta, log(omega), alpha, log(df)
  n <- sum(freq)
  m <- ncol(X)
  param <- numeric(length(free.param)+length(fixed.comp))
  param[fixed.comp]  <- fixed.values
  param[-fixed.comp] <- free.param
  beta  <- param[1:m]
  omega <- exp(param[m+1])
  eta <- param[m+2]/omega
  df  <- exp(param[m+3])
  u <-  y - X %*% beta
  Q <- freq*(u/omega)^2
  L <- u*eta 
  logDet <- log(df*pi*omega^2)
  dev <- (n*(2*lgamma(df/2) + logDet - 2*lgamma((df+1)/2)) 
          + (df+1) * sum(log(1+Q/df))
         -2*sum(log(2*pt(L * sqrt((df+1)/(Q+df)),df+1))))
  if(trace) cat("st.dev.fixed (param, dev): ", param, dev,"\n")
  dev
}

#----

sn.SFscore<-function(shape, data, trace=FALSE)
{# Sartori-Firth's modified score function
  a42<- integrate(function(x) dsn(x,0,1,shape) * x^4 * zeta(1,shape*x)^2,
                   -Inf, Inf)$value
  a22<- integrate(function(x) dsn(x,0,1,shape) * x^2 * zeta(1,shape*x)^2,
                   -Inf, Inf)$value
  score<- sum(zeta(1,shape*data)*data)-0.5*shape*a42/a22
  if(trace) cat("sn.SFscore:", shape, score,"\n")
  score
}

sn.mmle <- function(X, y, plot.it=TRUE, trace=FALSE,...)
   {
     n <- length(y)
     if (missing(X)){
         X <- as.matrix(rep(1, n))
         colnames(X) <- "mean"
       }
     m <- ncol(X)
     dp <- cp.to.dp(sn.mle(X=X, y=y, plot.it=plot.it, trace=trace,...)$cp)
     z  <- as.vector(y- X %*% dp[1:m])/dp[m+1]
     a0 <- 0
     f0 <- sn.SFscore(a0, z, trace=trace)
     a1 <- sign(dp[m+2])
     f1 <- sn.SFscore(a1, z, trace=trace)
     while(f0*f1 > 0){
       a0 <- a1
       f0 <- f1
       a1 <- a1*2
       f1 <- sn.SFscore(a1, z, trace=trace)
       }
     if(trace)cat("a0, a1: ",a0, a1,"\n")
     a<- uniroot(sn.SFscore, interval=c(a0, a1), data=z, trace=trace)
     dp[m+2]<- a$root
     if (plot.it) {
       dp0 <- dp
       if (all(X == rep(1, n)))
         y0 <- y
       else {
         y0 <- as.vector(y - X %*% dp0[1:m])
         dp0 <- c(0, dp0[m + 1], dp0[m + 2])
         xlab <- "residuals"
         }
       curve(dsn(x, dp0[1], dp0[2], dp[m+2]), add=TRUE, lty=2, col=3)
       }
     names(dp)[m+2] <- "shape"
     info <- sn.Einfo(dp=dp,x=X)
     se <- info$se.dp
     names(se)<- names(dp)
     list(call=match.call(), dp=dp, se=se, Einfo=info$info.dp)
   }


sn.Einfo <- function(dp=NULL, cp=NULL, n=1, x=NULL)
{# computes Expected  Fisher information matrix for SN variates
  if(is.null(dp) & is.null(cp)) stop("either dp or cp must be set")
  if(!is.null(dp) & !is.null(cp)) stop("either dp or cp must be set")
  if(is.null(cp)) cp<- dp.to.cp(dp)
  if(is.null(dp)) dp<- cp.to.dp(cp)
  if(is.null(x))
     {
      x <-matrix(rep(1,n),nrow=n,ncol=1)
      xx<- n
      sum.x<-n
      p <- 1
     }
    else
    { if(n!=1) warning("You have set both n and x, I am setting n<-nrow(x)")
      n <- nrow(x)
      p <- ncol(x)
      xx <- t(x) %*% x
      sum.x <- matrix(apply(x,2,sum))
    }
  if(length(cp) != (p+2)| length(dp) != (p+2))
        stop("length(dp) | length(cp) must be equal to ncol(x)+2")
  omega <- dp[p+1]
  alpha <- dp[p+2]
  E.z   <- sqrt(2/pi)*alpha/sqrt(1+alpha^2)
  s.z   <- sqrt(1-E.z^2)
  I.dp  <- matrix(NA,p+2,p+2)
  if(abs(alpha)< 200){
    a0 <- integrate(function(x) dsn(x,0,1,alpha) * zeta(1,alpha*x)^2,
          -Inf, Inf)$value
    a1 <- integrate(function(x) dsn(x,0,1,alpha) *x * zeta(1,alpha*x)^2,
          -Inf, Inf)$value
    a2 <- integrate(function(x) dsn(x,0,1,alpha) *x^2 * zeta(1,alpha*x)^2,
          -Inf, Inf)$value
    }
  else
    {a0 <- sign(alpha)*0.7206/abs(alpha)
     a1 <- -sign(alpha)*(0.6797/alpha)^2
     a2 <- 0.005897/alpha^2 + 30.611/alpha^4
    }
  I.dp[1:p,1:p] <- xx* (1+alpha^2*a0)/omega^2  
  I.dp[p+1,p+1] <- n * (2+alpha^2*a2)/omega^2
  I.dp[p+2,p+2] <- n * a2
  I.dp[1:p,p+1] <- sum.x * (E.z*(1+E.z^2*pi/2)+alpha^2*a1)/omega^2
  I.dp[p+1,1:p] <- t(I.dp[1:p,p+1])
  I.dp[1:p,p+2] <- sum.x * (sqrt(2/pi)/(1+alpha^2)^1.5-alpha*a1)/omega
  I.dp[p+2,1:p] <- t(I.dp[1:p,p+2])
  I.dp[p+1,p+2] <- I.dp[p+2,p+1] <- n*(-alpha*a2)/omega
  # cp <- dp.to.cp(dp)
  sigma <-cp[p+1]
  gamma1<-cp[p+2]
  D  <- diag(p+2)
  R  <- E.z/s.z
  T  <- sqrt(2/pi-(1-2/pi)*R^2)
  Da.Dg <- 2*(T/(T*R)^2+(1-2/pi)/T^3)/(3*(4-pi))
  DE.z <- sqrt(2/pi)/(1+alpha^2)^1.5
  Ds.z <- (-E.z/s.z)*DE.z
  D[1,p+1] <- (-R)
  D[1,p+2] <- (-sigma*R)/(3*gamma1)
  D[p+1,p+1] <- 1/s.z
  D[p+1,p+2] <- (-sigma)* Ds.z* Da.Dg/s.z^2
  D[p+2,p+2] <- Da.Dg
  I.cp  <- t(D) %*% I.dp %*% D
  I.cp <- (I.cp + t(I.cp))/2
  se.dp <- sqrt(diag(solve(I.dp)))
  se.cp <- sqrt(diag(solve(I.cp)))
  dimnames(I.cp)<- list(names(cp), names(cp))
  dimnames(I.dp)<- list(names(dp), names(dp))
  list(dp=dp, cp=cp, info.dp=I.dp, info.cp=I.cp, se.dp=se.dp, se.cp=se.cp, D=D)
}

#----


sn.logL.grouped <- function(param, breaks, freq, trace=FALSE)
{
  cdf <- pmax(psn(breaks, param[1],exp(param[2]), param[3]), 0)
  p <- diff(cdf)
  logL <- sum(freq*log(p))
  if(trace) print(c(param, logL))
  logL
}

sn.mle.grouped <- function(breaks, freq, trace=FALSE, start=NA)
{
  if(any(is.na(start))){
    b <- breaks
    d <- diff(b)
    if(b[1]== -Inf) b[1]<- b[2]-d[2]
    if(b[length(b)]==Inf) b[length(b)] <- b[length(b)-1]+d[length(d)-1]
    mid<- (b[-1]+b[-length(b)])/2
    dp <- msn.mle(y=mid, freq=freq, trace=trace)$dp
    start <- c(dp[[1]], log(sqrt(dp[[2]])), dp[[3]])
    }
  opt <- optim(start,  sn.logL.grouped, 
              control=list(fnscale=-1),
              breaks=breaks, freq=freq, trace=trace)
  param <- opt$par
  dp <- c(param[1], exp(param[2]), param[3]) 
  invisible(list(call=match.call(), dp=dp, logL=opt$value, end=param, opt=opt))
}


st.logL.grouped <- function(param, breaks, freq, trace=FALSE)
{
  if(param[4] > 5.5214609)
      cdf<- psn(breaks, param[1], exp(param[2]), param[3])
    else
      cdf<- pst(breaks, param[1], exp(param[2]), param[3], exp(param[4]))
  p <- pmax(diff(cdf), 1.0e-10)
  logL <- sum(freq*log(p)) 
  if(trace) print(c(param, logL))
  logL
}

st.mle.grouped <- function(breaks, freq, trace=FALSE, start=NA)
{
  if(any(is.na(start))){
    a <- sn.mle.grouped(breaks, freq)
    start <- c(a$end, log(15))
    if(trace)  cat("Initial parameters set to:", format(start),"\n")
    }
  opt <- optim(start,  st.logL.grouped, 
            control=list(fnscale=-1),
            breaks=breaks, freq=freq, trace=trace)  
  param<-opt$par
  dp <- c(param[1],exp(param[2]),param[3], exp(param[4])) 
  logL <- opt$value 
  invisible(list(call=match.call(), dp=dp, logL=logL, end=param, opt=opt))
}

msn.affine <- function(dp, A, a=0, drop=TRUE)
{
# computes distribution of affine transformation of MSN/MST variate, T=a+AY,
# using formuale in Appendix A.2 of Capitanio et al.(2003)
#
  Diag  <- function(x) diag(x,nrow=length(x),ncol=length(x))
  if(is.null(dp$xi))  xi <- dp$beta  else  xi <- dp$xi
  xi.T  <- as.vector(A %*% matrix(xi,ncol=1)+a)
  Omega <- dp$Omega
  O.T   <- as.matrix(A %*% Omega %*% t(A)) 
  oi    <- Diag(1/sqrt(diag(Omega)))
  B     <- oi %*% Omega %*% t(A)
  tmp   <- (oi %*% Omega %*% oi - B %*% solve(O.T) %*% t(B)) %*% dp$alpha
  den   <- sqrt(1+sum(dp$alpha*as.vector(tmp)))
  num   <- Diag(sqrt(diag(O.T))) %*% solve(O.T) %*% t(B) %*% dp$alpha
  # cat("termine sotto radice -1: ", sum(dp$alpha*as.vector(tmp)),"\n")
  alpha <- as.vector(num/den)
  if(all(dim(O.T)==c(1,1)) & drop)
     dp.T<- list(location=xi.T, scale=sqrt(as.vector(O.T)), shape=alpha)
  else
     dp.T <- list(xi=xi.T, Omega=O.T, alpha=alpha)
  if(!is.null(tau=dp$tau)) dp.T$tau <- dp$tau
  if(!is.null(tau=dp$df)) dp.T$df <- dp$df
  return(dp.T)
}

mst.affine <- function(dp, A, a=0, drop=TRUE) msn.affine(dp, A, a, drop)

#---

.First.lib <- function(library, pkg)
{ 
    Rv <- R.Version()
    if(Rv$major == 0 |(Rv$major == 1 && Rv$minor < 0.1))
        stop("This package requires R 1.0.1 or later")
    assign(".sn.home", file.path(library, pkg),
           pos=match("package:sn", search()))
    sn.version <- "0.33 (2005-04-07)"
    assign(".sn.version", sn.version, pos=match("package:sn", search()))
    if(interactive())
    {
      cat("Library 'sn', version ", sn.version, ", © 1998-2005 A.Azzalini\n")
      cat("type 'help(SN)' for summary information\n")
    }
    invisible()
}

back to top