Revision a53065a09c3fce65a63e137deb5bccb6162e6cff authored by Matthias Templ on 18 November 2020, 20:10:02 UTC, committed by cran-robot on 18 November 2020, 20:10:02 UTC
1 parent 9ae1e67
Raw File
CompositionalSpline.R
#' Compositional spline
#' 
#' @rdname compositionalSpline
#' @name compositionalSpline
#' @author J. Machalova \email{jitka.machalova@upol.cz}, R. Talska \email{talskarenata@seznam.cz}
#' @description This code implements the compositional smoothing splines grounded on the theory of 
#' Bayes spaces.
#' @details The compositional splines enable to construct a spline basis in the centred logratio (clr) space of density 
#' functions (ZB-spline basis) and consequently also in the original space of densities (CB-spline basis).The resulting 
#' compositional splines in the clr space as well as the ZB-spline basis satisfy the zero integral constraint. 
#' This enables to work with compositional splines consistently in the framework of the Bayes space methodology.
#' @details Augmented knot sequence is obtained from the original knots by adding #(order-1) multiple endpoints.
#' @importFrom splines splineDesign
#' @importFrom graphics matplot matpoints
#' @param t class midpoints
#' @param clrf clr transformed values at class midpoints, i.e., fcenLR(f(t))
#' @param knots sequence of knots
#' @param w weights
#' @param order order of the spline (i.e., degree + 1)
#' @param der lth derivation
#' @param alpha smoothing parameter
#' @param spline.plot if TRUE, the resulting spline is plotted
#' @param basis.plot if TRUE, the ZB-spline basis system is plotted 
#' @return 
#' \item{\code{J}}{value of the functional J}
#' \item{\code{ZB_coef}}{ZB-spline basis coeffcients}
#' \item{\code{CV}}{score of cross-validation}
#' \item{\code{GCV}}{score of generalized cross-validation}
#' @references Machalova, J., Talska, R., Hron, K. Gaba, A. Compositional splines for representation of density functions. \emph{Comput Stat} (2020). https://doi.org/10.1007/s00180-020-01042-7
#' @export
#' @examples
#' # Example (Iris data):
#' SepalLengthCm <- iris$Sepal.Length
#' Species <- iris$Species
#' iris1 <- SepalLengthCm[iris$Species==levels(iris$Species)[1]]
#' h1 <- hist(iris1, plot = FALSE)
#' midx1 <- h1$mids
#' midy1 <- matrix(h1$density, nrow=1, ncol = length(h1$density), byrow=TRUE)
#' clrf  <- cenLR(rbind(midy1,midy1))$x.clr[1,]
#' knots <- seq(min(h1$breaks),max(h1$breaks),l=5)
#' order <- 4
#' der <- 2
#' alpha <- 0.99
#' \donttest{
#' sol1 <- compositionalSpline(t = midx1, clrf = clrf, knots = knots, 
#'   w = rep(1,length(midx1)), order = order, der = der, 
#'   alpha = alpha, spline.plot = T)
#' sol1$GCV
#' ZB_coef <- sol1$ZB_coef
#' t <- seq(min(knots),max(knots),l=500)
#' t_step <- diff(t[1:2])
#' ZB_base <- ZBsplineBasis(t=t,knots,order)$ZBsplineBasis
#' sol1.t <- ZB_base%*%ZB_coef
#' sol2.t <- fcenLRinv(t,t_step,sol1.t)
#' h2 = hist(iris1,prob=T,las=1)
#' points(midx1,midy1,pch=16)
#' lines(t,sol2.t,col="darkred",lwd=2)
#' # Example (normal distrubution):
#' # generate n values from normal distribution
#' set.seed(1)
#' n = 1000; mean = 0; sd = 1.5
#' raw_data = rnorm(n,mean,sd)
#'   
#' # number of classes according to Sturges rule
#' n.class = round(1+1.43*log(n),0)
#'   
#' # Interval midpoints
#' parnition = seq(-5,5,length=(n.class+1))
#' t.mid = c(); for (i in 1:n.class){t.mid[i]=(parnition[i+1]+parnition[i])/2}
#'   
#' counts = table(cut(raw_data,parnition))
#' prob = counts/sum(counts)                # probabilities
#' dens.raw = prob/diff(parnition)          # raw density data
#' clrf =  cenLR(rbind(dens.raw,dens.raw))$x.clr[1,]  # raw clr density data
#'   
#' # set the input parameters for smoothing 
#' knots = seq(min(parnition),max(parnition),l=5)
#' w = rep(1,length(clrf))
#' order = 4
#' der = 2
#' alpha = 0.5
#' spline = compositionalSpline(t = t.mid, clrf = clrf, knots = knots, 
#'   w = w, order = order, der = der, alpha = alpha, 
#'   spline.plot=TRUE, basis.plot=F)
#'   
#' # ZB-spline coefficients
#' ZB_coef = spline$ZB_coef
#'   
#' # ZB-spline basis evaluated on the grid "t.fine"
#' t.fine = seq(min(knots),max(knots),l=1000)
#' ZB_base = ZBsplineBasis(t=t.fine,knots,order)$ZBsplineBasis
#'   
#' # Compositional spline in the clr space (evaluated on the grid t.fine)
#' comp.spline.clr = ZB_base%*%ZB_coef
#'   
#' # Compositional spline in the Bayes space (evaluated on the grid t.fine)
#' comp.spline = fcenLRinv(t.fine,diff(t.fine)[1:2],comp.spline.clr)
#'   
#' # Unit-integral representation of truncated true normal density function 
#' dens.true = dnorm(t.fine, mean, sd)/trapzc(diff(t.fine)[1:2],dnorm(t.fine, mean, sd))
#'   
#' # Plot of compositional spline together with raw density data
#' matplot(t.fine,comp.spline,type="l",
#'     lty=1, las=1, col="darkblue", xlab="t", 
#'     ylab="density",lwd=2,cex.axis=1.2,cex.lab=1.2,ylim=c(0,0.28))
#' matpoints(t.mid,dens.raw,pch = 8, col="darkblue", cex=1.3)
#'   
#' # Add true normal density function
#' matlines(t.fine,dens.true,col="darkred",lwd=2)
compositionalSpline = function(t,clrf,knots,w,order,der,alpha,spline.plot = FALSE,basis.plot=FALSE){
  ## Comments from Matthias:
  # 1. Why not giving as first argument some data, and do the fcenLR(f(t)) within compositionalSpline()?
  # This would be more userfriendly.
  # 2. Please provide executable examples for the users.
  # 3. I put all helper functions within compositionalSpline, but can 
  # include it as usual documented functions in robCompositions as well. The disadvantage 
  # to do so is that more and more documented functions are available, but these functions are only used internally.
  # 4. I will add a comment like plot = FALSE, so that the plot is only done when the user decides for it.
  # Alternatively I also can implement a plot method.
  # 5. Which parameter values can be set to a sensible default value? E.g. order = 2

  ## Comments from Renata
  # ad 1. The order of arguments was changed. We prefer to let the input data as clr.
  # ad 2. The example based on iris1 data was expanded and one related to normal distribution was considered.
  # ad 3. To run the code, only the function trapzc() is needed. 
  #       Functions trapzc(), fcenLR(), fcenLRp(), fcenLRu(), fcenLRinv() are to be included in robCompositions.
  # ad 4. Two more additional input paramaters were added to the function - spline.plot, basis.plot.
  # ad 5. It is not necessary.
    
  # Verification for parameter alpha
  if (alpha<=0 | alpha>1) stop ("parameter alpha must be from interval (0,1]")
  
  k = order
  r = length(knots) 

  Celkova_Delka = 2*(k-1) + r 
  lambda = c() 
  for (i in 1:(Celkova_Delka)){
    if (i <= k-1){lambda[i] = knots[1]}
    if ((i > k-1) && (i <= r + k-1)){lambda[i] = knots[i-(k-1)]}
    if (i > r+ k-1){lambda[i] = knots[r]}
  }
  
  # Collocation matrix B
  B = splineDesign(lambda, t, k, outer.ok = TRUE)
  
  # Diag matrix with weights
  W = diag(w)
  
  # Collocation matrix B (:=BB) evaluated on finner grid
  parnition = seq(min(lambda), max(lambda), length = 1000)    
  lambda_index = c(0:(r-1)) 
  g = lambda_index[length(lambda_index) - 1]
  # Dimension(space of splines)
  N = g+(k-1)+1
  BB = array(0, c(length(parnition),N))
  l = c()
  for(i in (1:N)){
    for (j in 1:(k+1)){
      l[j] = lambda[i+j-1]
    }
    BB[ ,i] = splineDesign(l, parnition, k, outer.ok = TRUE) 
  }
  # Verification of full column rank of collocation matrix K
  if (length(t) <= N) stop ('length(t) must be higher then Dimension(space of splines)')
  if (qr(B)$rank != N) stop ('Collocaton matrix does not have full column rank.')
  
   # Matrix S
  S = array(0)
  if (der == 0){
    S = diag(1, c(N,N))
  }
  if (der >= (k-1)) {stop ('Error. The lth derivation is not from the set {1,2,...,order-2}.')
  } else {
    S_pom = diag(1,N,N)
    for (j in 1:der)
    {
      D_mat = array(0)
      rozdil = lambda[(1+k):(N+k-j)] - lambda[(1+j):(N)]
      D_mat = (k-j)*diag(1/rozdil)
      L_mat = array(0, c(N-j,N-j+1))
      for (J in (1:(N-j))){
        L_mat[J,J] = (-1)
        L_mat[J,J+1] = 1
      }
      S_pom = D_mat%*%L_mat%*%S_pom
    }
    S = S_pom
  }
  
  # Matrix M: order of spline = k-der
  kk = k-der 
  
  # Matrix M: augmented knot sequence
  celkova_delka = 2*(kk-1) + r                                         
  Lambda = c()
  for (i in 1:celkova_delka){
    if (i <= (kk-1)){Lambda[i] = knots[1]}
    if ((i > kk-1) && (i <= r + kk-1)){Lambda[i] = knots[i-(kk-1)]}
    if (i > (r+(kk-1))){Lambda[i] = knots[r]}
  } 
  Parnition = seq(min(Lambda), max(Lambda),  length = 10000)    
  Lambda_index = c(0:(r-1)) 
  G = Lambda_index[length(Lambda_index) - 1]
  
  # Matrix M: spline space dimension
  NN = G+(kk-1)+1
  
  # Matrix M: collocation matrix BBB
  BBB = splineDesign(Lambda, Parnition, kk, outer.ok=TRUE)
  
  # Matrix M: function for computing integral
  step=diff(Parnition[1:2])
  
  # Matrix M
  M=array(0, c(NN,NN))
  for (i in 1:NN){
    for (j in 1:NN){
      nenulove = c()
      soucin = BBB[,i]*BBB[,j]
      for (m in 1:length(Parnition)){
        if (soucin[m] != 0) {nenulove[m] = soucin[m]}
      }
      M[i,j]=trapzc(step, soucin)
    }
  }
  
  # Matrix D
  difference = lambda[(1+k):(r+2*(k-1))] - lambda[(1:(r+k-2))]
  D = (k)*diag(1/difference)
  
  # Matrix K
  K = array(0, c(N,N-1))
  K[1,1]=1
  K[N,N-1]=-1
  
  for (j in (2:(N-1))){
    K[j,j-1] = (-1)
    K[j,j] = 1
  }

  # Matrix U
  U = D%*%K

  # Matrix G, vector g
  GG = t(U)%*%((1-alpha)*t(S)%*%M%*%S + alpha*t(B)%*%W%*%B)%*%U
  gg = alpha*t(K)%*%t(D)%*%t(B)%*%W%*%clrf
  
  # vector of B-spline coefficients := z
  z = solve(GG)%*%gg
  
  # ZB-spline basis
  Zbasis = B%*%D%*%K
  
  # ZB-spline basis (evaluated on a grid finner grid)
  Zbasis_finner = BB%*%D%*%K
  
  if (basis.plot == TRUE){
    matplot(parnition,Zbasis_finner, type="l",lty=1,las=1, xlab="t", ylab="fcenLR(density)",col = rainbow(dim(Zbasis_finner)[2])
            ,main="ZB-spline basis")
    abline(v=knots, col="gray",lty=2)
  }
  
  # Resulting compositional spline in L20
  spline0 = Zbasis_finner%*%z
  
  if (spline.plot==TRUE) {
    matplot(parnition,spline0, type="l",las=1,xlab="t",ylab="fcenLR(density)",col="darkblue",lwd=2,cex.lab=1.2,cex.axis=1.2,
            ylim = c(min(c(min(clrf),min(spline0))),max(c(max(clrf),max(spline0)))),
            main = paste("Compositional spline of degree k =",k-1))
    matpoints(t,clrf, pch = 8, col="darkblue", cex=1.3)
    abline(h=0,col="red",lty=2,lwd=1)
    abline(v=knots,col="gray",lty=2,lwd=1)
  }
  
  # CV
  clrf = as.matrix(clrf)
  Hmat = (B%*%D%*%K)%*%solve(GG)%*%(alpha*t(K)%*%t(D)%*%t(B)%*%W)
  clrfhat = (B%*%D%*%K)%*%z
  reziduals = (clrf-clrfhat)
  Hmat_diag = c()
  for (i in 1:length(clrf))  Hmat_diag[i] = Hmat[i,i]
  
  Hmat_diag_mean = (1/length(clrf))*sum(Hmat_diag)
  
  CV = (1/length(clrf))*sum((reziduals/(rep(1,length(Hmat_diag))-Hmat_diag))^2)
  GCV = (1/length(clrf))*(sum((reziduals)^2))/((1-Hmat_diag_mean)^2)
  
  J = (1-alpha)*t(z)%*%t(U)%*%t(S)%*%M%*%S%*%U%*%z + alpha*t(clrf-B%*%D%*%K%*%z)%*%W%*%(clrf-B%*%D%*%K%*%z)
  return(list(J=J, ZB_coef=z, CV=CV, GCV=GCV))
}







back to top