https://github.com/cran/RandomFields
Revision 84ac7774b2385ab164ebf06b957ce2e61978c2e3 authored by Martin Schlather on 02 February 2015, 00:00:00 UTC, committed by Gabor Csardi on 02 February 2015, 00:00:00 UTC
1 parent bc905e7
Raw File
Tip revision: 84ac7774b2385ab164ebf06b957ce2e61978c2e3 authored by Martin Schlather on 02 February 2015, 00:00:00 UTC
version 3.0.62
Tip revision: 84ac777
basic.fctns.R
### functions used in several situations, but do not really
### belong to getNset.R
### in particular internal getNset functions staying on the R level


rfSplitTrendAndCov <-
  function(model, spatialdim, xdimOZ, Time,  forcedplus=FALSE,
           cathegories=list(trend=DetTrendEffect : SpVarEffect),
           remaining.name="cov") {
  ## splittet additives modell in trend-Teil und cov-Teil auf

    info <- .Call("SetAndGetModelInfo",
                 MODEL_SPLIT, list("Dummy", model),
                 as.integer(spatialdim),
                 FALSE,
                 TRUE, ## in case it is a non-domain model
                 as.logical(Time), 
                 as.integer(xdimOZ), 
                 as.integer(254), FALSE, TRUE,
                 PACKAGE="RandomFields")

    
  effect <- info$effect
  
  submodels <- list()
  for (j in 1:(length(cathegories)+1)) submodels[[j]]<-list()
  names(submodels) <- c(names(cathegories), remaining.name)

  if ( !(model[[1]] %in% ZF_PLUS)) model<- list(ZF_SYMBOLS_PLUS, model)        

  for (i in 2:length(model)) {
    j<-1
    while(j<=length(cathegories) && !(effect[i-1] %in% cathegories[[j]])) {
      j <- j+1
    }
    submodels[[j]][[length(submodels[[j]])+1]] <- model[[i]]
  }

  for (j in 1:(length(cathegories)+1)) {
    if (length(submodels[[j]]) > 1 ||
        (forcedplus && length(submodels[[j]])>0)) { # geaendert 2.11.11
      submodels[[j]] <- c(ZF_SYMBOLS_PLUS,  submodels[[j]])
    } else if (length(submodels[[j]])==1) {
      submodels[[j]] <- submodels[[j]][[1]]
    }
  }
  submodels[sapply(submodels, length)==0] <- NULL
  return(submodels)
}

StandardSimpleModel <- function(model, tsdim, aniso=TRUE, addnugget=TRUE, ...) {    ## bei mixed model ist eine zweite Ebene von '+' vorhanden
  ## einfache mixed model koennen aber wie Simple behandelt werden
  ## die zweite '+'-Ebene wird hier aufgeloest:
  ##
  ## Nun koennten noch Namen fuer die Untermodelle vergeben worden sein.
  ## Wenn dann "unlist" aufgerufen wird, werden die namen der Ober- und
  ## Untermodelle zu einem gemeinsamen Namen verbunden, der dann nicht
  ## mehr als als submodel-Name erkannt wird. Also hilft nur alle
  ## (potentiellen) Namen zu loeschen:
  if (model[[1]] %in% ZF_PLUS) {
    submodels <- sapply(model[-1], function(x) x[[1]])
    plus <- submodels %in% ZF_PLUS
    if (any(plus)) {
      plus <- which(plus) + 1
      subs <- lapply(model[plus], function(x) x[-1])
      names(subs) <- NULL
      subs <- unlist(subs, recursive=FALSE)
      names(subs) <- NULL
      trend <- model[-plus]
      names(trend) <- NULL      
      model <- c(trend, subs)
      ## Print(model,trend,subs); xxx
    }
  } 

 # Print("SSM X", model)

  model <- PrepareModel2(model, ...)
  storage.mode(tsdim) <- "integer"
 # userdefined <- GetParameterModelUser(model)
  
  vdim <- InitModel(MODEL_USER, list("Dummy", model), tsdim, NAOK=TRUE) # ok
  if (!is.numeric(vdim) || any(vdim > 1)) return("model not univariate")
  model <- GetModel(register=MODEL_USER, GETMODEL_DEL_NATSC) # , do.notreturnparam=TRUE)
  .C("DeleteKey", MODEL_USER)

  s <- rfSplitTrendAndCov(model=model, spatialdim=tsdim, xdimOZ=tsdim,
                          Time=FALSE, forcedplus=TRUE,
                          cathegories=list(simple=c(PrimitiveModel:SimpleModel),
                            trend=DetTrendEffect:FixedTrendEffect),
                          remaining.name="cov")
    
  if (!is.null(s$cov) || length(s$simple)>3)
    return("model is too complex to be simple")
  if (length(s$simple) <= 1) return("model is too primitive")
  s <- s$simple

  for (i in 2:length(s)) {
    if (! (s[[i]][[1]] %in% DOLLAR)) s[[i]] <- list(DOLLAR[1], var=1, s[[i]])
  }
  if (length(s)==2 && !(s[[2]][[length(s[[2]])]][[1]] %in% ZF_NUGGET)
              && addnugget){
    ## assuming nugget is missing
    s[[3]] <- list(DOLLAR[1], var=0, list(ZF_NUGGET[2]))
  }

  if (length(s) == 3) {  ## [[1]] is '+'
    if (s[[2]][[length(s[[2]])]][[1]] %in% ZF_NUGGET) {
      s[2:3] <- s[3:2] ## [[1]] is '+'
    }
    if (s[[2]][[length(s[[2]])]][[1]] %in% ZF_NUGGET)
      return("nugget model given twice")
    if (!(s[[3]][[length(s[[3]])]][[1]] %in% ZF_NUGGET))
      return("simple model must the sum of some domain and isotropic model and a nugget effect")
     if (length(c(s[[3]]$scale, s[[3]]$Aniso, s[[3]]$aniso, s[[3]]$proj)) > 0)
       return("nugget contains scale and/or anisotropic elements")
  }
  if (length(c(if (!aniso) s[[2]]$aniso, s[[2]]$Aniso, s[[2]]$proj)) > 0)
    return("model is a complicated scale structure")
       
  return(s)
}



earth_coordinate_names<- function(names) {
  ## Earth coordinates + possibly radius
  n <- substr(tolower(names), 1, 6)
  nc <- nchar(n)
  lon <- lat <- integer(length(n))
  for (i in 1:length(n)) {
    lon[i] <- substr("longit", 1, nc[i]) == n[i]
    lat[i] <- substr("latitu", 1, nc[i]) == n[i]
  }
  lonORlat <- lon | lat  
  earth <- all(nc[lonORlat] >= 2) && sum(lon==1) && sum(lat == 1)
  return(if (length(names)==2 | !earth) earth else         
         if ( (lo <- which(lon)) < (la <- which(lat)))
         which(lonORlat[]))
}

cartesian_coordinate_names <- function(names) {
  n <- substr(tolower(names), 1, 1)
  coords <-  c("T", "x", "y", "z")
  Txyz <- outer(n, coords, "==")
  cs <- colSums(Txyz)
  if (any(cs > 1) || sum(cs[1:2]) == 0 || any(diff(cs[-1]) > 0))
    return (integer(0))
  Txyz <- Txyz[, c(2:4, 1), drop=FALSE]
  ord <- apply(Txyz, 2, function(x) which(x > 0))
  ord <- order(unlist(ord))
  rs <- which(rowSums(Txyz) > 0)
  return(rs[ord])
}



data.columns <- function(data, xdim=0, force=FALSE, halt=TRUE) {
  #  Print("data.col", data)
  if (xdim>0 && xdim >= ncol(data)) stop("not enough columns in 'data'.")
  RFopt <- RFoptions()
  info <- RFoptions()$coords
  cn <- colnames(data)

  if (all(is.na(info$varnames))) {
    if (all(is.na(info$coordnames))) {
      if (is.null(cn)) {
        if (force) return(list(data=(xdim+1):ncol(data), x=1:xdim))
        if (halt)
          stop('colnames of data argument must contain "data" or "variable"')
        else return(NULL);
      }
      is.data <- (tolower(substr(cn, 1, 4)) == "data" |
                  tolower(substr(cn, 1, 4)) == "value" |
                  tolower(substr(cn, 1, 8)) == "variable")
      if (!any(is.data)) {
        if (force) return(list(data=(xdim+1):ncol(data), x=1:xdim))
        if (halt) stop('no colname starts with "data" or "variable"')
        else return(NULL);
      }
      is.data <- which(is.data)
      if (is.data[1] > 1) is.data <- is.data[1] : ncol(data)# coord am Anfang
      ##     dann wird Rest alles als data angenommen, egal welcher Name
    }
  } else {
    if (is.numeric(info$varnames)) {
      is.data <- rep(info$varnames, length.out=2)
      if (is.na(is.data[1])) is.data[1] <- 1
      if (is.na(is.data[2])) is.data[2] <- ncol(data)
      is.data <- is.data[1] : is.data[2]
    } else {
      if (RFopt$general$vdim_close_together)
        stop("'vdim_close_together' must be FALSE")
      l <- list()
      vdim <- length(info$varnames)
      for (v in 1:vdim)
        l[[v]] <-
          substring(cn, 1, nchar(info$varnames[v])) == info$varnames[v]
      repet <- sapply(l, sum)
      if (repet[1] == 0) stop("data names could not be detected") 
      if (any(repet != repet[1]))
        stop("detected repetitions are not equal for all components")
      m <- matrix(unlist(l), ncol=vdim)
      if (any(rowSums(m) > 1))
        stop("names of multivariate components not unique")
      is.data <- as.vector(t(apply(m, 2, which)))
    }
  }

  if (all(is.na(info$coordnames))) {
    is.x <- (1:ncol(data))[-is.data]
    if (xdim > 0) {
      if (length(is.x) < xdim)
        stop("not enough columns for coordinates found ")
      if (xdim < length(is.x) &&
          RFopt$general$printLevel >= PL_SUBIMPORTANT)
        message("column(s) '", paste(is.x[-1:-xdim], collapse=","),
                "' not used.\n")
      is.x <- is.x[1:xdim]
    }
  } else {
    if (is.numeric(info$coordnames)) {
      is.x <- rep(info$coordnames, length.out=2)
      if (is.na(is.x[1])) is.x[1] <- 1
      if (is.na(is.x[2])) is.x[2] <- ncol(data)
      is.x <- is.x[1] : is.x[2]
    } else {
      l <- list()
      len <- length(info$coordnames)
      for (i in 1:len)
        l[[v]] <-
          substring(cn, 1, nchar(info$coordnames[v])) == info$coordnames[v]
      is.x <- unlist(l)
      if (xdim > 0 && xdim != length(l))
        stop("expected dimension of coordinates does not match the found coordinates")
    }
     
    if (all(is.na(info$varnames))) {
      is.data <-  (1:ncol(data))[-is.x]
      if (length(is.data) == 0) stop("no columns for data found")
    } else {
     if (any(is.x %in% is.data))
       stop("column names and data names overlap.")
     if (length(is.x) + length(is.data) < ncol(data) &&
         RFopt$general$printLevel >= PL_SUBIMPORTANT)
       message("column(s) '",
               paste(1:ncol[c(-is.x, -is.data)], collapse=","),
               "' not used.\n")
    }
  }
  return(list(data=is.data, x=is.x) )
}

    
GetDataNames <- function(model, coords=NULL, locinfo, data=NULL) {
  if (length(data) > 0) {
    cd <- try(CheckData(model=model, given=coords, data=data))      
    if (class(cd) != "try-error")
      return(list(coord.names=cd$coord.names, variab.names=cd$variab.names))
  }

  variab.names <- extractVarNames(model)
  coord.names <-
    if (!is.null(coords) && is.matrix(coords)) colnames(coords) else NULL
                       # gets response part of model, if model is a formula

  Zeit <- locinfo$Zeit
  ts.xdim <- locinfo$spatialdim + locinfo$Zeit
  if (is.null(coord.names))
    coord.names <- paste("coords.x", 1:ts.xdim, sep="")
  if (Zeit) coord.names[ts.xdim] <- "coords.T1"

  return(list(coord.names=coord.names, variab.names=variab.names))
}



search.model.name <- function(cov, name, level) {
  if (length(name) == 0 || length(cov) ==0) return(cov);
  if (!is.na(pmatch(name[1], cov))) return(search.model.name(cov, name[-1], 1))

  for (i in 1:length(cov$submodels)) {
    found <- search.model.name(cov$submodels[[i]], name, 1)
    if (!is.null(found)) return(found)      
  }
  found <- search.model.name(cov$internal, name, 1)
  if (!is.null(found)) return(found)
  if (level == 0) stop("model name not found")
  return(NULL)
}




## to do: grid
GetNeighbourhoods <- function(model.nr, all,
                              splitfactor, maxn, split_vec,
                              shared=FALSE)  {
  ## model.nr < 0 : standard scales
  ## MODEL_USER 0  /* for user call of Covariance etc. */
  ## MODEL_SIMU 1  /* for GaussRF etc */ 
  ## MODEL_INTERN 2 /* for kriging, etc; internal call of covariance */
  ## MODEL_SPLIT 3  /* split  covariance model */
  ## MODEL_GUI 4   /* RFgui */
  ## MODEL_MLE  5  /* mle covariance model */
  ## MODEL_MLESPLIT 6 /* ="= */
  ## MODEL_MLETREND 7  /* mle trend model !! */
  ## MODEL_BOUNDS 8 /* MLE, lower, upper */
  
  locfactor <- as.integer(splitfactor * 2 + 1) ## total number of
  ##    neighbouring boxes in each direction, including the current box itself
  maxn <- as.integer(maxn)        ## max number of points, including neighbours
  splitn <- as.integer(split_vec[1]) ## number of location when split up
  minimum <- as.integer(split_vec[2])## min. number of points in a neighbourhood
  maximum <- as.integer(split_vec[3])## maximum number of points when still
  ##                                    neighbours of neighbours are included.
  ##                         Note that, mostly, an additional box is included.

  d <- dim(all$given$x)
  ts.xdim <- as.integer(d[1])
  n <- as.integer(d[2])

  if (model.nr >= 0) {
    natsc <- .C("MultiDimRange", as.integer(model.nr), natsc = double(ts.xdim),
                PACKAGE="RandomFields")$natsc
  } else natsc <- rep(1, ts.xdim)

 
  u <- numeric(ts.xdim)
  for (i in 1:ts.xdim) {
    u[i] <- length(unique(all$given$x[i,]))
  }

  if (all$given$grid) {
    stop("not programmed yet")
  } else {
    Range <- apply(all$given$x, 1, range)
    rd <- apply(Range, 2, diff) 
    len <- pmax(1e-10 * max(rd), rd * (1 + 1e-10))
    units <- pmax(1, len * natsc)
    nDsplitn <- n / splitn

    ## * "gerechte" Aufteilung in alle Richtungen waere nDsplitn
    ## * die Richtung in die viele units sind, soll eher aufgespalten werden
    ## * ebenso : wo viele Werte sind eher aufspalten
    idx <- (nDsplitn / prod(units * u))^{1/ts.xdim} * units * u > 0.5 
    reddim <- sum(idx)
    units <- units[idx]
    zaehler <- 1
    parts <- rep(1, ts.xdim)
    OK <- integer(1)
    
    repeat {
      parts[idx] <- (nDsplitn / prod(units))^{1/reddim} *
        locfactor * zaehler * units * all$vdim
      parts <- as.integer(ceiling(parts))
      
      ## zuordnung der coordinaten_Werte zu den jeweiligen "parts"
      ## given ist liegend
      coord.idx <- floor((all$given$x - Range[1,]) / (len / parts))
      
      cumparts <- cumprod(parts)
      totparts <- as.integer(cumparts[length(cumparts)])
      Ccumparts <- as.integer(c(1, cumparts))
      cumparts <- Ccumparts[-length(Ccumparts)]
      
      ## elms.in.boxes <- integer(totparts)  
      ## neighbours <- integer(totparts)
      cumidx <- as.integer(colSums(coord.idx * cumparts))
      
      elms.in.boxes <- .Call("countelements", cumidx, n, totparts,
                             PACKAGE="RandomFields")
      
      neighbours <- .Call("countneighbours", ts.xdim, parts, locfactor, Ccumparts,
                     elms.in.boxes, PACKAGE="RandomFields")
      
      ## if there too many points within all the neighbours, then split
      ## into smaller boxes
      zaehler <- zaehler * 2
      
      ## image(neighbours, zlim=c(0:(prod(parts)-1)))
      if (!is.null(neighbours)) break;
    }
    
    l <- list()
    l[[1]] <- .Call("getelements", cumidx, ts.xdim, n, Ccumparts,
                    elms.in.boxes, PACKAGE="RandomFields")
    l1len <- sapply(l[[1]], length)
  }

  

  if (length(all$x) > 0) {
    if (all$grid) {
     stop("not programmed yet")
    } else {
      ## now calculate the boxes for the locations where we will interpolate
      i <- pmax(0, pmin(parts-1, floor((t(all$x) - Range[1,]) / (len / parts))))
      dim(i) <- rev(dim(all$x))
      i <- as.integer(colSums(i * cumparts))
      
      res.in.boxes <- .C("countelements", i, nrow(all$x), totparts,
                         PACKAGE="RandomFields")
      
      notzeros <- res.in.boxes > 0
      l[[3]] <- .Call("getelements", i, ts.xdim, as.integer(nrow(all$x)),
                      Ccumparts, res.in.boxes, PACKAGE="RandomFields")[notzeros]
      ## TO DO : idx[[3]] passt nicht, da sowohl fuer Daten
      ##         als auch coordinaten verwendet wird. Bei repet > 1
      ##         ist da ein Problem -- ueberpruefen ob repet=1
   

    }
  } else {
    notzeros <- TRUE
  }

  if (all$given$grid) {
    stop("not programmed yet")
  } else {
    ll <- .Call("getneighbours", ts.xdim, parts, locfactor, Ccumparts,
                neighbours, PACKAGE="RandomFields")[notzeros]
    less <- sapply(ll, function(x) sum(elms.in.boxes[x]) < minimum) | !shared
    ##                  if !shared then all(less)==TRUE

    if (any(less)) {
      not.considered.yet <- sapply(l[[1]], length) > 0   
      newll <- ll
      for (i in which(less)) {
        current <- ll[[i]]
        elements <- sum(elms.in.boxes[current] *
                        (shared | not.considered.yet[current]))# number of pts in a neighbourhood
        while (elements < minimum) {
          new <- unique(unlist(ll[current])) # neighbours of neighbours, but not
          new <- new[which(is.na(pmatch(new, current)))]# neighbours themselves
          nn <- elms.in.boxes[new] * (shared | not.considered.yet[new]) # how many pts are in each of these boxes?
        ordr <- order(nn)
        new <- new[ordr]
        nn <- nn[ordr]
        cs <- elements + cumsum(nn)
        smaller <- sum(cs <= maximum) ## now, check which neighbours of
        ## the neigbours can be included in the list of neighbours of i
        ## to increase the number of points in the kriging neighbourhood
        if (smaller == 0) break; ## none
        if (smaller == length(cs) || cs[smaller] >= minimum ||
            cs[smaller+1] > maxn) {
          if ( (elements <- cs[length(cs)]) <= maxn ) {            
            current <- c(current, new)            
          } else {
            current <- c(current, new[1:smaller])
            elements <- cs[smaller]
          }
          if (smaller != length(cs)) break
        } else {
          ## smaller < length(cs) && cs[smaller]<minimum && cs[smaller+1]<=maxn
          ## i.e., include the next one, but there is no chance to include
          ## more within the rules.
          elements <- cs[smaller+1]
          current <- c(current, new[1:(smaller+1)])
          break;
        }
      }
      current <- current[l1len[current] > 0]
      if (!shared) current <- current[not.considered.yet[current]]
      newll[[i]] <- current
      not.considered.yet[current] <- FALSE                            
      }
      newll <- newll[sapply(newll, length) > 0]
      l[[2]] <- newll
    } else l[[2]] <- ll
  }
  return(if (shared) l else lapply(l[[2]], function(x) unlist(l[[1]][x])))
}



FinishImputing <- function(data, all, n, spConform) {
  ## to do: grid
  
  #Print(data, all, tail(all$simu), spConform);
  if (is(data, "RFsp")) {
    if (spConform) {
      data@data[ , ] <- as.vector(all$simu)
      return(data)
    } else {
      values <- as.matrix(data@data)
      values[is.na(values)] <- all$simu
      
      #print(cbind(coordinates(data), values))
      return(cbind(coordinates(data), values))
    }
  } else { ## not RFsp
    #Print("for testing")
    coords <- all$fullgiven$x
    if (all$fullgiven$grid) {
      ## to do
      stop("not programmed yet")
    } else {
      ##  coords <- all$x
      colnames(coords) <- all$coord.names
      
      values <- data[, all$data.col]
      values[is.na(values)] <- all$simu
      
      if (!spConform)  return(cbind(coords, values))
      
      tmp.all <- conventional2RFspDataFrame(data=values, coords=coords,
                                            gridTopology=NULL,
                                            n=n, vdim=all$vdim,
                                            vdim_close_together=FALSE)
      all <- tmp.all@data         
      if (is(tmp.all, "RFspatialPointsDataFrame"))
        try(tmp.all <- as(tmp.all, "RFspatialGridDataFrame"), silent=TRUE)
      if (is(tmp.all, "RFpointsDataFrame"))
        try(tmp.all <- as(tmp.all, "RFgridDataFrame"), silent=TRUE)
    }
    return(tmp.all)
  }
}
back to top