Content - 4dce246b79b6470d3d8afd39f02d183aba9300fa

Permalink
Lexis <-
function(entry, exit, duration, entry.status=0, exit.status=0, id, data,
         merge=TRUE, states )
{
  nmissing <- missing(entry) + missing(exit) + missing(duration)

  if (nmissing > 2)
    stop("At least one of the arguments exit and duration must be supplied")

  only.exit <- missing( entry.status ) && !missing( exit.status )

  ## If data argument is supplied, use it to evaluate arguments
  if (!missing(data)) {
    if (!missing(entry)) {
      entry <- eval(substitute(entry), data, parent.frame())
    }
    if (!missing(exit)) {
      exit <- eval(substitute(exit), data, parent.frame())
    }
    if (!missing(duration)) {
      duration <- eval(substitute(duration), data, parent.frame())
    }
    entry.status <- eval(substitute(entry.status), data, parent.frame())
    exit.status <- eval(substitute(exit.status), data, parent.frame())
    if (!missing(id)) {
      id <- eval(substitute(id), data, parent.frame())
    }
    if (merge) {
      data <- as.data.frame(data)
    }
  }

  ## Check for missing values in status variables
  wh.miss <- any(is.na(entry.status)) + 2*any(is.na(exit.status))
  if ( wh.miss > 0 ) stop("Missing values in ",
                          switch( wh.miss, "entry status",
                                           "exit status",
                                           "entry AND exit status" ) )

  ## Adjust entry status mode according to exit status
  if( only.exit )
    {
    if( is.logical( exit.status ) )
        entry.status <- FALSE
    if( is.character( exit.status ) )
        exit.status <- factor( exit.status )
    if( is.factor( exit.status ) )
        {
        entry.status <- factor( rep( levels(exit.status)[1],
                                     length(exit.status)),
                                levels=levels(exit.status),
                                labels=levels(exit.status) )
        cat("NOTE: entry.status has been set to",
            paste( '"', levels(exit.status)[1], '"', sep='' ),
            "for all.\n" )
        }
    if( is.numeric( exit.status ) )
        entry.status <- rep( 0, length( exit.status ) )
    }

  ## Convert character states to factors
  if( is.character(entry.status) ) entry.status <- factor(entry.status)
  if( is.character( exit.status) )  exit.status <- factor( exit.status)

  ## Check compatibility of entry and exit status
  if (is.factor(entry.status) || is.factor(exit.status)) {
      if (is.factor(entry.status) && is.factor(exit.status)) {
          if (!identical(levels(entry.status),levels(exit.status))) {
              all.levels = union(levels(entry.status),levels(exit.status))
              entry.status <- factor( entry.status, levels=all.levels )
               exit.status <- factor(  exit.status, levels=all.levels )
            cat("Incompatible factor levels in entry.status and exit.status:\n",
                "both lex.Cst and lex.Xst now have levels:\n", all.levels, "\n")
          }
      }
      else {
          stop("Incompatible classes for entry and exit status")
      }
  }
  else {
      if (mode(entry.status) != mode(exit.status)) {
          stop("Incompatible mode for entry and exit status")
      }
  }
  
  ## If entry is missing and one of the others is given as a list of length
  ## one, entry is assumed to be 0 on this only timescale.
  if( nmissing==2 )
    {
    if( !missing(exit) )
      { if( length(exit)>1 )
          stop("If 'entry' is omitted, only one timescale can be specified.")
        else
        {
        entry <- exit
        entry[[1]] <- 0*entry[[1]]
        cat( "NOTE: entry is assumed to be 0 on the",names(exit),"timescale.\n")
        }
      }
    else
    if( !missing(duration) )
      { if( length(duration)>1 )
          stop("If 'entry' is omitted, only one timescale can be specified")
        else
        {
        entry <- duration
        entry[[1]] <- 0*entry[[1]]
        cat( "NOTE: entry is assumed to be 0 on the",names(duration),"timescale.\n")
        }
      }
    else
    stop("Either exit or duration must be supplied.")
    }

  ## Coerce entry and exit lists to data frames
  
  if(!missing(entry)) {
    entry <- as.data.frame(entry)
    if (is.null(names(entry)))
      stop("entry times have no names")
    if (any(substr(names(entry),1,4) == "lex."))
      stop("names starting with \"lex.\" cannot be used for time scales")
  }

  if(!missing(exit)) {
    exit <- as.data.frame(exit)
    if (is.null(names(exit)))
      stop("exit times have no names")
    if (any(substr(names(exit),1,4) == "lex."))
      stop("names starting with \"lex.\" cannot be used for time scales")
  }

  if(!missing(duration)) {
    duration <- as.data.frame(duration)
    if (is.null(names(duration)))
      stop("duration have no names")
    if (any(substr(names(duration),1,4) == "lex."))
      stop("names starting with \"lex.\" cannot be used for time scales")
  }

  if (missing(entry)) {
    ## Impute entry
    entry <- exit - duration
  }
  
  if (missing(duration)) {
    ## Impute duration 
    full.time.scales <- intersect(names(entry), names(exit))
    if (length(full.time.scales) == 0) {
      stop("Cannot calculate duration from entry and exit times")
    }
    duration <- exit[,full.time.scales[1]] - entry[,full.time.scales[1]]
  }

  if (missing(exit)) {
    all.time.scales <- names(entry)
  }
  else {
    ## We dont need the exit times but, if they are supplied, we must
    ## make sure they are consistent with the entry and duration.

    all.time.scales <- unique(c(names(entry), names(exit)))
    ## Fill in any missing entry times
    entry.missing <- setdiff(all.time.scales, names(entry))
    if (length(entry.missing) > 0) {
      entry <- cbind(entry, exit[,entry.missing, drop=FALSE] - duration)
      }
    ## Check that duration is the same on all time scales
    dura <- exit - entry[,names(exit),drop=FALSE]
    if (missing(duration)) {
      duration <- dura[,1] #BxC# apply( dura, 1, mean, na.rm=TRUE )
                           # Allows for timescales with missing values
      }
    ok <- sapply(lapply(dura, all.equal, duration), isTRUE)
#   ok <- sapply(lapply(dura, all.equal, duration),
#                function(x) identical(FALSE,x) )
    if (!all(ok)) {
      stop("Duration is not the same on all time scales")
    }
  }
  ## Check that duration is positive
  if (any(duration<0)) {
    stop("Duration must be non-negative")
  }
  
  ## Make sure id value - if supplied - is valid. Otherwise supply default id
  
  if (missing(id)) {
    id <- 1:nrow(entry)
  }
  else if (any(duplicated(id))) {
    ##Fixme: check for overlapping intervals
    ##stop("Duplicate values in id")
  }

  ## Return a data frame with the entry times, duration, and status
  ## variables Use the prefix "lex." for the names of reserved
  ## variables.
  if( is.data.frame( duration ) ) duration <- duration[,1]
  lex <- data.frame(entry, "lex.dur" = duration,
                           "lex.Cst" = entry.status,
                           "lex.Xst" = exit.status,
                           "lex.id"  = id )

  #### Addition by BxC --- support for states as factors
  # Convert states to factors if states are given
  if( !missing( states ) ) #is.character( states ) )
    {
    # This as.character-business is necessary because we cannot assume
    # that the values of states are 1,2, etc.
    st.lev <- unique( as.character( c(lex$lex.Cst,lex$lex.Xst) ) )
    lex$lex.Cst <- factor( as.character(lex$lex.Cst), levels=st.lev, labels=states )
    lex$lex.Xst <- factor( as.character(lex$lex.Xst), levels=st.lev, labels=states )
    }

  if (!missing(data) && merge) {
    duplicate.names <- intersect(names(lex), names(data))
    if (length(duplicate.names) > 0) {
      stop("Cannot merge data with duplicate names")
    }
    lex <- cbind(lex, data)
  }
  attr(lex,"time.scales") <- all.time.scales
  breaks <- vector("list", length(all.time.scales))
  names(breaks) <- all.time.scales
  attr(lex,"breaks") <- breaks
  class(lex) <- c("Lexis", class(lex))
  return(lex)
}

is.Lexis <- function(x)
{
  inherits(x, "Lexis")
}

check.time.scale <- function(lex, time.scale=NULL)
{

  ##Utility function, returns the names of the time scales in a Lexis object
  ##lex - a Lexis object
  ##time.scale - a numeric or character vector. The function checks that
  ##             these are valid time scales for the Lexis object. 
  ##Return value is a character vector containing the  names of the requested
  ##time scales
  
  all.names <- timeScales(lex)
  if (is.null(time.scale))
    return(all.names)

  nscale <- length(time.scale)
  scale.names <- character(nscale)
  if (is.character(time.scale)) {
    for (i in 1:nscale) {
      if (is.null(lex[[time.scale[i]]]))
        stop("invalid time scale name")
    }
  }
  else if (is.numeric(time.scale)) {
    if (any(time.scale) > length(all.names)  || any(time.scale) < 1)
      stop("invalid time scale column number")
    time.scale <- all.names[time.scale]
  }
  else {
    stop("invalid type for time scale")
  }
  return(time.scale)
}


plot.Lexis.1D <- function(x, time.scale=1, breaks="lightgray", 
                          type="l", col="darkgray", xlim, ylim, xlab, ylab,
                          ...)
{
  ## x Lexis object
  ## time.scale  name of time scale to plot

  if (length(time.scale) != 1)
    stop("Only one time scale allowed")
  
  time.entry <- x[,time.scale]
  time.exit <- x[,time.scale] + x$lex.dur
  id <- x$lex.id

  if (missing(xlim))
    xlim <- c(min(time.entry), max(time.exit))
  if (missing(ylim))
    ylim <- range(id)
  if (missing(xlab))
    xlab <- time.scale
  if (missing(ylab))
    ylab <- "id number"
  
  plot(time.entry, id, type="n", xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
       ...)
  if (type=="b" || type=="l") {
    segments(time.entry, id, time.exit, id, col=col, ...)
  }
  if (type=="b" || type=="p") {
    points(time.exit, id, col=col, ...)
  }
  ## Plot break points
  brk <- attr(x,"breaks")[[time.scale]]
  abline(v=brk, col=breaks, ...)
}

points.Lexis.1D <- function(x, time.scale, ...)
{
  time.exit <- x[,time.scale] + x$lex.dur
  points(time.exit, x$lex.id, ...)
}

lines.Lexis.1D <- function(x, time.scale, col="darkgray", breaks="lightgray",
                           ...)
{
  time.entry <- x[,time.scale]
  time.exit <- x[,time.scale] + x$lex.dur
  id <- x$lex.id
  segments(time.entry, id, time.exit, id, col=col, ...)
  ## Plot break points
  brk <- attr(x,"breaks")[[time.scale]]
  abline(v=brk, col=breaks, ...)

}

plot.Lexis.2D <- function(x, time.scale, breaks="lightgray",
                          type="l", col="darkgray",
                          xlim, ylim, xlab, ylab,
                          grid=FALSE,
                      col.grid="lightgray",
                      lty.grid=2,
                      coh.grid=FALSE,
                          ...)
{
  if (length(time.scale) != 2)
    stop("Two time scales are required")

  time.entry <- time.exit <- vector("list",2)
  for (i in 1:2) {
    time.entry[[i]] <- x[,time.scale[i]]
    time.exit[[i]] <- x[,time.scale[i]] + x$lex.dur
  }

  if (missing(xlim) && missing(ylim)) {
    ## If no axis limits are given, set the plotting region to be
    ## square, and adjust the axis limits to cover the same time interval.
    ## All life lines will then be at 45 degrees.
    opar <- par(pty="s")
    on.exit(par(opar))
    min.times <- sapply(time.entry, min)
    max.times <- sapply(time.exit, max)
    xywidth <- max(max.times - min.times)
    xlim <- min.times[1] + c(0, xywidth)
    ylim <- min.times[2] + c(0, xywidth)
  }
  else if (missing(xlim)) {
    xlim <- c(min(time.entry[[1]]), max(time.exit[[1]]))
  }
  else if (missing(ylim)) {
    ylim <- c(min(time.entry[[2]]), max(time.exit[[2]]))
  }
  
  if (missing(xlab))
    xlab <- time.scale[1]
  if (missing(ylab))
    ylab <- time.scale[2]
      
  plot(time.entry[[1]], time.entry[[2]], type="n",
       xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, ...)

# Set up the background grid(s):
  if (!missing(grid)) {
     if (is.logical(grid)) {
        if (grid) {
        vgrid <- pretty(xlim)
        hgrid <- pretty(ylim)
        } }
     else if (is.list(grid)) {
        vgrid <- grid[[1]]
        hgrid <- grid[[length(grid)]]
        }
     else if (is.numeric(grid)) {
       vgrid <- grid - min( grid ) + min( pretty( xlim )[pretty(xlim)>=par("usr")[1]] )
       hgrid <- grid - min( grid ) + min( pretty( ylim )[pretty(ylim)>=par("usr")[3]] )
        }
     else stop( "'grid' must be either logical, list or a numeric vector" )
   # and plot the grid:
     abline( v=vgrid, h=hgrid, col=col.grid, lty=lty.grid )
     box()
     }
  if (!missing(grid) & coh.grid) {
     # Make the 45-degree grids as fine as the finest grid on the axes
     for (yy in c(hgrid-diff(range(hgrid)),hgrid))
         abline( yy-min(vgrid), 1, col=col.grid, lty=lty.grid )
     for (yy in c(vgrid-diff(range(vgrid)),vgrid))
         abline( min(hgrid)-yy, 1, col=col.grid, lty=lty.grid )
     }
# End of background grid(s) (PHEW!)

  if (type=="b" || type=="l") {
    segments(time.entry[[1]], time.entry[[2]], time.exit[[1]], time.exit[[2]],
             col=col, ...)
  }
  if (type=="b" || type=="p") {
    points(time.exit[[1]], time.exit[[2]], col = col, ...)
  }
  if (type != "n") {
    ## Plot break points
    brk <- attr(x,"breaks")[time.scale]
    abline(v=brk[[1]], h=brk[[2]], col=breaks, ...)
  }
}

points.Lexis.2D <- function(x, time.scale, ...)
{
  time.exit <- vector("list",2)
  for (i in 1:2) {
    time.exit[[i]] <- x[,time.scale[i]] + x$lex.dur
  }
  points( time.exit[[1]], time.exit[[2]], ...)
}

lines.Lexis.2D <- function(x, time.scale, col="darkgray", ...)
{
  time.entry <- time.exit <- vector("list",2)
  for (i in 1:2) {
    time.entry[[i]] <- x[,time.scale[i]]
    time.exit[[i]] <- x[,time.scale[i]] + x$lex.dur
  }
  segments(time.entry[[1]], time.entry[[2]], time.exit[[1]], time.exit[[2]],
           col=col, ...)
}

### Plotting generic functions

plot.Lexis <- function(x, time.scale=NULL, breaks="lightgray", ...)
{
  time.scale <- check.time.scale(x, time.scale)
  if (length(time.scale) > 2)
    time.scale <- time.scale[1:2]
  
  if (length(time.scale) == 1)
    plot.Lexis.1D(x, time.scale=time.scale, breaks=breaks, ...)
  else if (length(time.scale) == 2)
    plot.Lexis.2D(x, time.scale=time.scale, breaks=breaks, ...)
}


lines.Lexis <- function(x, time.scale=NULL, ...)
{
  time.scale <- check.time.scale(x, time.scale)
  if (length(time.scale) > 2)
    time.scale <- time.scale[1:2]
  
  if (length(time.scale) == 1)
    lines.Lexis.1D(x, time.scale=time.scale, ...)
  else if (length(time.scale) == 2)
    lines.Lexis.2D(x, time.scale=time.scale, ...)
}

points.Lexis <- function(x, time.scale = NULL, ...)
{
  time.scale <- check.time.scale(x, time.scale)
  if (length(time.scale) > 2)
    time.scale <- time.scale[1:2]
  
  if (length(time.scale) == 1)
    points.Lexis.1D(x, time.scale=time.scale, ...)
  else if (length(time.scale) == 2)
    points.Lexis.2D(x, time.scale=time.scale, ...)
}

### Generic functions

### Methods for data.frame drop Lexis attributes, so we need a Lexis
### method that adds them again

subset.Lexis <- function(x, ...)
{
  y <-  subset.data.frame(x, ...)
  attr(y,"breaks") <- attr(x, "breaks")
  attr(y,"time.scales") <- attr(x, "time.scales")
  return(y)
}

merge.data.frame <- function(x, y, ...)
{
  if (is.Lexis(x))
    merge.Lexis(x, y, ...)
  else if (is.Lexis(y)) 
    merge.Lexis(y, x, ...)
  else
    base::merge.data.frame(x, y, ...)
}

merge.Lexis <- function(x, y, id, by, ...)
{
  if (!missing(id)) {
    if (!is.character(id) || length(id) != 1 || !(id %in% names(y))) {
      stop("id must be the name of a single variable in y")
    }
    if (any(duplicated(y[[id]]))) {
      stop("values of the id variable must be unique in y")
    }
    y$lex.id <- y[[id]]
  }
  else if (missing(by)) {
    by <- intersect(names(x), names(y))
    if (length(by)==0) {
      stop("x and y have no variable names in common")
    }
  }
  
  z <-  base::merge.data.frame(x, y, ...)
  attr(z,"breaks") <- attr(x, "breaks")
  attr(z,"time.scales") <- attr(x, "time.scales")
  class(z) <- c("Lexis", "data.frame")
  return(z)
}


## Extractor functions

entry <- function(x, time.scale = NULL)
{
    time.scale <- check.time.scale(x, time.scale)
    if (length(time.scale) > 1) {
        return(as.matrix(x[, time.scale]))
    }
    else {
        return(x[, time.scale])
    }
}

exit <- function(x, time.scale = NULL)
{
    time.scale <- check.time.scale(x, time.scale)
    if (length(time.scale) > 1) {
        return(as.matrix(x[, time.scale]) + x$lex.dur)
    }
    else {
        return(x[, time.scale] + x$lex.dur)
    }
}

dur <- function(x)
{
  return(x$lex.dur)
}


status <- function(x, at="exit")
{
  at <- match.arg(at, c("entry","exit"))
  switch(at, "entry"=x$lex.Cst, "exit"=x$lex.Xst)
}

timeScales <- function(x)
{
  return (attr(x,"time.scales"))
}


timeBand <- function(lex, time.scale, type="integer")
{
  time.scale <- check.time.scale(lex, time.scale)[1]
  breaks <- attr(lex, "breaks")[[time.scale]]

  time1 <- lex[[time.scale]]
  band <- findInterval(time1, breaks)

  ##Check that right hand side of interval falls in the same band
  abrk <- c(breaks, Inf)
  tol <- sqrt(.Machine$double.eps)
  if (any(time1 + lex$lex.dur > abrk[band+1] + tol)) {
    stop("Intervals spanning multiple time bands in Lexis object")
  }

  type <- match.arg(type, choices = c("integer","factor","left","middle",
                    "right"))
  if (type=="integer") {
     return(band)
  }

  I1 <- c(-Inf, breaks)
  I2 <- c(breaks, Inf)
  labels <- switch(type,
                   "factor" = paste("(", I1, ",", I2, "]", sep=""),
                   "left" = I1,
                   "right" = I2,
                   "middle" = (I1 + I2)/2)
                   
  if(type=="factor") {
    return(factor(band, levels=0:length(breaks), labels=labels))
  }
  else {
    return(labels[band+1])
  }
}

breaks <- function(lex, time.scale)
{
  time.scale <- check.time.scale(lex, time.scale)[1]
  return(attr(lex, "breaks")[[time.scale]])
}

transform.Lexis <- function(`_data`, ... )
{
    save.at <- attributes(`_data`)
    ## We can't use NextMethod here because of the special scoping rules
    ## used by transform.data.frame
    y <- base:::transform.data.frame(`_data`, ...)
    save.at[["names"]] <- attr(y, "names")
    attributes(y) <- save.at
    y
}