nsxD <- 
function (x, df = NULL, knots = NULL, intercept = FALSE, Boundary.knots = range(x), 
    derivs = if (cure) c(2, 1) else c(2, 2), log = FALSE, centre = FALSE, 
    cure = FALSE, stata.stpm2.compatible = FALSE) 
{
    nx <- names(x)
    x <- as.vector(x)
    nax <- is.na(x)
    if (nas <- any(nax)) 
        x <- x[!nax]
    if (!missing(Boundary.knots)) {
        Boundary.knots <- sort(Boundary.knots)
        outside <- (ol <- x < Boundary.knots[1L]) | (or <- x > 
            Boundary.knots[2L])
    }
    else outside <- FALSE
    if (!missing(df) && missing(knots)) {
        nIknots <- df - 1 - intercept + 4 - sum(derivs)
        if (nIknots < 0) {
            nIknots <- 0
            warning("'df' was too small; have used ", 1 + intercept)
        }
        knots <- if (nIknots > 0) {
            knots <- if (!cure) 
                seq.int(0, 1, length.out = nIknots + 2L)[-c(1L, 
                  nIknots + 2L)]
            else c(seq.int(0, 1, length.out = nIknots + 1L)[-c(1L, 
                nIknots + 1L)], 0.95)
            if (!stata.stpm2.compatible) 
                stats::quantile(x[!outside], knots)
            else stats::quantile(x[!outside], round(knots, 2), 
                type = 2)
        }
    }
    else nIknots <- length(knots)
    Aknots <- sort(c(rep(Boundary.knots, 4L), knots))
    if (any(outside)) {
        basis <- array(0, c(length(x), nIknots + 4L))
        if (any(ol)) {
            k.pivot <- Boundary.knots[1L]
            tt <- spline.des(Aknots, rep(k.pivot, sum(ol)), 4, 1)$design
            basis[ol, ] <- tt
        }
        if (any(or)) {
            k.pivot <- Boundary.knots[2L]
            tt <- spline.des(Aknots, rep(k.pivot, sum(or)), 4, 1)$design
            basis[or, ] <- tt
        }
        if (any(inside <- !outside)) 
            basis[inside, ] <- spline.des(Aknots, x[inside], 
                4, 1)$design
    }
    else basis <- spline.des(Aknots, x, 4, 1)$design
    const <- splineDesign(Aknots, rep(Boundary.knots, 3 - derivs), 
        4, c(derivs[1]:2, derivs[2]:2))
    if (!intercept) {
        const <- const[, -1, drop = FALSE]
        basis <- basis[, -1, drop = FALSE]
    }
    qr.const <- qr(t(const))
    q.const <- qr.Q(qr.const, complete=TRUE)[, -(1L:2L), drop = FALSE] # NEW
    basis <- as.matrix((t(qr.qty(qr.const, t(basis))))[, -(1L:nrow(const)), 
        drop = FALSE])
    n.col <- ncol(basis)
    if (nas) {
        nmat <- matrix(NA, length(nax), n.col)
        nmat[!nax, ] <- basis
        basis <- nmat
    }
    dimnames(basis) <- list(nx, 1L:n.col)
    if (centre) {
        centreBasis <- nsx(centre, knots = if (is.null(knots)) 
            numeric(0)
        else knots, Boundary.knots = Boundary.knots, intercept = intercept, 
            derivs = derivs, centre = FALSE, log = log)
        oldAttributes <- attributes(basis)
        basis <- t(apply(basis, 1, function(x) x - centreBasis))
        attributes(basis) <- oldAttributes
    }
    a <- list(degree = 3, knots = if (is.null(knots)) numeric(0) else knots, 
        Boundary.knots = Boundary.knots, intercept = intercept, 
        derivs = derivs, centre = centre, log = log, q.const = q.const)
    attributes(basis) <- c(attributes(basis), a)
    class(basis) <- c("nsxD", "basis", "matrix")
    basis
}
makepredictcall.nsxD <- 
function (var, call) 
{
    if (as.character(call)[1L] != "nsxD") 
        return(call)
    at <- attributes(var)[c("knots", "Boundary.knots", "intercept",
                            "derivs", "centre", "log")]
    xxx <- call[1L:2]
    xxx[names(at)] <- at
    xxx
}
predict.nsxD <- 
function (object, newx, ...) 
{
    if (missing(newx)) 
        return(object)
    a <- c(list(x = newx), attributes(object)[c("knots", "Boundary.knots", 
        "intercept", "derivs", "centre", "log")])
    do.call("nsxD", a)
}
nsxDD <- 
function (x, df = NULL, knots = NULL, intercept = FALSE, Boundary.knots = range(x), 
    derivs = if (cure) c(2, 1) else c(2, 2), log = FALSE, centre = FALSE, 
    cure = FALSE, stata.stpm2.compatible = FALSE) 
{
    nx <- names(x)
    x <- as.vector(x)
    nax <- is.na(x)
    if (nas <- any(nax)) 
        x <- x[!nax]
    if (!missing(Boundary.knots)) {
        Boundary.knots <- sort(Boundary.knots)
        outside <- (ol <- x < Boundary.knots[1L]) | (or <- x > 
            Boundary.knots[2L])
    }
    else outside <- FALSE
    if (!missing(df) && missing(knots)) {
        nIknots <- df - 1 - intercept + 4 - sum(derivs)
        if (nIknots < 0) {
            nIknots <- 0
            warning("'df' was too small; have used ", 1 + intercept)
        }
        knots <- if (nIknots > 0) {
            knots <- if (!cure) 
                seq.int(0, 1, length.out = nIknots + 2L)[-c(1L, 
                  nIknots + 2L)]
            else c(seq.int(0, 1, length.out = nIknots + 1L)[-c(1L, 
                nIknots + 1L)], 0.95)
            if (!stata.stpm2.compatible) 
                stats::quantile(x[!outside], knots)
            else stats::quantile(x[!outside], round(knots, 2), 
                type = 2)
        }
    }
    else nIknots <- length(knots)
    Aknots <- sort(c(rep(Boundary.knots, 4L), knots))
    if (any(outside)) {
        basis <- array(0, c(length(x), nIknots + 4L))
        if (any(ol)) {
            basis[ol, ] <- 0
        }
        if (any(or)) {
            basis[or, ] <- 0
        }
        if (any(inside <- !outside)) 
            basis[inside, ] <- spline.des(Aknots, x[inside], 
                4, 2)$design
    }
    else basis <- spline.des(Aknots, x, 4, 2)$design
    const <- splineDesign(Aknots, rep(Boundary.knots, 3 - derivs), 
        4, c(derivs[1]:2, derivs[2]:2))
    if (!intercept) {
        const <- const[, -1, drop = FALSE]
        basis <- basis[, -1, drop = FALSE]
    }
    qr.const <- qr(t(const))
    q.const <- qr.Q(qr.const, complete=TRUE)[, -(1L:2L), drop = FALSE] # NEW
    basis <- as.matrix((t(qr.qty(qr.const, t(basis))))[, -(1L:nrow(const)), 
        drop = FALSE])
    n.col <- ncol(basis)
    if (nas) {
        nmat <- matrix(NA, length(nax), n.col)
        nmat[!nax, ] <- basis
        basis <- nmat
    }
    dimnames(basis) <- list(nx, 1L:n.col)
    if (centre) {
        centreBasis <- nsx(centre, knots = if (is.null(knots)) 
            numeric(0)
        else knots, Boundary.knots = Boundary.knots, intercept = intercept, 
            derivs = derivs, centre = FALSE, log = log)
        oldAttributes <- attributes(basis)
        basis <- t(apply(basis, 1, function(x) x - centreBasis))
        attributes(basis) <- oldAttributes
    }
    a <- list(degree = 3, knots = if (is.null(knots)) numeric(0) else knots, 
        Boundary.knots = Boundary.knots, intercept = intercept, 
        derivs = derivs, centre = centre, log = log, q.const = q.const)
    attributes(basis) <- c(attributes(basis), a)
    class(basis) <- c("nsxDD", "basis", "matrix")
    basis
}
makepredictcall.nsxDD <- 
function (var, call) 
{
    if (as.character(call)[1L] != "nsxDD") 
        return(call)
    at <- attributes(var)[c("knots", "Boundary.knots", "intercept",
                            "derivs", "centre", "log")]
    xxx <- call[1L:2]
    xxx[names(at)] <- at
    xxx
}
predict.nsxDD <- 
function (object, newx, ...) 
{
    if (missing(newx)) 
        return(object)
    a <- c(list(x = newx), attributes(object)[c("knots", "Boundary.knots", 
        "intercept", "derivs", "centre", "log")])
    do.call("nsxDD", a)
}

## test nsxD and nsxDD
if (FALSE) {
    zeros <- function(mat,rows=1:nrow(mat),cols=1:ncol(mat)) "[<-"(mat,rows,cols,0)
    tm <- as.numeric(3:5)
    tm2 <- as.numeric(0:11)
    y <- rnorm(length(tm))
    lm1 <- lm(y~nsx(tm,df=4))
    lmD1 <- lm(y~nsxD(tm,df=4)-1)
    lmDD1 <- lm(y~nsxDD(tm,df=4)-1)
    eps <- 1e-5
    (lpmatrix.lm(lm1,newdata=data.frame(tm=tm2+eps)) - 
     lpmatrix.lm(lm1,newdata=data.frame(tm=tm2-eps)))/(2*eps) -
        cbind(0,lpmatrix.lm(lmD1,newdata=data.frame(tm=tm2))) # ok
    (lpmatrix.lm(lmD1,newdata=data.frame(tm=tm2+eps)) - 
     lpmatrix.lm(lmD1,newdata=data.frame(tm=tm2-eps)))/(2*eps) -
        lpmatrix.lm(lmDD1,newdata=data.frame(tm=tm2)) # ok
}

S0hat <- function(obj)
  {
    ## predicted survival for individuals (adjusted for covariates)
    newobj = survfit(obj,se.fit=FALSE)
    surv = newobj$surv
    surv[match(obj$y[,ncol(obj$y)-1],newobj$time)]
  }

## general link functions
setClass("aft", representation(args="list"), contains="mle2")

aft <- function(formula, data, smooth.formula = NULL, df = 3,
                tvc = NULL, cure.formula=~1,
                control = list(), init = NULL,
                weights = NULL, tvc.intercept=TRUE, tvc.integrated= FALSE,
                timeVar = "", time0Var = "",
                cure = FALSE, mixture = FALSE,
                contrasts = NULL, subset = NULL, ...) {
    dots = list(...)
    control.defaults = list(parscale = 1, maxit = 1000, constrOptim = FALSE,
                            use.gr = TRUE, reltol = 1.0e-8, trace = 0,
                            nNodes = 20, add.penalties = TRUE)
    control = modifyList(control.defaults, control)
    if (any(ioverlap <- names(dots) %in% names(control.defaults))) {
        overlap = names(dots)[ioverlap]
        for (name in overlap) {
            cat(sprintf("Deprecated argument: %s; use control argument\n", name))
            control[[name]] = dots[[name]]
        }
    }
    ## Special case
    if (mixture && df>2) {
        Call = match.call()
        Call$df = 2
        fitWeibullMixture = eval(Call,parent.frame())
    } else {
        fitWeibullMixture = NULL
    }
    ## parse the event expression
    eventInstance <- eval(lhs(formula),envir=data)
    stopifnot(length(lhs(formula))>=2)
    eventExpr <- lhs(formula)[[length(lhs(formula))]]
    delayed <- length(lhs(formula))>=4 # indicator for multiple times (cf. strictly delayed)
    surv.type <- attr(eventInstance,"type")
    if (surv.type %in% c("interval","interval2","left","mstate"))
        stop("aft not implemented for Surv type ",surv.type,".")
    counting <- attr(eventInstance,"type") == "counting"
    ## interval <- attr(eventInstance,"type") == "interval"
    timeExpr <- lhs(formula)[[if (delayed) 3 else 2]] # expression
    if (timeVar == "")
        timeVar <- all.vars(timeExpr)
    ## set up the formulae
    full.formula <- formula
    if (!is.null(smooth.formula))
        rhs(full.formula) <- rhs(formula) %call+% rhs(smooth.formula)
    rhs(full.formula) <- rhs(full.formula) %call+% quote(0)
    if (!is.null(tvc)) {
        tvc.formulas <-
            lapply(names(tvc), function(name)
                call(":",
                     call("as.numeric",as.name(name)),
                     as.call(c(quote(ns),
                               if (tvc.integrated) timeExpr else call("log",timeExpr),
                               vector2call(list(intercept=tvc.intercept,df=tvc[[name]]))))))
        if (length(tvc.formulas)>1)
            tvc.formulas <- list(Reduce(`%call+%`, tvc.formulas))
        tvc.formula <- as.formula(call("~",tvc.formulas[[1]]))
        rhs(full.formula) <- rhs(full.formula) %call+% rhs(tvc.formula)
    }
    ##
    ## set up the data
    ## ensure that data is a data frame
    ## data <- get_all_vars(full.formula, data) # but this loses the other design information
    ## restrict to non-missing data (assumes na.action=na.omit)
    .include <- apply(model.matrix(formula, data, na.action = na.pass), 1, function(row) !any(is.na(row))) &
        !is.na(eval(eventExpr,data)) & !is.na(eval(timeExpr,data))
    data <- data[.include, , drop=FALSE]
    ##
    ## parse the function call
    Call <- match.call()
    mf <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "contrasts", "weights"),
               names(mf), 0L)
    mf <- mf[c(1L, m)]
    ##
    ## Specials:
    bhazard = rep(0,nrow(data))
    specials.names <- c("bhazard")
    specials <- attr(terms.formula(full.formula, specials.names), "specials")
    spcall <- mf
    spcall[[1]] <- quote(stats::model.frame)
    spcall$formula <- terms(formula, specials.names, data = data)
    mf2 <- eval(spcall, parent.frame())
    lm.formula = full.formula
    if (any(!sapply(specials,is.null))) {
        bhazard.index <- specials$bhazard
        if (length(bhazard.index)>0) {
            bhazard <- mf2[, bhazard.index]
            bhazard.index2 <- attr(terms.formula(full.formula, "bhazard"), "specials")$bhazard
            termobj = terms(mf2)
            dropped.terms = if(length(attr(termobj,"term.labels"))==1) reformulate("1", response=termobj[[2L]], intercept=TRUE, env=environment(termobj)) else stats::drop.terms(terms(mf2), bhazard.index - 1, keep.response=TRUE)
            formula <- formula(dropped.terms)
            lm.formula <- formula(stats::drop.terms(terms(full.formula), bhazard.index2 - 1))
        } else {
            bhazard.index2 = NULL
        }
        ## rm(mf2,spcall)
    }
    ## get variables
    time <- eval(timeExpr, data, parent.frame())
    time0Expr <- NULL # initialise
    if (delayed) {
        time0Expr <- lhs(formula)[[2]]
        if (time0Var == "")
            time0Var <- all.vars(time0Expr)
        time0 <- eval(time0Expr, data, parent.frame())
    } else {
        time0 <- NULL
    }
    event <- eval(eventExpr,data)
    ## if all the events are the same, we assume that they are all events, else events are those greater than min(event)
    event <- if (length(unique(event))==1) rep(TRUE, length(event)) else event <- event > min(event)
    ## setup for initial values
    ## Cox regression
    coxph.call <- mf
    coxph.call[[1L]] <- as.name("coxph")
    coxph.call$formula = formula
    coxph.call$model <- TRUE
    coxph.obj <- eval(coxph.call, envir=parent.frame())
    y <- model.extract(model.frame(coxph.obj),"response")
    data$logHhat <- pmax(-18,log(-log(S0hat(coxph.obj))))
    ## now for the cure fraction
    glm.cure.call = coxph.call
    glm.cure.call[[1]] = as.name("glm")
    glm.cure.call$family = as.name("binomial")
    glm.cure.call$data = as.name("data")
    glm.cure.call$model = NULL
    data["*event*"] = event 
    lhs(glm.cure.call$formula) = as.name("*event*")
    rhs(glm.cure.call$formula) = rhs(cure.formula)
    ## glm(y ~ X, family=binomial)
    glm.cure.obj <- eval(glm.cure.call, envir=as.list(environment()), enclos=parent.frame())
    Xc = model.matrix(glm.cure.obj, data)
    ##
    ## pred1 <- predict(survreg1)
    data$logtstar <- log(time)
    ## data$logtstar <- log(time/pred1)
    ## initial values and object for lpmatrix predictions
    lm.call <- mf
    lm.call[[1L]] <- as.name("lm")
    ## lm.formula <- full.formula # ??
    lhs(lm.formula) <- quote(logtstar) # new response
    lm.call$formula <- lm.formula
    dataEvents <- data[event,]
    lm.call$data <- quote(dataEvents) # events only
    lm.obj <- eval(lm.call)
    coef1b <- coef(lm.obj)
    if (names(coef1b)[1]=="(Intercept)") coef1b <- coef1b[-1] # ???
    ## if (is.null(init)) {
    ##   init <- coef(lm.obj)
    ## }
    lm0.obj <- lm(logHhat~nsx(logtstar,df,intercept=TRUE)-1,dataEvents)
    ## lm0D.obj <- lm(logHhat~nsxD(logtstar,df,intercept=TRUE,cure=cure)-1,dataEvents)
    coef0 <- coef(lm0.obj) # log-log baseline
    ## design information for baseline survival
    design <- nsx(dataEvents$logtstar, df=df, intercept=TRUE, cure=cure)
    designD <- nsxD(dataEvents$logtstar, df=df, intercept=TRUE, cure=cure)
    designDD <- nsxDD(dataEvents$logtstar, df=df, intercept=TRUE, cure=cure)
    ##
    ## set up mf and wt
    mt <- terms(lm.obj)
    mf <- model.frame(lm.obj)
    ## wt <- model.weights(lm.obj$model)
    wt <- if (is.null(substitute(weights))) rep(1,nrow(data)) else eval(substitute(weights),data,parent.frame())
    ##
    ## XD matrix
    loglpfunc <- function(x,fit,data,var) {
        data[[var]] <- exp(x)
        lpmatrix.lm(fit,data)
    }
    ##
    X <- lpmatrix.lm(lm.obj,data)
    if (is.integer(X.index <- which.dim(X)))
        warning("Design matrix for the acceleration factor is not full rank")
    X <- X[, X.index, drop=FALSE]
    XD0 <- X0 <- XD <- matrix(0,1,ncol(X))
    X_list = list()
    gauss = gauss.quad(control$nNodes)
    if (delayed && all(time0==0)) delayed <- FALSE # CAREFUL HERE: delayed redefined
    if (tvc.integrated) {
        X_list = lapply(1:control$nNodes, function(i)
            lpmatrix.lm(lm.obj,
                        local({ data[[timeVar]] = (gauss$nodes[i]+1)/2*data[[timeVar]]; data}))[,X.index, drop = FALSE])
        if (delayed) {
            X_list0 = lapply(1:control$nNodes, function(i)
                lpmatrix.lm(lm.obj,
                            local({ data[[timeVar]] = (gauss$nodes[i]+1)/2*data[[time0Var]]; data}))[,X.index, drop = FALSE])
        }
    } else { # tvc using cumulative formulation 
        XD <- grad1(loglpfunc,log(data[[timeVar]]),lm.obj,data,timeVar,log.transform=FALSE)
        XD <- matrix(XD,nrow=nrow(X))[,X.index, drop = FALSE]
        if (delayed) {
            ind0 <- time0>0
            data0 <- data[ind0,,drop=FALSE] # data for delayed entry times
            data0[[timeVar]] <- data0[[time0Var]]
            X0 <- lpmatrix.lm(lm.obj, data0)
            XD0 <- grad1(loglpfunc,log(data0[[timeVar]]),lm.obj,data0,timeVar,
                         log.transform=FALSE)
            XD0 <- matrix(XD0,nrow=nrow(X))[,X.index, drop = FALSE]
            rm(data0)
        }
    }
    ## Weibull regression
    if (delayed) {
        if (requireNamespace("eha", quietly = TRUE)) {
            survreg1 <- eha::aftreg(formula, data)
            coef1 <- -coef(survreg1) # reversed parameterisation
            coef1 <- coef1[1:(length(coef1)-2)][X.index]
        } else coef1 <- rep(0,ncol(X))
    } else {
        survreg1 <- survival::survreg(formula, data)
        coef1 <- coef(survreg1)
        coef1 <- coef1[-1] # assumes intercept included in the formula
    }
    if (ncol(X)>length(coef1)) {
        coef1 <- c(coef1,rep(0,ncol(X) - length(coef1)))
        names(coef1) <- names(coef1b)[X.index]
    }
    coef2 = coef(glm.cure.obj)
    names(coef2) = paste0("cure.", names(coef2))
    if (is.null(init))
        init <- if (mixture) c(coef1, -coef2, coef0) else c(coef1, coef0) # -coef2 because the glm models for uncured!
    if (any(is.na(init) | is.nan(init)))
        stop("Some missing initial values - check that the design matrix is full rank.")
    if (!is.null(control) && "parscale" %in% names(control)) {
        if (length(control$parscale)==1)
            control$parscale <- rep(control$parscale,length(init))
        if (is.null(names(control$parscale)))
            names(control$parscale) <- names(init)
    }
    parscale <- rep(if (is.null(control$parscale)) 1 else control$parscale,length=length(init))
    names(parscale) <- names(init)
    args <- list(init=init,X=X,XD=XD,
                 X_list=X_list,
                 X_list0=if (delayed) X_list0 else list(matrix(0,0,0)),
                 X.index=X.index,
                 wt=wt,event=ifelse(event,1,0),time=time,y=y,
                 time0 = if (delayed) time0 else 0*time,
                 timeVar=timeVar,timeExpr=timeExpr,terms=mt,
                 delayed=delayed, X0=X0, XD0=XD0,
                 parscale=parscale, reltol=control$reltol,
                 Xc=if (mixture) Xc else matrix(0,0,0), maxit=control$maxit,
                 trace = as.integer(control$trace),
                 boundaryKnots=attr(design,"Boundary.knots"), q.const=t(attr(design,"q.const")),
                 interiorKnots=attr(design,"knots"), design=design, designD=designD,
                 designDD=designDD, cure=as.integer(cure), mixture = as.integer(mixture),
                 tvc.integrated=tvc.integrated,
                 data=data, lm.obj = lm.obj, glm.cure.obj = glm.cure.obj,
                 init_copy = init, return_type="optim",
                 gweights=gauss$weights, gnodes=gauss$nodes, bhazard=bhazard,
                 add.penalties = control$add.penalties, df=df, ci=rep(0, df+1),
                 constrOptim = control$constrOptim)
    getui = function(args) {
        q.const = t(args$q.const) # (df+2) x df
        n.coef = length(args$init)
        nr = nrow(q.const)
        df = ncol(q.const)
        (cbind(0,diag(nr-1))-cbind(diag(nr-1),0)) %*%
            cbind(matrix(0,nr,n.coef-df), q.const) # (df+1) x n.coef
    }
    args$ui = getui(args)
    negll <- function(beta, ...) {
        stopifnot(is.numeric(beta),
                  length(beta) == length(args$init))
        localargs <- args
        localargs$return_type <- "objective"
        localargs$init <- beta
        if (length(dots <- list(...)) > 0)
            localargs <- modifyList(localargs, dots)
        return(.Call("aft_model_output", localargs, PACKAGE="rstpm2"))
    }
    gradient <- function(beta, ...) {
        stopifnot(is.numeric(beta),
                  length(beta) == length(args$init))
        localargs <- args
        localargs$return_type <- "gradient"
        localargs$init <- beta
        if (length(dots <- list(...)) > 0)
            localargs <- modifyList(localargs, dots)
        return(as.vector(.Call("aft_model_output", localargs, PACKAGE="rstpm2")))
    }
    parnames(negll) <- names(init)
    args$negll = negll
    args$gradient = gradient
    if (!is.null(fitWeibullMixture)) {
        this.index = (length(coef1)+1):(length(coef1)+length(coef2))
        fixed = as.list(coef(fitWeibullMixture)[this.index])
        this.control = control
        for (name in c("constrOptim", "use.gr", "nNodes", "add.penalties"))
            this.control[[name]] = NULL
        this.control$parscale = this.control$parscale[-this.index]
        fixedfit <- if (control$use.gr) {
                    bbmle::mle2(negll, init, vecpar=TRUE, control=this.control,
                                fixed=fixed, gr=gradient, ...)
                } else {
                    bbmle::mle2(negll, init, vecpar=TRUE, control=this.control,
                                fixed=fixed, ...)
                }
        init = coef(fixedfit)
        rm(fixedfit)
    }
    ## MLE
    if (delayed && control$use.gr) { # initial search using nmmin (conservative -- is this needed?)
        args$return_type <- "nmmin"
        args$maxit <- 50
        fit <- .Call("aft_model_output", args, PACKAGE="rstpm2")
        args$maxit <- control$maxit
    }
    optim_step <- function(use.gr) {
        args$return_type <<- if (use.gr) "vmmin" else "nmmin"
        fit <- .Call("aft_model_output", args, PACKAGE="rstpm2")
        coef <- as.vector(fit$coef)
        hessian <- fit$hessian
        names(coef) <- rownames(hessian) <- colnames(hessian) <- names(init)
        args$init <<- coef
        ## we could use mle2() to calculate vcov by setting eval.only=FALSE
        mle2 <- if (use.gr) bbmle::mle2(negll, coef, vecpar=TRUE, control=control,
                                        gr=gradient, ..., eval.only=TRUE)
                else bbmle::mle2(negll, coef, vecpar=TRUE, control=control, ..., eval.only=TRUE)
        mle2@details$convergence <- fit$fail # fit$itrmcd
        vcov <- try(solve(hessian,tol=0), silent=TRUE)
        if (inherits(vcov, "try-error"))
            vcov <- try(solve(hessian+1e-6*diag(nrow(hessian)), tol=0), silent=TRUE)
        if (inherits(vcov, "try-error")) {
            if (!use.gr)
                message("Non-invertible Hessian")
            mle2@vcov <- matrix(NA,length(coef), length(coef))
        } else {
            mle2@vcov <- vcov
        }
        mle2
    }
    ## mle2 <-  bbmle::mle2(negll, init, vecpar=TRUE, control=control, ...)
    mle2 <- optim_step(control$use.gr)
    if (all(is.na(mle2@vcov)) && control$use.gr) {
        args$init <- init
        mle2 <- optim_step(FALSE)
    }
    out <- as(mle2, "aft")
    out@args <- args
    attr(out,"nobs") <- length(out@args$event) # for logLik method
    return(out)
}

setMethod("nobs", "aft", function(object, ...) length(object@args$event))

setMethod("predict", "aft",
          function(object,newdata=NULL,
                   type=c("surv","cumhaz","hazard","density","hr","sdiff","hdiff","loghazard","link","meansurv","meansurvdiff","odds","or","meanhaz","af","fail","accfac","gradh"),
                   grid=FALSE,seqLength=300,level=0.95,
                   se.fit=FALSE,link=NULL,exposed=incrVar(var),var=NULL,keep.attributes=TRUE,...) {
              type = match.arg(type)
              if (object@args$tvc.integrated)
                  predict.aft_integrated2(object, newdata, type, grid, seqLength, level, se.fit, link, exposed, var, keep.attributes, ...)
              else predict.aft_mixture2(object, newdata, type, grid, seqLength, level, se.fit, link, exposed, var, keep.attributes, ...)
          })

predict.aft_mixture2 <-
    function(object,newdata=NULL,
             type=c("surv","cumhaz","hazard","density","hr","sdiff","hdiff","loghazard","link",
                    "meansurv","meansurvdiff","odds","or","meanhaz","af","fail","accfac","gradh"),
             grid=FALSE,seqLength=300,level=0.95,
             se.fit=FALSE,link=NULL,exposed=incrVar(var),var=NULL,keep.attributes=TRUE,...) {
        type <- match.arg(type)
        args <- object@args
        if (type %in% c("fail")) {
            out <- 1-predict(object,newdata=newdata,type="surv",grid,seqLength,se.fit,link,
                             exposed,var,keep.attributes,...)
            if (se.fit) {temp <- out$lower; out$lower <- out$upper; out$upper <- temp}
            return(out)
        }
        if (is.null(exposed) && is.null(var) & type %in% c("hr","sdiff","hdiff","meansurvdiff",
                                                           "or","af","accfac"))
            stop('Either exposed or var required for type in ("hr","sdiff","hdiff","meansurvdiff","or","af","accfac")')
        ## exposed is a function that takes newdata and returns the revised newdata
        ## var is a string for a variable that defines a unit change in exposure
        if (is.null(newdata) && type %in% c("hr","sdiff","hdiff","meansurvdiff","or","af","accfac"))
            stop("Prediction using type in ('hr','sdiff','hdiff','meansurvdiff','or','af','accfac') requires newdata to be specified.")
        calcX <- !is.null(newdata)
        time <- NULL
        if (is.null(newdata)) {
            ##mm <- X <- model.matrix(object) # fails (missing timevar)
            X <- args$X
            XD <- args$XD
            ##y <- model.response(object@model.frame)
            y <- args$y
            time <- as.vector(y[,ncol(y)-1])
            newdata <- as.data.frame(args$data)
        }
        loglpfunc <- function(x,newdata,...) {
            newdata[[object@args$timeVar]] <- exp(x)
            lpmatrix.lm(object@args$lm.obj,newdata=newdata)
        }
        ## resp <- attr(Terms, "variables")[attr(Terms, "response")]
        ## similarly for the derivatives
        if (grid) {
            Y <- args$y
            event <- Y[,ncol(Y)]==1
            time <- args$data[[args$timeVar]]
            eventTimes <- time[event]
            tt <- seq(min(eventTimes),max(eventTimes),length=seqLength)[-1]
            data.x <- data.frame(tt)
            names(data.x) <- args$timeVar
            newdata[[args$timeVar]] <- NULL
            newdata <- merge(newdata,data.x)
            calcX <- TRUE
        }
        if (calcX)  {
            X <- lpmatrix.lm(args$lm.obj, newdata)[,args$X.index, drop = FALSE]
            XD <- grad1(loglpfunc,log(newdata[[object@args$timeVar]]),
                        log.transform=FALSE, newdata=newdata)
            XD <- matrix(XD,nrow=nrow(X))[,args$X.index, drop = FALSE]
            Xc <- lpmatrix.lm(args$glm.cure.obj, newdata)
            time <- eval(args$timeExpr,newdata)
        }
        if (type %in% c("hr","sdiff","hdiff","meansurvdiff","or","af","accfac")) {
            newdata2 <- exposed(newdata)
            X2 <- lpmatrix.lm(args$lm.obj, newdata2)[,args$X.index, drop = FALSE]
            XD2 <- grad1(loglpfunc,log(newdata2[[object@args$timeVar]]),
                         log.transform=FALSE, newdata=newdata2)
            XD2 <- matrix(XD2,nrow=nrow(X))[,args$X.index, drop = FALSE]
            time2 <- eval(args$timeExpr,newdata2) # is this always equal to time?
            Xc2 = model.matrix(args$glm.cure.obj, newdata2)
        }
        if (type %in% c("grad")) {
            return(predict.aft.ext(object, type=type, time=time, X=X, XD=XD))
        }
        ## colMeans <- function(x) colSums(x)/apply(x,2,length)
        local <-  function (object, newdata=NULL, type="surv", exposed, ...)
        {
            args <- object@args
            betafull <- coef(object)
            beta <- betafull[1:ncol(args$X)]
            betas <- betafull[-(1:ncol(args$X))]
            tt <- args$terms
            eta <- as.vector(X %*% beta)
            logtstar <- log(time) - eta
            etas <- as.vector(predict(args$design, logtstar) %*% betas)
            H <- exp(etas)
            S <- exp(-H)
            if (type=="cumhaz")
                return(H)
            if (type=="surv")
                return(S)
            if (type=="fail")
                return(1-S)
            if (type=="odds")
                return((1-S)/S)
            if (type=="meansurv")
                return(tapply(S,newdata[[object@args$timeVar]],mean))
            etaDs <- as.vector(predict(args$designD, logtstar) %*% betas)
            etaD <- as.vector(XD %*% beta)
            h <- H*etaDs*(1/time-etaD)
            Sigma = vcov(object)
            if (type=="link")
                return(eta)
            if (type=="density")
                return (S*h)
            if (type=="hazard")
                return(h)
            if (type=="loghazard")
                return(log(h))
            if (type=="meanhaz")
                return(tapply(S*h,newdata[[object@args$timeVar]],sum)/tapply(S,newdata[[object@args$timeVar]],sum))
            eta2 <- as.vector(X2 %*% beta)
            logtstar2 <- log(time2) - eta2
            etas2 <- as.vector(predict(args$design, logtstar2) %*% betas)
            H2 <- exp(etas2)
            S2 <- exp(-H2)
            if (type=="sdiff")
                return(S2-S)
            if (type=="or")
                return((1-S2)/S2/((1-S)/S))
            if (type=="meansurvdiff")
                return(tapply(S2,newdata[[object@args$timeVar]],mean) - tapply(S,newdata[[object@args$timeVar]],mean))
            etaDs2 <- as.vector(predict(args$designD, logtstar2) %*% betas)
            etaD2 <- as.vector(XD2 %*% beta)
            h2 <- H2*etaDs2*(1/time2-etaD2)
            if (type=="hdiff")
                return(h2 - h)
            if (type=="hr")
                return(h2/h)
            if (type=="af") {
                meanS <- tapply(S,newdata[[object@args$timeVar]],mean)
                meanS2 <- tapply(S2,newdata[[object@args$timeVar]],mean)
                return((meanS2 - meanS)/(1-meanS))
            }
            if (type=="accfac") {
                accfac <- -eta + log(1-etaD)
                accfac2 <- -eta2 + log(1-etaD2)
                return(exp(accfac2-accfac))
            }
        }
        local2 <-  function (object, newdata=NULL, type="surv", exposed, ...)
        {
            args <- object@args
            betafull <- coef(object)
            beta <- betafull[1:ncol(args$X)]
            betac <- betafull[(ncol(args$X)+1):(ncol(args$X)+ncol(args$Xc))]
            betas <- betafull[-(1:(ncol(args$X)+ncol(args$Xc)))]
            tt <- args$terms
            eta <- as.vector(X %*% beta)
            etac <- as.vector(Xc %*% betac)
            cure_frac <- exp(etac)/(1+exp(etac))
            logtstar <- log(time) - eta
            etas <- as.vector(predict(args$design, logtstar) %*% betas)
            Hu <- exp(etas)
            Su <- exp(-Hu)
            S <- cure_frac + (1-cure_frac)*Su
            if (type=="cumhaz")
                return(-log(cure_frac + (1-cure_frac)*exp(-Hu)))
            if (type=="surv")
                return(S)
            if (type=="fail")
                return(1-S)
            if (type=="odds")
                return((1-S)/S)
            if (type=="meansurv")
                return(tapply(S,newdata[[object@args$timeVar]],mean))
            etaDs <- as.vector(predict(args$designD, logtstar) %*% betas)
            etaD <- as.vector(XD %*% beta)
            hu <- Hu*etaDs*(1/time-etaD)
            h <- (1-cure_frac)*exp(-Hu)*hu/(cure_frac + (1-cure_frac)*exp(-Hu))
            Sigma = vcov(object)
            if (type=="link")
                return(eta)
            if (type=="density")
                return (S*h)
            if (type=="hazard")
                return(h)
            if (type=="loghazard")
                return(log(h))
            if (type=="meanhaz")
                return(tapply(S*h,newdata[[object@args$timeVar]],sum)/tapply(S,newdata[[object@args$timeVar]],sum))
            eta2 <- as.vector(X2 %*% beta)
            logtstar2 <- log(time2) - eta2
            etas2 <- as.vector(predict(args$design, logtstar2) %*% betas)
            etac2 <- as.vector(Xc2 %*% betac)
            cure_frac2 <- exp(etac2)/(1+exp(etac2))
            Hu2 <- exp(etas2)
            Su2 <- exp(-Hu2)
            S2 <- cure_frac2 + (1-cure_frac2)*Su2
            if (type=="sdiff")
                return(S2-S)
            if (type=="or")
                return((1-S2)/S2/((1-S)/S))
            if (type=="meansurvdiff")
                return(tapply(S2,newdata[[object@args$timeVar]],mean) - tapply(S,newdata[[object@args$timeVar]],mean))
            etaDs2 <- as.vector(predict(args$designD, logtstar2) %*% betas)
            etaD2 <- as.vector(XD2 %*% beta)
            hu2 <- Hu2*etaDs2*(1/time2-etaD2)
            h2 <- (1-cure_frac2)*exp(-Hu2)*hu2/(cure_frac2 + (1-cure_frac2)*exp(-Hu2))
            if (type=="hdiff")
                return(h2 - h)
            if (type=="hr")
                return(h2/h)
            if (type=="af") {
                meanS <- tapply(S,newdata[[object@args$timeVar]],mean)
                meanS2 <- tapply(S2,newdata[[object@args$timeVar]],mean)
                return((meanS2 - meanS)/(1-meanS))
            }
            if (type=="accfac") {
                accfac <- -eta + log(1-etaD)
                accfac2 <- -eta2 + log(1-etaD2)
                return(exp(accfac2-accfac))
            }
        }
        local <- if (args$mixture) local2 else local
        out <- if (!se.fit) {
                   local(object,newdata,type=type,exposed=exposed,
                         ...)
               } else {
                   if (is.null(link))
                       link <- switch(type,surv="cloglog",cumhaz="log",hazard="log",hr="log",sdiff="I",
                                      hdiff="I",loghazard="I",link="I",odds="log",or="log",meansurv="I",meanhaz="I",af="I",accfac="log")
                   invlinkf <- switch(link,I=I,log=exp,cloglog=cexpexp,logit=expit)
                   pred <- predictnl(object,local,link=link,newdata=newdata,type=type,gd=NULL,
                                     exposed=exposed,...)
                   ci <- confint.predictnl(pred, level = level)
                   out <- data.frame(Estimate=pred$fit,
                                     lower=ci[,1],
                                     upper=ci[,2])
                   if (link=="cloglog")
                       out <- data.frame(Estimate=out$Estimate,lower=out$upper,upper=out$lower)
                   invlinkf(out)
               }
        if (keep.attributes)
            attr(out,"newdata") <- newdata
        return(out)
    }

predict.aft_integrated2 =  function(object,newdata=NULL,
                                type=c("surv","cumhaz","hazard","density","hr","sdiff","hdiff","loghazard","link","meansurv","meansurvdiff","odds","or","meanhaz","af","fail","accfac","gradh"),
                                grid=FALSE,seqLength=300,level=0.95,
                                se.fit=FALSE,link=NULL,exposed=incrVar(var),var=NULL,keep.attributes=TRUE,...) {
    type <- match.arg(type)
    args <- object@args
    if (type %in% c("fail")) {
        out <- 1-predict(object,newdata=newdata,type="surv",grid,seqLength,se.fit,link,exposed,var,keep.attributes,...)
        if (se.fit) {temp <- out$lower; out$lower <- out$upper; out$upper <- temp}
        return(out)
    }
    if (is.null(exposed) && is.null(var) & type %in% c("hr","sdiff","hdiff","meansurvdiff","or","af","accfac"))
        stop('Either exposed or var required for type in ("hr","sdiff","hdiff","meansurvdiff","or","af","accfac")')
    ## exposed is a function that takes newdata and returns the revised newdata
    ## var is a string for a variable that defines a unit change in exposure
    if (is.null(newdata) && type %in% c("hr","sdiff","hdiff","meansurvdiff","or","af","accfac"))
        stop("Prediction using type in ('hr','sdiff','hdiff','meansurvdiff','or','af','accfac') requires newdata to be specified.")
    calcX <- !is.null(newdata)
    time <- NULL
    if (is.null(newdata)) {
        ##mm <- X <- model.matrix(object) # fails (missing timevar)
        X <- args$X
        X_list <- args$X_list
        XD <- args$XD
        ##y <- model.response(object@model.frame)
        y <- args$y
        time <- as.vector(y[,ncol(y)-1])
        newdata <- as.data.frame(args$data)
    }
    loglpfunc <- function(x,newdata,...) {
        newdata[[object@args$timeVar]] <- exp(x)
        lpmatrix.lm(object@args$lm.obj,newdata=newdata)
    }
    ## resp <- attr(Terms, "variables")[attr(Terms, "response")]
    ## similarly for the derivatives
    if (grid) {
        Y <- args$y
        event <- Y[,ncol(Y)]==1
        time <- args$data[[args$timeVar]]
        eventTimes <- time[event]
        tt <- seq(min(eventTimes),max(eventTimes),length=seqLength)[-1]
        data.x <- data.frame(tt)
        names(data.x) <- args$timeVar
        newdata[[args$timeVar]] <- NULL
        newdata <- merge(newdata,data.x)
        calcX <- TRUE
    }
    if (calcX)  {
        X <- lpmatrix.lm(args$lm.obj, newdata)[,args$X.index, drop = FALSE]
        XD <- grad1(loglpfunc,log(newdata[[object@args$timeVar]]),
                    log.transform=FALSE, newdata=newdata)
        XD <- matrix(XD,nrow=nrow(X))[,args$X.index, drop = FALSE]
        Xc <- lpmatrix.lm(args$glm.cure.obj, newdata)
        time <- eval(args$timeExpr,newdata)
        X_list = lapply(args$gnodes, function(gnode)
            lpmatrix.lm(args$lm.obj,
                    local({ newdata[[args$timeVar]] = (gnode+1)/2*newdata[[args$timeVar]]; newdata}))[,args$X.index, drop=FALSE])
    }
    if (type %in% c("hr","sdiff","hdiff","meansurvdiff","or","af","accfac")) {
        newdata2 <- exposed(newdata)
        X2 <- lpmatrix.lm(args$lm.obj, newdata2)[,args$X.index, drop = FALSE]
        XD2 <- grad1(loglpfunc,log(newdata2[[object@args$timeVar]]),
                     log.transform=FALSE, newdata=newdata2)
        XD2 <- matrix(XD2,nrow=nrow(X))[,args$X.index, drop = FALSE]
        time2 <- eval(args$timeExpr,newdata2) # is this always equal to time?
        Xc2 = model.matrix(args$glm.cure.obj, newdata2)
        X_list2 = lapply(args$gnodes, function(gnode)
            lpmatrix.lm(args$lm.obj,
                    local({ newdata2[[args$timeVar]] = (gnode+1)/2*newdata2[[args$timeVar]]; newdata2}))[,args$X.index, drop=FALSE])
    }
    if (type == "gradh") {
        return(predict.aft.ext(object, type="gradh", time=time, X=X, XD=XD,
                                          Xc=Xc, X_list=X_list))
    }
    ## colMeans <- function(x) colSums(x)/apply(x,2,length)
    local <-  function (object, newdata=NULL, type="surv", exposed, ...)
    {
        args <- object@args
        betafull <- coef(object)
        beta <- betafull[1:ncol(X)]
        betas <- betafull[-(1:ncol(X))]
        tt <- args$terms
        tstar = time*0
        scale = time/2.0
        for(i in 1:length(X_list)) {
            tstar = tstar + args$gweights[i]*scale * exp(-X_list[[i]] %*% beta)
        }
        ## eta <- as.vector(X %*% beta)
        logtstar <- log(tstar)
        etas <- as.vector(predict(args$design, logtstar) %*% betas)
        H <- exp(etas)
        S <- exp(-H)
        if (type=="cumhaz")
            return(H)
        if (type=="surv")
            return(S)
        if (type=="fail")
            return(1-S)
        if (type=="odds")
            return((1-S)/S)
        if (type=="meansurv")
            return(tapply(S,newdata[[object@args$timeVar]],mean))
        etaDs <- as.vector(predict(args$designD, logtstar) %*% betas)
        etaD <- as.vector(XD %*% beta)
        h <- drop(H*etaDs*exp(X %*% beta))
        Sigma = vcov(object)
        if (type=="link")
            return(logtstar) ## what should this be?
        if (type=="density")
            return (S*h)
        if (type=="hazard")
            return(h)
        if (type=="loghazard")
            return(log(h))
        if (type=="meanhaz")
            return(tapply(S*h,newdata[[object@args$timeVar]],sum)/tapply(S,newdata[[object@args$timeVar]],sum))
        tstar2 = time2*0
        scale2 = time2/2.0
        for(i in 1:length(X_list2)) {
            tstar2 = tstar2 + args$gweights[i]*scale2 * exp(-X_list2[[i]] %*% beta)
        }
        logtstar2 = log(tstar2)
        etas2 <- as.vector(predict(args$design, logtstar2) %*% betas)
        H2 <- exp(etas2)
        S2 <- exp(-H2)
        if (type=="sdiff")
            return(S2-S)
        if (type=="or")
            return((1-S2)/S2/((1-S)/S))
        if (type=="meansurvdiff")
            return(tapply(S2,newdata[[object@args$timeVar]],mean) - tapply(S,newdata[[object@args$timeVar]],mean))
        etaDs2 <- as.vector(predict(args$designD, logtstar2) %*% betas)
        etaD2 <- as.vector(XD2 %*% beta)
        h2 <- drop(H2*etaDs2*exp(X2 %*% beta))
        if (type=="hdiff")
            return(h2 - h)
        if (type=="hr")
            return(h2/h)
        if (type=="af") {
            meanS <- tapply(S,newdata[[object@args$timeVar]],mean)
            meanS2 <- tapply(S2,newdata[[object@args$timeVar]],mean)
            return((meanS2 - meanS)/(1-meanS))
        }
        if (type=="accfac") {
            return(exp(-(X2-X) %*% beta))
        }
    }
    local2 <-  function (object, newdata=NULL, type="surv", exposed, ...)
    {
        args <- object@args
        betafull <- coef(object)
        beta <- betafull[1:ncol(args$X)]
        betac <- betafull[(ncol(args$X)+1):(ncol(args$X)+ncol(args$Xc))]
        betas <- betafull[-(1:(ncol(args$X)+ncol(args$Xc)))]
        tt <- args$terms
        tt <- args$terms
        tstar = time*0
        scale = time/2.0
        for(i in 1:length(X_list)) {
            tstar = tstar + args$gweights[i]*scale * exp(-X_list[[i]] %*% beta)
        }
        ## eta <- as.vector(X %*% beta)
        etac <- as.vector(Xc %*% betac)
        cure_frac <- exp(etac)/(1+exp(etac))
        logtstar <- log(tstar)
        etas <- as.vector(predict(args$design, logtstar) %*% betas)
        Hu <- exp(etas)
        Su <- exp(-Hu)
        S <- cure_frac + (1-cure_frac)*Su
        if (type=="cumhaz")
            return(-log(cure_frac + (1-cure_frac)*exp(-Hu)))
        if (type=="surv")
            return(S)
        if (type=="fail")
            return(1-S)
        if (type=="odds")
            return((1-S)/S)
        if (type=="meansurv")
            return(tapply(S,newdata[[object@args$timeVar]],mean))
        etaDs <- as.vector(predict(args$designD, logtstar) %*% betas)
        etaD <- as.vector(XD %*% beta)
        hu <- drop(Hu*etaDs*exp(X %*% beta))
        h <- (1-cure_frac)*exp(-Hu)*hu/(cure_frac + (1-cure_frac)*exp(-Hu))
        Sigma = vcov(object)
        if (type=="link")
            return(logtstar) # is this correct?
        if (type=="density")
            return (S*h)
        if (type=="hazard")
            return(h)
        if (type=="loghazard")
            return(log(h))
        if (type=="meanhaz")
            return(tapply(S*h,newdata[[object@args$timeVar]],sum)/tapply(S,newdata[[object@args$timeVar]],sum))
        tstar2 = time2*0
        scale2 = time2/2.0
        for(i in 1:length(X_list2)) {
            tstar2 = tstar2 + args$gweights[i]*scale2 * exp(-X_list2[[i]] %*% beta)
        }
        logtstar2 = log(tstar2)
        ## eta2 <- as.vector(X2 %*% beta)
        etas2 <- as.vector(predict(args$design, logtstar2) %*% betas)
        etac2 <- as.vector(Xc2 %*% betac)
        cure_frac2 <- exp(etac2)/(1+exp(etac2))
        Hu2 <- exp(etas2)
        Su2 <- exp(-Hu2)
        S2 <- cure_frac2 + (1-cure_frac2)*Su2
        if (type=="sdiff")
            return(S2-S)
        if (type=="or")
            return((1-S2)/S2/((1-S)/S))
        if (type=="meansurvdiff")
            return(tapply(S2,newdata[[object@args$timeVar]],mean) - tapply(S,newdata[[object@args$timeVar]],mean))
        etaDs2 <- as.vector(predict(args$designD, logtstar2) %*% betas)
        etaD2 <- as.vector(XD2 %*% beta)
        hu2 <- drop(Hu2*etaDs2*exp(X2 %*% beta))
        h2 <- (1-cure_frac2)*exp(-Hu2)*hu2/(cure_frac2 + (1-cure_frac2)*exp(-Hu2))
        if (type=="hdiff")
            return(h2 - h)
        if (type=="hr")
            return(h2/h)
        if (type=="af") {
            meanS <- tapply(S,newdata[[object@args$timeVar]],mean)
            meanS2 <- tapply(S2,newdata[[object@args$timeVar]],mean)
            return((meanS2 - meanS)/(1-meanS))
        }
        if (type=="accfac") {
            return(exp(-(X2-X) %*% beta))
        }
    }
    local <- if(args$mixture)  local2 else local
    out <- if (!se.fit) {
               local(object,newdata,type=type,exposed=exposed,
                     ...)
           } else {
               if (is.null(link))
                   link <- switch(type,surv="cloglog",cumhaz="log",hazard="log",hr="log",sdiff="I",
                                  hdiff="I",loghazard="I",link="I",odds="log",or="log",meansurv="I",meanhaz="I",af="I",accfac="log")
               invlinkf <- switch(link,I=I,log=exp,cloglog=cexpexp,logit=expit)
               pred <- predictnl(object,local,link=link,newdata=newdata,type=type,gd=NULL,
                                 exposed=exposed,...)
               ci <- confint.predictnl(pred, level = level)
               out <- data.frame(Estimate=pred$fit,
                                 lower=ci[,1],
                                 upper=ci[,2])
               if (link=="cloglog")
                   out <- data.frame(Estimate=out$Estimate,lower=out$upper,upper=out$lower)
               invlinkf(out)
           }
    if (keep.attributes)
        attr(out,"newdata") <- newdata
    return(out)
}

plot.aft.meansurv <- function(x, y=NULL, times=NULL, newdata=NULL, type="meansurv", exposed=NULL, add=FALSE, ci=!add, rug=!add, recent=FALSE,
                          xlab=NULL, ylab=NULL, lty=1, line.col=1, ci.col="grey", seqLength=301, ...) {
    ## if (is.null(times)) stop("plot.meansurv: times argument should be specified")
    args <- x@args
    if (is.null(newdata)) newdata <- as.data.frame(args$data)
    if (is.null(times)) {
      Y <- args$y
      event <- Y[,ncol(Y)]==1
      time <- args$data[[args$timeVar]]
      eventTimes <- time[event]
      times <- seq(min(eventTimes),max(eventTimes),length=seqLength)[-1]
    }
    times <- times[times !=0]
    if (recent) {
        newdata <- do.call("rbind",
                           lapply(times, 
                                  function(time) {
                                      newd <- newdata
                                      newd[[args$timeVar]] <- newdata[[args$timeVar]]*0+time
                                      newd
                                  }))
        pred <- predict(x, newdata=newdata, type=type, se.fit=ci, exposed=exposed) # requires recent version
        if (type=="meansurv")
            pred <- if (ci) rbind(c(Estimate=1,lower=1,upper=1),pred) else c(1,pred)
    } else {
        pred <- lapply(times, 
                       function(time) {
                           newdata[[args$timeVar]] <- newdata[[args$timeVar]]*0+time
                           predict(x, newdata=newdata, type=type, se.fit=ci, grid=FALSE, exposed=exposed)
                       })
        pred <- do.call("rbind", pred)
        if (type=="meansurv")  {
            pred <- if (ci) rbind(c(Estimate=1,lower=1,upper=1),pred) else c(1,unlist(pred))
            times <- c(0,times)
            }
        }
    if (is.null(xlab)) xlab <- deparse(args$timeExpr)
    if (is.null(ylab))
        ylab <- switch(type,
                       meansurv="Mean survival",
                       af="Attributable fraction",
                       meansurvdiff="Difference in mean survival")
    if (!add) matplot(times, pred, type="n", xlab=xlab, ylab=ylab, ...)
    if (ci) {
        polygon(c(times,rev(times)),c(pred$lower,rev(pred$upper)),col=ci.col,border=ci.col)
        lines(times,pred$Estimate,col=line.col,lty=lty,...)
    } else {
        lines(times,pred,col=line.col,lty=lty,...)
    }
    if (rug) {
        Y <- args$y
        eventTimes <- Y[Y[,ncol(Y)]==1,ncol(Y)-1]
        rug(eventTimes,col=line.col)
    }
    return(invisible(y))
}

plot.aft.base <- 
          function(x,y,newdata=NULL,type="surv",
                   xlab=NULL,ylab=NULL,line.col=1,ci.col="grey",lty=par("lty"),
                   add=FALSE,ci=!add,rug=!add,
                   var=NULL,exposed=incrVar(var),times=NULL,...) {
              if (type %in% c("meansurv","meansurvdiff","af")) {
                  return(plot.aft.meansurv(x,times=times,newdata=newdata,type=type,xlab=xlab,ylab=ylab,line.col=line.col,ci.col=ci.col,
                                       lty=lty,add=add,ci=ci,rug=rug, exposed=exposed, ...))
              }
              args <- x@args
              if (is.null(newdata)) stop("newdata argument needs to be specified")
              y <- predict(x,newdata,type=switch(type,fail="surv",margfail="margsurv",type),var=var,exposed=exposed,
                           grid=!(args$timeVar %in% names(newdata)), se.fit=ci)
              if (type %in% c("fail","margfail")) {
                  if (ci) {
                      y$Estimate <- 1-y$Estimate
                      lower <- y$lower
                      y$lower=1-y$upper
                      y$upper=1-lower
                      } else y <- structure(1-y,newdata=attr(y,"newdata"))
              }
              if (is.null(xlab)) xlab <- deparse(args$timeExpr)
              if (is.null(ylab))
                  ylab <- switch(type,hr="Hazard ratio",hazard="Hazard",surv="Survival",density="Density",
                                 sdiff="Survival difference",hdiff="Hazard difference",cumhaz="Cumulative hazard",
                                 loghazard="log(hazard)",link="Linear predictor",meansurv="Mean survival",
                                 meansurvdiff="Difference in mean survival",odds="Odds",or="Odds ratio",
                                 margsurv="Marginal survival",marghaz="Marginal hazard",marghr="Marginal hazard ratio", haz="Hazard",fail="Failure",
                                 meanhaz="Mean hazard",margfail="Marginal failure",af="Attributable fraction",meanmargsurv="Mean marginal survival",
                                 uncured="Uncured distribution","Acceleration factor")
              xx <- attr(y,"newdata")
              xx <- eval(args$timeExpr,xx) # xx[,ncol(xx)]
              if (!add) matplot(xx, y, type="n", xlab=xlab, ylab=ylab, ...)
              if (ci) {
                  polygon(c(xx,rev(xx)), c(y[,2],rev(y[,3])), col=ci.col, border=ci.col)
                  lines(xx,y[,1],col=line.col,lty=lty,...)
              } else lines(xx,y,col=line.col,lty=lty,...)
              if (rug) {
                  Y <- args$y
                  eventTimes <- Y[Y[,ncol(Y)]==1,ncol(Y)-1]
                  rug(eventTimes,col=line.col)
              }
              return(invisible(y))
          }
setMethod("plot", signature(x="aft", y="missing"),
          function(x,y,newdata=NULL,type="surv",
                   xlab=NULL,ylab=NULL,line.col=1,ci.col="grey",lty=par("lty"),
                   add=FALSE,ci=!add,rug=!add,
                   var=NULL,exposed=incrVar(var),times=NULL,...)
              plot.aft.base(x=x, y=y, newdata=newdata, type=type, xlab=xlab,
                              ylab=ylab, line.col=line.col, lty=lty, add=add,
                              ci=ci, rug=rug, var=var, exposed=exposed, times=times, ...)
          )

predict.aft.ext <- function(obj, type=c("survival","haz","gradh"),
                            time=obj@args$time, X=obj@args$X, XD=obj@args$XD,
                            X_list=obj@args$X_list, Xc=obj@args$Xc) {
    type <- match.arg(type)
    localargs <- obj@args
    localargs$return_type <- type
    localargs$X <- X
    localargs$XD <- XD
    localargs$time <- time
    localargs$Xc <- Xc
    localargs$X_list <- X_list
    as.matrix(.Call("aft_model_output", localargs, PACKAGE="rstpm2"))
}

## ## simulate from Weibull with one binary covariate
## if (FALSE) {
##     require(rstpm2)
##     summary(aft0 <- aft(Surv(rectime,censrec==1)~hormon,data=brcancer,df=4))
##     aft1 <- aft(Surv(rectime,censrec==1)~hormon,data=brcancer,df=4,init=coef(aft1))
##     ##
##     require(rstpm2)
##     summary(aft0 <- aft(Surv(rectime,censrec==1)~hormon,data=brcancer,df=4))
##     plot(survfit(Surv(rectime,censrec==1)~hormon,data=brcancer),col=1:2)
##     plot(aft0,newdata=data.frame(hormon=0), add=TRUE, line.col="green", ci=FALSE)
##     plot(aft0,newdata=data.frame(hormon=1), add=TRUE, line.col="blue", ci=FALSE)
##     ##
##     summary(aft1 <- aft(Surv(rectime,censrec==1)~hormon,data=brcancer,df=4,smooth.formula=~hormon:ns(log(rectime),df=3)))
##     plot(survfit(Surv(rectime,censrec==1)~hormon,data=brcancer),col=1:2)
##     plot(aft1,newdata=data.frame(hormon=0), add=TRUE, line.col="green", ci=FALSE)
##     plot(aft1,newdata=data.frame(hormon=1), add=TRUE, line.col="blue", ci=FALSE)

##     require(rstpm2)
##     require(survival)
##     require(splines)
##     set.seed(12345)
##     n <- 1e4
##     x <- rep(c(0,1),n/2)
##     af <- exp(0.3*x)
##     time <- rweibull(n,2,5)*af
##     censor <- rexp(n,rate=1/10)
##     obstime <- pmin(time,censor)
##     event <- ifelse(obstime==time,1,0)
##     X <- cbind(x)
##     XD <- X*0
##     dat1 <- data.frame(obstime,event,x)
##     aft1 <- aft(Surv(obstime,event)~x,data=dat1, df=2, reltol=1e-12, control=list(maxit=2000))

##     plot(survfit(Surv(obstime,event)~x,data=dat1),col=1:2)
##     plot(aft1,newdata=data.frame(x=0), add=TRUE, line.col="green", ci=FALSE)
##     plot(aft1,newdata=data.frame(x=1), add=TRUE, line.col="blue", ci=FALSE)
    
##     head(rstpm2:::predict.aft.ext(aft1) - predict(aft1))
##     range(rstpm2:::predict.aft.ext(aft1,type="haz") - predict(aft1, type="haz"))
##     rstpm2:::predict.aft.ext(aft1,type="haz",time=aft1@args$time[1:6],X=aft1@args$X[1:6,,drop=FALSE],XD=aft1@args$XD[1:6,,drop=FALSE]) - head(predict(aft1, type="haz"))
##     predict.aft.ext.test <- function(obj, eps=1e-5) {
##         localargs <- obj@args
##         localargs$return_type <- "haz"
##         basecoef <- coef(obj)
##         sapply(1:length(basecoef),
##                function(i) {
##                    coef <- basecoef
##                    coef[i] <- coef[i]+eps
##                    localargs$init <- coef
##                    upper <- as.vector(.Call("aft_model_output", localargs, PACKAGE="rstpm2"))
##                    coef <- basecoef
##                    coef[i] <- coef[i]-eps
##                    localargs$init <- coef
##                    lower <- as.vector(.Call("aft_model_output", localargs, PACKAGE="rstpm2"))
##                    (upper-lower)/eps/2
##                })
##     }
##     temp <- predict.aft.ext.test(aft1)
##     range(rstpm2:::predict.aft.ext(aft1,type="gradh") - temp)
##     rstpm2:::predict.aft.ext(aft1,type="gradh") - head(temp)
##     range(predict(aft1,newdata=data.frame(obstime=aft1@args$time[1:6],x=x[1:6]),type="gradh") -
##           head(temp))
    
##     plot(aft1,newdata=data.frame(x=0), type="hazard", line.col="green", rug=FALSE)
##     plot(aft1,newdata=data.frame(x=1), type="hazard", add=TRUE, line.col="blue", ci=TRUE)

##     aft2 <- aft(Surv(obstime,event)~x,data=dat1, df=4, reltol=1e-12, control=list(maxit=2000), smooth.formula=~x:ns(log(obstime),df=3))

##     plot(survfit(Surv(obstime,event)~x,data=dat1),col=1:2)
##     plot(aft2,newdata=data.frame(x=0),line.col="green",add=TRUE,ci=FALSE)
##     plot(aft2,newdata=data.frame(x=1),line.col="blue",add=TRUE,ci=FALSE)

##     plot(aft2,newdata=data.frame(x=0),type="accfac",exposed=function(data) transform(data,x=1),se.fit=TRUE)

##     aft3 <- aft(Surv(obstime,event)~x,data=dat1, df=4, reltol=1e-12, control=list(maxit=2000), smooth.formula=~x:ns(obstime,df=3))
##     plot(aft3,newdata=data.frame(x=0),type="accfac",exposed=function(data) transform(data,x=1))
##     plot(aft2,newdata=data.frame(x=0),type="accfac",exposed=function(data) transform(data,x=1),ci=FALSE,add=TRUE,line.col="blue")
    

## ## f$fn is not the same (?!)
##     aft1$negll(aft1$par)
##     -sum(aft1$logh*event-aft1$H) # ok
##     -sum(aft1$f$report(aft1$par)$logh*event-aft1$f$report(aft1$par)$H) # ok
##     -sum(aft2$f$report(aft1$par)$logh*event-aft2$f$report(aft1$par)$H) # ok
##     aft1$f$fn(aft1$par) # ???
    
##     ## f$fn is not the same (?!)
##     aft2$negll(aft2$par)
##     -sum(aft2$logh*event-aft2$H) # ok
##     -sum(aft2$f$report(aft2$par)$logh*dat1$event-aft2$f$report(aft2$par)$H) # ok
##     aft2$f$fn(aft2$par) # ???

##     ## the events are the same
##     all(event == aft1$f$report(aft1$par)$event) # ok
##     all(event == aft2$f$report(aft2$par)$event) # ok
##     ## The H and logh vectors are very close
##     max(abs(aft1$f$report(aft1$par)$H - aft1$H))       # ok
##     max(abs(aft2$f$report(aft2$par)$H - aft2$H))       # ok
##     max(abs(aft1$f$report(aft1$par)$logh - aft1$logh)) # ok
##     max(abs(aft2$f$report(aft2$par)$logh - aft2$logh)) # ok
##     ##
##     ## the Xs and XDs matrices are very close
##     max(abs(aft1$f$report(aft1$par)$Xs - aft1$Xs))   # ok
##     max(abs(aft1$f$report(aft1$par)$XDs - aft1$XDs)) # ok
##     max(abs(aft2$f$report(aft2$par)$Xs - aft2$Xs))   # ok
##     max(abs(aft2$f$report(aft2$par)$XDs - aft2$XDs)) # ok


##     head(aft1$Xs)
##     head(aft2$Xs)
##     head(aft1$f$report(aft1$par)$Xs)
##     head(aft1$f$report(aft2$par)$Xs)
##     head(aft2$f$report(aft2$par)$Xs)
    
##     aft1$f$gr(aft1$par) # ok
##     delta=function(i,eps=1e-5) {new=rep(0,5); new[i]=eps; new}
##     for (i in 1:length(aft1$par))
##         print((aft1$f$fn(aft1$par+delta(i))-aft1$f$fn(aft1$par-delta(i)))/(2e-5))
##     ## Gradient at the 'final' value are not the same
##     for (i in 1:length(aft2$par))
##         print((aft2$negll(aft1$par+delta(i))-aft2$negll(aft1$par-delta(i)))/(2e-5))

##     ## gradient at the initial value are the same
##     aft1$f$gr(aft1$init) # ok
##     delta=function(i,eps=1e-5) {new=rep(0,5); new[i]=eps; new}
##     for (i in 1:length(aft1$par))
##         print((aft1$f$fn(aft1$init+delta(i))-aft1$f$fn(aft1$init-delta(i)))/(2e-5))

##     ## Objective at the initial values are the same
##     as.numeric(aft1$negll(aft1$init)) - aft1$f$fn(aft1$init) # ok
##     as.numeric(aft2$negll(aft2$init)) - aft2$f$fn(aft2$init) # ok

##     ## objectives at the 'final' values are NOT the same
##     as.numeric(aft1$negll(aft1$init+0.1)) - aft1$f$fn(aft1$init+0.1) # ??
##     as.numeric(aft1$negll(aft1$par)) - aft1$f$fn(aft1$par) # ???

##     undebug(aft1$negll)
##     aft1$negll(aft1$par)

##     aft1$init
##     aft1$f$par
##     aft1$f$fn(aft1$init)
##     aft1$f$fn(aft1$f$par)
##     aft1$f$fn(aft1$par)
##     aft1$f$fn(aft2$par)
    
## }


## ## KL using GausLaguerre Quadrature --numerically unstable
##
## KL <- function(object, true_density = "Weibull",
##                relative = TRUE, symmetric = FALSE,
##                shape = 1, scale = 1, shape2 = 1, mixing_par = 0.5,
##                intercept1 = 1, intercept2 = 1, beta = c(1,1),
##                rate =1, location = 0, n_nodes = 110)
##   {
##
##   GaussLaguerreQuadrature <- function(f, n_nodes) {
##
##     library(orthopolynom)
##
##     roots <- sapply(polyroot(laguerre.polynomials(n_nodes)[[n_nodes+1]]), Re)
##
##     weights <- rep(roots/
##       ((n_nodes+1)^2 * as.function(
##     laguerre.polynomials(n_nodes+1)[[n_nodes+2]])(roots)^2), each =
##     length(object@args$event))
##
##     ## transformation
##
##     g <- function(x) f(x*scale)*rep(exp(x)*scale, each = length(object@args$event))
##     g(roots) %*% weights
##   }
##
##   integrand <- function(x) {
##     true_density =  if(true_density == "Weibull") {
##       rep(dweibull(x, shape, scale), each = length(object@args$event))
##     } else if (true_density == "gamma") { rep(dgamma(x, shape, scale = scale),
##                                               each = length(object@args$event))
##     } else if (true_density == "norm") { dnorm(x, location, scale)
##     } else if (true_density == "logis") { dlogis(x, location, scale)
##     } else if (true_density == "mixture Weibull") {
##         ## densities at each quadrature point are evaluated for all covariate
##         ## patterns and then stacked
##       mixing_par *
##         c(t(do.call(rbind,
##                     lapply(x, dweibull, shape = shape,
##                            scale =
##                              exp(intercept1 + object@args$X %*% beta))))) +
##         (1-mixing_par) *
##         c(t(do.call(rbind,
##                     lapply(x, dweibull, shape = shape2,
##                            scale = exp(intercept2 + object@args$X %*% beta)))))
##
##     } else { true_density = true_density # user supplied density
##     }
##
##     newdata <- data.frame(X = do.call(rbind,
##                                       rep(list(object@args$X),length(x))))
##
##     colnames(newdata) <- as.character(
##       tail(as.list(attr(object@args$lm.obj$terms, "variables")),-2))
##     newdata[[object@args$timeVar]] <- rep(x, each = length(object@args$event))
##
##     model_density <-  predict.aft_mixture(object,
##                                    type = "density",
##                                    newdata = newdata)
##
##
##     model_density[model_density == 0] <- .Machine$double.xmin
##
##     if (symmetric) { true_density*log(true_density/model_density) +
##         model_density*log(model_density/true_density)
##     } else if (relative) { -true_density*log(model_density)
##     } else { true_density*log(true_density/model_density) }
##   }
##
##   GaussLaguerreQuadrature(integrand, n_nodes)/length(object@args$event)
## }

## KL using integrate --fast when the number of unique covariate patterns is small
KL_not_vectorized <- function(object, true_density = "Weibull",
                              relative = TRUE, symmetric = FALSE,
                              shape = c(1,1), scale = c(1,1), rate =1, location = 0,
                              mixing_par = 0.5, beta = c(1,1))
{

    integrand <- function(x, newdata) {
        true_density =  if(true_density == "Weibull") { dweibull(x, shape, scale)
                        } else if (true_density == "gamma") { dgamma(x, shape, scale = scale)
                        } else if (true_density == "norm") { dnorm(x, location, scale)
                        } else if (true_density == "logis") { dlogis(x, location, scale)
                        } else if (true_density == "mixture Weibull") {
                            mixing_par*
                                dweibull(x, shape[1], scale[1]) +
                                (1-mixing_par)*
                                dweibull(x, shape[2], scale[2])
                        } else { true_density = true_density # user supplied density
                        }
        names <- colnames(newdata)
        newdata <- data.frame(X = rep(newdata[[names]],length(x)))
        colnames(newdata) <- as.character(
            tail(as.list(attr(object@args$lm.obj$terms, "variables")),-2))
        newdata[[object@args$timeVar]] <- x
        model_density <-  predict(object,
                                  type = "density",
                                  newdata = newdata)
        if (symmetric) { true_density*log(true_density/model_density) +
                             model_density*log(model_density/true_density)
        } else if (relative) { -true_density*log(model_density)
        } else { true_density*log(true_density/model_density) }
    }
    out <- 0
    ## save unique covariate patterns with duplicate count
    uniqueCov <- data.frame(pre = object@args$X)
    uniqueCov <- aggregate(cbind(uniqueCov[0], nDuplic = 1), uniqueCov, length)
    ## calculate integral only for unique covariate patterns
    for (i in c(1:NROW(uniqueCov))) {
        newdata <- data.frame(X = uniqueCov[i,-NCOL(uniqueCov)])
        colnames(newdata) <-
            as.character(tail(as.list(attr(object@args$lm.obj$terms,"variables")),-2)[[1]])
        out <- out +
            integrate(integrand,
                      0, 30, newdata = newdata)$value*
                                              uniqueCov$nDuplic[i]/sum(uniqueCov$nDuplic)
    }
    return(out)
}