## package.skeleton(name="rstpm2", path="c:/usr/src/R", force=TRUE, namespace=TRUE, code_files="pm2-3.R") ## Local Windows setup: ## Rtools.bat ## R CMD INSTALL --html "c:/usr/src/R/rstpm2/pkg" ## R CMD build "c:/usr/src/R/rstpm2/pkg" ## R CMD INSTALL --build "c:/usr/src/R/rstpm2/pkg" ## R CMD CHECK "c:/usr/src/R/rstpm2/pkg" ## ## Local Ubuntu setup: ## R CMD INSTALL --html ~/src/R/rstpm2/pkg --library=~/R/x86_64-pc-linux-gnu-library/2.12 ## R CMD build ~/src/R/rstpm2/pkg ## R CMD build --binary ~/src/R/rstpm2/pkg ## ## testPackage <- TRUE ## if (testPackage) { ## require(splines) ## require(survival) ## require(bbmle) ## } ## Email from Paul S library(rstpm2) fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3,tvc=list(hormon=2)) ## Plot marginal HR ## Straight from plot function plot(fit, type = "meanhr", newdata = transform(brcancer, hormon = 0), exposed = function(data) transform(data, hormon = 1)) ## Using ggplot (need to manually extract predictions and plot) library(ggplot2) ## debugonce(rstpm2:::predict.stpm2.base) system.time(pred <- predict(fit, newdata = transform(brcancer, hormon = 0), grid = TRUE, full = TRUE, se.fit = TRUE, type = "meanhr", recent=TRUE, exposed = function(data) transform(data, hormon = 1))) system.time(pred2 <- predict(fit, newdata = transform(brcancer, hormon = 0), grid = TRUE, full = TRUE, se.fit = TRUE, type = "meanhr", exposed = function(data) transform(data, hormon = 1))) ## Plot ggplot(pred, aes(x = rectime, y = Estimate)) + geom_line() + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3) + ylim(0,2) + xlab('Time (days)') + ylab('HR') + theme_bw(base_size = 15) ## Email for episig ## install.packages("extRemes") library(extRemes) library(survival) set.seed(12345) n<-500000 x1<-rnorm(n) x2<-rnorm(n) error<-revd(n, loc = 0, scale = 1) b0<- 1 b1<-0.5 b2<--1.2 c<-1 time<-exp(b0+b1*x1+b2*x2-c*error) status<-rep(1,n) survreg(Surv(time, status==1) ~ x1+x2,dist="exponential") library(rstpm2) object <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3,tvc=list(hormon=2)) newdata = data.frame(hormon=0) simulate(object, newdata=newdata) # currently returns just the times -- this will later return a data-frame simulate(object, newdata=newdata, start=1500) # left-truncation plot(object, newdata=newdata) grep_call = function(name,x) { local_function = function(x) if(length(x)==1) x==name else any(sapply(x, local_function)) which(sapply(x, local_function)) } gsm_design = function(object, newdata, inflate=100) { stopifnot(inherits(object, "stpm2"), is.list(newdata), is.numeric(inflate), length(inflate) == 1) ## Assumed patterns: ## timeEffect := (ns|nsx)(log(timeVar),knots,Boundary.knots,centre=FALSE,derivs=(c(2,2)|c(2,1))) ## effect := timeEffect | otherEffect:timeEffect | timeEffect:otherEffect terms = attr(object@model.frame, "terms") factors = attr(terms, "factors")[-1,,drop=FALSE] variables = attr(terms, "variables")[-(1:2)] predvars = attr(terms, "predvars")[-(1:2)] indices = grep_call(object@timeVar, variables) if(length(indices)==0) stop("No timeVar in the formula -- unexpected error") index_time_variables = grep_call(object@timeVar, variables) # time variables index_time_effects = grep(object@timeVar,colnames(factors)) # components in the rhs with time variables ## I want to know how wide is each term nms = names(coef(object)) term.labels = attr(terms, "term.labels") coef_index <- sapply(strsplit(nms, ":"), function(c) { if (length(c)>2) stop("current implementation only allows for main effects and two-way interactions") pmatchp = function(x, table) !is.na(pmatch(x, table)) if(length(c)==1) { for (i in seq_along(term.labels)) { t = term.labels[i] ## browser() if (pmatchp(t,c)) return(i) } if (c == "(Intercept)") return(0) return(-1) } ## => length(c) == 2 term.labels.split = strsplit(term.labels, ":") for (i in seq_along(term.labels)) { t = term.labels.split[[i]] if (all(pmatchp(t,c))) return(i) } return(-1) }) ## how to map from term.labels to factors to predvars? ## Can I replace each term with its predvars? parse_ns = function(mycall,x,index_time_effect) { df = length(c(mycall$knots, mycall$Boundary.knots)) - 1 stopifnot(mycall[[1]] == quote(nsx) || mycall[[1]] == quote(ns), length(mycall[[2]])>1, mycall[[2]][[1]] == quote(log), # assumes log mycall[[2]][[2]] == object@timeVar, # what about a scalar product or divisor? is.null(mycall$deriv) || (mycall$derivs[1] == 2 && mycall$derivs[2] %in% 1:2), mycall$centre == FALSE) # doesn't allow for centering cure = !is.null(mycall$derives) && all(mycall$derivs == c(2,1)) time = object@args$time q_const = attr(nsx(log(mean(time)), knots=mycall$knots, Boundary.knots=mycall$Boundary.knots, intercept=mycall$intercept), "q.const") ## browser() list(call = mycall, knots=mycall$knots, Boundary_knots=mycall$Boundary.knots, intercept=as.integer(mycall$intercept), gamma=coef(object)[which(coef_index %in% index_time_effect)], q_const = q_const, cure = as.integer(cure), x=x) } time = object@args$time newdata[[object@timeVar]] = mean(time) # NB: time not used Xp = predict(object, newdata=newdata, type="lpmatrix") index2 = which(coef_index %in% index_time_effects) etap = drop(Xp[, index2, drop=FALSE] %*% coef(object)[index2]) list(type="gsm", link_name=object@args$link, tmin = min(time), # not currently used? tmax = max(time), inflate=as.double(inflate), etap=etap, log_time=TRUE, terms = lapply(index_time_effects, function(i) { j = which(factors[,i] != 0) if (length(j)==1) return(parse_ns(predvars[[j]], rep(1, nrow(newdata)), i)) else { if(length(j)>3) stop("Current implementation only allows for two-way interaction terms") if (j[1] %in% index_time_variables) return(parse_ns(predvars[[j[1]]], eval(predvars[[j[2]]], newdata), i)) else return(parse_ns(predvars[[j[2]]], eval(predvars[[j[1]]], newdata), i)) } }) ) } library(rstpm2) library(microsimulation) object <- gsm(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3,tvc=list(hormon=2)) newdata = data.frame(hormon=0:1) inflate = 100 (design <- gsm_design(object,newdata)) replicate(10,.Call("test_read_gsm", design, PACKAGE="microsimulation")) set.seed(1002) simulate(fit1, nsim=10, newdata=nd) simulate(fit1, newdata=data.frame(hormon=0:1)) set.seed(1002) replicate(10,.Call("test_read_gsm", design, PACKAGE="rstpm2")) set.seed(1002) range(replicate(10,.Call("test_read_gsm", gsm_design(fit1,nd), PACKAGE="rstpm2")) - simulate(fit1, nsim=10, seed=1002, newdata=nd)) library(microsimulation) set.seed(1002) replicate(10,.Call("test_read_gsm", gsm_design(fit1,nd), PACKAGE="microsimulation")) simulate.stpm2 <- function(object, nsim=1, seed=NULL, newdata=NULL, lower=1e-6, upper=1e5, ...) { stopifnot(!is.null(newdata)) if (!is.null(seed)) set.seed(seed) ## assumes nsim replicates per row in newdata n = nsim * nrow(newdata) newdata = newdata[rep(1:nrow(newdata), each=nsim), , drop=FALSE] e <- rexp(n) objective <- function(time) { newdata[[object@timeVar]] <- time predict(object, type="cumhaz", newdata=newdata) - e } vuniroot(objective, lower=rep(lower,length=n), upper=rep(upper,length=n))$root } setMethod("simulate", "stpm2", simulate.stpm2) setMethod("simulate", "pstpm2", simulate.stpm2) ## Can we estimate the hessian externally? library(rstpm2) library(numDeriv) fit = stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=4) summary(fit) fit@vcov = solve(numDeriv::hessian(fit@minuslogl, coef(fit))) summary(fit) ## Can we also use the gradient? (solve(numDeriv::jacobian(fit@args$gradnegll, coef(fit))) - solve(numDeriv::hessian(fit@minuslogl, coef(fit)))) |> range() (solve(numDeriv::jacobian(fit@args$gradnegll, coef(fit))) - vcov(fit)) |> range() ## Can we improve on accuracy using nsxD? This would also include predictions using C++ code. ## library(rstpm2) m <- rstpm2::stpm2(Surv(time, status) ~ sex, data = lung) predict(m, newdata = data.frame(sex = c(1,1), time = c(100,200)), type = "rmst", se.fit=TRUE) # ok plot(m, newdata = data.frame(sex = 1), type="rmst") plot(m, newdata = data.frame(sex = 1), var="sex", type="rmstdiff") ## Predictions for differences ## Can we return the predictions, gradients and covariance matrix and use those for differences? ## preddiff = pred1 - pred0 ## graddiff = grad1 - grad0 ## vardiff = graddiff^T Sigma graddiff ## Note that we can silently return attributes d = structure(data.frame(a=1), attr1=1:100, class=c("Test","data.frame")) print(d) # print.Test() does not exist, so this uses print.data.frame() ## What about gradients on transformed scales, ## with a transformation g and an inverse transformation G? ## var(eta1) = grad_eta1^T Sigma grad_eta1 ## and pred1 = G(eta1) ## => var(pred1) = var(G(eta1)) = grad(G(eta1))^T Sigma grad(G(eta1)) ## where grad1 = grad(G(eta1)) = G'(eta1) grad_eta1?? (chain rule) ## preddiff = pred1 - pred0 ## graddiff = grad1 - grad0 ## vardiff = graddiff^T Sigma graddiff ## add cure for the AFT models library(rstpm2) fit0 = aft(Surv(rectime,censrec==1)~hormon,data=brcancer,df=4) fit = aft(Surv(rectime,censrec==1)~hormon,data=brcancer,df=4,cure=TRUE) par(mfrow=1:2) plot(fit0,newdata=data.frame(hormon=0),main="Without cure") plot(fit,newdata=data.frame(hormon=0),main="With cure") cov2cor(vcov(fit0)) ## cure models paper library(cuRe) colonDC <- subset(cuRe::colonDC, stage %in% c("Regional","Distant")) colonDC <- transform(colonDC, stage=factor(stage), stageDistant=as.numeric(stage == "Distant"), bhaz = general.haz(time = "FU", rmap = list(age="agedays", sex = "sex", year = "dx"), data = colonDC, ratetable = survexp.dk)) fit.lat <- stpm2(Surv(FUyear, status) ~ stageDistant + bhazard(bhaz), data = colonDC, df = 4, cure = TRUE) fit.lat.timevar <- stpm2(Surv(FUyear, status) ~ stageDistant + bhazard(bhaz), data = colonDC, df = 4, cure = TRUE, tvc.formula = ~nsx(log(FUyear), df=4, cure=TRUE):stageDistant) fit.lat.timevar2 <- stpm2(Surv(FUyear, status) ~ stageDistant + bhazard(bhaz), data = colonDC, df = 4, cure = TRUE, tvc = list(stageDistant = 4)) predict(fit.lat, newdata = data.frame(FUyear = 0, stageDistant = 0), type = "probcure", se.fit = TRUE) predict(fit.lat, newdata = data.frame(FUyear = 2, stageDistant = 0), type = "uncured", se.fit = TRUE) predict(fit.lat.timevar, newdata = data.frame(FUyear = 2, stageDistant = 0), type = "probcure", se.fit = TRUE) plot(fit.lat, newdata = data.frame(stageDistant = 0), type = "probcure") plot(fit.lat, newdata = data.frame(stageDistant = 0), type = "surv") plot(fit.lat, newdata = data.frame(stageDistant = 0), type = "uncured") ## solve A x = b set.seed(12345) A = matrix(rnorm(18),3,byrow=TRUE) b = c(0,0,1) x = qr.solve(A,b) # under-determined system drop(A %*% x - b) # this is a solution -- noice! qmat = qr.Q(qr(t(A)), complete=TRUE)[, -(1:3), drop=FALSE] A %*% qmat %*% rnorm(3) # should be close to zero... A %*% qmat library(rstpm2) library(splines) print(ns1 <- nsx(1:10,df=4,intercept=TRUE)) ## check how to calculate q.const Bstar <- splineDesign(sort(c(rep(1,4),attr(ns1,"knots"),rep(10,4))), x=c(1,10),derivs=c(2,2)) qr.Q(qr(t(Bstar)), complete=TRUE)[,-(1:2), drop=FALSE] - attr(ns1, "q.const") # OK ## Now, solve: Bstar x = c(0,0,1) Bstar <- splineDesign(sort(c(rep(1,4),attr(ns1,"knots"),rep(10,4))), x=c(1,10,10),derivs=c(2,2,1)) b = c(0,0,1) x = qr.solve(Bstar,b) # magic sauce drop(Bstar %*% x) - b # OK qmat = qr.Q(qr(t(Bstar)), complete=TRUE)[,-(1:3), drop=FALSE] theta = rnorm(3) drop(Bstar %*% (drop(qmat %*% theta) + x)) - b # OK Bstar %*% qmat %*% theta + Bstar %*% x - b # OK ## transformation: theta -> B %*% (drop(qmat %*% theta) + x) = B %*% qmat %*% theta + B %*% x ## nsx(..., derivs, target=rep(0,length(derivs))) for target=b ## returns an attribute for offset=x ## For fixed design matrix, we get N = B %*% qmat and C = B %*% x, so that theta -> N %*% theta + C ## How will this affect the linear predictor? ## Rather than N(t)^T theta, we will have N(t)^T theta + C ## For the derivative, we get N' = B' %*% qmat and C' = B' %*% x, ## so that we have N'(t)^T theta + C' xs = seq(1,10,l=301) B <- splineDesign(sort(c(rep(1,4),attr(ns1,"knots"),rep(10,4))), x=xs) N = B %*% qmat C = B %*% x theta = rnorm(3) plot(xs, N %*% theta + C, type="l") ## Plot of derivatives... B <- splineDesign(sort(c(rep(1,4),attr(ns1,"knots"),rep(10,4))), x=xs, derivs=1) N = B %*% qmat C = B %*% x theta = rnorm(3) plot(xs, N %*% theta + C, type="l") ## How to predict outside of the boundaries? ## test for smoothpwc library(rstpm2) library(dplyr) relative_survival <- function(model1, smoothpwc, ...) { transmat = matrix(c(NA,1,2, NA,NA,NA, NA,NA,NA),3,3,byrow=TRUE) rownames(transmat) <- colnames(transmat) <- c("Initial","Cause-specific death","Other causes of death") rstpm2::markov_msm(list(model1,smoothpwc), ..., trans = transmat) } ## use popmort colon2 = inner_join(survival::colon |> filter(etype==2), mutate(popmort,sex=2-sex,rate) |> filter(year==2000), by=c("age","sex")) |> mutate(t=time/365.25) excess = gsm(Surv(t,status)~factor(rx)+bhazard(rate), data=colon2, df=3) smoothpwc1 = with(filter(popmort,sex==1 & year==2000),smoothpwc(age+0.5-70,rate)) # example is for men aged 70 years rs = relative_survival(excess, smoothpwc1, newdata=data.frame(rx="Obs"), t = seq(0,7, length=301)) plot(rs,ggplot=TRUE) ## Spline interpolation library(schumaker) d <- data.frame(x=(0:9)+0.5,y=((1:10)-5.5)^2) xs=seq(0,20,length=301) plot(xs,Schumaker(d$x,d$y,Extrapolation="Constant")$Spline(xs),type="l") points(y~x,data=d) library(splines) plot(xs,splinefun(d$x,d$y,method="natural")(xs),type="l") points(y~x,data=d) d2 <- rbind(d,transform(tail(d,1)[rep(1,2),],x=c(9.5+1e-7,9.5+2e-7))) xs=seq(9.5-1e-3,9.5+1e-3,length=301) plot(xs,splinefun(d2$x,d2$y,method="natural")(xs),type="l") points(y~x,data=d2) abline(h=20.25,lty=2) splinefun(d2$x,d2$y,method="natural")(15)-20.25 splinefunx <- function(x, y, method="natural", constant.left=FALSE, constant.right=FALSE, ...) { xstar <- x ystar <- y if (constant.right) { xstar <- c(xstar,tail(x,1)+(1:10)*1e-7) ystar <- c(ystar,rep(tail(y,1),10)) } if (constant.left) { xstar <- c(xstar,x[1]-(1:10)*1e-7) ystar <- c(ystar,rep(y[1],10)) } splinefun(xstar, ystar, ..., method=method) } splinefunx(d$x,d$y,constant.right=TRUE)(15)-20.25 splinefunx(d$x,d$y,constant.right=TRUE)(15000)-20.25 splinefunx(d$x,d$y)(9.5)-20.25 ## library(numDeriv) xs=seq(0,20,length=301) plot(xs, numDeriv::grad(splinefunx(d$x,d$y,constant.right=TRUE), xs), type="l") rug(d$x) xs=seq(9.5-1e-3,9.5+1e-3,length=301) plot(xs, numDeriv::grad(splinefunx(d$x,d$y,constant.right=TRUE), xs), type="l") ## Rather than using mid-points, we could use intervals and then assume a monotone smoother... numdiff <- function(f,x,eps=1e-5) (f(x+eps)-f(x-eps))/2/eps d <- transform(data.frame(x0=0:9,x1=(0:9)+1,delta=1), xmid=(x0+x1)/2, yint=((x1-5)^3-(x0-5)^3)/3) set.seed(12345) d <- transform(d, yint_ = yint*rgamma(NROW(d),10,10)) xs=seq(0,20,length=301) ## cumd <- transform(d, H=cumsum(c(0,diff(x0)*head(y0,-1)))) cumd <- with(d, data.frame(x=c(x0,tail(x1,1)), H=cumsum(c(0,diff(c(x0,tail(x1,1)))*yint)))) sf.natural <- splinefun(cumd$x,cumd$H, method="natural") sf.mono <- splinefun(cumd$x,cumd$H, method="hyman") plot(xs, numdiff(sf.natural,xs), type="l", ylim=c(0,25)) # constant from rhs of last interval lines(xs, splinefunx(d$xmid,d$yint,constant.right=TRUE)(xs), col="blue") # constant from mid-point curve((x-5)^2, col="green", add=TRUE) lines(xs, numdiff(sf.mono,xs), type="l", col="orange") # extrapolates nicely if quadratic ## now with variation in the outcome cumd <- with(d, data.frame(x=c(x0,tail(x1,1)+1), H=cumsum(c(0,diff(c(x0,tail(x1,1)+1))*yint_)))) sf <- splinefun(cumd$x,cumd$H, method="hyman") plot(xs, numdiff(sf,xs), type="l", ylim=c(0,35)) lines(xs, splinefunx(d$xmid,d$yint_,constant.right=TRUE)(xs), col="blue") curve((x-5)^2, col="green", add=TRUE) ## gradient for penalty in the AFT model library(Rcpp) library(RcppArmadillo) fdiff = \(f,x,eps=1e-5) sapply(1:length(x), \(i) (f("[<-"(x,i,x[i]+eps)) - f("[<-"(x,i,x[i]-eps)))/2/eps) src = "#include using namespace arma; // [[Rcpp::depends(RcppArmadillo)]] // [[Rcpp::export]] mat differenceMatrix(int n) { return join_rows(zeros(n-1,1),eye(n-1,n-1)) - join_rows(eye(n-1,n-1),zeros(n-1,1)); } // [[Rcpp::export]] vec quadraticPenalty(mat Q, vec beta) { vec delta = differenceMatrix(Q.n_rows) * Q * beta; return delta % delta % (delta<0); } // [[Rcpp::export]] vec gradientPenalty(mat Q, vec beta) { // Q: (nbeta+2) x nbeta size_t n = Q.n_rows; mat D = join_rows(zeros(n-1,1),eye(n-1,n-1)) - join_rows(eye(n-1,n-1),zeros(n-1,1)); // (nbeta+1) x (nbeta+2) vec delta = D * Q * beta; // nbeta+1 mat M = Q.t() * D.row(0).t() * D.row(0) * Q * (delta(0)<0.0); // nbeta x nbeta for(size_t j=1; jbetaStar(i+1)) { D(i,i) = -1.0; D(i,i+1) = 1.0; } mat M = Q.t() * D.t() * D * Q; // nbeta x nbeta return 2*M*beta; } " sourceCpp(code=src) ## library(rstpm2) Qmat = attr(nsx(1:10,df=3,intercept = TRUE),"q.const") # 5x3 differenceMatrix(nrow(Qmat)) # 4x5 quadraticPenalty(Qmat, c(1,-1,1)) # 4x1 as.vector(gradientPenalty(Qmat, c(1,-1,1))) as.vector(gradientPenalty2(Qmat, c(1,-1,1))) fdiff(\(x) sum(quadraticPenalty(Qmat, x)), c(1,-1,1), 1e-5) as.vector(gradientPenalty(Qmat, c(1,-1,1))) - fdiff(\(x) sum(quadraticPenalty(Qmat, x)), c(1,-1,1), 1e-3) quadraticPenalty(Qmat, c(1,-1,2)) # 4x1 gradientPenalty(Qmat, c(1,-1,2)) ## Error report by Joshua (now fixed) ## Load package library(rstpm2) ## Define start time brcancer2 <- transform(brcancer, startTime = ifelse(hormon == 0, rectime * 0.5, 0)) ## Stpm2 call without explicit specification of the type argument stpm2(Surv(startTime, rectime, censrec == 1) ~ hormon, data = brcancer2, df = 3) ## Stpm2 call with explicit specification of the type argument ## Do not run, this will cause your R-session to terminate ## debug(gsm) stpm2(Surv(startTime, rectime, censrec == 1, type = "counting") ~ hormon, data = brcancer2, df = 3) ## Are 2D thin-plate spline approximations in mgcv dense matrices? library(mgcv) df=expand.grid(u=seq(0,1,length=100),v=seq(0,1,length=100)) set.seed(12345) df=transform(df,y=rnorm(nrow(df),sin(u*2*pi)*cos(v*2*pi),0.1)) gam1=gam(y~s(u,v,bs="ts"),data=df) ## Email from Grace library(rstpm2) fit = gsm(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3) fit@lm attr(fit@lm$terms, "predvars") nd = data.frame(rectime=1000, hormon=0) rstpm2:::lpmatrix.lm(fit@lm, nd) %*% coef(fit) # or, more simply predict(fit@lm, nd) ## eps=1e-4 (predict(fit@lm, data.frame(rectime=1000+eps, hormon=0)) - predict(fit@lm, data.frame(rectime=1000-eps, hormon=0)))/2/eps ## or (predict(fit@lm, data.frame(rectime=1000*exp(eps), hormon=0)) - predict(fit@lm, data.frame(rectime=1000*exp(-eps), hormon=0)))/2/eps/1000 ## exp(predict(fit@lm, nd))* (predict(fit@lm, data.frame(rectime=1000*exp(eps), hormon=0)) - predict(fit@lm, data.frame(rectime=1000*exp(-eps), hormon=0)))/2/eps/1000 predict(fit, newdata=nd, type="haz") plot(fit,type="haz", newdata=data.frame(hormon=0)) ## biostat3: coxphHaz and coxphHazList as.data.frame.coxphHaz = function(x, row.names=NULL, optional = FALSE, ...) { newdata = attr(x,"newdata") ## To avoid "row names were found from a short variable and have been discarded": rownames(newdata) = NULL data.frame(newdata, x=x$x, y=x$y) } as.data.frame.coxphHazList = function(x, row.names=NULL, optional = FALSE, ...) { do.call(rbind, lapply(x, as.data.frame)) } fit <- coxph(Surv(surv_mm/12,status=="Dead: cancer")~agegrp, data=colon) newdata <- data.frame(agegrp=levels(colon$agegrp)) haz <- suppressWarnings(coxphHaz(fit,newdata)) library(ggplot2) head(as.data.frame(haz)) ggplot(as.data.frame(haz), aes(x=x,y=y,ymin=y.lower,ymax=y.upper,col=agegrp,fill=agegrp)) + geom_ribbon(alpha=0.5) + geom_line(col="black") # + facet_grid(~agegrp) ## Constrained MLE ## relative survival and LEL library(rstpm2) library(biostat3) head(melanoma) head(popmort) merged = merge(transform(melanoma, ye = floor(yexit), se=ifelse(sex=="Male",1,2), event=status != "Alive"), popmort, by.x=c("age","ye","se"), by.y=c("_age","_year","sex"), all.x=TRUE) merged = subset(merged, sex="Female") ## sum(is.na(merged$rate)) model1 = stpm2(Surv(surv_mm,event)~bhazard(rate),data=merged,df=3) # only intercept - was buggy ## model2 = stpm2(Surv(surv_mm,event)~age+bhazard(rate)+cluster(age),data=merged,df=3) ## model1 = stpm2(Surv(surv_mm,event)~1,data=merged,df=3,bhazard=merged$rate) ## mean(subset(merged,event)$yexit) ## We need event counts and person-time in the popmort data-frame library(dplyr) temp = cbind(filter(popmort,sex==1) %>% mutate(males=rate,rate=NULL,sex=NULL), data.frame(females=filter(popmort,sex==2)$rate)) plot(males~females,temp,log="xy") abline(0,1,col=2) ## d = data.frame(y=c(1000,2000),sex=0:1,pt=1e5) # some data fit = glm(y~sex+offset(log(pt)),data=d, family=poisson) # Poisson regression pred = predict(fit,newdata=transform(d,pt=1),type="link",se.fit=TRUE) with(pred, exp(data.frame(Estimate=fit,Lower=fit-1.96*se.fit,Upper=fit+1-96*se.fit))) library(rstpm2) exp(confint(predictnl(fit,predict,newdata=transform(d,pt=1),type="response"))) set.seed(12345) n = 1000 x = factor(rbinom(n,1,0.5)) y = rnorm(n, x==1) fit = lm(y~x) predict(fit, newdata=data.frame(x=0)) # fails predict(fit, newdata=data.frame(x=1)) # fails predict(fit, newdata=data.frame(x=factor(0:1))) # okay predict(fit, newdata=data.frame(x=factor(0,0:1))) # okay ## library(expm) ## K = 2 Z = c(1,0,0) Y = c(1000,2000); N = 1e4; Sigma=diag(Y/N^2) lambda=Y/N ## alpha=cbind(c(-sum(lambda),lambda),matrix(0,K+1,K)) expm(alpha*10) # ok ## Z = c(1,rep(0,(1+2)*3-1)) alpha=cbind(c(-sum(lambda),lambda,-1,-1,1,0,0,1),matrix(0,length(Z),length(Z)-1)) expm(alpha) m=matrix(expm(alpha)[4:9,1],2,3) t(m) %*% Sigma %*% m # includes covariance terms expm(alpha*10) m=matrix(expm(alpha*10)[4:9,1],2,3) sqrt(diag(t(m) %*% Sigma %*% m)) # includes covariance terms ## This approach works! ## We can add further differential equations for the covariance matrix for P (for a time-dependent covariance matrix for the parameters) library(deSolve) func <- function(t, y, parms, ...) { ## browser() index = findInterval(t,parms$t) lambda = parms$lambda[index,] # length=K K = length(lambda) q = rbind(c(-sum(lambda),lambda),matrix(0,K,K+1)) # length=K+1 Qm = lapply(1:K,function(k) { m = matrix(0,K+1,K+1) m[1,1] = -1 m[1,k+1] = 1 m }) Sigma = parms$Sigma[[index]] # dim=K^2 P = y[1:(K+1)] # K+1 rowvec Pm = matrix(y[(2+K):(1+K+K*(K+1))],K+1,K) # (K+1)*K dPdt = P %*% q dPmdt = sapply(1:K, function(k) Pm[,k] %*% q + P %*% Qm[[k]]) # (K+1)*K dphidt = dPmdt %*% (Sigma %*% t(Pm)) + Pm %*% (Sigma %*% t(dPmdt)) # (K+1)^2 list(c(dPdt, dPmdt, dphidt)) # length=1+K+(K+1)*K+(K+1)^2 } Y = c(1000,2000); N = 1e4; Sigma=diag(Y/N^2); K=2 lambda = Y/N y = c(1,rep(0,K+K*(K+1)+(K+1)^2)) parms = list(t=c(0,Inf), lambda=rbind(lambda), Sigma=list(Sigma)) ode1 = ode(y,seq(0,10),func,parms) ode1[11,2:4] sqrt(diag(matrix(ode1[11,11:19],3,3))) ode1 ## Using markov_msm (with an *identity* link for comparability) fit1=glm(rate~1, data=data.frame(rate=0.1,N=1e4), family=poisson(link="identity"), weight=N) fit2=glm(rate~1, data=data.frame(rate=0.2,N=1e4), family=poisson(link="identity"), weight=N) test=markov_msm(list(fit1,fit2), trans=matrix(c(NA,1,2,NA,NA,NA,NA,NA,NA),3,3,TRUE), t=0:10, newdata=data.frame(N=1), tmvar="t") print(test,se=TRUE) test$Pu ## fit1=glm(rate~factor(findInterval(t,0:2))-1, data=data.frame(t=0:2,rate=c(0.1,0.2,0.3),N=1e4), family=poisson(link="identity"), weight=N) fit1=glm(Y~offset(log(N)), data=data.frame(Y=1000,N=1e4), family=poisson) fit2=glm(Y~offset(log(N)), data=data.frame(Y=2000,N=1e4), family=poisson) test=markov_msm(list(fit1,fit2), trans=matrix(c(NA,1,2,NA,NA,NA,NA,NA,NA),3,3,TRUE), t=0:10, newdata=data.frame(N=1), tmvar="t") print(test,se=TRUE) ## Inquiry library(survival) library(rstpm2) m <- rstpm2::stpm2(Surv(time, status) ~ sex, data = lung) plot(m, newdata=data.frame(sex=2)) predict(m, newdata = data.frame(sex = 2, time = 100), type = "rmst", se.fit=TRUE) ## plot(m, newdata=data.frame(sex=2), type="rmst") # fails ## predict(m, newdata = data.frame(sex = 1:2, time = 100), type = "rmst", se.fit=TRUE) # fails ## Inquiry ## Cross-validation with bias correction (Davison and Hinkley, 1997, p. 295) crossValidation = function(object,data,prediction,K = 10,correction=TRUE) { N = nrow(data) ndex = 1:N part = as.factor(sample(1:K, N, replace = TRUE)) folds = split(ndex, part) p = sapply(folds, length)/N sum(sapply(1:K, function(k) { training = data[unlist(folds[-k]), ] validation = data[folds[[k]], ] objectStar = update(object,data=training) prediction(objectStar,validation) - p[k]*prediction(objectStar,data)*correction })) + prediction(object,data)*correction } library(rstpm2) setMethod("update", "stpm2", function(object, ...) { # I have added this to rstpm2 on GitHub object@call = object@Call update.default(object, ...) }) fit = gsm(Surv(rectime,censrec==1)~hormon+x1+x2+x3,data=brcancer,df=3) set.seed(420) ## Currently, there is no simpler way to predict out-of-sample -2*log-likelihood for an stpm2 model predictDeviance = function(eventVar) function(object,newdata) { events = newdata[[eventVar]]>0 -2*(sum(predict(object,newdata=newdata[events,,drop=FALSE], type="loghazard"))- sum(predict(object,newdata=newdata,type="cumhaz"))) } crossValidation(fit, brcancer,predictDeviance("censrec")) set.seed(420) crossValidation(fit, brcancer,predictDeviance("censrec"),correction=FALSE) AIC(fit) BIC(fit) library(rstpm2) library(survival) library(timereg) library(ggplot2) library(lattice) ## Two states: Initial -> Final ## Note: this shows how to use markov_msm to estimate survival and risk probabilities based on ## smooth hazard models. two_states <- function(model, ...) { transmat = matrix(c(NA,1,NA,NA),2,2,byrow=TRUE) rownames(transmat) <- colnames(transmat) <- c("Initial","Final") markov_sde(list(model), ..., trans = transmat) } ## Note: the first argument is the hazard model. The other arguments are arguments to the ## markov_msm function, except for the transition matrix, which is defined by the new function. colon2 <- transform(survival::colon, Obs=(rx=="Obs")+0, Lev=(rx=="Lev")+0,Lev_5FU=(rx=="Lev+5FU")+0) death = aalen(Surv(time,status)~Lev+Lev_5FU, data=subset(colon2,etype==2)) cr = two_states(death, newdata=data.frame(Lev=0,Lev_5FU=0)) plot(cr,ggplot=TRUE,stacked=FALSE,which="L") # ok plot(cr,ggplot=TRUE,which="L") # ok - is this sensible? plot(cr) # ok -- no legend plot(cr,ggplot=TRUE) # ok plot(cr,lattice=TRUE) # ok -- no legend plot(cr,ggplot=TRUE,stacked=FALSE) # ok ## competing_risks <- function(models, ...) { transmat = matrix(c(NA,1,2, NA,NA,NA, NA,NA,NA),3,3,byrow=TRUE) rownames(transmat) <- colnames(transmat) <- c("Initial","Event 1","Event 2") markov_sde(models, ..., trans = transmat) } ## Note: the first argument is the hazard model. The other arguments are arguments to the ## markov_msm function, except for the transition matrix, which is defined by the new function. colon2 <- transform(survival::colon, Obs=(rx=="Obs")+0, Lev=(rx=="Lev")+0,Lev_5FU=(rx=="Lev+5FU")+0) progression = aalen(Surv(time,status)~Lev+Lev_5FU, data=subset(colon2,etype==1)) death = aalen(Surv(time,status)~Lev+Lev_5FU, data=subset(colon2,etype==2)) cr = competing_risks(list(progression,death), newdata=data.frame(Lev=0,Lev_5FU=0)) plot(cr) ## illness_death <- function(models, ...) { transmat = matrix(c(NA,1,2, NA,NA,3, NA,NA,NA),3,3,byrow=TRUE) rownames(transmat) <- colnames(transmat) <- c("Initial","Illness","Death") markov_sde(models, ..., trans = transmat) } ## Note: the first argument is the hazard model. The other arguments are arguments to the ## markov_msm function, except for the transition matrix, which is defined by the new function. colon2 <- transform(survival::colon, Obs=(rx=="Obs"), Lev=(rx=="Lev"),Lev_5FU=(rx=="Lev+5FU")) d1 = subset(colon2,etype==1) d2 = subset(colon2,etype==2) index = d1$time < d2$time & d1$status==1 # index for initial -> recurrence d12 = d1 # initial -> recurrence d23 = transform(d2[index,], enter=d1$time[index]) # recurrence -> death d13 = transform(d2, # initial -> death time=pmin(d1$time,d2$time), status=ifelse(d1$time==d2$time,d2$status,0)) progression = aalen(Surv(time,status)~Lev+Lev_5FU, data=d12) death1 = aalen(Surv(time,status)~Lev+Lev_5FU, data=d13) death2 = aalen(Surv(enter,time,status)~Lev+Lev_5FU, data=d23) cr = illness_death(list(progression,death1,death2), newdata=data.frame(Lev=FALSE,Lev_5FU=FALSE)) # fails plot(cr) ## illness_death = function(models, ...) { transmat = matrix(c(NA,1,2,NA, NA,NA,NA,3, NA,NA,NA,NA, NA,NA,NA,NA),4,4,byrow=TRUE) rownames(transmat) <- colnames(transmat) <- c("Initial","Illness","DirectDeath","DeathAfterRecurrence") markov_sde(models, ..., trans = transmat) } ## Note: the first argument is the hazard model. The other arguments are arguments to the ## markov_msm function, except for the transition matrix, which is defined by the new function. colon2 = transform(survival::colon, Obs=(rx=="Obs")+0, Lev=(rx=="Lev")+0, Lev_5FU=(rx=="Lev+5FU")+0) d1 = subset(colon2,etype==1) d2 = subset(colon2,etype==2) index = d1$time < d2$time & d1$status==1 # index for initial -> recurrence d12 = d1 # initial -> recurrence d23 = transform(d2[index,], enter=d1$time[index]) # recurrence -> death d13 = transform(d2, # initial -> death time=pmin(d1$time,d2$time), status=ifelse(d1$time==d2$time,d2$status,0)) progression = aalen(Surv(time,status)~Lev+Lev_5FU, data=d12) death1 = aalen(Surv(time,status)~Lev+Lev_5FU, data=d13) death2 = aalen(Surv(enter,time,status)~Lev+Lev_5FU, data=d23) cr = illness_death(list(progression,death1,death2), newdata=data.frame(Lev=0,Lev_5FU=0:1), weights=c(-1,1), los=FALSE) plot(cr,ggplot=TRUE) + facet_grid(~Lev_5FU) # ok plot(cr,ggplot=TRUE,stacked=FALSE) + facet_grid(state~Lev_5FU) # ok plot(cr,ggplot=TRUE,stacked=FALSE,which="L") + facet_grid(state~Lev_5FU) # ok plot(standardise(cr),stacked=FALSE,ggplot=TRUE) # CIs are *very* wide -- error? ## Non-parametric baseline: SDE approach due to Ryalen and colleagues markov_sde <- function(models, trans, newdata, init=NULL, nLebesgue=1e4+1, los=FALSE, nOut=300, weights=1) { transfun <- function(tmat) { indices <- sort(as.vector(tmat)); indices <- setdiff(indices,NA) nStates <- nrow(tmat) out <- do.call(rbind, lapply(indices, function(i) { index2 <- which(tmat == i) from <- (index2-1) %% nStates +1 to <- (index2-1) %/% nStates + 1 data.frame(from=from,to=to) })) matrix(as.integer(as.matrix(out)),nrow(out))-1L } ## TODO check parameters nStates <- nrow(trans) nTrans <- sum(!is.na(trans)) if (is.null(init)) { init <- c(1,rep(0,nStates-1)) } stopifnot(length(init)==nStates) ## init <- rep(init,nrow(newdata)) n <- sum(sapply(models,attr,"orig.max.clust")) cumHazList <- lapply(models, function(object) -t(log(predict(object, newdata=newdata, se=FALSE)$S0))) timesList <- lapply(models, function(model) model$cum[,1]) eventTimes <- times <- sort(unique(unlist(timesList))) hazList <- lapply(1:nrow(newdata), function(i) { hazMatrix <- matrix(0,nrow=nTrans,ncol=length(times)) for (j in 1:nTrans) { hazMatrix[j,match(timesList[[j]],times)] <- diff(c(0,cumHazList[[j]][,i])) } hazMatrix }) hazMatrix <- do.call(rbind,hazList) if (length(weights)==1 && weights != 0) weights <- rep(weights,nrow(newdata))/(weights*nrow(newdata)) vcov <- matrix(1,1,1) if (!los) { out <- .Call("plugin_P_by", n, nrow(newdata), hazMatrix, init, transfun(trans), weights, vcov, PACKAGE="rstpm2") out$P <- out$X out$P.se <- sqrt(out$variance) } else { out <- .Call("plugin_P_L_by", n, nrow(newdata), hazMatrix, init, transfun(trans), times, weights, nOut, vcov, nLebesgue, PACKAGE="rstpm2") PIndex <- 1:(nrow(out$X)/2) tr <- function(x) array(as.vector(x), dim=c(nStates,nrow(newdata),ncol(x))) out$P <- tr(out$X[PIndex,]) out$L <- tr(out$X[-PIndex,]) out$P.se <- sqrt(tr(out$variance[PIndex,])) out$L.se <- sqrt(tr(out$variance[-PIndex,])) } out$n <- n out$times <- if (los) out$time else times out$newdata <- newdata out$trans <- trans out$los <- los out$init <- init out$weights <- weights class(out) <- "markov_sde" if (!all(weights==0)) { stand <- out # Warning: copy! This may be a bad idea... if (!los) { stand$P <- stand$Y stand$P.se <- sqrt(stand$varY) } else { PIndex <- 1:(nrow(out$Y)/2) stand$P <- stand$Y[PIndex,] stand$L <- stand$Y[-PIndex,] stand$P.se <- sqrt(stand$varY[PIndex,]) stand$L.se <- sqrt(stand$varY[-PIndex,]) } ## tidy up stand$X <- stand$Y stand$variance <- stand$varY ## out$X <- out$variance <- out$Y <- out$varY <- stand$Y <- stand$varY <- NULL stand$newdata <- stand$newdata[1,,drop=FALSE] class(stand) <- "markov_sde" out$stand <- stand } out } standardise.markov_sde <- function(object) { structure(object$stand, class="markov_sde") } cr = two_states(death, newdata=data.frame(rx=levels(survival::colon$rx))) cr = two_states(death, newdata=data.frame(Obs=1)) plot.markov_sde <- function(x, y, stacked=TRUE, which=c("P","L"), xlab="Time", ylab=NULL, col=2:6, border=col, ggplot2=FALSE, lattice=FALSE, alpha=0.2, strata=NULL, ...) { stopifnot(inherits(x,"markov_sde")) which <- match.arg(which) if (!missing(y)) warning("y argument is ignored") ## ylab defaults if (is.null(ylab)) ylab <- if(which=='P') "Probability" else "Length of stay" if (ggplot2) rstpm2:::ggplot.markov_msm(x, which=which, stacked=stacked, xlab=xlab, ylab=ylab, alpha=alpha, ...) else if (lattice) rstpm2:::xyplot.markov_msm(x, which=which, stacked=stacked, xlab=xlab, ylab=ylab, col=col, border=border, strata=strata, ...) else { if (is.null(index) && nrow(x$newdata)>1) { warning("More than one set of covariates; defaults to weighted estimator") x <- x$stand # Warning: replacement index <- 1 } browser() if (is.null(index)) index <- 1 df <- merge(x$newdata[index,,drop=FALSE], as.data.frame(x)) states <- unique(df$state) if (stacked) { out <- graphics::plot(range(x$times, na.rm=TRUE),0:1, type="n", xlab=xlab, ylab=ylab, ...) lower <- 0 for (i in length(states):1) { # put the last state at the bottom df2 <- df[df$state==states[i],] if (length(lower)==1) lower <- rep(0,nrow(df2)) upper <- lower+df2[[which]] graphics::polygon(c(df2$time,rev(df2$time)), c(lower,rev(upper)), border=border[i], col=col[i]) lower <- upper } graphics::box() invisible(out) } else stop('Unstacked plot not implemented in base graphics; use ggplot2=TRUE or lattice=TRUE') } } plot(standardise(cr)) # ok (no legend) plot(standardise(cr), ggplot=TRUE) # ok (includes legend) plot(standardise(cr), lattice=TRUE) # ok (no legend) plot(cr, ggplot=TRUE) + facet_grid(~ rx) # ok plot(cr, index=1) # incorrect plot(cr, ggplot=TRUE, index=1) # incorrect plot(cr, lattice=TRUE, index=1) # incorrect plot(standardise(cr), ggplot=TRUE, stacked=FALSE) # incorrect: wrong CIs for standardised estimates plot(cr, ggplot=TRUE, stacked=FALSE) + facet_grid(~ rx) # incorrect as.data.frame.markov_sde <- function(x, row.names=NULL, ci=TRUE, P.conf.type="logit", L.conf.type="log", P.range=c(0,1), L.range=c(0,Inf), ...) { if (any(x$weights<0)) P.conf.type <- L.conf.type <- "plain" .id. <- 1:nrow(x$newdata) nStates <- nrow(x$trans) state.names <- rownames(x$trans) stateNames <- if (!is.null(rownames(x$trans))) rownames(x$trans) else 1:nrow(x$trans) out <- expand.grid(state=stateNames, .id.=.id., time=x$times) out <- cbind(x$newdata[out$.id.,],out) names(out)[1:ncol(x$newdata)] <- colnames(x$newdata) out$P <- as.vector(x$P) out$P.se <- as.vector(x$P.se) out <- out[order(out$.id.,out$state,out$time),] out$.id. <- NULL if (ci) { tmp <- rstpm2:::surv.confint(out$P,out$P.se, conf.type=P.conf.type, min.value=P.range[1], max.value=P.range[2]) out$P.lower <- tmp$lower out$P.upper <- tmp$upper } if (x$los) { out$L <- as.vector(x$L) out$L.se <- as.vector(x$L.se) if (ci) { tmp <- rstpm2:::surv.confint(out$L,out$L.se, conf.type=L.conf.type, min.value=L.range[1], max.value=L.range[2]) out$L.lower <- tmp$lower out$L.upper <- tmp$upper } } if(!is.null(row.names)) rownames(out) <- row.names out } temp = as.data.frame(cr) head(temp) ggplot(temp,aes(x=time,y=P,fill=rx,ymin=P.lower,ymax=P.upper)) + geom_line() + geom_ribbon(alpha=0.5) + facet_grid(state~rx) # ok rstpm2:::ggplot.markov_msm(temp) + facet_grid(~rx) # ok ## Competing risks ## Note: this shows how to adapt the markov_msm model for competing risks. competing_risks <- function(listOfModels, ...) { nRisks = length(listOfModels) transmat = matrix(NA,nRisks+1,nRisks+1) transmat[1,1+(1:nRisks)] = 1:nRisks rownames(transmat) <- colnames(transmat) <- c("Initial",names(listOfModels)) rstpm2::markov_msm(listOfModels, ..., trans = transmat) } ## Note: The first argument for competing_risks is a list of models. Names from that list are ## used for labelling the states. The other arguments are as per the markov_msm function, ## except for the transition matrix, which is defined by the competing_risks function. recurrence = gsm(Surv(time,status)~factor(rx), data=survival::colon, subset=(etype==1), df=3) death = gsm(Surv(time,status)~factor(rx), data=survival::colon, subset=(etype==2), df=3) cr = competing_risks(list(Recurrence=recurrence,Death=death), newdata=data.frame(rx=levels(survival::colon$rx)), t = seq(0,2500, length=301)) ## Plot the probabilities for each state for three different treatment arms plot(cr, ggplot=TRUE) + facet_grid(~ rx) ## And: differences in probabilities cr_diff = diff(subset(cr,rx=="Lev+5FU"),subset(cr,rx=="Obs")) plot(cr_diff, ggplot=TRUE, stacked=FALSE) ## Example using Crowther and Lambert (2018) library(readstata13) library(transform.hazards) library(timereg) library(rstpm2) # standardise mex.1 <- read.dta13("https://fmwww.bc.edu/repec/bocode/m/multistate_example.dta") transmat <- rbind("Post-surgery"=c(NA,1,2), "Relapsed"=c(NA,NA,3), "Died"=c(NA,NA,NA)) colnames(transmat) <- rownames(transmat) mex.2 <- transform(mex.1,osi=(osi=="deceased")+0) levels(mex.2$size)[2] <- ">20-50 mm" # fix typo mex <- mstate::msprep(time=c(NA,"rf","os"),status=c(NA,"rfi","osi"), data=mex.2,trans=transmat,id="pid", keep=c("age","size","nodes","pr_1","hormon")) mex <- transform(mex, size2=(unclass(size)==2)+0, # avoids issues with TRUE/FALSE size3=(unclass(size)==3)+0, hormon=(hormon=="yes")+0, Tstart=Tstart/12, Tstop=Tstop/12) ## Slow fitting... c.ar <- aalen(Surv(Tstart,Tstop,status) ~ const(age) + size2 + size3 + const(nodes) + pr_1 + const(hormon), data = subset(mex, trans==1)) c.ad <- aalen(Surv(Tstart, Tstop, status) ~ const(age) + const(size) + const(nodes) + const(pr_1) + const(hormon), data = subset(mex, trans==2)) c.rd <- aalen( Surv(Tstart,Tstop,status) ~ const(age) + const(size) + const(nodes) + pr_1 + const(hormon), data=subset(mex, trans==3)) ## nd <- expand.grid(nodes=seq(0,20,10), size=levels(mex$size)) nd <- transform(nd, age=54, pr_1=3, hormon=0, size2=(unclass(size)==2)+0, size3=(unclass(size)==3)+0) system.time(fit1 <- markov_sde(list(c.ar,c.ad,c.rd), trans=transmat, newdata=nd[c(1,2),], los=TRUE)) system.time(fit0 <- markov_sde(list(c.ar,c.ad,c.rd), trans=transmat, newdata=nd[c(1,2),])) plot(fit1) plot(fit1, ggplot=TRUE) plot(fit1, lattice=TRUE, stacked=FALSE, which="L") ## plot(fit1,which="L",xlim=NULL) # does this make sense? df1 <- as.data.frame(fit1) fit2 <- standardise(fit1) plot(fit2,ggplot=TRUE) df2 <- as.data.frame(fit2) diff1 <- diff(fit1) plot(diff1) ## type="af" library(rstpm2) fit = gsm(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3) plot(fit,type="af",newdata=brcancer, exposed=function(data) transform(data,hormon=1)) pred2 <- predict(fit,type="af",newdata=brcancer, grid=TRUE, exposed=function(data) transform(data,hormon=1), se.fit=TRUE,full=TRUE) with(pred2, matplot(rectime,cbind(Estimate,lower,upper),type="l",lty=c(1,2,2), col=1, xlab="Time since treatment (days)", ylab="PAF", ylim=c(0,0.5))) ## Bug report from Joshua # Loading rstpm2 package library(rstpm2) # Fit stmp2 model using breastcancer data set fpm_model <- stpm2(Surv(rectime, censrec) ~ hormon, data = brcancer, df = 3, tvc = list("hormon" = 3)) # Predict hazard difference comparing hormon users and non-users # using full=TRUE option for obtaining a full data set for ggplot() predict( fpm_model, type = "hdiff", newdata = data.frame(hormon = 0), var = "hormon", grid = TRUE, se.fit = TRUE, full = TRUE) ## test plots for PO models with random effects library(rstpm2) set.seed(12345) logit <- binomial()$linkfun expit <- binomial()$linkinv Spo <- function(eta) expit(eta) eta <- function(t) {} rPOgamma <- function(n,eta,theta,eps=1e-16) { ## U <- pmin(runif(n),1-eps) U <- runif(n) ## solve_t(U=expit(-eta(t))^theta), where eta(0)=-Inf and eta(Inf)=Inf V <- -logit(U^(1/theta)) if (any(is.na(V))) browser() c(list(U=U),vuniroot(function(t) eta(t)-V, lower=rep(1e-50,n), upper=rep(1e100,n))) } t0 <- rPOgamma(1e5, function(t) log(t), 1) with(t0,plot(U,root,log="y")) t1 <- rPOgamma(n=1e5, function(t) log(t), rgamma(10,1)) range(t0$root) range(t1$root) plot(density(log(t0$root))) ## Plots with three levels library(rstpm2) table(brcancer$x4) # cancer stage ## define indicators for the "exposed" levels (*R* needs these to be numeric) d <- transform(brcancer, x4.2=(x4==2)+0, x4.3=(x4==3)+0) ## fit the model with the indicators as main effects and with tvc fit <- stpm2(Surv(rectime,censrec==1)~x4.2+x4.3,data=d,df=3,tvc=list(x4.2=2,x4.3=2)) ## predict for each exposure level pred2 <- predict(fit,newdata=data.frame(x4.2=0,x4.3=0),type="hr",var="x4.2", grid=TRUE, full=TRUE, se.fit=TRUE) pred3 <- predict(fit,newdata=data.frame(x4.2=0,x4.3=0),type="hr",var="x4.3", grid=TRUE, full=TRUE, se.fit=TRUE) pred <- transform(rbind(pred2,pred3), x4=factor(ifelse(x4.2,2,3))) library(ggplot2) ggplot(pred, aes(x=rectime, y=Estimate, ymin=lower, ymax=upper, fill=x4, col=x4)) + geom_line() + coord_cartesian(ylim=c(0,10)) + geom_ribbon(alpha=0.3, col=NA) ## how to extract the design information library(rstpm2) fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3) names(fit@model.frame) attributes(fit@model.frame[[3]]) fit@lm$terms fit@x <- matrix() ls(environment(fit@model.frame)) ## withEnvs <- sapply(slotNames(fit),function(nm) !is.null(environment(slot(fit,nm)))) slotNames(fit)[withEnvs] lapply(slotNames(fit)[withEnvs],function(nm) ls(environment(slot(fit,nm)))) ## withEnvs <- sapply(fit@args,function(obj) !is.null(environment(obj))) names(fit@args)[withEnvs] lapply(fit@args[withEnvs],function(obj) ls(environment(obj))) ## bug in predict for meansurv library(rstpm2) library(ggplot2) fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3) ## Easy to use plot and lines functions plot(fit, newdata=transform(brcancer, hormon=0), type="meansurv") lines(fit, newdata=transform(brcancer, hormon=1), type="meansurv",lty=2) ## More tedious to do for different covariate patterns with ggplot2 pred0 <- predict(fit, newdata=transform(brcancer, hormon=0), type="meansurv", full=TRUE, se.fit=TRUE, grid=TRUE) pred1 <- predict(fit, newdata=transform(brcancer, hormon=1), type="meansurv", full=TRUE, se.fit=TRUE, grid=TRUE) ## bug with values returned AsIs - I'll try to fix this unAsIs <- function(object) if (inherits(object,"AsIs")) "class<-"(object, setdiff(class(object), "AsIs")) else object pred <- rbind(transform(unAsIs(pred1),hormon=1),transform(unAsIs(pred0),hormon=0)) pred <- transform(pred, Hormone=ifelse(hormon==1,"Yes","No")) ggplot(pred, aes(x=rectime,y=Estimate,ymin=lower,ymax=upper,fill=Hormone)) + xlab("Time since diagnosis (years)") + ylab("Standardised survival") + geom_ribbon(alpha=0.6) + geom_line() ## bug in predict for meansurv library(devtools) install_github("mclements/rstpm2", ref="develop") library(rstpm2) fit.tvc <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3, tvc=list(hormon=3)) out <- predict(fit.tvc, newdata=transform(brcancer,hormon=1),type="meansurv",grid=TRUE, se.fit=TRUE, full=TRUE) ## bug in plot(..., xlab="Something") library(rstpm2) fit <- stpm2(Surv(rectime, censrec)~hormon, data=brcancer,df=3) plot(fit, newdata=data.frame(hormon=1), type="hr", xlab="Time since diagnosis (years)", var="hormon", ylab="Hazard ratio", main="Lung cancer-BMI") ## predict linear predictor library(rstpm2) fit <- stpm2(Surv(rectime, censrec)~hormon, data=brcancer,df=3) predict(fit, newdata=data.frame(hormon=0:1, rectime=1000), type="link") predict(fit, newdata=data.frame(hormon=1, rectime=1000), type="link") - predict(fit, newdata=data.frame(hormon=0, rectime=1000), type="link") ## library(rstpm2) library(Hmisc) d <- local({ set.seed(12345) x <- rep(0:1,length=200) y <- rexp(length(x), exp(-3+x)) data.frame(x,y,e=TRUE) }) fit0 <- stpm2(Surv(y, e)~1, data=d,df=3) fit <- stpm2(Surv(y, e)~x, data=d,df=3) rcorr.cens(predict(fit0,type="link")-predict(fit0,newdata=transform(d,x=0),type="link"), with(d,Surv(y,e))) rcorr.cens(-predict(fit,type="link")+ predict(fit,newdata=transform(d,x=0),type="link"), with(d,Surv(y,e))) summary(fit) ## testing - Bug requires loading devtools *before* rstpm2 setwd("~/src/R/rstpm2") library(devtools) devtools::test() ## offset library(rstpm2) brcancer2 <- transform(brcancer,off=0.1) fit <- stpm2(Surv(rectime,censrec==1)~hormon, df = 2, data=brcancer2) fit2 <- stpm2(Surv(rectime,censrec==1)~hormon+offset(off), df = 2, data=brcancer2) fit fit2 zeroModel(fit) ## p-value for survival differences library(rstpm2) fit <- stpm2(Surv(rectime,censrec==1)~hormon, df = 2, data=brcancer,tvc=list(hormon=2)) test <- predict(fit, type="sdiff", var="hormon", newdata=data.frame(hormon=0,rectime=500),se.fit=TRUE) z <- test[1,1]/((test[1,3]-test[1,2])/2/qnorm(0.975)) 2*pnorm(-abs(z)) ## ## test the difference in survival rates test <- predictnl(fit, function(object,newdata=NULL) { lp1 <- predict(object, newdata=data.frame(hormon=1,rectime=500), type="surv") lp2 <- predict(object, newdata=data.frame(hormon=0,rectime=500), type="surv") lp1-lp2 }) with(test, c(fit=fit, se.fit=se.fit, statistic=fit/se.fit, p=2*pnorm(-abs(fit/se.fit)))) ## same p-value as predict(..., type="sdiff") ## s18 <- summary(survfit(Surv(rectime,censrec==1)~hormon,data=brcancer),time=500) z2<-diff(s18$surv)/sqrt(sum(s18$std.err^2)) 2*pnorm(-abs(z2)) ## library(bpcp) with(brcancer, fixtdiff(rectime,censrec,hormon,500,doall = TRUE)) ## bug #3 on GitHub library(rstpm2) # Function with model_formula paramter **ERRORS** f_bad <- function(model_formula) stpm2(formula = model_formula, df = 2, data=brcancer, link.type = "PH") # Function with formula paramter to match the name pf stpm2's formula parameter **PASSES** f_good <- function(formula) stpm2(formula = formula, df = 2, data=brcancer, link.type = "PH") f_bad (model_formula = Surv(rectime,censrec==1)~hormon) f_good (formula = Surv(rectime,censrec==1)~hormon) ## random draws library(rstpm2) predict.cumhaz <- function(object, newdata=NULL, ...) { stopifnot(inherits(object,"stpm2") || inherits(object,"pstpm2")) args <- object@args beta <- coef(object) if (is.null(newdata)) X <- args$X else if (inherits(object, "stpm2")) { X <- object@args$transX(lpmatrix.lm(object@lm, newdata), newdata) } else if (inherits(object, "pstpm2")) { X <- object@args$transX(predict(object@gam, newdata, type="lpmatrix"), newdata) } link <- object@link # cf. link for transformation of the predictions eta <- as.vector(X %*% beta) link$H(eta) } simulate <- function(object, nsim=nrow(as.data.frame(newdata)), newdata=as.data.frame(object@data), lower=1e-6, upper=1e5, ...) { e <- rexp(nsim) objective <- function(time) { newdata[[object@timeVar]] <- time predict.cumhaz(object, type="cumhaz", newdata=newdata) - e } vuniroot(objective, lower=rep(lower,length=nsim), upper=rep(upper,length=nsim))$root } fit1 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer) set.seed(12345) d <- do.call(rbind,lapply(1:100,function(i) brcancer)) system.time(r <- simulate(fit1, newdata=d)) length(r) library(rstpm2) fit1 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer) negll2 <- function(beta,svalues,missingIndicator,k=2) sapply(svalues, function(s) fit1@args$logli2(coef(fit1),ifelse(missingIndicator, s*coef(fit1)[k], 0))) head(negll2(coef(fit1),c(0,1),(1:686) %% 2)) ## values for the tests library(rstpm2) brcancer2 <- transform(rstpm2::brcancer, w=1) brcancer2$w[1] <- NA fit1 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w) nd <- data.frame(hormon=1,rectime=1000) predict(fit1, newdata=nd, type="surv") predict(fit1, newdata=nd, type="fail") predict(fit1, newdata=nd, type="haz") predict(fit1, newdata=nd, type="hr",var="hormon") predict(fit1, newdata=nd, type="hdiff", var="hormon") ## predictions for relative survival (email from Anke Richters) set.seed(12345) d <- with(list(t0=rexp(10000), # constant hazard t1=rweibull(10000, 1.5), # rising hazard bg=rexp(2*10000)), # constant background hazard (rate=1) data.frame(t=pmin(bg,c(t0,t1)), x=rep(0:1,each=10000), e=TRUE, bhazard=1)) library(rstpm2) uniroot(function(x) pexp(x)-pweibull(x,shape=1.5), c(1e-6,10)) # survival overlaps at 1 uniroot(function(x) 1-dweibull(x,shape=1.5)/pweibull(x,shape=1.5,lower=FALSE), c(1e-6,10)) # hazards overlap at 0.444 fit <- stpm2(Surv(t,e)~x,data=d,tvc=list(x=3),bhazard=d$bhazard) ## fit <- pstpm2(Surv(t,e)~1,data=d,smooth.formula=~s(log(t))+s(log(t),by=x),bhazard=d$bhazard) ts <- seq(0,4,length=301)[-1] ## hazard plot - ok plot(fit,newdata=data.frame(x=0),type="hazard",ylim=c(0,4)) lines(fit,newdata=data.frame(x=1),type="hazard",lty=2) abline(h=1,col="blue") # theorical lines(ts,dweibull(ts,shape=1.5)/pweibull(ts,shape=1.5,lower.tail=FALSE),lty=2,col="blue") # theorical ## survival plot - ok plot(fit,newdata=data.frame(x=0),type="surv") lines(fit,newdata=data.frame(x=1),type="surv",lty=2) lines(ts, pexp(ts,lower.tail=FALSE), col="blue") # theoretical lines(ts, pweibull(ts,shape=1.5,lower.tail=FALSE), lty=2, col="blue") # theoretical ## hr - now fixed plot(fit,newdata=data.frame(x=0),type="hr",var="x") lines(ts, dweibull(ts,shape=1.5)/pweibull(ts,shape=1.5,lower.tail=FALSE), lty=2, col="blue") # theoretical abline(h=1,lty=2) ## plot(fit,newdata=data.frame(x=0),type="hr",exposed=function(data) transform(data,x=1)) # same mistake plot(fit,newdata=data.frame(x=0),type="sdiff",var="x",ylim=c(-1,1)) S0 <- predict(fit,newdata=data.frame(x=0),grid=TRUE,full=TRUE) S1 <- predict(fit,newdata=data.frame(x=1),grid=TRUE,full=TRUE) lines(S0$t,S1$Estimate-S0$Estimate,col="blue") abline(h=0) abline(v=1) library(rstpm2) fit1 <- coxph(Surv(rectime,censrec==1)~ns(x1,df=2),data=brcancer) ## Missing values in predictions library(rstpm2) brcancer2 <- brcancer brcancer$rectime[1] <- NA fit1 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2) pred1 <- predict(fit1,newdata=brcancer2) head(pred1) fit1 summary(fit1) brcancer2 <- brcancer brcancer$hormon[1] <- NA fit1 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2) pred1 <- predict(fit1,newdata=brcancer2) head(pred1) fit1 summary(fit1) ## library(rstpm2) brcancer2 <- brcancer brcancer$rectime[1] <- NA fit1 <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2) pred1 <- predict(fit1,newdata=brcancer2) head(pred1) fit1 summary(fit1) brcancer2 <- brcancer brcancer$hormon[1] <- NA fit1 <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2) pred1 <- predict(fit1,newdata=brcancer2) head(pred1) fit1 summary(fit1) ## Examples using predictnl for Alessandro library(rstpm2) brcancer2 <- transform(brcancer, x4.23=x4 %in% 2:3) fit1 <- stpm2(Surv(rectime,censrec==1)~hormon*x4.23,data=brcancer2,df=3) summary(fit1) newd <- data.frame(hormon=0,x4.23=FALSE) plot(fit1, newdata=newd) RERI <- function(object, newdata, var1, val1=1, var2, val2=1) { exp1 <- function(data) {data[[var1]] <- val1; data} exp2 <- function(data) {data[[var2]] <- val2; data} s00 <- predict(object, newdata, type="surv") s10 <- predict(object, newdata=exp1(newdata), type="surv") s01 <- predict(object, newdata=exp2(newdata), type="surv") s11 <- predict(object, newdata=exp1(exp2(newdata)), type="surv") -(s11-s10-s01+s00)/(1-s00) } times <- seq(0,2500,length=301)[-1] reri <- RERI(fit1,newdata=transform(newd,rectime=times),var1="hormon",var2="x4.23",val2=TRUE) plot(times,reri,type="l") reri2 <- predictnl(fit1,fun=RERI,newdata=transform(newd,rectime=times),var1="hormon",var2="x4.23",val2=TRUE) with(reri2, matplot(times,fit+cbind(0,-1.96*se.fit,+1.96*se.fit),type="l",lty=c(1,2,2),col=1, xlab="Time since diagnosis", ylab="RERI")) abline(h=0,lty=3) RERI.hr <- function(object, newdata, var1, val1=1, var2, val2=1) { exp1 <- function(data) {data[[var1]] <- data[[var1]]+val1; data} exp2 <- function(data) {data[[var2]] <- data[[var2]]+val2; data} h00 <- predict(object, newdata, type="haz") h10 <- predict(object, newdata=exp1(newdata), type="haz") h01 <- predict(object, newdata=exp2(newdata), type="haz") h11 <- predict(object, newdata=exp1(exp2(newdata)), type="haz") (h11-h10-h01+h00)/h00 } RERI.hr(fit1,newdata=transform(newd,rectime=1000),var1="hormon",var2="x4.23",val2=TRUE) predictnl(fit1,fun=RERI.hr,newdata=transform(newd,rectime=1000),var1="hormon",var2="x4.23",val2=TRUE) ## testing of relative survival library(rstpm2) ayear <- 365.24 brcancer2 <- transform(brcancer, age=80*ayear, sex="male", year=as.Date("1980-01-01"), time=1, recyear=rectime/ayear) rate0 <- survexp(time~1,data=brcancer2,method="individual.h",scale=ayear) (fit1 <- stpm2(Surv(recyear,censrec==1)~hormon,data=brcancer2,df=2,cure=T,bhazard=rate0)) head(predict(fit1,type.relsurv="excess")) head(predict(fit1,type.relsurv="total")) head(brcancer2) ayear <- 365.24 timeVar <- substitute(times) scale <- ayear rmap <- substitute(list()) newdata <- data.frame(sex=c("male",rep("male",5)),age=ayear*60,year=2002,times=c(1,1:5)) survexp1 <- do.call(survexp, list(substitute(I(timeVar*scale)~1,list(timeVar=timeVar)), ratetable=survexp.us, scale=scale, rmap=rmap, cohort=FALSE, data=newdata)) plot(fit1, newdata=data.frame(hormon=1,age=80,sex="male",year=1980)) ## lines(fit1, newdata=data.frame(hormon=1,age=80,sex="male",year=1980)) ## Bug report from Alessandro for 1.4.0 library(rstpm2) data(kidney) fitg = stpm2(Surv(time, status) ~ age + sex, cluster = kidney$id, data = kidney, RandDist = "Gamma") head(predict(fitg)) fitln = stpm2(Surv(time, status) ~ age + sex, cluster = kidney$id, data = kidney, RandDist = "LogN") head(predict(fitln)) fitln = stpm2(Surv(time, status) ~ age + sex, cluster = kidney$id, data = kidney, Z=~age-1, RandDist = "LogN") head(predict(fitln)) ## test meanhr library(rstpm2) fit <- stpm2(Surv(rectime, censrec==1) ~ x4+x5, data = brcancer, df=3) fit <- stpm2(Surv(rectime, censrec==1) ~ x4+x5, data = brcancer, df=3) summary(fit) eform(fit) plot(fit, newdata=data.frame(hormon=0,x4=0,x5=0)) plot(fit, newdata=data.frame(hormon=0,x4=0,x5=0),type="hazard") plot(fit, newdata=data.frame(hormon=0,x4=0,x5=0), type="hr", exposed=function(data) transform(data, x4=1)) plot(fit, newdata=transform(brcancer,x4=1), type="meanhr", exposed=function(data) transform(data, x4=2)) plot(fit, newdata=transform(brcancer,x4=1), type="meanhaz") ## test rmst library(rstpm2) fit <- stpm2(Surv(rectime, censrec==1) ~ hormon, data = brcancer, df=3) plot(fit, newdata=data.frame(hormon=1)) predict(fit, newdata=data.frame(hormon=1,rectime=1000), type="rmst", se.fit=TRUE) predict(fit, newdata=data.frame(hormon=0,rectime=1000), type="rmst", se.fit=TRUE) library(devtools) install.packages("bbmle") devtools::install_github("mclements/rstpm2",ref="develop") ## 2017-06-21 ## Verify: the choice of basis dimension (default: k=10) for penalized regression splines is not sensitive to estimates ## Adjusted by a constant coefficient (e.g. alpha=2) to correct potential overfitting by GCV for lambda ## alpha = 1.4 suggested by Kim and Gu (2004) library(rstpm2) ## k = 7 pfit7 <- pstpm2(Surv(rectime, censrec==1) ~ hormon, data = brcancer, smooth.formula = ~ s(log(rectime), k=7), alpha=2) plot(pfit7, newdata = data.frame(hormon=0), type="hazard") ## k = 27 pfit27 <- pstpm2(Surv(rectime, censrec==1) ~ hormon, data = brcancer, smooth.formula = ~ s(log(rectime), k=27), alpha=2) plot(pfit27, newdata = data.frame(hormon=0), type="hazard") ## Estimated effective degree of freedom (EDF) pfit7@edf ## 5.36 pfit27@edf ## 5.96 require(coxme) ## Fix error in code for gradli library(rstpm2) data(brcancer) fit <- stpm2(Surv(rectime,censrec) ~ hormon,data=transform(brcancer,censrec=1)) fit <- stpm2(Surv(rectime,censrec==1) ~ hormon,data=brcancer,cure=TRUE) fit <- stpm2(Surv(rectime,censrec==1) ~ hormon,data=brcancer) plot(fit,newdata=data.frame(hormon=1),type="uncured",exposed=function(data) transform(data,rectime=2500)) X <- fit@args$X XD <- fit@args$XD args <- fit@args beta.est <- coef(fit) eta <- as.vector(X %*% beta.est) etaD <- as.vector(XD %*% beta.est) link <- switch(fit@args$link,PH=rstpm2:::link.PH,PO=rstpm2:::link.PO) h <- link$h(eta,etaD) # - as.vector(predict(fit, type="haz")) ## Ok! H <- link$H(eta) #- as.vector(predict(fit, type="cumhaz")) ## Ok! gradh <- as.matrix(link$gradh(eta,etaD, args)) gradH <- as.matrix(link$gradH(eta, args)) gradli <- residuals(fit, type="gradli") ## n*npar dim(gradli) gradli2 <- gradH - ifelse(fit@args$event,1/h,0)*gradh head(gradli + gradli2) ## Gamma frailty require(rstpm2) brcancer2 <- transform(brcancer, id=rep(1:(nrow(brcancer)/2),each=2)) brcancer2$hormon[1] <- NA fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2, cluster=brcancer2$id) summary(fit) plot(fit,newdata=data.frame(hormon=1),type="margsurv") fit2 <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2, cluster=brcancer2$id) summary(fit2) plot(fit2,newdata=data.frame(hormon=1),type="margsurv") # Aranda-Ordaz link refresh require(rstpm2) ## PH summary(fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="PH", df=3)) summary(fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="AO", df=3)) # Same: OK summary(fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="PO", df=3)) summary(fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="AO", theta.AO=1, df=3)) # Same: OK summary(fit <- pstpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="AO", theta.AO=0.5)) refresh require(rstpm2) ## PH summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,link="PH", df=3)) predict(fit, newdata=transform(brcancer,rectime=1000),type="meansurv",keep.attributes=FALSE,se.fit=TRUE,use.gr=F) predict(fit, newdata=transform(brcancer,rectime=1000),type="meansurv",keep.attributes=FALSE,se.fit=TRUE,use.gr=T) plot(fit,newdata=transform(brcancer,hormon=1),type="meansurv",times=seq(10,1500,by=10)) plot(fit,newdata=transform(brcancer,hormon=2),type="meansurv",times=seq(10,1500,by=10),lty=2,add=TRUE) newd <- merge(transform(brcancer,rectime=NULL), data.frame(rectime=c(500,1000))) unlist(predict(fit,newdata=newd,type="af",exposed=function(data) transform(data,hormon=1),keep.attributes=FALSE,se.fit=TRUE) - predict(fit,newdata=newd,type="af",exposed=function(data) transform(data,hormon=1),keep.attributes=FALSE,se.fit=TRUE,use.gr=FALSE)) system.time(plot(fit,type="af",exposed=function(data) transform(data,hormon=1),recent=TRUE)) system.time(plot(fit,type="af",exposed=function(data) transform(data,hormon=1),recent=FALSE)) plot(fit,newdata=NULL,type="meansurv",ci=F) plot(fit,newdata=NULL,type="meansurvdiff",exposed=function(data) transform(data,hormon=1)) plot(fit,newdata=data.frame(hormon=1),type="surv",ci=F) plot(fit,newdata=data.frame(hormon=1),type="fail",ci=T) unlist(predict(fit,newdata=newd,type="meansurvdiff",exposed=function(data) transform(data,hormon=1),keep.attributes=FALSE,se.fit=TRUE) - predict(fit,newdata=newd,type="meansurvdiff",exposed=function(data) transform(data,hormon=1),keep.attributes=FALSE,se.fit=TRUE,use.gr=FALSE)) unlist(predict(fit,newdata=newd,type="meansurv",keep.attributes=FALSE,se.fit=TRUE)- predict(fit,newdata=newd,type="meansurv",keep.attributes=FALSE,se.fit=TRUE,use.gr=FALSE)) ## comparison with AF ## Example 1: clustered data with frailty U require(AF) set.seed(12345) expit <- function(x) 1 / (1 + exp( - x)) n <- 100 m <- 2 alpha <- 1.5 eta <- 1 phi <- 0.5 beta <- 1 id <- rep(1:n,each=m) U <- rep(rgamma(n, shape = 1 / phi, scale = phi), each = m) Z <- rnorm(n * m) X <- rbinom(n * m, size = 1, prob = expit(Z)) ## Reparametrize scale as in rweibull function weibull.scale <- alpha / (U * exp(beta * X)) ^ (1 / eta) t <- rweibull(n * m, shape = eta, scale = weibull.scale) ## Right censoring cen <- runif(n * m, 0, 10) delta <- as.numeric(t < cen) t <- pmin(t, cen) d <- data.frame(t, delta, X, Z, id) require(rstpm2) fit2 <- stpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, df=1, cluster=d$id, smooth.formula=~log(t)) predict(fit2, type="af", newdata=transform(d,t=1),exposed=function(data) transform(data, X=0), se.fit=TRUE) plot(fit2, type="af", exposed=function(data) transform(data, X=0)) plot(fit2, type="meansurvdiff", exposed=function(data) transform(data, X=0)) plot(fit2, type="meansurv") fit3 <- pstpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, df=1, cluster=d$id) predict(fit3, type="af", newdata=transform(d,t=1),exposed=function(data) transform(data, X=0), se.fit=TRUE) plot(fit3, type="meansurv") predict(fit2, newdata=transform(d,t=1), type="meansurv") ## check analytical gradients for margsurv and marghaz predict(fit2, newdata=data.frame(t=1,X=1,Z=1), type="margsurv", use.gr=TRUE, se.fit=TRUE)-predict(fit2, newdata=data.frame(t=1,X=1,Z=1), type="margsurv", use.gr=FALSE, se.fit=TRUE) predict(fit2, newdata=data.frame(t=1,X=1,Z=1), type="marghaz", use.gr=TRUE, se.fit=TRUE)-predict(fit2, newdata=data.frame(t=1,X=1,Z=1), type="marghaz", use.gr=FALSE, se.fit=TRUE) predict(fit2, newdata=data.frame(t=1,X=1,Z=1), type="hazard", use.gr=TRUE, se.fit=TRUE)-predict(fit2, newdata=data.frame(t=1,X=1,Z=1), type="hazard", use.gr=FALSE, se.fit=TRUE) require(boot) meansurv <- function(data,index) predict(fit2, newdata=transform(data[index,,drop=FALSE],t=1), type="meansurv") meansurv(d,TRUE) boot1 <- boot(d, meansurv, R=1000) boot.ci(boot1) require(rstpm2) fit <- stpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, df=1) diag(vcov(fit)) fit <- stpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, frailty=FALSE, cluster=d$id, df=1) diag(vcov(fit)) fit <- stpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, cluster=d$id, df=1) diag(vcov(fit)) ## fit <- stpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, cluster = d$id, df=1) predict(fit,type="af",newdata=transform(d,t=1),exposed=function(data) transform(data,X=0),keep.attributes=FALSE,se.fit=TRUE) fit <- stpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, cluster = d$id, df=4) predict(fit,type="af",newdata=transform(d,t=1),exposed=function(data) transform(data,X=0),keep.attributes=FALSE,se.fit=TRUE) fit <- stpm2(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, cluster=d$id, df=1) predict(fit,type="af",newdata=transform(d,t=1),exposed=function(data) transform(data,X=0),keep.attributes=FALSE,se.fit=TRUE) ## Fit a frailty object library(stdReg) fit <- stdReg::parfrailty(formula = Surv(t, delta) ~ X + Z + X * Z, data = d, clusterid = "id") summary(fit) ## Estimate the attributable fraction from the fitted frailty model time <- c(seq(from = 0.2, to = 1, by = 0.2)) time <- 1 ## debug(AFfrailty) library(AF) AFfrailty_est <- AFparfrailty(object = fit, data = d, exposure = "X", times = time, clusterid = "id") AFfrailty_est ##AF:::summary.AF(AFfrailty_est) ## tvc for Maarten Coemans require(rstpm2) brcancer <- transform(brcancer, x1c=x1-mean(x1)) summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~ hormon+x1c, data=brcancer, df=3, tvc=list(hormon=2,x1c=2))) plot(fit.tvc,newdata=data.frame(hormon=0,x1c=-10),type="hr", var="hormon") plot(fit.tvc,newdata=data.frame(hormon=0,x1c=+10),type="hr", var="hormon", add=TRUE,ci=FALSE,line.col=2) ## same model summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~ hormon+x1c, data=brcancer, smooth.formula = ~ns(log(rectime),df=3)+hormon:ns(log(rectime),df=2)+x1c:ns(log(rectime),df=2))) ## and again... summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~ x1c, data=brcancer, smooth.formula = ~ns(log(rectime),df=3)+hormon:ns(log(rectime),df=3,intercept=TRUE)+x1c:ns(log(rectime),df=2))) ## new model with time different time transformation for the TVCs summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~ hormon+x1c, data=brcancer, smooth.formula = ~ns(log(rectime),df=3)+hormon:ns(rectime,df=2)+x1c:ns(rectime,df=2))) plot(fit.tvc,newdata=data.frame(hormon=0,x1c=-10),type="hr", var="hormon") plot(fit.tvc,newdata=data.frame(hormon=0,x1c=+10),type="hr", var="hormon", add=TRUE,ci=FALSE,line.col=2) ## ## not including the main effect and no intercept is not the same summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~ x1c, data=brcancer, smooth.formula = ~ns(log(rectime),df=3)+hormon:ns(log(rectime),df=2)+x1c:ns(log(rectime),df=2))) ## Standardised survival require(rstpm2) plot.meansurv <- function(x, y=NULL, times=NULL, newdata=NULL, add=FALSE, ci=!add, rug=!add, recent=FALSE, xlab=NULL, ylab="Mean survival", lty=1, line.col=1, ci.col="grey", ...) { if (is.null(times)) stop("plot.meansurv: times argument should be specified") if (is.null(newdata)) newdata <- x@data times <- times[times !=0] if (recent) { newdata <- do.call("rbind", lapply(times, function(time) { newdata[[x@timeVar]] <- newdata[[x@timeVar]]*0+time newdata })) pred <- predict(x, newdata=newdata, type="meansurv", se.fit=ci) # requires recent version pred <- if (ci) rbind(c(Estimate=1,lower=1,upper=1),pred) else c(1,pred) } else { pred <- lapply(times, function(time) { newdata[[x@timeVar]] <- newdata[[x@timeVar]]*0+time predict(x, newdata=newdata, type="meansurv", se.fit=ci) }) pred <- if (ci) rbind(c(Estimate=1,lower=1,upper=1),do.call("rbind", pred)) else c(1,unlist(pred)) } times <- c(0,times) if (is.null(xlab)) xlab <- deparse(x@timeExpr) if (!add) matplot(times, pred, type="n", xlab=xlab, ylab=ylab, ...) if (ci) { polygon(c(times,rev(times)),c(pred$lower,rev(pred$upper)),col=ci.col,border=ci.col) lines(times,pred$Estimate,col=line.col,lty=lty) } else { lines(times,pred,col=line.col,lty=lty) } if (rug) { Y <- x@y eventTimes <- Y[Y[,ncol(Y)]==1,ncol(Y)-1] rug(eventTimes,col=line.col) } return(invisible(y)) } brcancer <- transform(brcancer, x1c=x1-mean(x1)) summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~ hormon+x1c, data=brcancer, df=3, tvc=list(hormon=2,x1c=2))) times <- seq(0,3000,by=100) plot.meansurv(fit.tvc, newdata=transform(brcancer, hormon=1), times=times, ylim=c(0.2,1)) plot.meansurv(fit.tvc, times=times, newdata=transform(brcancer, hormon=0), line.col=2, add=TRUE) ## Examples using ns() for covariates - this was buggy. refresh require(rstpm2) summary(fit <- stpm2(Surv(rectime,censrec==1)~1, smooth.formula=~ns(log(rectime),df=3)+ns(x1,df=3), data=brcancer,link="PH")) summary(fit <- stpm2(Surv(rectime,censrec==1)~ns(x1,df=3), df=3,data=brcancer,link="PH")) summary(fit <- pstpm2(Surv(rectime,censrec==1)~ns(x1,df=3), data=brcancer,link="PH")) grad <- function(f,x,eps=1e-5) sapply(1:length(x), function(i) { lower <- upper <- x upper[i] <- x[i]+eps lower[i] <- x[i]-eps (f(upper)-f(lower))/2/eps }) link <- function(S,theta=0.5) log((S^(-theta)-1)/theta) S <- ilink <- function(eta,theta=0.5) exp(-log(theta*exp(eta)+1)/theta) H <- function(eta,theta=0.5) -log(S(eta,theta)) h <- function(eta,etaD,theta=0.5) exp(eta)*etaD/(theta*exp(eta)+1) gradH <- function(eta,X,theta=0.5) exp(eta)*X/(1+theta*exp(eta)) gradh <- function(eta,etaD,X,XD,theta=0.5) { eta <- as.vector(eta) etaD <- as.vector(etaD) ((theta*exp(2*eta)+exp(eta))*XD+exp(eta)*etaD*X) / (theta*exp(eta)+1)^2 } X <- cbind(1,1:2,1) # (constant, t, x) XD <- cbind(0,1:2,0) beta <- c(0.1, 0.2, 0.3) eta <- as.vector(X %*% beta) etaD <- as.vector(XD %*% beta) S(eta) H(eta) h(eta,etaD) - grad(function(t) H(cbind(1,t,1) %*% beta), 1) # OK gradH(eta,X) - grad(function(beta) H(X %*% beta), beta) # OK gradh(eta,etaD,X,XD) grad(function(beta) h(X %*% beta, XD %*% beta), beta) ilink(link(.1)) link(ilink(.1)) require(abind) X <- matrix(seq(0,1,length=5*10),nrow=10) beta <- seq(0,1,length=5) H <- exp(as.vector(X %*% beta)) dHdbeta <- X * H # row=indiv, col=beta d2Hdbeta2 <- aperm(abind(lapply(1:ncol(X), function(k) X[,k] * X * H),along=3),c(2,3,1)) abind(lapply(1:nrow(X), function(i) (X[i,] %*% t(X[i,])) * H[i]),along=3) - aperm(abind(lapply(1:ncol(X), function(k) X[,k] * X * H),along=3),c(2,3,1)) numder <- function(f,x,eps=1e-8) (f(x+eps)-f(x-eps))/2/eps expit <- function(x) 1/(1+exp(-x)) numder(expit,2) expit(2)*expit(-2) numder(dnorm,2) -dnorm(2)*2 require(mgcv) d <- data.frame(x = seq(0,1,length=100), x2=rnorm(100), y = rnorm(100)) fit <- gam(y~s(x)+s(x2,by=x), data=d) X <- predict(fit,d,type="lpmatrix") X0 <- predict(fit,transform(d,x=0),type="lpmatrix") Xstar <- X-X0 index0 <- rstpm2:::which.dim(Xstar) lapply(fit$smooth, function(s) { which((1:ncol(X) %in% index0)[s$first.para:s$last.para]) # index for S' }) lapply(fit$smooth, function(s) { range(which((1:ncol(X) %in% s$first.para:s$last.para)[index0])) }) lapply(fit$smooth,"[[","S") ## outline: given a full index=1:n, a reduced index set index0 and a smoother with first.para, last.para and a square matrix S, return a revised first.para', last.para' and matrix S' ## For S': refresh require(rstpm2) ## additive fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="AH", smooth.formula=~ns(rectime,df=4)+hormon:ns(rectime,df=3), optimiser="NelderMead") summary(fit) fit2 <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="AH", smooth.formula=~ns(rectime,df=4)+hormon:ns(rectime,df=3)) summary(fit2) plot(fit2,newdata=data.frame(hormon=0),type="haz") plot(fit2,newdata=data.frame(hormon=1),add=TRUE,lty=2,type="haz") fit <- pstpm2(Surv(rectime,censrec==1)~1,data=brcancer,link="AH", smooth.formula=~s(rectime)+s(rectime,by=hormon)) plot(fit,newdata=data.frame(hormon=0),type="haz") plot(fit,newdata=data.frame(hormon=1),add=TRUE,lty=2,type="haz") ## test robust estimators from penalized models ## most spline coefficients become statistically significant require(rstpm2) summary(pstpm2(Surv(rectime/365,censrec==1)~hormon,data=brcancer,robust=FALSE)) summary(pstpm2(Surv(rectime/365,censrec==1)~hormon,data=brcancer,robust=TRUE)) ## robust standard errors for clustered data refresh require(rstpm2) brcancer2 <- transform(brcancer, id=rep(1:(nrow(brcancer)/2),each=2)) fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer) summary(fit) fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer, cluster=brcancer2$id, robust=TRUE) summary(fit) ## require(rstpm2) brcancer2 <- transform(brcancer, id=rep(1:(nrow(brcancer)/2),each=2)) fit <- stpm2(Surv(rectime,censrec==1)~1,data=brcancer, cluster=brcancer2$id) summary(fit) predict(fit,type="gradli") ## Stata estimated coef for hormon ## PH: -.3614357 ## PO: -.474102 ## Probit: -.2823338 system.time(print( stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, stata=TRUE))) system.time(print(pfit <- pstpm2(Surv(rectime,censrec==1)~hormon,smooth.formula=~s(log(rectime))+s(x1),data=brcancer))) ## system.time(print( stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,type="PO"))) system.time(print(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,type="PO"))) ## system.time(print( stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,type="probit"))) system.time(print(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,type="probit"))) # slow summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,smooth.formula=~nsx(log(rectime), df=4, stata.stpm2.compatible = TRUE))) if (FALSE) { debug(pstpm2) pfit <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,sp=1) ## towards the end of the pstpm2 function... sum(diag(solve(optimHess(coef(mle2),negllsp,sp=1)) %*% optimHess(coef(mle2),negll0sp,sp=1))) sum(diag(solve(optimHess(coef(mle2),negllsp,sp=fit$sp)) %*% optimHess(coef(mle2),negll0sp,sp=fit$sp))) negllsp(coef(mle2),sp=1) negll0sp(coef(mle2),sp=1) } update.list <- function(list,...) { args <- list(...) for (name in names(args)) list[[name]] <- args[[name]] list } ## right censored ## Stata estimated coef for hormon (PH): -.3614357 refresh require(rstpm2) summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE),trace=0)) summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE),trace=0,optimiser="NelderMead")) summary(fit2 <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer)) summary(fit2 <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,optimiser="NelderMead")) ## delayed entry ## Stata estimated coef for hormon (PH): -1.162504 library(rstpm2) brcancer2 <- transform(brcancer,startTime=ifelse(hormon==0,rectime/2,0)) ## brcancer2 <- transform(brcancer,startTime=0.1) ##debug(rstpm2:::meat.stpm2) summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) summary(fit2 <- pstpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2,control=list(optimiser="NelderMead"))) # OK summary(fit2 <- pstpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2)) plot(fit,newdata=data.frame(hormon=1)) lines(fit2,newdata=data.frame(hormon=1),lty=2) head(predict(fit)) # OK head(predict(fit,se.fit=TRUE)) ## delayed entry and tvc (problems?) summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2, smooth.formula=~nsx(rectime,df=3)+hormon:nsx(rectime,df=3,stata=TRUE))) head(predict(fit,se.fit=TRUE)) pstpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2) ## left truncated with clusters library(rstpm2) brcancer2 <- transform(brcancer, startTime=ifelse(hormon==0,rectime/2,0), id=rep(1:(nrow(brcancer)/2),each=2)) ##debug(stpm2) summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2, cluster=brcancer2$id, control=list(optimiser="NelderMead"),recurrent=TRUE, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) summary(fit0 <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2, control=list(optimiser="NelderMead"), smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) require(foreign) require(rstpm2) stmixed <- read.dta("http://fmwww.bc.edu/repec/bocode/s/stmixed_example2.dta") stmixed2 <- transform(stmixed, start = ifelse(treat,stime/2,0)) summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat-1,adaptive=TRUE,optimiser="NelderMead")) summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat-1,adaptive=TRUE)) ## library(foreign) library(rstpm2) stmixed <- read.dta("http://fmwww.bc.edu/repec/bocode/s/stmixed_example2.dta") summary(r <- stpm2(Surv(stime,event)~treat,data=stmixed,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat-1)) ## non-adaptive system.time(print(summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat,adaptive=FALSE,nodes=20,optimiser="NelderMead")))) # slow and gradients not close to zero system.time(print(summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat,nodes=20,adaptive=FALSE)))) # gradients close to zero ## random intercept and random slope with 20 nodes summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat,nodes=20,adaptive=FALSE)) summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat,adaptive=FALSE,nodes=20)) # gradients close to zero ## Simple examples with no random effects and with a random intercept (check: deviances) summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed,df=3)) summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat-1)) ## check modes and sqrttau args <- r2@args args$return_type <- "modes" .Call("model_output", args, package="rstpm2") args$return_type <- "variances" # fudge .Call("model_output", args, package="rstpm2") ## check gradients args <- r2@args args$return_type <- "gradient" .Call("model_output", args, package="rstpm2") fdgrad <- function(obj,eps=1e-6) { args <- obj@args args$return_type <- "objective" sapply(1:length(args$init), function(i) { largs <- args largs$init[i] <- args$init[i]+eps f1 <- .Call("model_output", largs, package="rstpm2") largs$init[i] <- args$init[i]-eps f2 <- .Call("model_output", largs, package="rstpm2") data.frame(f1,f2,gradient=(f1-f2)/2.0/eps) }) } fdgrad(r2,1e-3) ## random intercept and random slope summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN",df=3,Z=~treat)) summary(stpm2(Surv(start,stime,event)~treat,data=stmixed2)) summary(r2 <- stpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN")) ## summary(r2 <- pstpm2(Surv(stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN")) ## summary(r2 <- stpm2(Surv(stime,event)~treat+factor(trial),data=stmixed2,cluster=stmixed$trial,RandDist="LogN",Z=~treat-1)) ## summary(r2 <- pstpm2(Surv(stime,event)~treat+factor(trial),data=stmixed2,cluster=stmixed$trial,RandDist="LogN",Z=~treat-1)) ## gradients for RE parameters require(expm) link.PH <- list(link=function(S) log(-log(S)), ilink=function(eta) exp(-exp(eta)), h=function(eta,etaD) etaD*exp(eta), H=function(eta) exp(eta), gradh=function(eta,etaD,obj) obj$XD*exp(eta)+obj$X*etaD*exp(eta), gradH=function(eta,obj) obj$X*exp(eta)) link.AH <- list(link=function(S) -log(S), ilink=function(eta) exp(-eta), h=function(eta,etaD) etaD, H=function(eta) eta, gradh=function(eta,etaD,obj) obj$XD, gradH=function(eta,obj) obj$X) corrtrans <- function(x) (1-exp(-x)) / (1+exp(-x)) l <- function(gamma=c(log(.3),log(.4),0.5)) { ## initially assume an additive model H <- function(eta) eta h <- function(eta,etaD) etaD delta <- 1 eta <- 1 etaD <- 2 Z <- c(1,2) d <- c(1,1) mode <- c(1,2) ## define sqrtmat var <- exp(gamma[c(1,2)]) rho <- corrtrans(gamma[3]) cov <- rho*sqrt(var[1]*var[2]) Sigma <- matrix(c(var[1],cov,cov,var[2]),2,2) ## SqrtSigma <- chol(Sigma) SqrtSigma <- sqrtm(as(Sigma,"symmetricMatrix")) b <- mode + SqrtSigma %*% d Zb <- sum(Z*b) l <- delta*log(h(eta+Zb,etaD))-H(eta+Zb) l } g <- c(log(.3),log(.4),0.5) ## gradient wrt g using finite differences sapply(1:3, function(i,eps=1e-6) { g[i] <- g[i]+eps val <- l(g) g[i] <- g[i]-2*eps (val - l(g))/2/eps }) bf <- function(gamma=c(log(.3),log(.4),0.5)) { Z <- c(1,2) d <- c(1,1) mode <- c(1,2) ## define sqrtmat var <- exp(gamma[c(1,2)]) rho <- corrtrans(gamma[3]) cov <- rho*sqrt(var[1]*var[2]) Sigma <- matrix(c(var[1],cov,cov,var[2]),2,2) ## SqrtSigma <- chol(Sigma) SqrtSigma <- sqrtm(as(Sigma,"symmetricMatrix")) b <- mode + SqrtSigma %*% d b } ## gradient for b wrt g using finite differences (gradbwrtg <- sapply(1:3, function(i,eps=1e-6) { g[i] <- g[i]+eps val <- bf(g) g[i] <- g[i]-2*eps (val - bf(g))/2/eps })) var <- exp(g[c(1,2)]) rho <- corrtrans(g[3]) cov <- rho*sqrt(var[1]*var[2]) ## wrt g[1] matrix(c(g[1]*var[1],cov*g[1]/2,cov*g[1]/2,0),2,2) ## A=1; B=0.5; D=2 M=matrix(c(A,B,B,D),2,2) tau <- A+D delta <- A*D-B*B s <- sqrt(delta) t <- sqrt(tau+2*s) sqrtM <- matrix(c(A+s,B,B,D+s),2,2)/t sqrtM %*% sqrtM lb <- function(b) { ## initially assume an additive model H <- function(eta) eta h <- function(eta,etaD) etaD delta <- 1 eta <- 1 etaD <- 2 Z <- c(1,2) d <- c(1,1) mode <- c(1,2) Zb <- sum(Z*b) l <- delta*log(h(eta+Zb,etaD))-H(eta+Zb) l } ## gradient wrt b using finite differences gradlwrtb <- sapply(1:2, function(i,eps=1e-6) { b[i] <- b[i]+eps val <- lb(b) b[i] <- b[i]-2*eps (val - lb(b))/2/eps }) t(gradbwrtg) %*% gradlwrtb ## gradient for a multivariate normal require(mvtnorm) fdgrad <- function(f,x, ..., eps=1.0e-6) sapply(1:length(x), function(i) { e <- rep(0,length(x)) e[i] <- 1 (f(x+e*eps, ...)-f(x-e*eps, ...))/2/eps }) ## hess(i,i) = (-f2 +16.0*f1 - 30.0*f0 + 16.0*fm1 -fm2)/(12.0*hi*hi); fdhessian <- function(f,x, ..., eps=1.0e-5) sapply(1:length(x), function(i) { ei <- rep(0,length(x)) ei[i] <- 1 sapply(1:length(x), function(j) { ej <- rep(0,length(x)) ej[j] <- 1 if (i==j) (-f(x+2*ei*eps, ...)+16*f(x+ei*eps, ...)-30*f(x,...)+16*f(x-ei*eps, ...)-f(x-2*ei*eps, ...))/12/eps/eps else (f(x+eps*ei+eps*ej)-f(x+eps*ei-eps*ej)-f(x-eps*ei+eps*ej)+f(x-eps*ei-eps*ej))/4/eps/eps }) }) -fdgrad(mvtnorm::dmvnorm, c(1,1), c(0,0), Sigma <- matrix(c(1,.5,.5,1),2), log=TRUE) ## fdhessian(mvtnorm::dmvnorm, c(1,1), c(0,0), Sigma <- matrix(c(1,.5,.5,1),2)) as.vector(solve(Sigma) %*% c(1,1)) summary(fit <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial,optimiser="NelderMead",recurrent=TRUE)) summary(fit <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial,recurrent=TRUE)) summary(fit <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN", optimiser="NelderMead", recurrent=TRUE)) summary(r2 <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN", recurrent=TRUE)) summary(fit <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial,optimiser="NelderMead")) summary(fit <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial)) summary(fit <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN", optimiser="NelderMead")) summary(r2 <- stpm2(Surv(start,stime,event)~treat,data=stmixed2,cluster=stmixed$trial,RandDist="LogN")) summary(r <- stpm2(Surv(start,stime,event)~treat,data=stmixed2)) ## check gradients args <- fit@args args$return_type <- "gradient" .Call("model_output", args, package="rstpm2") fdgrad <- function(obj,eps=1e-6) { args <- obj@args args$return_type <- "objective" sapply(1:length(args$init), function(i) { largs <- args largs$init[i] <- args$init[i]+eps f1 <- .Call("model_output", largs, package="rstpm2") largs$init[i] <- args$init[i]-eps f2 <- .Call("model_output", largs, package="rstpm2") data.frame(f1,f2,gradient=(f1-f2)/2.0/eps) }) } fdgrad(fit,1e-3) require(rstpm2) require(mgcv) x=seq(0,1,length=5001) set.seed(12345) y=rnorm(length(x),sin(2*pi*x)) i <- x>0.65 d=data.frame(x=x[i],y=y[i]) fit <- gam(y~s(x),data=d) ## plot(fit) plot(x,predict(fit,newdata=data.frame(x=x)),type="l") plot(x,y) ## weighted estimates refresh require(rstpm2) ## unequal weights brcancer2 <- transform(brcancer,w=ifelse(hormon==0,10,1)) ## unweighted summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) ## weighted estimates ## stpm2 summary(stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) ## stpm2 robust summary(stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w,robust=TRUE, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) summary(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2)) ## pstpm2 summary(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w)) summary(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w,robust=TRUE)) ## ## equal weights brcancer2 <- transform(brcancer,w=4) ## unweighted summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) ## weighted estimates ## stpm2 summary(stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) summary(stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w,robust=TRUE, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) ## pstpm2 summary(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2)) summary(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w)) summary(pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w,robust=TRUE)) refresh require(rstpm2) brcancer2 <- transform(brcancer,w=ifelse(hormon==0,10,1)) ##debug(rstpm2:::meat.stpm2) summary(fit <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2,weights=w,robust=TRUE, smooth.formula=~nsx(log(rectime),df=3,stata=TRUE))) summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer2, logH.formula=~nsx(log(rectime),df=3,stata=TRUE))) ## code for the SAS PROC ICPHREG examples read.textConnection <- function(text, ...) { conn <- textConnection(text) on.exit(close(conn)) read.table(conn, ...) } hiv <- read.textConnection("0 16 0 0 0 1 15 26 0 0 0 1 12 26 0 0 0 1 17 26 0 0 0 1 13 26 0 0 0 1 0 24 0 0 1 0 6 26 0 1 1 0 0 15 0 1 1 0 14 26 0 1 1 0 12 26 0 1 1 0 13 26 0 1 0 1 12 26 0 1 1 0 12 26 0 1 1 0 0 18 0 1 0 1 0 14 0 1 0 1 0 17 0 1 1 0 0 15 0 1 1 0 3 26 1 0 0 1 4 26 1 0 0 1 1 11 1 0 0 1 13 19 1 0 0 1 0 6 1 0 0 1 0 11 1 1 0 0 6 26 1 1 0 0 0 6 1 1 0 0 2 12 1 1 0 0 1 17 1 1 1 0 0 14 1 1 0 0 0 25 1 1 0 1 2 11 1 1 0 0 0 14 1 1 0 0") names(hiv) <- c("Left","Right","Stage","Dose","CdLow","CdHigh") ##hiv <- transform(hiv, Left=pmax(1e-5,Left)) hiv <- transform(hiv,Event = ifelse(Left==0,2, ifelse(Right>=26,0, 3))) hiv2 <- transform(hiv, Left = ifelse(Event==2,Right, ifelse(Event==0,Left, Left)), Right = ifelse(Event==2,NA, ifelse(Event==0,NA, Right))) library(rstpm2) summary(stpm2(Surv(Left,Right,Event,type="interval")~Stage, data=hiv2, df=2))[2] survreg(Surv(Left, Right, Event, type = "interval")~Stage, data=hiv2,dist="exponential") library(rms) psm(Surv(Left, Right, Event, type = "interval")~Stage, data=hiv2,dist="exponential") ## library(rstpm2) my.brcancer = brcancer my.brcancer$left = my.brcancer$rectime my.brcancer$right = ifelse(my.brcancer$censrec==1, my.brcancer$rectime, Inf) test.stpm2.C = rstpm2::stpm2(Surv(left, right, censrec, type = "interval")~hormon, data=my.brcancer,df=3) ## additive model summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2, logH.formula=~nsx(rectime,df=3), tvc.formula=~hormon:nsx(rectime,df=3,stata=TRUE))) require(foreign) require(rstpm2) stmixed <- read.dta("http://fmwww.bc.edu/repec/bocode/s/stmixed_example2.dta") system.time(r <- stpm2(Surv(stime,event)~treat,data=stmixed,cluster=stmixed$trial)) system.time(r <- stpm2(Surv(stime,event)~treat,data=stmixed,cluster=stmixed$trial,RandDist="LogN", nodes=20)) summary(r) require(mexhaz) system.time(mix <- mexhaz(formula=Surv(stime,event)~treat, data=stmixed, base="exp.bs",degree=3, random="trial", verbose=0)) ## Frailty model require(rstpm2) require(frailtypack) data(dataAdditive) ##debug(pstpm2) system.time(mod2n <- pstpm2(Surv(t1,t2,event)~var1, data=dataAdditive, RandDist="LogN", ##optimiser="NelderMead", smooth.formula=~s(log(t2)), sp.init=0.07723242, adaptive=TRUE, cluster=dataAdditive$group, nodes=10, trace=0)) summary(mod2n) localargs <- mod2n@args localargs$init <- mod2n@args$init*1.1 localargs$adaptive=TRUE localargs$return_type <- "gradient" .Call("model_output", localargs, package="rstpm2") fdgrad <- function(obj,eps=1e-6) { args <- obj@args args$init <- args$init*1.1 sapply(1:length(args$init), function(i) { args$return_type <- "objective" args$init[i] <- args$init[i]+eps f1 <- .Call("model_output", args, package="rstpm2") args$init[i] <- args$init[i]-2*eps f2 <- .Call("model_output", args, package="rstpm2") (f1-f2)/2/eps }) } fdgrad(mod2n) ## OK for adaptive=FALSE localargs <- mod2n@args localargs$return_type <- "variances" .Call("model_output", localargs, package="rstpm2") localargs$return_type <- "modes" .Call("model_output", localargs, package="rstpm2") system.time(mod2nb <- stpm2(Surv(t1,t2,event)~var1, data=dataAdditive, RandDist="LogN", logH.formula=~ns(log(t2),df=7), cluster=dataAdditive$group, nodes=20)) system.time(mod2g <- pstpm2(Surv(t1,t2,event)~var1, data=dataAdditive, RandDist="Gamma", smooth.formula=~s(log(t2)), cluster=dataAdditive$group)) mod1 <- frailtyPenal(Surv(t1,t2,event)~cluster(group)+var1,data=dataAdditive, n.knots=8,kappa=0.1,cross.validation=TRUE) mod1n <- frailtyPenal(Surv(t1,t2,event)~cluster(group)+var1,data=dataAdditive, n.knots=8,kappa=0.1,cross.validation=TRUE, RandDist="LogN") system.time(mod2 <- stpm2(Surv(t1,t2,event)~var1, # Gamma data=dataAdditive, logH.formula=~ns(t2,df=7), cluster=dataAdditive$group)) system.time(coxph1 <- coxph(Surv(t1,t2,event)~var1+frailty(group,distribution="gaussian"), data=dataAdditive)) summary(coxph1) system.time(mod2n <- stpm2(Surv(t1,t2,event)~var1, data=dataAdditive, RandDist="LogN", optimiser="NelderMead", logH.formula=~ns(log(t2),df=7), cluster=dataAdditive$group, nodes=20)) system.time(mod2nb <- stpm2(Surv(t1,t2,event)~var1, data=dataAdditive, RandDist="LogN", logH.formula=~ns(log(t2),df=7), cluster=dataAdditive$group, nodes=20)) system.time(mod3 <- pstpm2(Surv(t1,t2,event)~var1, data=dataAdditive, RandDist="LogN", criterion="BIC", smooth.formula=~s(log(t2)), cluster=dataAdditive$group, nodes=20)) system.time(mod3 <- coxph(Surv(t1,t2,event)~frailty(group,distribution="gamma")+var1,data=dataAdditive)) summary(mod2) coef2 <- coef(summary(mod2)) theta <- exp(coef2[nrow(coef2),1]) se.logtheta <- coef2[nrow(coef2),2] se.theta <- theta*se.logtheta test.statistic <- 1/se.logtheta pchisq(test.statistic,df=1,lower.tail=FALSE)/2 library(rstpm2) library(ICE) data(ICHemophiliac) ICHemophiliac2 <- transform(as.data.frame(ICHemophiliac),event=3) ## fit1 <- stpm2(Surv(left,right,event,type="interval")~1,data=ICHemophiliac2,df=3) fit1 <- pstpm2(Surv(left,right,event,type="interval")~1,data=ICHemophiliac2, smooth.formula=~s(left,k=7)) estimate <- ickde(ICHemophiliac, m=200, h=0.9) plot(estimate, type="l", ylim=c(0,0.20)) tt <- seq(0,20,length=301)[-1] lines(tt,predict(fit1,newdata=data.frame(left=tt),type="density"),col="blue") ## reg1 <- survreg(Surv(left,right,event,type="interval")~1,data=ICHemophiliac2) ## weibullShape <- 1/reg1$scale ## ## weibullScale <- exp(predict(reg1,type="lp")) ## weibullScale <- predict(reg1); ## tt <- seq(0,20,length=301) ## estimate <- ickde(ICHemophiliac, m=200, h=0.9) ## plot(estimate, type="l", ylim=c(0,0.15)) ## lines(tt,dweibull(tt,weibullShape,weibullScale),lty=2) library(rstpm2) library(survival) data(veteran) ## Re-define variables veteran <- dplyr::mutate(veteran, squamous = ifelse(celltype=="squamous",1,0), smallcell = ifelse(celltype=="smallcell",1,0), adeno = ifelse(celltype=="adeno",1,0), large = ifelse(celltype=="large",1,0), prior.ty = ifelse(prior==0,0,1), trt = ifelse(trt==2,1,0), high = ifelse(karno > 50,1,0)) lung<-subset(veteran, prior==0) ## patients with no prior therapy ## Why no optimal smoothing parameters?? divergence with version 1.3.3 pfit <-pstpm2(Surv(time,status==1) ~ adeno + smallcell + squamous, smooth.formula = ~s(log(time)) + s(karno), data=lung, link.type="PO", trace = 1) ## two-dimensional smoothers x1 <- x2 <- seq(0,1,length=11) dat <- expand.grid(x1=x1,x2=x2) dat$y <- rnorm(nrow(dat)) require(mgcv) fit <- gam(y~s(x1,x2),data=dat) fit$smooth system.time(print(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,type="probit"))) system.time(print(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,type="probit",use.rcpp=FALSE))) system.time(print(fit2 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, type="PO"))) system.time(print(fit2 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, type="PO",use.rcpp=FALSE))) system.time(print(stpm2Gen(Surv(rectime,censrec==1)~hormon,data=brcancer))) system.time(print(stpm2Gen(Surv(rectime,censrec==1)~hormon,data=brcancer, use.rcpp=FALSE))) head(predict(fit,se.fit=TRUE)) head(predict(fit,type="haz",se.fit=TRUE)) brcancer <- brcancer[rep(1:nrow(brcancer),each=500),] system.time(print(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer))) # faster than Stata! system.time(print(pfit <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer))) plot(pfit,newdata=data.frame(hormon=0)) refresh require(rstpm2) data(brcancer) system.time(print(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, tvc=list(hormon=3)))) system.time(print(pfit <- pstpm2(Surv(rectime,censrec==1)~1,data=brcancer,sp.init=c(0.0001,0.0001), tvc.formula=~s(log(rectime),by=hormon)))) print(pstpm2(Surv(rectime,censrec==1)~1,data=brcancer,init=coef(pfit)*100, tvc.formula=~s(log(rectime),by=hormon))) summary(pfit) plot(pfit,newdata=data.frame(hormon=0)) plot(pfit,newdata=data.frame(hormon=1),add=TRUE) plot(pfit,newdata=data.frame(hormon=0),type="haz") plot(pfit,newdata=data.frame(hormon=1),type="haz",add=TRUE) pfit <- pstpm2(Surv(rectime/365,censrec==1)~1,data=brcancer) # OK plot(pfit,newdata=data.frame(hormon=0)) system.time(print(pfit <- pstpm2(Surv(rectime/365,censrec==1)~1,data=brcancer, tvc.formula=~s(log(rectime/365),by=hormon)))) plot(pfit,newdata=data.frame(hormon=0)) # OK times <- seq(500,2000,by=500) meansurv1 <- t(sapply(times,function(time) predict(pfit,transform(brcancer,rectime=time,hormon=1),type="meansurv",se.fit=TRUE))) meansurv0 <- t(sapply(times,function(time) predict(pfit,transform(brcancer,rectime=time,hormon=0),type="meansurv",se.fit=TRUE))) matplot(times,meansurv1,type="l",lty=c(1,2,2),col=1) matlines(times,meansurv0,type="l",lty=c(1,2,2),col=2) meansurvdiff1 <- t(sapply(times,function(time) predict(pfit,transform(brcancer,rectime=time,hormon=0),type="meansurvdiff",var="hormon",se.fit=TRUE))) matplot(times,meansurvdiff1,type="l",lty=c(1,2,2),col=1) system.time(print(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,control=list(parscale=100,reltol=1e-10),use.rcpp=FALSE))) summary(fit) system.time(print(fit2 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,control=list(parscale=10000.0),reltol=1e-10,init=0.0001*coef(fit)))) summary(fit2) plot(fit2,newdata=data.frame(hormon=1)) brcancerN <- brcancer[rep(1:nrow(brcancer),each=100),] system.time(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancerN,use.rcpp=FALSE, control=list(parscale=0.1,reltol=1e-10))) summary(fit) system.time(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancerN,use.rcpp=TRUE)) summary(fit) ###### penalised likelihood ## environment(pstpm2) <- environment(rstpm2::pstpm2) ## require(rstpm2) try(detach("package:rstpm2",unload=TRUE)) ## source("/home/MEB/marcle/src/R/rstpm2/R/pm2-3.R") refresh require(rstpm2) data(brcancer) brcancer$recyear <- brcancer$rectime/365 system.time(fit0 <- stpm2(Surv(recyear,censrec==1)~hormon,data=brcancer,df=5)) system.time(pfit0 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer,sp.init=1)) system.time(pfit0.1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, smooth.formula=~s(log(recyear),k=15),sp.init=10,alpha=2)) system.time(pfit1.1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, smooth.formula=~s(log(recyear)),sp.init=10,criterion="BIC")) system.time(pfit2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, smooth.formula=~s(recyear),sp.init=10)) plot(pfit0,newdata=data.frame(hormon=1),line.col="red",type="hazard") plot(pfit0.1,newdata=data.frame(hormon=1),line.col="blue",add=TRUE,type="hazard") plot(pfit1.1,newdata=data.frame(hormon=1),line.col="orange",add=TRUE,type="hazard") plot(fit0,newdata=data.frame(hormon=1),line.col="green",type="hazard",add=TRUE) plot(pfit2,newdata=data.frame(hormon=1),line.col="black",type="hazard",add=TRUE) plot(pfit0,newdata=data.frame(hormon=1),line.col="red") plot(pfit0.1,newdata=data.frame(hormon=1),line.col="blue",add=TRUE) plot(pfit1.1,newdata=data.frame(hormon=1),line.col="pink",add=TRUE) plot(fit0,newdata=data.frame(hormon=1),line.col="green",add=TRUE) plot(pfit2,newdata=data.frame(hormon=1),line.col="black",add=TRUE) system.time(pfit0.check <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, sp=pfit0@sp, use.rcpp=FALSE)) system.time(pfit0.check2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, sp=pfit0@sp)) summary(pfit0) summary(pfit0.check) summary(pfit0.check2) system.time(pfit0 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=30),sp.init=1)) system.time(pfit0.1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=30),sp.init=1,criterion="BIC")) plot(pfit0,newdata=data.frame(hormon=1),line.col="red",type="hazard") plot(pfit0.1,newdata=data.frame(hormon=1),line.col="blue",add=TRUE,type="hazard") system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=30),sp=10,pen="h", smoother.parameters=list("log(recyear)"=list(var="recyear", inverse=exp, transform=log)))) plot(pfit1,newdata=data.frame(hormon=1),line.col="green",add=TRUE,type="hazard") system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=30),sp=10,criterion="GCV")) system.time(pfit2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=20),sp=10,criterion="BIC")) plot(pfit1,newdata=data.frame(hormon=1),type="hazard",ylim=c(0,0.25)) plot(pfit2,newdata=data.frame(hormon=1),add=TRUE,line.col="blue",type="hazard") system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=30),sp=1,criterion="GCV")) system.time(pfit2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=20),sp=1,use.rcpp=F,penalty="h")) system.time(pfit2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=20),sp=1,penalty="h")) plot(pfit2,newdata=data.frame(hormon=1),type="hazard") system.time(pfit2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=30),sp=1,use.rcp=FALSE)) plot(pfit1,newdata=data.frame(hormon=1),type="hazard") plot(pfit2,newdata=data.frame(hormon=1),type="hazard") system.time(pfit2.0 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=30),sp=0.055,penalty="h",cr="GCV")) system.time(pfit2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=30),sp=0.055,use.rcp=FALSE,penalty="h")) rstpm2:::gcv(pfit2) plot(pfit2,newdata=data.frame(hormon=1),line.col="red",add=TRUE,type="hazard") plot(pfit2.0,newdata=data.frame(hormon=1),line.col="green",add=TRUE,type="hazard") require(frailtypack) fpack1 <- frailtyPenal(Surv(recyear,censrec==1)~hormon, data=brcancer, cross.validation=TRUE, n.knots=10, kappa1=0.1) plot(fpack1) system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon+x3,data=brcancer, logH.formula=~s(log(recyear),k=20)+s(x3),sp=c(0.1,0.1))) system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon+x3,data=brcancer, logH.formula=~s(log(recyear),k=20)+s(x3))) system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=20)+s(x3))) plot(pfit1,newdata=data.frame(hormon=1,x3=20)) plot(pfit1,newdata=data.frame(hormon=0,x3=20),type="hazard") plot(pfit1,newdata=data.frame(hormon=1,x3=20),type="hazard",add=TRUE,line.col="blue",lty=1) summary(pfit1) brcancerN <- brcancer[rep(1:nrow(brcancer),each=100),] system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancerN, logH.formula=~s(log(recyear),k=20))) plot(pfit1,newdata=data.frame(hormon=1)) pfit1@gam$sp par(mfrow=c(2,2)) plot(pfit1,newdata=data.frame(hormon=1)) summary(pfit1@gam)$edf rstpm2:::gcv(pfit1) rstpm2:::gcvc(pfit1,nn) sps <- as.list(10^(seq(-4,2,by=0.5))) system.time(pfit2 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=20), sp=sps)) gcvs <- lapply(pfit2,rstpm2:::gcv) plot(sps,unlist(gcvs),type="l",log="x") plot(sapply(gcvs,attr,"negll"),sapply(gcvs,attr,"trace"),type="l",asp=1) plot(sapply(gcvs,attr,"trace"),sapply(gcvs,attr,"negll"),type="l",asp=1) plot(sps,sapply(pfit2,rstpm2:::aicc,nn=nn),type="l",log="x") plot(sps,sapply(pfit2,rstpm2:::bic,nn=nn),type="l",log="x") ##gcvc brcancer$recyear <- brcancer$rectime/365 sps <- 10^(seq(-4,2,by=0.5)) gcvcs <- sapply(sps, function(sp) { gcvc(pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=30), sp=sp),length(brcancer$recyear)) }) plot(sps,gcvcs,type="l",log="x") ###bic brcancer$recyear <- brcancer$rectime/365 sps <- 10^(seq(-4,2,by=0.5)) gcvcs <- sapply(sps, function(sp) { bic(pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=30), sp=sp),length(brcancer$recyear)) }) plot(sps,gcvcs,type="l",log="x") ###aicc brcancer$recyear <- brcancer$rectime/365 sps <- 10^(seq(-4,2,by=0.5)) gcvcs <- sapply(sps, function(sp) { aicc(pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=30), sp=sp),length(brcancer$recyear)) }) plot(sps,gcvcs,type="l",log="x") ######################### ### penalty functions require(mgcv) require(gaussquad) ## Outline: ## get w, lambda, X0, X1, X2, X3 ## calculate s0, s1, s2, s3 ## calculate h2 and pfun=integrate(h2^2,t) ## calculate dh2sq.dbeta and dpfun=integrate(dh2sq.dbeta,t) ## ## calculate w, lambda, X0, X1, X2, X3 derivativeDesign <- function (functn, lower = -1, upper = 1, rule = NULL, ...) { pred <- if (length(list(...)) && length(formals(functn)) > 1) function(x) functn(x, ...) else functn if (is.null(rule)) rule <- ## gaussquad::legendre.quadrature.rules(20)[[20]] data.frame(x = c(0.993128599185095, 0.963971927277914, 0.912234428251326, 0.839116971822219, 0.746331906460151, 0.636053680726515, 0.510867001950827, 0.37370608871542, 0.227785851141646, 0.0765265211334977, -0.0765265211334974, -0.227785851141645, -0.373706088715418, -0.510867001950827, -0.636053680726516, -0.746331906460151, -0.839116971822219, -0.912234428251326, -0.963971927277913, -0.993128599185094), w = c(0.0176140071391522, 0.040601429800387, 0.0626720483341092, 0.0832767415767053, 0.101930119817241, 0.11819453196152, 0.131688638449176, 0.14209610931838, 0.149172986472603, 0.152753387130726, 0.152753387130726, 0.149172986472603, 0.142096109318381, 0.131688638449175, 0.11819453196152, 0.10193011981724, 0.0832767415767068, 0.0626720483341075, 0.0406014298003876, 0.0176140071391522)) lambda <- (upper - lower)/(2) mu <- (lower + upper)/(2) x <- lambda * rule$x + mu w <- rule$w eps <- .Machine$double.eps^(1/8) X0 <- pred(x) X1 <- (-pred(x+2*eps)+8*pred(x+eps)-8*pred(x-eps)+pred(x-2*eps))/12/eps X2 <- (-pred(x+2*eps)/12+4/3*pred(x+eps)-5/2*pred(x)+4/3*pred(x-eps)-pred(x-2*eps)/12)/eps/eps X3 <- (-pred(x+3*eps)/8+pred(x+2*eps)-13/8*pred(x+eps)+ 13/8*pred(x-eps)-pred(x-2*eps)+pred(x-3*eps)/8)/eps/eps/eps return(list(x=x,w=w,lambda=lambda,X0=X0,X1=X1,X2=X2,X3=X3)) } hpfun <- function(beta,design) { lapply(design,function(obj) { s0 <- as.vector(obj$X0 %*% beta) s1 <- as.vector(obj$X1 %*% beta) s2 <- as.vector(obj$X2 %*% beta) s3 <- as.vector(obj$X3 %*% beta) h2 <- (s3+3*s1*s2+s1^3)*exp(s0) obj$lambda*sum(obj$w*h2^2) }) } hdpfun <- function(beta,design) { lapply(design, function(obj) { s0 <- as.vector(obj$X0 %*% beta) s1 <- as.vector(obj$X1 %*% beta) s2 <- as.vector(obj$X2 %*% beta) s3 <- as.vector(obj$X3 %*% beta) h2 <- (s3+3*s1*s2+s1^3)*exp(s0) dh2sq.dbeta <- 2*h2*(exp(s0)*(obj$X3+3*(obj$X1*s2+obj$X2*s1)+3*s1^2*obj$X1)+h2*obj$X0) obj$lambda*colSums(obj$w*dh2sq.dbeta) }) } smootherDesign <- function(gamobj,data) { d <- data[1,,drop=FALSE] ## how to get mean prediction values, particularly for factors? makepred <- function(var) { function(value) { d <- d[rep(1,length(value)),] d[[var]] <- value predict(gamobj,newdata=d,type="lpmatrix") } } lapply(gamobj$smooth, function(smoother) { var <- smoother$term pred <- makepred(var) derivativeDesign(pred,lower=min(data[[var]]),upper=max(data[[var]])) }) } ## example data d <- within(data.frame(x=seq(0,1,length=301)), { mu <- exp(x) y <- rnorm(301,mu,0.01) }) fit <- gam(y~s(x),data=d,family=gaussian(link="log")) beta <- coef(fit) design <- smootherDesign(fit,d) hpfun(beta,design) hdpfun(beta,design) ## Testing... require(mgcv) d <- within(data.frame(x=seq(0,2*pi,length=301)), { mu <- sin(x) dmu <- cos(x) y <- rnorm(301,mu,0.001) }) fit <- gam(y~s(x),data=d) mat <- predict(fit,type="lpmatrix") with(d,plot(x,y)) with(d,lines(x,mu,lwd=2)) with(d,lines(x,predict(fit),col="blue",lwd=2)) par(mfrow=c(3,2)) pred <- function(eps,obj=fit,data=d,var="x") { nd <- d nd[[var]] <- nd[[var]]+eps predict(obj,newdata=nd,type="lpmatrix") } ## First derivative eps <- .Machine$double.eps^(1/8) matD <- (pred(eps) - pred(-eps)) / 2 / eps with(d,plot(x,dmu,lwd=2,type="l")) with(d,lines(x,matD %*% coef(fit),col="blue",lwd=2)) ## ## 1/12 −2/3 0 2/3 −1/12 eps <- .Machine$double.eps^(1/8) matD <- (-pred(2*eps)+8*pred(eps)-8*pred(-eps)+pred(-2*eps))/12/eps with(d,plot(x,dmu,lwd=2,type="l")) with(d,lines(x,matD %*% coef(fit),col="blue",lwd=2)) ## ## Second derivative eps <- .Machine$double.eps^(1/8) matD2 <- (pred(eps)-2*pred(0)+pred(-eps))/eps/eps with(d,plot(x,-mu,lwd=2,type="l")) with(d,lines(x,matD2 %*% coef(fit),col="blue",lwd=2)) ## ## −1/12 4/3 −5/2 4/3 −1/12 eps <- .Machine$double.eps^(1/8) matD2 <- (-pred(2*eps)/12+4/3*pred(eps)-5/2*pred(0)+4/3*pred(-eps)-pred(-2*eps)/12)/eps/eps with(d,plot(x,-mu,lwd=2,type="l")) with(d,lines(x,matD2 %*% coef(fit),col="blue",lwd=2)) ## ## Third derivatives eps <- .Machine$double.eps^(1/8) matD3 <- (pred(2*eps)- 2*pred(eps)+ 2*pred(-eps)- pred(-2*eps))/2/eps/eps/eps with(d,plot(x,-dmu,lwd=2,type="l")) with(d,lines(x,matD3 %*% coef(fit),col="blue",lwd=2)) ## ## 1/8 −1 13/8 0 −13/8 1 −1/8 eps <- .Machine$double.eps^(1/8) matD3 <- (-pred(3*eps)/8+pred(2*eps)-13/8*pred(eps)+ 13/8*pred(-eps)-pred(-2*eps)+pred(-3*eps)/8)/eps/eps/eps with(d,plot(x,-dmu,lwd=2,type="l")) with(d,lines(x,matD3 %*% coef(fit),col="blue",lwd=2)) ## (browse-url "http://en.wikipedia.org/wiki/Finite_difference_coefficients") require(mgcv) data <- data.frame(x=1:10,y=1:10) fit <- gam(y~s(x,k=5,bs="ps"),data=data) round(cbind(1,(spline.des(knots=fit$smooth[[1]]$knots,x=data$x)$design %*% qr.Q(attr(fit$smooth[[1]],"qrc"),complete=TRUE))[,-1]) - predict(fit,type="lpmatrix"),1e-10) round(cbind(1,(spline.des(knots=fit$smooth[[1]]$knots,x=5:6)$design %*% qr.Q(attr(fit$smooth[[1]],"qrc"),complete=TRUE))[,-1]) - predict(fit,newdata=data.frame(x=5:6),type="lpmatrix"),1e-10) cbind(0,(spline.des(knots=fit$smooth[[1]]$knots,x=data$x,derivs=rep(1,nrow(data)))$design %*% qr.Q(attr(fit$smooth[[1]],"qrc"),complete=TRUE))[,-1]) - (predict(fit,newdata=transform(data,x=x+1e-5),type="lpmatrix")- (predict(fit,newdata=transform(data,x=x-1e-5),type="lpmatrix")))/2e-5 #######Optimal fitting####### ###GCV,AICC,BIC or GCVC to choose smoothing parameters### opt.val<-function(pstpm2.fit,nn){ like<-pstpm2.fit@like Hl<-numDeriv::hessian(like,coef(pstpm2.fit)) Hinv<-vcov(pstpm2.fit) trace<-sum(diag(Hinv%*%Hl)) loglike<-(like(coef(pstpm2.fit)))/nn gcv<-(trace-loglike)/nn aicc<-(-2*loglike+2*trace*nn/(nn-trace-1))/nn bic<-(-2*loglike+trace*log(nn))/nn gcvc<-(-2*loglike-2*nn*log(1-trace/nn))/nn out<-c(loglike,gcv,aicc,bic,gcvc) return(out) } ############################### ############################### # setClass("opt.fit", representation( # num.ind = "numeric", # cr = "numeric", # tops = "data.frame", # sp.opt = "numeric", # fun.min = "numeric" # ), # contains="pstpm2") # ######################### # opt.fit<-function(formula,data,logH.formula,sp.low,sp.upp,num.sp,timeVar = NULL){ # ###number of individual # num.ind <- nrow(data) # #####Censoring rate#### # ## set up the data # ## ensure that data is a data frame # data <- get_all_vars(formula, data) # # ## parse the function call # # Call <- match.call() # # mf <- match.call(expand.dots = FALSE) # # m <- match(c("formula", "data", "subset", "contrasts", "weights"), # # names(mf), 0L) # # mf <- mf[c(1L, m)] # stopifnot(length(lhs(formula))>=2) # eventExpr <- lhs(formula)[[length(lhs(formula))]] # delayed <- length(lhs(formula))==4 # timeExpr <- lhs(formula)[[if (delayed) 3 else 2]] # expression # if (is.null(timeVar)) # timeVar <- all.vars(timeExpr) # time <- eval(timeExpr, data) # if (delayed) { # time0Expr <- lhs(formula)[[2]] # time0 <- eval(time0Expr, data) # } # event <- eval(eventExpr,data) # cr <- sum(event > min(event))/num.ind # # # # cr=table(lhs(formula)[[if (delayed) 4 else 3]][2])/nn # ##nn<-length(brcancer$recyear) # # system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, # # logH.formula=~s(recyear,k=30), sp=1e-1)) # # plot(pfit1,newdata=data.frame(hormon=1)) # # #sps <- 10^(seq(-4,4,by=0.5)) # # sp.low=10^-4 # # sp.upp=4000 # # num.sp=30 # sps <- 10^(seq(log10(sp.low),log10(sp.upp),length=num.sp)) # optvals <- sapply(sps, function(sp) { # opt.val(pstpm2(formula,data,logH.formula=NULL, sp=sp),num.ind) # }) # tops<-t(optvals) # colnames(tops) <- c("loglike","gcv","aicc","bic","gcvc") # rownames(tops) <- rownames(tops, do.NULL = FALSE, prefix = "Obs.") # # tops<-as.data.frame(tops) # tops<-as.data.frame(tops) # ####Plot######### # #par(mfrow=c(1,2)) # ###to choose optimal smoothing parameter ### # ind.min <- sapply(2:5,function(x) order(tops[,x])[1]) # sp.opt <- sps[ind.min] # obj<-pstpm2(formula,data,logH.formula=NULL, sp=sp.opt[1]) # fun.min <- sapply(2:5,function(x) min(tops[,x])) # # if(ind.min[1]==1) # # stop("Hit left boundary, make sp.low smaller.") # # if(ind.min[1]==num.sp) # # stop("Hit right boundary, make sp.upp bigger.") # # with(tops,matplot(sps,tops[,-1],type="l",col=1:4,lty=1:4,xlab="x",ylab="y")) # # points(sp.opt,fun.min,pch=4,lwd=2,cex=1.2) # # lines(sp.opt,fun.min,err=-1,col=1:4,lty=1:4) # # ###Estimate final model with optimal value of sp### # # # # # summary(pfit.obj) # ######################################### # out <- as(obj,"opt.fit") # out <- new("opt.fit", # coef = pstpm2@coef, # fullcoef = pstpm2@fullcoef, # vcov = pstpm2@vcov, # min = pstpm2@min, # details = pstpm2@details, # minuslogl = pstpm2@minuslogl, # method = pstpm2@method, # data = data, # formula = pstpm2@formula, # optimizer = "optim", # xlevels = .getXlevels(pstpm2@terms,pstpm2@model.frame), # ##contrasts = attr(X, "contrasts"), # contrasts = NULL, # wrong! # logli = pstpm2@logli, # ##weights = weights, # Call = pstpm2@Call, # terms = pstpm2@terms, # model.frame = pstpm2@model.frame, # gam = pstpm2@gam, # timeVar = pstpm2@timeVar, # timeExpr = pstpm2@timeExpr, # like = pstpm2@like, # negll<-pstpm2@negll, # call.formula = pstpm2@call.formula, # x = pstpm2@x, # xd = pstpm2@xd, # termsd = pstpm2@termsd, # wrong! # y = pstpm2@y, # num.ind = num.ind, # cr = cr, # tops = tops, # sp.opt = sp.opt, # fun.min = fun.min) # # return(out) # } #####load data#### load("brcancer.rda") data(brcancer) brcancer$recyear <- brcancer$rectime/365 ####model fit### opt.fit(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear), sp.low=10^-4,sp.upp=4000, num.sp=30,timeVar = NULL) # ###methods for Plot ### # setMethod( # f= "plot", # signature(x="opt.fit", y="missing"), # definition=function (x,y,...){ # matplot(x@sps,x@tops[,-1],type="l",col=1:4,lty=1:4,xlab="",ylab="") # points(x@sp.opt,x@fun.min,pch=4,lwd=2,cex=1.2) # lines(x@sp.opt,x@fun.min,err=-1,col=1:4,lty=1:4) # } # ) # ####methods for print#### # setMethod ("print",signature(x="opt.fit", y="missing"), # function(x,...){ # cat("*** Class opt.fit, method Print *** \n") # cat("* Optimal SP ="); print (x@sp.opt) # cat("* GCV = \n"); print (x@fun.min[1]) # cat("******* End Print (opt.fit) ******* \n") # } # ) ########################## aplot <- function (x, y, ...) { .local <- function (x, y, newdata, type = "surv", xlab = NULL, ylab = NULL, line.col = 1, ci.col = "grey", lty = par("lty"), add = FALSE, ci = !add, rug = !add, var = NULL, ...) { browser() y <- predict(x, newdata, type = type, var = var, grid = TRUE, se.fit = TRUE) if (is.null(xlab)) xlab <- deparse(x@timeExpr) if (is.null(ylab)) ylab <- switch(type, hr = "Hazard ratio", hazard = "Hazard", surv = "Survival", sdiff = "Survival difference", hdiff = "Hazard difference", cumhaz = "Cumulative hazard") xx <- attr(y, "newdata") xx <- eval(x@timeExpr, xx) if (!add) matplot(xx, y, type = "n", xlab = xlab, ylab = ylab, ...) if (ci) polygon(c(xx, rev(xx)), c(y[, 2], rev(y[, 3])), col = ci.col, border = ci.col) lines(xx, y[, 1], col = line.col, lty = lty) if (rug) { Y <- x@y eventTimes <- Y[Y[, ncol(Y)] == 1, ncol(Y) - 1] rug(eventTimes, col = line.col) } return(invisible(y)) } .local(x, y, ...) } aplot(fit,newdata=data.frame(hormon=1)) apredict <- function (object, ...) { .local <- function (object, newdata = NULL, type = c("surv", "cumhaz", "hazard", "hr", "sdiff", "hdiff", "loghazard", "link"), grid = FALSE, seqLength = 300, se.fit = FALSE, link = NULL, exposed = incrVar(var), var, ...) { local <- function(object, newdata = NULL, type = "surv", exposed) { ## browser() tt <- object@terms if (is.null(newdata)) { X <- object@x XD <- object@xd y <- object@y time <- as.vector(y[, ncol(y) - 1]) } else { lpfunc <- function(delta, fit, data, var) { data[[var]] <- data[[var]] + delta lpmatrix.lm(fit, data) } X <- lpmatrix.lm(object@lm, newdata) XD <- grad(lpfunc, 0, object@lm, newdata, object@timeVar) XD <- matrix(XD, nrow = nrow(X)) if (type %in% c("hazard", "hr", "sdiff", "hdiff", "loghazard")) { time <- eval(object@timeExpr, newdata) } if (object@delayed) { newdata0 <- newdata newdata0[[object@timeVar]] <- newdata[[object@time0Var]] X0 <- lpmatrix.lm(object@lm, newdata0) } if (type %in% c("hr", "sdiff", "hdiff")) { if (missing(exposed)) stop("exposed needs to be specified for type in ('hr','sdiff','hdiff')") newdata2 <- exposed(newdata) X2 <- lpmatrix.lm(object@lm, newdata2) XD2 <- grad(lpfunc, 0, object@lm, newdata2, object@timeVar) XD2 <- matrix(XD, nrow = nrow(X)) } } beta <- coef(object) cumHaz = as.vector(exp(X %*% beta)) Sigma = vcov(object) if (type == "link") { return(as.vector(X %*% beta)) } if (type == "cumhaz") { if (object@delayed) return(cumHaz - as.vector(X0 %*% beta)) else return(cumHaz) } if (type == "surv") { return(exp(-cumHaz)) } if (type == "sdiff") return(as.vector(exp(-exp(X2 %*% beta))) - exp(-cumHaz)) if (type == "hazard") { return(as.vector(XD %*% beta) * cumHaz) } if (type == "loghazard") { return(as.vector(log(XD %*% beta)) + log(cumHaz)) } if (type == "hdiff") { return(as.vector((XD2 %*% beta) * exp(X2 %*% beta) - (XD %*% beta)/time * cumHaz)) } if (type == "hr") { cumHazRatio = exp((X2 - X) %*% beta) return(as.vector((XD2 %*% beta)/(XD %*% beta) * cumHazRatio)) } } type <- match.arg(type) if (is.null(newdata) && type %in% c("hr", "sdiff", "hdiff")) stop("Prediction using type in ('hr','sdiff','hdiff') requires newdata to be specified.") if (grid) { Y <- object@y event <- Y[, ncol(Y)] == 1 time <- object@data[[object@timeVar]] eventTimes <- time[event] X <- seq(min(eventTimes), max(eventTimes), length = seqLength)[-1] data.x <- data.frame(X) names(data.x) <- object@timeVar newdata <- merge(newdata, data.x) } pred <- if (!se.fit) { local(object, newdata, type = type, exposed = exposed, ...) } else { if (is.null(link)) link <- switch(type, surv = "cloglog", cumhaz = "log", hazard = "log", hr = "log", sdiff = "I", hdiff = "I", loghazard = "I", link = "I") predictnl(object, local, link = link, newdata = newdata, type = type, exposed = exposed, ...) } attr(pred, "newdata") <- newdata return(pred) } .local(object, ...) } environment(apredict) <- environment(stpm2) dim(apredict(fit,newdata=data.frame(hormon=1),grid=T)) # n=300 or 299?? apredict(fit,newdata=data.frame(hormon=1),grid=T) dim(apredict(fit,newdata=data.frame(hormon=1),grid=T,se.fit=T)) # n=300 or 299?? apredict(fit,newdata=data.frame(hormon=1),grid=T,se.fit=T) debug(rstpm2:::numDeltaMethod) try(suppressWarnings(detach("package:rstpm2",unload=TRUE)),silent=TRUE) require(rstpm2) data(brcancer) system.time(fit2 <- stpm2(Surv(rectime/365,censrec==1)~hormon,data=brcancer,df=5)) system.time(fit3 <- pstpm2(Surv(rectime/365,censrec==1)~hormon,data=brcancer,use.gr=F)) plot(fit3,newdata=data.frame(hormon=0),type="hazard") plot(fit2,newdata=data.frame(hormon=0),type="hazard",add=TRUE,ci=FALSE,rug=FALSE, line.col=2) ## penalised likelihood brcancer$recyear <- brcancer$rectime/365 system.time(pfit1 <- pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(log(recyear),k=30), sp=1e-1)) system.time(fit1 <- stpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~ns(log(recyear),df=4))) plot(pfit1,newdata=data.frame(hormon=1)) plot(fit1,newdata=data.frame(hormon=1),lty=2,add=TRUE,ci=F) rstpm2:::gcv(pfit1) sps <- 10^(seq(-4,2,by=0.5)) gcvs <- sapply(sps, function(sp) { rstpm2:::gcv(pstpm2(Surv(recyear,censrec==1)~hormon,data=brcancer, logH.formula=~s(recyear,k=30), sp=sp)) }) plot(sps,gcvs,type="l",log="x") ## system.time(fit <- rstpm2:::stpm2Old(Surv(rectime,censrec==1)~hormon,df=5,data=brcancer)) system.time(fit2 <- stpm2(Surv(rectime,censrec==1)~hormon,df=5,data=brcancer)) system.time(fit3 <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer)) ## plot(fit3,newdata=data.frame(hormon=0),type="hazard") plot(fit2,newdata=data.frame(hormon=0),type="hazard",add=TRUE,line.col=2,ci=FALSE) ## system.time(fit <- stpm2(Surv(rectime/365,censrec==1)~hormon,df=5,data=brcancer)) system.time(fit2 <- rstpm2:::stpm2Old(Surv(rectime/365,censrec==1)~hormon,df=5,data=brcancer)) ## system.time(fit3 <- pstpm2(Surv(rectime/365,censrec==1)~hormon,data=brcancer)) plot(fit3,newdata=data.frame(hormon=0),type="hazard") ## plot(fit2,newdata=data.frame(hormon=0),type="hazard",add=TRUE,line.col=2,ci=FALSE) ## plot(fit.tvc,newdata=data.frame(hormon=1),type="hr",var="hormon") ## summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3, tvc=list(hormon=3))) anova(fit,fit.tvc) # compare with and without tvc summary(fit.tvc <- stpm2Old(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3, tvc=list(hormon=3))) anova(fit,fit.tvc) # compare with and without tvc ## plot(fit.tvc,newdata=data.frame(hormon=0),type="hr",var="hormon") # no lines method: use add=TRUE plot(fit.tvc,newdata=data.frame(hormon=1),type="hr",var="hormon", add=TRUE,ci=FALSE,line.col=2) ## ## plain: identical results (good) stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer) stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, logH.formula=~ns(log(rectime),3)) rstpm2:::stpm2Old(Surv(rectime,censrec==1)~hormon,data=brcancer) ## cure: identical (requires bhazard to be sensible) rate0 <- 10^(-5+brcancer$x1/100) (fit1 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=2,cure=T,bhazard=rate0)) (fit2 <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, logH.formula=~nsx(log(rectime),df=2,cure=T,log=T),bhazard=rate0)) (fit3 <- rstpm2:::stpm2Old(Surv(rectime,censrec==1)~hormon,data=brcancer,cure=T,df=2,bhazard=rate0)) (fit4 <- rstpm2:::stpm2Old(Surv(rectime,censrec==1)~hormon,data=brcancer,bhazard=rate0, logH.formula=~nsx(log(rectime),2,cure=T))) ##### examples ##### require(foreign) if (FALSE) { # testing in open code install.packages("bbmle", repos="http://R-Forge.R-project.org") require(bbmle) brcancer=read.dta("brcancer.dta") brcancer=transform(brcancer,rate0=10^(-5+x1/100)) } try(suppressWarnings(detach("package:bbmle",unload=TRUE)),silent=TRUE) try(suppressWarnings(detach("package:rstpm2",unload=TRUE)),silent=TRUE) ## require(rstpm2) data(brcancer) summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, logH.formula=~nsx(log(rectime),df=3,stata=TRUE))) brcancer <- transform(brcancer,w=10) summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, weights=w,robust=TRUE, logH.formula=~nsx(log(rectime),df=3,stata=TRUE))) ## sandwich variance estimator (from the sandwich package) coeftest.stpm2 <- function (x, vcov. = NULL, df = NULL, ...) { est <- coef(x) if (is.null(vcov.)) se <- vcov(x) else { if (is.function(vcov.)) se <- vcov.(x) else se <- vcov. } se <- sqrt(diag(se)) if (!is.null(names(est)) && !is.null(names(se))) { anames <- names(est)[names(est) %in% names(se)] est <- est[anames] se <- se[anames] } tval <- as.vector(est)/se pval <- 2 * pnorm(abs(tval), lower.tail = FALSE) cnames <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)") mthd <- "z" rval <- cbind(est, se, tval, pval) colnames(rval) <- cnames class(rval) <- "coeftest" attr(rval, "method") <- paste(mthd, "test of coefficients") return(rval) } ## weights.stpm2 <- ## function (object, ...) ## { ## wts <- object@weights ## if (is.null(wts)) ## wts ## else napredict(object@na.action, wts) ## } require(sandwich) coxph1 <- coxph(Surv(rectime,censrec==1)~hormon,data=brcancer) update(coxph1,robust=TRUE) sandwich(coxph1) sandwich.stpm2(fit) # hurrah! ## require(lmtest) ## coeftest(coxph1) ## coeftest(coxph1,vcov.=sandwich(coxph1)) ## coeftest(fit,sandwich(fit)) sandwich(fit) sandwich(fit,bread.=bread.stpm2,meat.=meat.stpm2) ## some predictions head(predict(fit,se.fit=TRUE,type="surv")) head(predict(fit,se.fit=TRUE,type="hazard")) ## some plots plot(fit,newdata=data.frame(hormon=0),type="hazard") ## time-varying coefficient summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3, tvc=list(hormon=3))) anova(fit,fit.tvc) # compare with and without tvc plot(fit.tvc,newdata=data.frame(hormon=0),type="hr",var="hormon") # no lines method: use add=TRUE plot(fit.tvc,newdata=data.frame(hormon=1),type="hr",var="hormon", add=TRUE,ci=FALSE,line.col=2) plot(fit.tvc,newdata=data.frame(hormon=0),type="sdiff",var="hormon") plot(fit.tvc,newdata=data.frame(hormon=0),type="hdiff",var="hormon") plot(fit.tvc,newdata=data.frame(hormon=0),type="hazard") plot(fit.tvc,newdata=data.frame(hormon=1),type="hazard",line.col=2,ci=FALSE,add=TRUE) ## trace("predict", browser, exit=browser, signature = "stpm2") set.seed(10101) brcancer <- transform(brcancer, x=rlnorm(nrow(brcancer))) summary(fit.tvc <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,df=3, tvc.formula=~hormon:nsx(log(rectime),df=3))) ## cure model ## cf. http://www.pauldickman.com/survival/solutions/q37.do ### Data setup require(foreign) colon <- read.dta("http://www.pauldickman.com/survival/colon.dta") popmort <- read.dta("http://www.pauldickman.com/survival/popmort.dta") brcancer <- read.dta("http://www.stata-press.com/data/r11/brcancer.dta") popmort <- transform(popmort, age=`_age`, year=`_year`, `_age`=NULL, `_year`=NULL) save(colon,file="c:/usr/src/R/rstpm2/pkg/data/colon.rda") save(popmort,file="c:/usr/src/R/rstpm2/pkg/data/popmort.rda") save(brcancer,file="c:/usr/src/R/rstpm2/pkg/data/brcancer.rda") ## require(rstpm2) popmort2 <- transform(popmort,exitage=age,exityear=year,age=NULL,year=NULL) colon2 <- within(colon, { status <- ifelse(surv_mm>120.5,1,status) tm <- pmin(surv_mm,120.5)/12 exit <- dx+tm*365.25 sex <- as.numeric(sex) exitage <- pmin(floor(age+tm),99) exityear <- floor(yydx+tm) }) colon2 <- merge(colon2,popmort2) ## compare relative survival without and with cure summary(fit0 <- stpm2(Surv(tm,status %in% 2:3)~I(year8594=="Diagnosed 85-94"), data=colon2, bhazard=colon2$rate, df=5)) ## CHECKED: same year8594 estimate as Stata head(predict(fit0)) ## estimate of failure at the end of follow-up 1-predict(fit0,data.frame(year8594 = unique(colon2$year8594),tm=max(colon2$tm)),type="surv",se.fit=TRUE) plot(fit0,newdata=data.frame(year8594 = "Diagnosed 85-94"),ylim=0:1) plot(fit0,newdata=data.frame(year8594 = "Diagnosed 75-84"),add=TRUE,line.col="red",rug=FALSE) ## summary(fit <- stpm2(Surv(tm,status %in% 2:3)~I(year8594=="Diagnosed 85-94"), data=colon2, bhazard=colon2$rate, df=5,cure=TRUE)) head(predict(fit)) ## cure fractions (I need to add this to the predict function) 1-predict(fit,data.frame(year8594 = unique(colon2$year8594),tm=max(colon2$tm)),type="surv",se.fit=TRUE) newdata1 <- data.frame(year8594 = "Diagnosed 85-94") plot(fit,newdata=newdata1,add=TRUE,ci=FALSE,lty=2,rug=FALSE) plot(fit,newdata=data.frame(year8594="Diagnosed 75-84"),add=TRUE,rug=FALSE,line.col="red",ci=FALSE,lty=2) plot(fit,newdata=newdata1,type="hazard") plot(fit,newdata=newdata1,type="cumhaz") ## http://www.pauldickman.com/survival/r/melanoma.relsurv.r library(foreign) library(survival) library(relsurv) # Download rates files from http://www.mortality.org/ # # 6. Life Tables By year of death (period) 1x1 # Save tables by gender in text files # The transrate.hmd command translate these to R ratetables Finlandpop <- transrate.hmd("c:/usr/tmp/mltper_1x1.txt","c:/usr/tmp/fltper_1x1.txt") ## The relsurv package requires time in days (exit and dx are dates of exit and diagnosis) colon3 <- transform(colon2,tm.dd=as.numeric(exit-dx)) colon3$sex <- ifelse(colon2$sex==1,"male","female") as.date <- function(x) if (inherits(x,"Date")) as.date(as.numeric(x)+3653) else date::as.date(x) model1 <- rs.surv(Surv(tm.dd,status %in% 2:3)~year8594+ratetable(age=(X_age+0.5)*365.25,sex=sex,year=as.date(exit)),colon3,ratetable=Finlandpop) plot(model1,lty=1:2) oldx <- 0:100 oldy <- (oldx-50)^2 oldy[c(20,30)] <- 0 old <- data.frame(x=oldx,y=oldy) predict(lm(y~nsx(x,knots=c(25,50,75,95)),old)) # as per Stata newx <- seq(min(oldx)/1.05,max(oldx)*1.05,length=101) new <- data.frame(x=newx) plot(oldx,oldy) predict(lm(y~nsx(x,df=5,cure=TRUE),old)) sum(oldy) terms(lm(y~nsx(x,df=5,cure=TRUE),old)) lm(y~nsx(x,df=5),old) lines(newx, predict(lm(y~nsx(x,df=4,cure=FALSE),old),newdata=new), type="l") # oops lines(newx, predict(lm(y~nsx(x,df=3),old),newdata=new), lty=2) summary(fit <- stpm2(Surv(tm,status %in% 2:3)~I(year8594=="Diagnosed 85-94"), data=colon2, bhazard=colon2$rate, logH.formula=~nsx(log(tm),df=6,stata=TRUE))) # okay summary(fit <- stpm2(Surv(tm,status %in% 2:3)~I(year8594=="Diagnosed 85-94"), data=colon2, logH.formula=~nsx(log(tm),df=6,stata=TRUE))) # okay ## Stata ## stata.knots=c(4.276666164398193, 6.214608192443848, 6.7833251953125, 7.806289196014404) stataKnots <- function(x,df) { intKnots <- round((1:(df-1))/df,2) # yes, Paul implicitly rounded to 2 dp logx <- log(x) c(min(logx),quantile(logx,intKnots,type=2),max(logx)) } stata.knots <- stataKnots(subset(brcancer,censrec==1)$rectime,3) ## sapply(1:9,function(type) log(quantile(subset(brcancer,censrec==1)$rectime,c(0.33,0.67),type=type))) summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, logH.args=list(knots=stata.knots[2:3], Boundary.knots=stata.knots[c(1,4)]))) ## formula specification for logH summary(stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer, logH.formula=~ns(log(rectime),df=3))) pred <- predict(fit.tvc,newdata=data.frame(hormon=0:3),grid=TRUE,se.fit=TRUE,type="cumhaz") pred.all <- cbind(pred,attr(pred,"newdata")) ## require(lattice) ## xyplot(Estimate ~ rectime, data=pred.all, group=hormon,type="l",xlab="Time") ## relative survival brcancer <- transform(brcancer,rate0=10^(-5+x1/100)) summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer,bhazard=brcancer$rate0,df=3)) head(predict(fit,se.fit=TRUE)) ## delayed entry brcancer2 <- transform(brcancer,startTime=ifelse(hormon==0,rectime*0.5,0)) ## debug(stpm2) summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2, logH.formula=~nsx(log(rectime),df=3,stata=TRUE))) head(predict(fit,se.fit=TRUE)) ## delayed entry and tvc summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=brcancer2, tvc.formula=~hormon:nsx(log(rectime),df=3,stata=TRUE))) head(predict(fit,se.fit=TRUE)) ## multiple time scales brcancer <- transform(brcancer,recyr=rectime/365.25) ## predictions from a simple model summary(fit <- stpm2(Surv(recyr,censrec==1)~hormon+x1,data=brcancer, logH.formula=~nsx(log(recyr),df=3,centre=log(50)))) head(predict(fit)) grid.x1 <- with(brcancer, seq(40,70,length=300)) newdata0 <- with(brcancer, data.frame(recyr=5,x1=grid.x1,hormon=0)) matplot(grid.x1, predict(fit,type="hr",newdata=newdata0,var="hormon",se.fit=TRUE), type="l") ## predictions with multiple time scales summary(fit <- stpm2(Surv(recyr,censrec==1)~hormon,data=brcancer, logH.formula=~nsx(log(recyr),df=3,centre=log(50)), tvc.formula=~hormon:nsx(log(recyr+x1),df=2))) matplot(grid.x1, predict(fit,type="hr",newdata=newdata0,var="hormon",se.fit=TRUE), type="l") brcancer <- transform(brcancer,recyr=rectime/365.25,entry=recyr/2) summary(fit <- stpm2(Surv(entry,recyr,censrec==1)~hormon,data=brcancer, logH.formula=~nsx(log(recyr),df=3,centre=log(50)), tvc.formula=~hormon:nsx(log(recyr+x1),df=2))) summary(fit <- stpm2(Surv(recyr,censrec==1)~hormon+x1,data=brcancer, logH.formula=~nsx(log(recyr),df=3,centre=log(50)))) plot(grid.x1, predict(fit,type="hr",newdata=newdata0,var="hormon",se.fit=TRUE)$fit, type="l") plot(fit,newdata=data.frame(hormon=0,x1=50),var="hormon",type="hr") head(predict(fit,type="hazard",newdata=newdata0)) head(predict(fit,type="hazard",newdata=transform(newdata0,hormon=1))) newdata0 <- with(brcancer, data.frame(recyr=5+1,x1=grid.x1-1,hormon=0)) predict(fit,type="hr",newdata=newdata0,var="hormon") summary(fit <- stpm2(Surv(recyr,censrec==1)~hormon+x1,data=brcancer, logH.formula=~nsx(log(recyr),df=3,centre=log(50)),tvc=list(hormon=3))) brcancer <- transform(brcancer, startAge=x1, endAge=x1+rectime/365) summary(fit <- stpm2(Surv(startAge,endAge,censrec==1)~hormon,data=brcancer, logH.formula=~nsx(log(endAge),df=3,centre=log(50)),tvc=list(hormon=3))) ## some simulated data: H_{weibull}(t)=(t/b)^a n <- 1000 sim1 <- data.frame(age=seq(20,70,length=n),x=rep(0:1,each=n/2)) y <- rweibull(1000,shape=1,scale=1) with(brcancer, plot(density(x1[censrec==1]))) summary(fit <- stpm2(Surv(recyr,censrec==1)~hormon,data=brcancer,logH.formula=~nsx(log(recyr),df=3,stata=TRUE))) brcancer <- transform(brcancer,ageStart=rnorm(length(rectime),50,5)) brcancer <- transform(brcancer,ageStop=ageStart+rectime) summary(fit <- stpm2(Surv(ageStart,ageStop,censrec==1)~hormon,data=brcancer,df=3)) brcancer3 <- transform(brcancer,startTime=ifelse(censrec==1,0,10)) summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=subset(brcancer,rectime>10),df=3)) summary(fit <- stpm2(Surv(startTime,rectime,censrec==1)~hormon,data=subset(brcancer3,rectime>10),df=3)) ## check the performance time refresh require(rstpm2) data(brcancer) brcancer10 = do.call("rbind",lapply(1:10,function(i) brcancer)) system.time(summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,df=3,data=brcancer10))) system.time(summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,df=3,data=brcancer10))) system.time(summary(fit <- stpm2(Surv(rectime,censrec==1)~hormon,data=brcancer10, logH.formula=~ns(log(rectime),df=4)+hormon:ns(log(rectime),df=3)))) system.time(summary(fit <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer10))) system.time(summary(fit <- pstpm2(Surv(rectime,censrec==1)~1,data=brcancer10, smooth.formula=~s(log(rectime))+s(log(rectime),by=hormon)))) fit <- pstpm2(Surv(rectime,censrec==1)~hormon,data=brcancer10,trace=1) fit <- pstpm2(Surv(rectime,censrec==1)~1,data=brcancer10, smooth.formula=~s(log(rectime))+s(log(rectime),by=hormon),trace=1,sp.init=c(1,1), reltol=list(outer=1e-5,search=1e-10,final=1e-10)) system.time(summary(fit <- pstpm2(Surv(rectime,censrec==1)~1,data=brcancer10, smooth.formula=~s(log(rectime))+s(log(rectime),by=hormon), sp=c(0.006,0.0031),trace=1,outer_optim=2,criterion="GCV", reltol=list(outer=1e-5,search=1e-10,final=1e-10)))) ## > fit@sp ## [1] 0.06104312 0.31430954 system.time(fit <- pstpm2(Surv(rectime,censrec==1)~1,data=brcancer10, smooth.formula=~s(log(rectime))+s(log(rectime),by=hormon),sp=c(1,1))) nsx(1:10,df=3) - ns(1:10,df=3) nsx(1:10,df=3,centre=3) nsx(1:10,df=3,centre=3,Boundary.knots=c(2,8),derivs=c(1,1)) nsx(1:10,df=3,cure=TRUE) nsxDeriv(1:10,df=3) - nsDeriv(1:10,df=3) nsxDeriv(1:10,df=3,centre=5,derivs=c(1,1)) nsxDeriv(1:10,df=3,centre=5,cure=TRUE) nsDeriv(1:10,df=3) - nsDeriv2(1:10,df=3) ## bug with calling mle2 require(bbmle) mle2a <- function(...) mle2(...) ## some data x <- 0:10 y <- c(26, 17, 13, 12, 20, 5, 9, 8, 5, 4, 8) d <- data.frame(x,y) ## some fits (fit0 <- mle2(y~dpois(lambda=ymean),start=list(ymean=mean(y)),data=d)) # okay (fit0.2 <- mle2(y~dpois(lambda=ymean),start=list(ymean=mean(y)),data=d, control=list(parscale=2))) # okay (fit1 <- mle2a(y~dpois(lambda=ymean),start=list(ymean=mean(y)),data=d)) # okay (fit1.2 <- mle2a(y~dpois(lambda=ymean),start=list(ymean=mean(y)),data=d, control=list(parscale=2))) # FAILS ## stdReg::parfrailty documentation library(stdReg) library(survival) ## simulate data n <- 1000 m <- 3 alpha <- 1.5 eta <- 1 phi <- 0.5 beta <- 1 id <- rep(1:n, each=m) U <- rep(rgamma(n, shape=1/phi,scale=phi), each=m) X <- rnorm(n*m) ## reparametrize scale as in rweibull function weibull.scale <- alpha/(U*exp(beta*X))^(1/eta) T <- rweibull(n*m, shape=eta, scale=weibull.scale) ## right censoring C <- runif(n*m, 0,10) D <- as.numeric(TL incl <- ave(x=incl, id, FUN=sum)==m dd <- data.frame(L, T, D, X, id) dd <- dd[incl, ] ## fit <- parfrailty(formula=Surv(L, T, D)~X, data=dd, clusterid="id") summary(fit) ## library(rstpm2) fit2 <- stpm2(formula=Surv(L, T, D)~X, data=dd, cluster=dd$id, smooth.formula=~log(T)) summary(fit2) ## ignore left truncation fit <- parfrailty(formula=Surv(T, D)~X, data=dd, clusterid="id") summary(fit) fit2 <- stpm2(Surv(T, D)~X, data=dd, cluster=dd$id, smooth.formula=~log(T)) summary(fit2) ## normal random effect fit2 <- stpm2(formula=Surv(T, D)~X, data=dd, cluster=dd$id, smooth.formula=~log(T), RandDist="LogN") summary(fit2) ## end of examples ## ## ## * stata ## cd c:\Users\marcle\Documents\Home\ ## clear ## webuse brcancer ## use brcancer ## stset rectime, f(censrec==1) ## cap program drop dopredictions ## program define dopredictions ## preserve ## predict hr, hrnumerator(hormon 1) ci ## predict haz, hazard ci ## predict surv, surv ci ## predict sdiff, sdiff1(hormon 1) ci ## list hr* in 1/5 ## list haz* surv* in 1/5 ## list sdiff* in 1/5 ## restore ## end ## * basic model ## stpm2 hormon, df(3) scale(h) ## dopredictions ## * cure ## gen rate0=10^(-5+x1/100) ## stpm2 hormon, df(3) scale(h) cure bhazard(rate0) ## dopredictions ## * tvc ## stpm2 hormon, df(3) tvc(hormon) dftvc(3) scale(h) ## dopredictions ## * delayed entry ## preserve ## replace _t0 = rectime*0.5 if hormon==0 ## stpm2 hormon, df(3) scale(h) ## dopredictions ## restore ## * relative survival ## preserve ## gen rate0=10^(-5+x1/100) ## stpm2 hormon, df(3) scale(h) bhazard(rate0) ## dopredictions ## restore ## * test speed ## clear all ## set mem 100m ## use brcancer ## stset rectime, f(censrec==1) ## expand 100 ## timer clear ## timer on 1 ## stpm2 hormon, df(3) scale(h) ## timer off 1 ## timer list ## hazard.pm = function(obj,tm,X,XD) # obj$par ## { ## Xlocal=predict(X,newx=log(tm)) ## XDlocal=predict(XD,newx=log(tm)) ## with(obj, ## c((XDlocal %*% par)/tm*exp(Xlocal %*% par))) ## } ## with(list(df=df,x=seq(0,3,length=100)[-1]), ## { ## plot(x,hazard.pm(fit,x,X,XD),type="l",ylim=c(0,2)) ## lines(x,dweibull(x,shape=1)/pweibull(x,shape=1,lower=FALSE),lty=2) ## }) ## ## ## require(deSolve) ## temp <- as.data.frame(ode(y=0,times=seq(0,10,length=100)[-1], ## func=function(t,state,parameters=NULL) list(exp(sin(2*pi*log(t)))))) ## plot(temp,type="l") ## temp <- transform(temp, cum=`1`,logcum=log(`1`)) ## with(temp,plot(log(time),logcum)) ## temp1 <- temp[-1,] ## fit <- glm(log(cum)~log(time)+sin(2*pi*log(time))+cos(2*pi*log(time)),data=temp1) ## lines(log(temp1$time),predict(fit)) ## ## In summary: ## ## we can model using sine and cosine terms for the log-cumulative hazard - for log(time).