https://github.com/cran/season
Tip revision: 2c210d12a3968c93e922ec61ba016704a7f9668e authored by Adrian Barnett on 03 March 2010, 00:00:00 UTC
version 0.2-4
version 0.2-4
Tip revision: 2c210d1
casecross.R
## casecross.R
## time-stratified case-crossover
## Oct 2009
## assumes date variable is called 'date'
## quicker version
casecross<-function(formula,data,exclusion=2,stratalength=28,matchdow=FALSE,usefinalwindow=FALSE,matchconf='',confrange=0,stratamonth=FALSE){
attach(data, warn.conflicts = FALSE)
thisdata<-data
detach(data)
## Checks
if (class(thisdata$date)!="Date"){stop("date variable must be in date format, see ?Dates")}
if (exclusion<0){stop("Minimum value for exclusion is zero")}
parts<-paste(formula)
dep<-parts[2] # dependent variable
indep<-parts[3] # dependent variable
if (length(formula)<=2){stop("Must be at least one independent variable")}
## original call with defaults (see amer package)
ans <- as.list(match.call())
frmls <- formals(deparse(ans[[1]]))
add <- which(!(names(frmls) %in% names(ans)))
call<-as.call(c(ans, frmls[add]))
thisdata$dow<-as.numeric(format(thisdata$date,'%w'));
## Slim down the data
f<-as.formula(paste(parts[2],parts[1],parts[3],'+date+dow'))
if (substr(matchconf,1,1)!=""){
f<-as.formula(paste(dep,"~",indep,'+date+dow+',matchconf))
}
datatouse<-model.frame(f,data=thisdata,na.action=na.omit) # remove cases with missing covariates
datediff<-as.numeric(datatouse$date)-min(as.numeric(thisdata$date)) # use minimum data in entire sample
time<-as.numeric(datediff)+1 # used as strata number
## Create strata
if (stratamonth==TRUE){
month<-as.numeric(format(datatouse$date,'%m'));
year<-as.numeric(format(datatouse$date,'%Y'));
matchday<-as.numeric(format(datatouse$date,'%d'));
yrdiff<-year-min(year);
windownum<-(yrdiff*12)+month;
}
if (stratamonth==FALSE){
## Get the earliest time and difference all dates from this time
## Increase strata windows in jumps of 'stratalength'
windownum<-floor(datediff/stratalength)+1
nwindows<-floor(nrow(thisdata)/stratalength)+1
matchday<-datediff-((windownum-1)*stratalength)+1 # Day number in strata
## Exclude the last window if it is less than 'stratalength'
lastwindow<-datatouse[datatouse$windownum==nwindows,]
if (nrow(lastwindow)>0){ # only apply to data sets with some data in the final window
lastlength<-max(time[windownum==nwindows])-min(time[windownum==nwindows])+1
if (lastlength<stratalength&usefinalwindow==FALSE) datatouse<-datatouse[windownum<nwindows,]
}
}
## Create the case data
n<-nrow(datatouse)
cases<-datatouse
cases$case<-1 # binary indicator of case
cases$timex<-1 # Needed for conditional logistic regression
cases$windownum<-windownum
cases$time<-time
cases$diffdays<-NA
cases$matchday<-matchday
posout<-sum(as.numeric(names(datatouse)==as.character(f[2]))*(1:ncol(datatouse))) # get the position of the dependent variable
cases$outcome<-datatouse[,c(posout)]
nonzerocases<-cases[cases$outcome>0,] # days with one or more cases
# Create a case number for matching
if (substr(matchconf,1,1)==""){cases.tomerge<-subset(nonzerocases,select=c(matchday,time,outcome,windownum,dow))}
if (substr(matchconf,1,1)!=""){
also<-sum(as.numeric(names(nonzerocases)==matchconf)*(1:length(names(nonzerocases))))
cases.tomerge<-subset(nonzerocases,select=c(matchday,time,outcome,windownum,dow,also))
}
ncases<-nrow(nonzerocases)
cases.tomerge$casenum<-1:ncases
# Duplicate case series to make controls
maxwindows<-max(cases$windownum)
rowstorep<-NA
casenum<-NA
for (k in 1:maxwindows){
small=min(cases$time[cases$windownum==k])
large=max(cases$time[cases$windownum==k])
these<-rep(small:large,large-small+1)
rowstorep<-c(rowstorep,these)
casenum<-c(casenum,these[order(these)])
}
controls<-cases[rowstorep[2:length(rowstorep)],] # out of memory
controls<-subset(controls,select=c(-case,-timex,-time,-outcome))
# Replace case number
controls$casenum<-casenum[2:length(rowstorep)]
# Merge cases with controls by case number
controls<-merge(controls,cases.tomerge,by='casenum')
controls<-controls[controls$windownum.x==controls$windownum.y,] # must be in same stratum window
controls$case<-0 # binary indicator of case
controls$timex<-2 # Needed for conditional logistic regression
controls$diffdays<-abs(controls$matchday.x-controls$matchday.y)
controls<-controls[controls$diffdays>exclusion,] # remove the exclusion window
# match on day of the week
if (matchdow==TRUE){controls<-controls[controls$dow.x==controls$dow.y,]}
# match on a confounder
if (substr(matchconf,1,1)!=""){
one<- paste(matchconf,'.x',sep='')
two<- paste(matchconf,'.y',sep='')
find1<-grep(one,names(controls))
find2<-grep(two,names(controls))
matchdiff<-abs(controls[,find1]-controls[,find2])
controls<-controls[matchdiff<=confrange,]
controls<-subset(controls,select=c(-casenum,-dow.x,-dow.y,-matchday.x,-matchday.y,-windownum.x,-windownum.y,-find1,-find2))
findc<-sum(as.numeric(names(nonzerocases)==matchconf)*(1:length(names(nonzerocases))))
final.cases<-subset(nonzerocases,select=c(-dow,-matchday,-windownum,-findc))
}
if (substr(matchconf,1,1)==""){
controls<-subset(controls,select=c(-casenum,-dow.x,-dow.y,-matchday.x,-matchday.y,-windownum.x,-windownum.y))
final.cases<-subset(nonzerocases,select=c(-dow,-matchday,-windownum))
}
finished<-rbind(final.cases,controls)
## Remove empty controls
finished<-finished[finished$outcome>0,]
## Count the number of controls without a case
onlycntl<-finished[finished$case==0,]
ncases<-nrow(table(onlycntl$time))
ncontrols<-round(mean(as.numeric(table(onlycntl$time))),1)
## Run the conditional logistic regression
finalformula<-as.formula(paste('Surv(timex,case)~',indep,'+strata(time)'))
c.model<-coxph(finalformula,
weights=outcome,
data=finished,method=c("breslow"))
toret<-list()
toret$call<-call
toret$c.model<-c.model
class(toret$c.model)<-"coxph"
toret$ncases<-ncases
toret$ncontrols<-ncontrols
class(toret)<-'casecross'
return(toret)
}