Revision ade4b694b3f416d6b195ddcd584d6f89ef36ea2f authored by M. Helena Gon\xe7alves on 28 January 2012, 00:00:00 UTC, committed by Gabor Csardi on 28 January 2012, 00:00:00 UTC
1 parent 8cb2c8d
Raw File
bild.R

setClass("summary.bild", representation(coefficients = "matrix", se = "matrix", covariance = "matrix", correlation="matrix", 
		log.likelihood="numeric", message ="integer",n.cases="numeric", ni.cases="numeric", aic="numeric",call="language"))

setClass("bild", representation(coefficients = "matrix", se = "matrix", covariance = "matrix", correlation="matrix", 
		log.likelihood="numeric", message ="integer",n.cases="numeric", ni.cases="numeric", aic="numeric", residuals="numeric", 
		s.residuals="numeric",ind.probability="numeric", prob.matrix="matrix", Fitted="numeric", 
		Fitted.av="numeric", Time="numeric", model.matrix= "matrix", y.matrix="matrix",
		subset.data="data.frame", y.av="numeric", f.value="factor",call="language"))


setGeneric("getAIC",def=function(object) standardGeneric("getAIC"))


setGeneric("getLogLik",def=function(object) standardGeneric("getLogLik"))


bild<-function(formula = formula(data), data, time,id, subSET, aggregate=FALSE, start = NULL, trace = FALSE,
dependence="ind", method="BFGS", control=bildControl(), 
integrate=bildIntegrate())
{
# *****************DEFINITION OF INTERNAL FUNCTIONS ******************
# na.action for binary families

na.discrete.replace <- function(frame,  n.times, ti.repl)
	{
	vars <- names(frame)
	names(vars) <- vars
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	badlines<-NULL
	bad.ind<-NULL
	for(j in 1:length(vars)) 
	{k1<-1
	for (i in 1:n.cases)
	{k2<-cumti.repl[i]
	x <- frame[[j]][k1:k2]
	pos <- is.na(x)
	if(any(pos))
			if(j == 1){distance.between.na <- diff(seq(1, n.times)[!pos])
				if (any(distance.between.na > 2 ))
				{badlines<-c(badlines,c(k1:k2))
				bad.ind<-c(bad.ind,i)
				x[pos] <- -1}
				if (any(distance.between.na == 2 ) & length(distance.between.na)>1)
				{for (i2 in 1:(length(distance.between.na)-1))
				{if (distance.between.na[i2]==2 & distance.between.na[i2+1]==2)
				{badlines<-c(badlines,c(k1:k2))
				bad.ind<-c(bad.ind,i)
				x[pos] <- -1}}}
			x[pos] <- -1}
			else stop("NA's on covariates not allowed")
	frame[[j]][k1:k2]<-x
	k1<-k2+1
	} 
	}
		if (length(bad.ind)>=1)
		cat("Warning Message: Condition on NA's not respected:\nresults might be inaccurate\n")
		return(list(data=frame, badlines=badlines,bad.ind=bad.ind))
	}

#
# compute the fitted values for variuos families of distributions
#
	various.fitted <- function(Fitted)
	{
		#return(1/(1 + care.exp( - Fitted)))
		return(1/(1 +( - Fitted)))
	}
#
# compute numerical second derivatives
#
	num.info <- function(coefficients, FUN, X, data)
	{
		FUN <- get(FUN, inherits = TRUE)
		values <- FUN(coefficients, X, data)
		p <- length(values)
		Info <- matrix(0, p, p)
		h <- rep(0, p)
		delta <- cbind((abs(coefficients) + 1e-012) * 0.0001, rep(1e-012, p))
		delta <- apply(delta, 1, max)
		for(i in 1:p) {
			h[i] <- delta[i]
			new.values <- FUN(coefficients + h, X, data)
			Info[, i] <- (new.values - values)/delta[i]
			h[i] <- 0
		}
		Info
	}

# compute numerical second derivatives involving integrals

	num.infoI <- function(coefficients, FUN, X, data,integrate)
	{
		FUN <- get(FUN, inherits = TRUE)
		values <- FUN(coefficients, X, data,integrate)
		p <- length(values)
		Info <- matrix(0, p, p)
		h <- rep(0, p)
		delta <- cbind((abs(coefficients) + 1e-012) * 0.0001, rep(1e-012, p))
		delta <- apply(delta, 1, max)
		for(i in 1:p) {
			h[i] <- delta[i]
			new.values <- FUN(coefficients + h, X, data,integrate)
			Info[, i] <- (new.values - values)/delta[i]
			h[i] <- 0
		}
		Info
	}


########################################################################
# ------------------------------------------------------------------- 
	
logL.bin0.aux<- function(parameters, X, data, trace)
{
	loglik1<- function(param, X, y, trace)
	{#calculate logLi(beta/bi) for each individual
	npar <- as.integer(length(param)+1)
	beta <- as.double(param[1:(npar-1)])
	log.psi <- as.double(0)
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
	x <-matrix(as.double(X),nrow=n,ncol=(npar-1))
	theta <- work <- pij<-as.double(rep(0,n))
	logL <- L<-as.double(0)

		results <- .Fortran("mblik1",logL,pij,beta,log.psi, npar,x,y,theta,work,n,PACKAGE="bild")

	
	return(list(loglik=results[[1]],pij=results[[2]]))
	}
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	prob<-as.double(rep(0,length(y)))
	counts<-data[[3]]
	logL<-as.double(0)
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<- loglik1(param=parameters,X=X[k1:k2,],y=y[k1:k2],trace=trace)
	logL<-logL+counts[i]*z$loglik
	prob[k1:k2]<-z$pij
	k1<-k2+1
	}
		if(trace)	cat(paste("\t",(format( logL,digit=6)), collapse=" "), "\n")

return(list(nloglik=-logL, pij=prob))}
############

logL.bin0<- function(parameters, X, data,trace)
        logL.bin0.aux(parameters, X, data,trace)$nloglik
# ------------------------------------------------------------------- 
	
logL.bin1mf.aux<- function(parameters, X, data, trace)
{
	loglik1<- function(param, X, y, trace)
	{#calculate logLi(beta/bi) for each individual
	npar <- as.integer(length(param))
	beta <- as.double(param[1:(npar-1)])
	log.psi <- as.double(param[npar])
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
	x <-matrix(as.double(X),nrow=n,ncol=npar-1)
	theta <- work <- pij<-as.double(rep(0,n))
	logL <- L<-as.double(0)

		results <- .Fortran("mblik1",logL,pij,beta,log.psi, npar,x,y,theta,work,n,PACKAGE="bild")

	return(list(loglik=results[[1]],pij=results[[2]]))
	}
		if(trace)	cat(paste(format(parameters[length(parameters)], digit=4), collapse=" "), "\t")
	
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	prob<-as.double(rep(0,length(y)))
	counts<-data[[3]]
	logL<-as.double(0)
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<- loglik1(param=parameters,X=X[k1:k2,],y=y[k1:k2],trace=trace)
	logL<-logL+counts[i]*z$loglik
	prob[k1:k2]<-z$pij
	k1<-k2+1
	}
		if(trace)	cat(paste("\t",(format( logL,digit=6)), collapse=" "), "\n")

return(list(nloglik=-logL, pij=prob))}
############

logL.bin1mf <- function(parameters, X, data,trace)
        logL.bin1mf.aux(parameters, X, data,trace)$nloglik

# ------------------------------------------------------------------- 

logL.bin2mf.aux <- function(parameters, X, data, trace)
{
	loglik2 <- function(param, X, y,trace)
	{
	npar <-as.integer(length(param))
	beta <- as.double(param[1:(npar-2)])
	lpsi <- as.double(param[(npar-1):npar])
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
	x <-matrix(as.double(X),nrow=n,ncol=npar-2)
	theta <- work <- pij<-as.double(rep(0,n))
	logL <- L<-as.double(0)

		results <- .Fortran("blik2m",logL,pij,beta,lpsi,npar,x,y,theta,work,n,PACKAGE="bild")


	return(list(loglik=results[[1]],pij=results[[2]]))}

		if(trace)	cat(paste(format(parameters[length(parameters)-1], digit=3), collapse=" "), "\t")
		if(trace)	cat(paste("\t",(format(parameters[length(parameters)], digit=3)), collapse=" "), "\t")

	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	prob<-as.double(rep(0,length(y)))
	counts<-data[[3]]
	logL<-as.double(0)
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<- loglik2(param=parameters,X=X[k1:k2,],y=y[k1:k2],trace=trace)
	logL<-logL+counts[i]*z$loglik
	prob[k1:k2]<-z$pij
	k1<-k2+1
	}
		if(trace)	cat(paste("\t",(format( logL,digit=6)), collapse=" "), "\n")

return(list(nloglik=-logL, pij=prob))}

############

logL.bin2mf<- function(parameters, X, data,trace)
        logL.bin2mf.aux(parameters, X, data,trace)$nloglik

# ------------------------------------------------------------------- 

logL.bin1Ifm<- function(parameters, X, data, integrate, trace)
{
	loglik1 <- function(param, X, y,integrate,trace)
	{
	npar <-as.integer(length(param)-1)
	beta<- as.double(param[1:(npar-1)])
	bt<- as.double(param[1:(npar-1)])
	log.psi<-as.double(param[npar])
	omega<-as.double(param[npar+1])
	y[is.na(y)]<-(-1)
	y<- as.integer(y)
	n <- as.integer(length(y)) 
	x<-matrix(as.double(X),nrow=n,ncol=npar-1)
	theta<- work<-as.double(rep(0,n))
	logL <- as.double(0)

	li<-as.double(integrate$li)
	ls<-as.double(integrate$ls)
	epsabs<-as.double(integrate$epsabs)
	epsrel<-as.double(integrate$epsrel)
	limit<-as.integer(integrate$limit)
	key<-as.integer(integrate$key)

 		results <- .Fortran("integ1",logL,bt,beta,log.psi,omega,npar,
		x,y,theta,work,n,li,ls,epsabs,epsrel,key,limit,PACKAGE="bild")

	return(results[[1]])}
	
		if(trace)	cat(paste(format(parameters[length(parameters)-1], digit=4), collapse=" "), "\t")
		if(trace)	cat(paste("\t",(format(parameters[length(parameters)], digit=4)), collapse=" "), "\t")

	nparam<-length(parameters)
	omega1<-as.double(parameters[nparam])
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	mt<-as.double(rep(0,length(y)))
	logL1<-as.double(0)
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<-loglik1(param=parameters,X=X[k1:k2,], y=y[k1:k2],integrate=integrate,trace=trace)
	#logL1 gives the log-likelihood
	logL1<-logL1+counts[i]*z
	k1<-k2+1
	}
		if(trace)	cat(paste("\t",(format( logL1,digit=6)), collapse=" "), "\n")
return(nloglik=-logL1)}


######## to compute pij

logL.bin1.aux<- function(parameters, X, data, trace)
{
	loglik1 <- function(param, X, y,trace)
	{
	
	npar <-as.integer(length(param)-1)
	beta<- as.double(param[1:(npar-1)])
	bt<- as.double(param[1:(npar-1)])
	log.psi<-as.double(param[npar])
	omega<-as.double(param[npar+1])
	y<- as.integer(y)
	n <- as.integer(length(y)) 
	x<-matrix(as.double(X),nrow=n,ncol=npar-1)
	theta<- work<-pij<- prob<-as.double(rep(0,n))
	logL <- as.double(0)
	eta<-i.fit<- fit<-as.vector(npar-1)
	eta<-x%*%beta
	m<-glm(as.numeric(y)~offset(eta), family=binomial)
	bi<-coef(m)
	beta[1]<-beta[1]+bi
	y[is.na(y)]<-(-1)
	y<- as.integer(y)
	
	results <- .Fortran("mblik1",logL,prob,beta,log.psi, npar,x,y,theta,work,n,PACKAGE="bild")


	i.fit<-x%*%beta

	return(list(loglik=results[[1]],pij=results[[2]], fit=i.fit))}
	
		if(trace)	cat(paste(format(parameters[length(parameters)-1], digit=4), collapse=" "), "\t")
		if(trace)	cat(paste("\t",(format(parameters[length(parameters)], digit=4)), collapse=" "), "\t")

	nparam<-length(parameters)
	omega1<-as.double(parameters[nparam])
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	mt<-fitted<-as.double(rep(0,length(y)))
	logL1<-as.double(0)
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<-loglik1(param=parameters,X=X[k1:k2,], y=y[k1:k2],trace=trace)
	mt[k1:k2]<-z$pij
	fitted[k1:k2]<-z$fit
	k1<-k2+1
	}
		if(trace)	cat(paste("\t",(format( logL1,digit=6)), collapse=" "), "\n")

return(list(nloglik=-logL1, pij=mt, fit=fitted))}

# ------------------------------------------------------------------- 

logL.bin2Ifm<- function(parameters, X, data,integrate,trace)
{
	loglik2 <- function(param, X, y,integrate,trace)
	{
	npar <-as.integer(length(param)-1)
	beta<- as.double(param[1:(npar-2)])
	bt<- as.double(param[1:(npar-2)])
	log.psi<-as.double(param[(npar-1):npar])
	omega<-as.double(param[npar+1])
	y[is.na(y)]<-(-1)
	y<- as.integer(y)
	n <- as.integer(length(y)) 
	x<-matrix(as.double(X),nrow=n,ncol=npar-2)
	theta<- work<- as.double(rep(0,n))
	logL <- as.double(0)

	li<-as.double(integrate$li)
	ls<-as.double(integrate$ls)
	epsabs<-as.double(integrate$epsabs)
	epsrel<-as.double(integrate$epsrel)
	limit<-as.integer(integrate$limit)
	key<-as.integer(integrate$key)

 		results <- .Fortran("integ",logL,bt,beta,log.psi,omega,npar,
		x,y,theta,work,n,li,ls,epsabs,epsrel,key,limit,PACKAGE="bild")


	return(results[[1]])}

		if(trace)	cat(paste(format(parameters[length(parameters)-2], digit=4), collapse=" "), "\t")
		if(trace)	cat(paste("\t",(format(parameters[length(parameters)-1], digit=4)), collapse=" "), "\t")
		if(trace)	cat(paste("\t",(format(parameters[length(parameters)], digit=4)), collapse=" "), "\t")

	nparam<-length(parameters)
	omega1<-as.double(parameters[nparam])
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	mt<-as.double(rep(0,length(y)))
	logL1<-as.double(0)
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<-loglik2(param=parameters,X=X[k1:k2,], y=y[k1:k2],integrate=integrate,trace=trace)
	#logL1 gives the log-likelihood
	logL1<-logL1+counts[i]*z
	k1<-k2+1
	}
		if(trace)	cat(paste("\t",(format( logL1,digit=6)), collapse=" "), "\n")

return(nloglik=-logL1)}


########to compute pij

logL.bin2.aux<- function(parameters, X, data,trace)
{
	loglik2 <- function(param, X, y,trace)
	{
	npar <-as.integer(length(param)-1)
	beta<- as.double(param[1:(npar-2)])
	bt<- as.double(param[1:(npar-2)])
	lpsi<-as.double(param[(npar-1):npar])
	omega<-as.double(param[npar+1])
	y<- as.integer(y)
	n <- as.integer(length(y)) 
	x<-matrix(as.double(X),nrow=n,ncol=npar-2)
	theta<- work<- pij<-prob<-as.double(rep(0,n))
	logL <- as.double(0)
	eta<-i.fit<-fit<-as.vector(npar-2)
	eta<-x%*%beta
	m<-glm(as.numeric(y)~offset(eta), family=binomial)
	bi<-coef(m)
	beta[1]<-beta[1]+bi
	y[is.na(y)]<-(-1)
	y<- as.integer(y)

		results <- .Fortran("blik2m",logL,pij,beta,lpsi,npar,x,y,theta,work,n,PACKAGE="bild")

	i.fit<-x%*%beta

	return(list(loglik=results[[1]],pij=results[[2]],fit=i.fit ))}

		if(trace)	cat(paste(format(parameters[length(parameters)-2], digit=4), collapse=" "), "\t")
		if(trace)	cat(paste("\t",(format(parameters[length(parameters)-1], digit=4)), collapse=" "), "\t")
		if(trace)	cat(paste("\t",(format(parameters[length(parameters)], digit=4)), collapse=" "), "\t")

	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	prob<-fitted<-as.double(rep(0,length(y)))
	counts<-data[[3]]
	logL<-as.double(0)
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<- loglik2(param=parameters,X=X[k1:k2,],y=y[k1:k2],trace=trace)
	prob[k1:k2]<-z$pij
	fitted[k1:k2]<-z$fit
	k1<-k2+1
	}
		if(trace)	cat(paste("\t",(format( logL,digit=6)), collapse=" "), "\n")

return(list(nloglik=-logL, pij=prob, fit=fitted))}

########################################################################
# compute gradient loglik for binary response
#
gradlogL.bin0<- function(parameters, X,data, trace)
{
	gradient1<- function(param, X, y)
	{
	npar <- as.integer(length(param)+1)
	beta <- as.double(param[1:(npar-1)])
	log.psi <- as.double(0)
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
	theta <- work <- as.double(rep(0,n))
	gbeta <- dbeta <- dbeta1<- as.double(rep(0,npar-1))
	glpsi<- as.double(0)
	db <- matrix(as.double(0),nrow=3,ncol=npar-1)
	x <- matrix(as.double(X),nrow=n, ncol=npar-1)
	der<-as.double(rep(0,npar-1))

  		results <- .Fortran("mbgd1",gbeta,glpsi,beta,log.psi, 
             			npar,x,y,theta,work,der,db,dbeta,dbeta1,n,PACKAGE="bild")

		
	return(results[[1]])}

	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	gradlogL<-0
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<- gradient1(param=parameters, X=X[k1:k2,],y=y[k1:k2])
	gradlogL<-gradlogL+counts[i]*z
	k1<-k2+1
	}
return(-gradlogL)}	

# ------------------------------------------------------------------- 
gradlogL.bin1mf<- function(parameters, X,data, trace)
{
	gradient1<- function(param, X, y)
	{
	npar <- as.integer(length(param))
	beta <- as.double(param[1:(npar-1)])
	log.psi <- as.double(param[npar])
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
	theta <- work <- as.double(rep(0,n))
	gbeta <- dbeta <- dbeta1<- as.double(rep(0,npar-1))
	glpsi<- as.double(0)
	db <- matrix(as.double(0),nrow=3,ncol=npar-1)
	x <- matrix(as.double(X),nrow=n, ncol=npar-1)
	der<-as.double(rep(0,npar-1))

  		results <- .Fortran("mbgd1",gbeta,glpsi,beta,log.psi, 
             			npar,x,y,theta,work,der,db,dbeta,dbeta1,n,PACKAGE="bild")

		
	gradL<-c(results[[1]],results[[2]])
	return(gradL)
	}
	nparam <- as.integer(length(parameters))
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	gradlogL<-0
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<- gradient1(param=parameters, X=X[k1:k2,],y=y[k1:k2])
	gradlogL<-gradlogL+counts[i]*z
	k1<-k2+1
	}
return(-gradlogL)}	

# ------------------------------------------------------------------- 

gradlogL.bin2mf<- function(parameters, X,data, trace)
{
	gradient2 <-  function(param,X,y)
	{
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
  	npar <- as.integer(length(param))
	beta <- as.double(param[1:(npar-2)])
	lpsi <- as.double(param[(npar-1):npar])
  	theta <- work <- as.double(rep(0,n))
  	g.beta <- d.beta <- d.beta1<-d.beta2 <- der <- as.double(rep(0,npar-2))
  	g.lpsi1<-g.lpsi2 <- as.double(0)
  	db <- matrix(as.double(0),3,npar-2)
  	db1 <- matrix(as.double(0),4,npar-2)
  	db2 <- matrix(as.double(0),5,npar-2)
 	x <- matrix(as.double(X),nrow=n, ncol=npar-2)
  	
	result <- .Fortran("bgd2m",g.beta,g.lpsi1,g.lpsi2,beta,
	lpsi,npar,x,y,theta,work,d.beta,d.beta1,d.beta2,n,der,db,db1,db2,PACKAGE="bild")

	return(c(result[[1]],result[[2]],result[[3]]))
	}
	nparam <- as.integer(length(parameters))
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	gradlogL<-0
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<- gradient2(param=parameters, X=X[k1:k2,],y=y[k1:k2])
	gradlogL<-gradlogL+counts[i]*z
	k1<-k2+1
	}
return(-gradlogL)}
# ------------------------------------------------------------------- 

gradlogL.binI1fm <- function(parameters, X,data,integrate,trace)
{
	gradient1 <-  function(param,X,y,integrate)
	{
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
  	npar <- as.integer(length(param)-1)
	beta <- as.double(param[1:(npar-1)])
	bt <- as.double(param[1:(npar-1)])
	lpsi <- as.double(param[npar])
	omega<-as.double(param[npar+1])
  	theta <- work <- as.double(rep(0,n))
  	g.beta <- d.beta <- d.beta1<- der <- as.double(rep(0,npar-1))
  	g.lpsi1<-g.lpsi2 <- as.double(0)
  	d.beta <- matrix(as.double(0),3,npar-1)
	gvar<-as.double(0)
 	x <- matrix(as.double(X),nrow=n, ncol=npar-1)
  	db <- matrix(as.double(0),3,npar-1)

	li<-as.double(integrate$lig)
	ls<-as.double(integrate$lsg)
	epsabs<-as.double(integrate$epsabs)
	epsrel<-as.double(integrate$epsrel)
	limit<-as.integer(integrate$limit)
	key<-as.integer(integrate$key)


	result <- .Fortran("gint1",g.beta,g.lpsi1,gvar,bt,beta,lpsi,omega,npar,x,y,theta,work,n,
                  d.beta,d.beta1,der,db,li,ls,epsabs,epsrel,key,limit,PACKAGE="bild")


	return(c(result[[1]],result[[2]],result[[3]]))}

	loglik1 <- function(param, X, y,integrate)
	{
	npar <- as.integer(length(param)-1)
	beta <- as.double(param[1:(npar-1)])
	bt <- as.double(param[1:(npar-1)])
	lpsi <- as.double(param[npar])
	omega<-as.double(param[npar+1])
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
	x <-matrix(as.double(X),nrow=n,ncol=npar-1)
	theta <- work <-as.double(rep(0,n))
	logL <- as.double(0)

	li<-as.double(integrate$lig)
	ls<-as.double(integrate$lsg)
	epsabs<-as.double(integrate$epsabs)
	epsrel<-as.double(integrate$epsrel)
	limit<-as.integer(integrate$limit)
	key<-as.integer(integrate$key)

 		results <- .Fortran("integ1",logL,bt,beta,lpsi,omega,npar,
		x,y,theta,work,n,li,ls,epsabs,epsrel,key,limit,PACKAGE="bild")

return(results[[1]])}

	nparam <- as.integer(length(parameters)-1)
	omega1<-parameters[nparam+1]
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	dbeta<-as.double(rep(0,nparam-1))
	dlog.psi1<-0
	dvar<-0
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<-loglik1(param=parameters,X=X[k1:k2,], y=y[k1:k2],integrate=integrate)
	grad<-gradient1(param=parameters,X=X[k1:k2,], y=y[k1:k2],integrate=integrate)

	for (j in 1:(nparam-1))
	{
	dbeta[j]<-dbeta[j]+counts[i]*(grad[j]/(exp(z)*sqrt(2*pi)*exp(omega1/2)))	
	}
	dlog.psi1<-dlog.psi1+counts[i]*(grad[nparam]/(exp(z)*sqrt(2*pi)*exp(omega1/2)))		
 #using the chain rule
	dvar<-dvar+counts[i]*(grad[nparam+1]/(exp(z)*sqrt(2*pi)*exp(omega1/2)))*exp(omega1)
	k1<-k2+1
	}
gr<-c(dbeta,dlog.psi1,dvar)
 return(-gr)}

# ------------------------------------------------------------------- 

gradlogL.binI2fm <- function(parameters, X,data,integrate,trace)
{
	gradient2 <-  function(param,X,y,integrate)
	{
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
  	npar <- as.integer(length(param)-1)
	beta <- as.double(param[1:(npar-2)])
	bt <- as.double(param[1:(npar-2)])
	lpsi <- as.double(param[(npar-1):npar])
	omega<-as.double(param[npar+1])
  	theta <- work <- as.double(rep(0,n))
  	g.beta <- d.beta <- d.beta1<-d.beta2 <- der <- as.double(rep(0,npar-2))
  	g.lpsi1<-g.lpsi2 <- as.double(0)
  	d.beta <- matrix(as.double(0),3,npar-2)
	gvar<-as.double(0)
 	x <- matrix(as.double(X),nrow=n, ncol=npar-2)
  	der <- as.double(rep(0,npar-2))
  	db <- matrix(as.double(0),3,npar-2)
  	db1<- matrix(as.double(0),4,npar-2)
  	db2<- matrix(as.double(0),5,npar-2)

	li<-as.double(integrate$lig)
	ls<-as.double(integrate$lsg)
	epsabs<-as.double(integrate$epsabs)
	epsrel<-as.double(integrate$epsrel)
	limit<-as.integer(integrate$limit)
	key<-as.integer(integrate$key)

  	result <- .Fortran("gint",g.beta,g.lpsi1,g.lpsi2,gvar,
                  x,theta,work,y,lpsi,beta,bt,d.beta,d.beta1,d.beta2,
                  der,db,db1,db2,omega, npar,n,li,ls,epsabs,epsrel,key,limit,PACKAGE="bild")

	return(c(result[[1]],result[[2]],result[[3]],result[[4]]))}

	loglik2 <- function(param, X, y,integrate)
	{
	npar <- as.integer(length(param)-1)
	beta <- as.double(param[1:(npar-2)])
	bt <- as.double(param[1:(npar-2)])
	lpsi <- as.double(param[(npar-1):npar])
	omega<-as.double(param[npar+1])
	y[is.na(y)]<-(-1)
	y <- as.integer(y)
	n <- as.integer(length(y)) 
	x <-matrix(as.double(X),nrow=n,ncol=npar-2)
	theta <- work <-as.double(rep(0,n))
	logL <- as.double(0)

	li<-as.double(integrate$lig)
	ls<-as.double(integrate$lsg)
	epsabs<-as.double(integrate$epsabs)
	epsrel<-as.double(integrate$epsrel)
	limit<-as.integer(integrate$limit)
	key<-as.integer(integrate$key)

 		results <- .Fortran("integ",logL,bt,beta,lpsi,omega,npar,
		x,y,theta,work,n,li,ls,epsabs,epsrel,key,limit,PACKAGE="bild")


	return(results[[1]])}
	
	nparam <- as.integer(length(parameters)-1)
	omega1<-parameters[nparam+1]
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data[[2]]
	counts<-data[[3]]
	dbeta<-as.double(rep(0,nparam-2))
	dlog.psi1<-0
	dlog.psi2<-0
	dvar<-0
	k1<-1
	for (i in 1:n.cases)
	{
	k2<-cumti.repl[i]
	z<-loglik2(param=parameters,X=X[k1:k2,], y=y[k1:k2],integrate=integrate)
	grad<-gradient2(param=parameters,X=X[k1:k2,], y=y[k1:k2],integrate=integrate)

	for (j in 1:(nparam-2))
	{
	dbeta[j]<-dbeta[j]+counts[i]*(grad[j]/(exp(z)*sqrt(2*pi)*exp(omega1/2)))	
	}
	dlog.psi1<-dlog.psi1+counts[i]*(grad[nparam-1]/(exp(z)*sqrt(2*pi)*exp(omega1/2)))	
	dlog.psi2<-dlog.psi2+counts[i]*(grad[nparam]/(exp(z)*sqrt(2*pi)*exp(omega1/2)))	
 	#using the chain rule
	dvar<-dvar+counts[i]*(grad[nparam+1]/(exp(z)*sqrt(2*pi)*exp(omega1/2)))*exp(omega1)
	k1<-k2+1
	}
gr<-c(dbeta,dlog.psi1,dlog.psi2,dvar)
return(-gr)}

# ******************* MAIN PROGRAM *******************************
#
	call <- match.call()
#	vect.time <- F
	if(missing(data) || !is.data.frame(data))
		stop("a data.frame must be supplied")
	if(is.null(names(data)))
		stop("objects in data.frame must have a name")
	expr1 <- terms(formula, data=data)
	expr <- attr(expr1, "variables")
	var.names <- all.vars(expr)
	response <- all.vars(expr)[1]

	if(any(is.na(match(var.names, names(data)))))
		stop("Variables in formula not contained in the data.frame")

if(!missing(time)) {Time<-as.vector(data[[time]])}
if (missing(time)) { if (all (is.na(match(names(data), "time")))) stop ("time must be defined")
			else  Time<-as.vector(data$time)}
				
if(!missing(id)) {id<-as.vector(data[[id]])}
if (missing(id)){ if (all(is.na(match(names(data), "id")))) stop ("id must be defined")
		else id<-as.vector(data$id)}

# select subset if necessary
	if(!missing(subSET)) {id1 <- eval(substitute(subSET), data)
			      data<-subset(data, id1)}

	if(!missing(aggregate)) {f.name <- deparse(substitute(aggregate))
  			f.value <- as.factor(data[[f.name]])}
	

	if(missing(aggregate)) {f.value<-as.factor(0)}

#returns data of a subset
subset.data<-data

	ti.repl<-as.vector(0)
	i1<-1
	i2<-1
	for (i in 1:(length(data[[response]])-1))
	{ 
		if (id[i]==id[i+1])
		{ 	i2<-i2+1
			ti.repl[i1]<-i2}
		else {	ti.repl[i1]<-i2
			i1<-i1+1
			i2<-1}
	}

	n.cases <- length(ti.repl)
	n.tot<-cumsum(ti.repl)[n.cases]
	n.time<-length(unique(Time))
	ni.cases <- length(ti.repl)
	pos.ind<-cumsum(ti.repl)

	counts<-as.vector(0)

	if(is.na(match("counts", names(data))))
			counts <- data$counts <- rep(1, n.cases)
		else   {for (i in 1:n.cases)
			{counts[i]<-data$counts[pos.ind[i]]}
		}

	final.data <- data
	var.names <- c(var.names, "counts")
	data <- data[var.names]
	n.var <- length(data)
	Y.resp <- as.vector(data[[response]])
	if((all(Y.resp[!is.na(Y.resp)] == 1 | Y.resp[!is.na(Y.resp)] == 0)) == FALSE)
		stop("Unfeasible values of response variable: must be 0,1,NA")

# ********** creation of individual profile according to NA patterns *******************

	data2<-data
	final.data <- na.discrete.replace(frame=data,  n.times=n.time, ti.repl=ti.repl)
	
if (length(final.data$bad.ind)>=1)
	{data<-final.data$data
	data<-data[-final.data$badlines,]
	data2<-data2[-final.data$badlines,]
	counts<-counts[-final.data$bad.ind]
	ti.repl<-ti.repl[-final.data$bad.ind]
	n.cases <- length(ti.repl)
	n.tot<-cumsum(ti.repl)[n.cases]}
else {data<-final.data$data}


# ********** design matrices creation *******************
	# define a plausible starting point for the optimizer if not given
		data1 <- na.omit(data2)
		data1.resp <- data1[, response]
		data1.resp <- log((data1.resp + 0.5)/(1.5 - data1.resp))
		data1[, c(response)] <- data1.resp

		if (dependence=="MC1")	 init<-0
		else  if (dependence=="MC1R")  init<-c(0,0)
		else  if (dependence=="MC2")    init<-c(0,0)
		else  if (dependence=="MC2R")   init<-c(0,0,0)

		if(is.null(start) && dependence!="ind")
			start <- c(lm(formula, data1, weights = counts)$coefficients, init)
		else if(!is.null(start) && dependence!="ind")
			start <- c(lm(formula, data1, weights = counts)$coefficients, start)
		else if (dependence=="ind") start <- c(lm(formula, data1, weights = counts)$coefficients)

if (any(is.na(start))) stop("starting values produced by lm contains NA")

	id.not.na<-rep(TRUE,n.tot)
	X <- model.matrix(expr1, data, contrasts)
	names.output <- dimnames(X)[[2]]
	sum.ti <- sum(ti.repl)
	data <- list(ti.repl, data[[response]], counts)
	data2<-list(ti.repl, data2[[response]], counts)
	p <- dim(X)[2] + 1
	pr<-prob<-F.aux<-as.double(rep(0,length(data[[2]])))

	if (dependence=="ind")
	{	if(trace)	cat("\t log.likelihood\n")
	temp <-optim(par= start, fn = logL.bin0,  gr= gradlogL.bin0, method=method, 
	data = data, X = X, trace=trace,control=control)}
	else if (dependence=="MC1")
	{	if(trace)	cat("\n log.psi1\t log.likelihood\n")
	temp <-optim(par= start, fn = logL.bin1mf ,  gr= gradlogL.bin1mf, method=method, 
	data = data, X = X, trace=trace,control=control)}
	else  if (dependence=="MC1R")
	{	if(trace)	cat("\n log.psi1\t omega\t log.likelihood\n")
	temp <- optim(par = start, fn =logL.bin1Ifm,gr = gradlogL.binI1fm,  method=method, 
	data = data, X = X, integrate=integrate, trace=trace,control=control)}
	else  if (dependence=="MC2")
	{	if(trace)	cat("\n log.psi1\t log.psi2\t log.likelihood\n")
	temp <- optim(par= start, fn =logL.bin2mf, gr =gradlogL.bin2mf, method=method,
	 data = data, X = X, trace = trace,control=control)}
	else  if (dependence=="MC2R")
	{	if(trace)	cat("\n log.psi1\t log.psi2\t omega\t log.likelihood\n")
	temp <- optim(par = start, fn= logL.bin2Ifm, gr = gradlogL.binI2fm,method=method, 
	data = data, X = X,integrate=integrate, trace = trace,control=control)}

	coefficients <- temp$par
	log.lik <-  - temp$value
	if (trace) 
	cat("Convergence reached. Computing the information matrix now\n")

	 if (dependence=="ind")
	Info <- num.info(coefficients, "gradlogL.bin0", X, data)
	else if (dependence=="MC1")
	Info <- num.info(coefficients, "gradlogL.bin1mf", X, data)
	else  if (dependence=="MC1R")
	Info <- num.infoI(coefficients, "gradlogL.binI1fm", X, data, integrate=integrate)
	else  if (dependence=="MC2")
	Info <- num.info(coefficients, "gradlogL.bin2mf", X, data)
	else  if (dependence=="MC2R")
	Info <- num.infoI(coefficients, "gradlogL.binI2fm", X, data, integrate=integrate)

	se <- matrix(sqrt(diag(solve(Info))), ncol = 1)
	coefficients <- matrix(coefficients, ncol = 1)
	if (dependence=="ind")
	dimnames(coefficients) <- dimnames(se) <- list(names.output, " ")
	else if (dependence=="MC1")
	dimnames(coefficients) <- dimnames(se) <- list(c(names.output, "log.psi1"), " ")
	else  if (dependence=="MC1R")
	dimnames(coefficients) <- dimnames(se) <-  list(c(names.output, "log.psi1","omega"), " ")
	else if (dependence=="MC2")
	dimnames(coefficients) <- dimnames(se) <- list(c(names.output, "log.psi1","log.psi2"), " ")
	else if (dependence=="MC2R")
	dimnames(coefficients) <- dimnames(se) <- list(c(names.output, "log.psi1","log.psi2","omega" ), " ")

	covariance <- solve(Info)
	cr<- diag(1/sqrt(diag(covariance)))
	correlation <- cr %*% covariance %*% cr

	if (dependence=="ind")
	{dimnames(covariance) <- list(names.output, names.output)
	dimnames(correlation) <- list(names.output, names.output)}
	else if (dependence=="MC1")
	{dimnames(covariance) <- list(c(names.output, "log.psi1"), c(names.output, "log.psi1"))
	dimnames(correlation) <- list(c(names.output, "log.psi1"), c(names.output, "log.psi1"))}
	else  if (dependence=="MC1R")
	{dimnames(covariance) <- list(c(names.output, "log.psi1","omega"), c(names.output, 	"log.psi1","omega"))
	dimnames(correlation) <- list(c(names.output, "log.psi1","omega"), c(names.output, 	"log.psi1","omega"))}
	else if (dependence=="MC2")
	{dimnames(covariance) <- list(c(names.output, "log.psi1","log.psi2"), c(names.output,"log.psi1","log.psi2"))
	dimnames(correlation) <- list(c(names.output, "log.psi1","log.psi2"), c(names.output,"log.psi1","log.psi2"))}
	else if (dependence=="MC2R")
	{dimnames(covariance) <- list(c(names.output, "log.psi1","log.psi2","omega"), c(names.output, 		"log.psi1","log.psi2","omega"))
	dimnames(correlation) <- list(c(names.output, "log.psi1","log.psi2","omega"), c(names.output, 		"log.psi1","log.psi2","omega"))}

#### To compute estimated transition probabilities
 	if (dependence=="ind")
	{pr <- logL.bin0.aux (parameters=coefficients, X=X, data=data, trace=trace)
	prob<-pr$pij}
	else if (dependence=="MC1")
	{pr <- logL.bin1mf.aux (parameters=coefficients, X=X, data=data, trace=trace)
	prob<-pr$pij}
	else  if (dependence=="MC1R")
	{pr <- logL.bin1.aux (parameters=coefficients, X=X, data=data2, trace=trace)
	prob<-pr$pij}
	else  if (dependence=="MC2")
	{pr <- logL.bin2mf.aux(parameters=coefficients, X=X, data=data, trace=trace)
	prob<-pr$pij}
	else  if (dependence=="MC2R")
	{pr <- logL.bin2.aux(parameters=coefficients, X=X, data=data2, trace=trace)
	prob<-pr$pij}

#### To compute fitted values 
	Fitted <- rep(NA, n.tot)
 	if (dependence=="ind"|dependence=="MC1"|dependence=="MC2")
	{Fitted[id.not.na] <- X %*% coefficients[1:(p - 1)]}
	else  if (dependence=="MC1R")
	{F.aux<- logL.bin1.aux (parameters=coefficients, X=X, data=data2, trace=trace)
	Fitted<-F.aux$fit}
	else  if (dependence=="MC2R")
	{F.aux <- logL.bin2.aux(parameters=coefficients, X=X, data=data2, trace=trace)
	Fitted<-F.aux$fit}

	ncoef<-length(coefficients)
	aic<-(2*temp$value+2*ncoef)
	ti.repl<-data[[1]]
	cumti.repl<-cumsum(ti.repl)
	n.cases<- length(ti.repl)
	y<-data2[[2]]
	counts<-data[[3]]
	residuals<-wts<-freq<-as.double(rep(0,length(y)))
	ncounts<-sum(counts[1:n.cases])
	residuals<-(y-prob)/sqrt(prob*(1-prob))

	Fitted <- plogis(Fitted)
	Fitted[is.na(y)] <- NA
	res<-as.double(rep(0,n.time))
	
	for (j in 1: n.time)
	{ k2<-0
	soma.n<-soma.d<-0
     	for ( i in 1:n.cases)
	{ 
	k3<-k2+j
 	if(!is.na(y[k3]))
     		{
      		if(!is.na(prob[k3]))
      			{
    			soma.n<-soma.n+(y[k3]-prob[k3])
  			soma.d<-soma.d+(prob[k3]*(1-prob[k3]))
  			k2<-cumti.repl[i]
      			}
     		}
  	}
	res[j]<-soma.n/sqrt(soma.d)
	}

	y.matrix<-matrix(y,ncol=n.time,byrow=TRUE)
	y.av<-apply(y.matrix,2,mean,na.rm=TRUE)
	Fitted.matrix<-matrix(Fitted,ncol=n.time,byrow=TRUE)
	Fitted.av<-apply(Fitted.matrix,2,mean,na.rm=TRUE)
	prob.matrix<-matrix(prob,ncol=n.time,byrow=TRUE)


bl<- new("bild", coefficients = coefficients, se = se, covariance =covariance, correlation=correlation, 
	log.likelihood=- temp$value, message = temp$convergence, n.cases=n.cases, ni.cases=ni.cases, aic=aic,    
      	residuals=residuals, s.residuals=res, ind.probability=prob, prob.matrix=prob.matrix, Fitted=Fitted, 
	Fitted.av=Fitted.av, Time=Time, model.matrix=X, y.matrix=y.matrix, subset.data=subset.data,
	y.av=y.av, f.value=f.value, call=call)

}


back to top