https://github.com/jsollari/popABC
Raw File
Tip revision: 3ebe7b76043299bc4b6a541b5be0d7012895ca1c authored by Joao Sollari Lopes on 21 November 2017, 15:04:40 UTC
remove binaries folder
Tip revision: 3ebe7b7
loc2plot_d.r
# This plots countours for bivariate density 
#x = random variable 1
#y = random variable 2
#cprob = vector of hpd values for contours; depending on how many values in cprob 
#        determines the number of contour lines
#alpha = the smoothing parameter for locfit; small numbers, more jaggedy; large numbers
#          more smooth.
#xlim = the support limits of the density given as (xmin,ymin,xmax,ymax)
#gxlim = the limits of the density that are actually plotted.
#maxk = some number, the bigger it is the less likely locfit is to complain 
#       (but makes it slower)

loc2plot <- function(x,y,cprob=0.5,alpha=0.5,xlim,gxlim,maxk,...)
{
	sc1 <- sqrt(var(x))
	sc2 <- sqrt(var(y))
	if(missing(maxk)) maxk <- 100
	if(missing(xlim)) fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	maxk=maxk,mint=100,cut=0.8,maxit=100)
	else fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	xlim=xlim,maxk=maxk,mint=100,cut=0.8,maxit=100)
	lev <- sort(fitted(fit))[floor(cprob*length(x))]
	plot(fit,lev=lev,m=100,label=paste(as.character(100*(1-cprob)),"%",sep=""),
	xlim=gxlim,...)
}


#like above, but allows for the points to be weighted - i.e. for output of ABC regression
loc2plotw <- function(x,y,cprob=0.5,alpha=0.5,xlim,gxlim,wt,maxk,...)
{
	sc1 <- sqrt(var(x))
	sc2 <- sqrt(var(y))
	if(missing(maxk)) maxk <- 100
	if(missing(xlim)) fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	maxk=maxk,mint=100,cut=0.8,maxit=100,weight=wt)
	else fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	xlim=xlim,maxk=maxk,mint=100,cut=0.8,maxit=100,weight=wt)
	lev <- sort(fitted(fit))[floor(cprob*length(x))]
	plot(fit,lev=lev,m=100,label=paste(as.character(100*(1-cprob)),"%",sep=""),
	xlim=gxlim,...)
}

#gives the HPD value for an observation px,py in a density constructed from x, y.
gethpdprob2 <- function(x,y,px,py,alpha=0.5,xlim,gxlim,maxk,...)
{
	sc1 <- sqrt(var(x))
	sc2 <- sqrt(var(y))
	if(missing(maxk)) maxk <- 100
	if(missing(xlim)) fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	maxk=maxk,mint=100,cut=0.8,maxit=100)
	else fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	xlim=xlim,maxk=maxk,mint=100,cut=0.8,maxit=100)
#	d1 <- (x-px)^2+(y-py)^2
#	best <- d1 == min(d1)
#	lev <- mean(fitted(fit)[best])
	lev <- predict(fit,list(px,py))
	slev <- sort(fitted(fit))
	indic <- slev <= lev
	sum(indic)/length(x)
}
	
	
# finds the mode for a bivariate density

loc2mode <- function(x,y,alpha=0.5,xlim,maxk,...)
{
	sc1 <- sqrt(var(x))
	sc2 <- sqrt(var(y))
	if(missing(maxk)) maxk <- 100
	if(missing(xlim)) fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	maxk=maxk,mint=100,cut=0.8,maxit=100)
	else fit <- locfit(~x+y,alpha=alpha,scale=c(sc1,sc2),
	xlim=xlim,maxk=maxk,mint=100,cut=0.8,maxit=100)
	tt <- max(fitted(fit))
	wt <- fitted(fit) == tt
	c(x[wt][1],y[wt][1])
}


# this works for univariate data; gives a vector with 
# mode (global), hpd1_low, hpd1_high, hpd2_low, hpd2_hi, etc
#The reason for multiple hpd limits is if you have multimodal data. 
#prob = prob *outside* the limit; i.e for a normal distribution 0.05 is expected to give
#     the 0.025 and 0.975 quantiles.
# this won't work for weighted data, use loc1statsx instead.
# xlim is optional - use it to define the support of the density.
loc1stats <- function(x,prob,alpha=0.5,xlim,...) 
{
	if(missing(xlim)){
		fit <- locfit(~x,alpha=alpha)
	}
	else {
		fit <- locfit(~x,alpha=alpha,xlim=xlim)
	}
	fx <- fitted(fit)
	x.modef <- max(fx)
	x.mode <- x[fx == x.modef]
	if(!missing(xlim)){
		if(predict(fit,xlim[1]) > x.modef){
			x.modef <- predict(fit,xlim[1])
			x.mode <- xlim[1]
		}
		if(predict(fit,xlim[2]) > x.modef){
			x.modef <- predict(fit,xlim[2])
			x.mode <- xlim[2]
		}
	}
		
	if(length(x.mode)>1)x.mode <- x.mode[1]
	lev <- sort(fx)[floor(prob*length(x))]
#	print("log difference from max is ")
#	print(log(x.modef)-log(lev))
	l1 <- list()
	l1[[1]] <- x.mode
	indx <- order(x)
	ii <- 2
	flip <- TRUE
	for(j in 2:length(x)){
		if(flip && fx[indx[j]] > lev){
			l1[[ii]] <- x[indx[j-1]]
			if(j==2 && !missing(xlim)){
				if(predict(fit,xlim[1]) >= lev)l1[[ii]] <- xlim[1]
			}
			flip <- FALSE
			ii <- ii+1
		}
		else if(!flip && fx[indx[j]] < lev){
			l1[[ii]] <- x[indx[j]]
			flip <- TRUE
			ii <- ii+1
		}
		if(!flip && !missing(xlim) && j == length(x)){
			l1[[ii]] <- xlim[2]
			flip <- TRUE
		}
	}
	if(!flip)stop("HPD interval not closed")
	as.numeric(l1)
}

#this does the hpd calculation in a different way. Should give similar answers
#to loc1stats. It is best to put xlim in directly. wt is a vector of weights and is optional.
#numpoint is the number of points to do interpolation - the more the better
loc1statsx <- function(x,prob,alpha=0.5,xlim,wt,numpoint=10000,...) 
{
	if(missing(xlim)){
		if(min(x) < 0)x.min <- 1.1*min(x)
		else x.min <- min(x)*0.9
		if(max(x) < 0)x.max <- 0.9*max(x)
		else x.max <- 1.1*max(x)
		print(paste("putting in these xlimits from the data:",x.min,x.max))
		xlim <- c(x.min,x.max)
	}

	if(missing(wt))fit <- locfit(~x,alpha=alpha,xlim=xlim)
	else fit <- locfit(~x,alpha=alpha,xlim=xlim,weight=wt)
	xx <- seq(xlim[1],xlim[2],len=numpoint)
	yy <- predict(fit,xx)
	sum1 <- sum(yy)
	x.modef <- max(yy)
	x.mode <- xx[yy == x.modef]
	if(length(x.mode)>1)x.mode <- x.mode[1]
	
	yy2 <- sort(yy)
	pval <- 0
	for(j in 1:numpoint){
		pval <- pval+yy2[j]/sum1
		if(pval > prob)break
	}
	lev <- yy2[j]
#	print("log difference from max is ")
#	print(log(x.modef)-log(lev))
	l1 <- list()
	l1[[1]] <- x.mode
	ii <- 2
	flip <- TRUE
	for(j in 2:length(xx)){
		if(flip && yy[j] > lev){
			l1[[ii]] <- xx[j-1]
			flip <- FALSE
			ii <- ii+1
		}
		else if(!flip && yy[j] < lev){
			l1[[ii]] <- xx[j]
			flip <- TRUE
			ii <- ii+1
		}
		if(!flip &&  j == length(xx)){
			l1[[ii]] <- xx[j]
			flip <- TRUE
		}
	}
	if(!flip)stop("HPD interval not closed")
	as.numeric(l1)

}

#gives the HPD value for an observation px,in a univariate density constructed from x.
#uses same method as loc1statsx, so need to specify xlim. wt is optional weight.
#numpoint is the number of points to do interpolation - the more the better
gethpdprob1 <- function(x,px,alpha=0.5,xlim,wt,numpoint=10000,...)
{

	if(missing(wt))fit <- locfit(~x,alpha=alpha,xlim=xlim)
	else fit <- locfit(~x,alpha=alpha,xlim=xlim,weight=wt)
	xx <- seq(xlim[1],xlim[2],len=numpoint)
	yy <- predict(fit,xx)
	lev <- predict(fit,px)
	sum1 <- sum(yy)

	indic <- yy <= lev
	sum(yy[indic])/sum1
}


#given a function described by x,y points, this works like loc1stats.
#uses spline interpolation between points.
#numpoint is the number of points to do interpolation - the more the better
linestats <- function(x,y,prob,numpoint=10000)
{
	n <- length(x)
	minval <- x[1]
	maxval <- x[n]
	# This bit just guarantees that irrespective of what is 
	# in x and y, we have numpoint *evenly* spaced (interpolated) points between
	# max and min
	res1 <- spline(x,y,numpoint,xmin=minval,xmax=maxval)
	xx <- res1$x
	yy <- res1$y
	sum1 <- sum(yy)
	x.modef <- max(yy)
	x.mode <- xx[yy == x.modef]
	if(length(x.mode)>1)x.mode <- x.mode[1]
	
	yy2 <- sort(yy)
	pval <- 0
	for(j in 1:numpoint){
		pval <- pval+yy2[j]/sum1
		if(pval > prob)break
	}
	lev <- yy2[j]
#	print("log difference from max is ")
#	print(log(x.modef)-log(lev))
	l1 <- list()
	l1[[1]] <- x.mode
	ii <- 2
	flip <- TRUE
	for(j in 2:length(xx)){
		if(flip && yy[j] > lev){
			l1[[ii]] <- xx[j-1]
			flip <- FALSE
			ii <- ii+1
		}
		else if(!flip && yy[j] < lev){
			l1[[ii]] <- xx[j]
			flip <- TRUE
			ii <- ii+1
		}
		if(!flip &&  j == length(xx)){
			l1[[ii]] <- xx[j]
			flip <- TRUE
		}
	}
	if(!flip)stop("HPD interval not closed")
	as.numeric(l1)
}

back to top