https://github.com/jsollari/popABC
Raw File
Tip revision: 3ebe7b76043299bc4b6a541b5be0d7012895ca1c authored by Joao Sollari Lopes on 21 November 2017, 15:04:40 UTC
remove binaries folder
Tip revision: 3ebe7b7
make_pd2.r
makepd4 <- function(target,x,sumstat,tol,gwt,rejmethod=T,transf="none",bb=c(0,0))
{
# target is the set of target summary stats
# x is the parameter vector (long vector of numbers from the simulations) and is the dependent variable for the regression
# sumstat is an array of simulated summary stats (i.e. independent variables).
# NBB this function originally used lm() and assumed 4 summary stats, and I edited by hand for other numbers. 
# NBB I've now modified it using lsfit() (following Shola Ajayi) so that it will take an arbitrary number of summary stats.
# tol is the required proportion of points nearest the target values
# gwt is a vector with T/F weights, weighting out any 'bad' values (determined by the simulation program - i.e. nan's etc)
# if rejmethod=T it doesn't bother with the regression, and just does rejection.


# If rejmethod=F it returns a list with the following components:-

# $x regression adjusted values
# $vals - unadjusted values in rejection region (i.e. normal rejection)
# $wt - the regression weight (i.e. the Epanechnikov weight)
# $ss - the sumstats corresponding to these points
# $predmean - estimate of the posterior mean
# $fv - the fitted value from the regression

if(sum(transf == c("none","log","logit")) == 0){
	stop("transf must be none, log, or logit")
	
}
if(transf=="logit"){
	if(bb[1] >= bb[2]){
		stop("bounds wrong for logit")
		
	}
}

if(missing(gwt))gwt <- rep(T,length(sumstat[,1]))

nss <- length(sumstat[1,])


# scale everything 

    scaled.sumstat <- sumstat
    
    for(j in 1:nss){
    
    	scaled.sumstat[,j] <- normalise(sumstat[,j],sumstat[,j][gwt])
    }
    target.s <- target
    
    for(j in 1:nss){
    
    	target.s[j] <- normalise(target[j],sumstat[,j][gwt])
    }
    
# calc euclidean distance

    sum1 <- 0
    for(j in 1:nss){
    	sum1 <- sum1 + (scaled.sumstat[,j]-target.s[j])^2
   }
   dst <- sqrt(sum1)
# includes the effect of gwt in the tolerance
    dst[!gwt] <- floor(max(dst[gwt])+10)
    

# wt1 defines the region we're interested in 
    abstol <- quantile(dst,tol)
    wt1 <- dst < abstol
    
    if(transf == "log"){
    	if(min(x) <= 0){
    		print("log transform: val out of bounds - correcting")
    		x.tmp <- ifelse(x <= 0,max(x),x)
    		x.tmp.min <- min(x.tmp)
    		x <- ifelse(x <= 0, x.tmp.min,x)
    	}
    	x <- log(x)
    }
    else if(transf == "logit"){
    	if(min(x) <= bb[1]){
    		x.tmp <- ifelse(x <= bb[1],max(x),x)
    		x.tmp.min <- min(x.tmp)
    		x <- ifelse(x <= bb[1], x.tmp.min,x)
    		
    	}
    	if(max(x) >= bb[2]){
    		x.tmp <- ifelse(x >= bb[2],min(x),x)
    		x.tmp.max <- max(x.tmp)
    		x <- ifelse(x >= bb[2], x.tmp.max,x)
    		
    	}
    	x <- (x-bb[1])/(bb[2]-bb[1])
    	x <- log(x/(1-x))
    }
    
    if(rejmethod){
        l1 <- list(x=x[wt1],wt=0)
    }
    else{
        regwt <- 1-dst[wt1]^2/abstol^2
        
        fit1 <- lsfit(scaled.sumstat[wt1,],x[wt1],wt=regwt)
        
        predmean <- fit1$coeff %*% c(1,target.s)
        
        l1 <- list(x=fit1$residuals+predmean,vals=x[wt1],wt=regwt,ss=sumstat[wt1,],predmean=predmean,fv = x[wt1]-fit1$residuals,transf=transf)
        
    }
    if(transf == "log"){
    	l1$x <- exp(l1$x)
    	l1$vals <- exp(l1$vals)
    }
    else if(transf == "logit"){
    	l1$x <- exp(l1$x)/(1+exp(l1$x))
    	l1$x <- l1$x*(bb[2]-bb[1])+bb[1]
    	l1$vals <- exp(l1$vals)/(1+exp(l1$vals))
    	l1$vals <- l1$vals*(bb[2]-bb[1])+bb[1]
    }
    
    l1
}


normalise <- function(x,y){

if(var(y) == 0)return (x - mean(y))
(x-(mean(y)))/sqrt(var(y))
}


back to top