Revision 49cb69c5a18fdb262964fbfeb47ab2099eb32c5c authored by Wesley Tansey on 03 May 2018, 19:46:59 UTC, committed by Wesley Tansey on 03 May 2018, 19:46:59 UTC
tryADMM_cholcache.R
library(genlasso)
library(FDRreg)
library(locfdr)
source("ADMMutils.R")
source("ADMM_cholcache.R")
n = 128
d = n^2
D = genlasso::getD2dSparse(n,n) # appears to fastcount on rows
# Construct a true blocky 2D image
beta_true = matrix(-10, nrow=n, ncol=n)
block1_i = 10:25
block1_j = 10:25
beta_true[block1_i, block1_j] = 0
block2_i = 40:50
block2_j = 50:60
beta_true[block2_i, block2_j] = 2
# Plot the truth
mybreaks = seq(0,1,length=21)
mycols = grey(seq(1,0,length=20)^0.5)
filled.contour(1:n, 1:n, ilogit(beta_true), levels=mybreaks, col=mycols)
beta_true_vec = as.numeric(beta_true)
w_true = ilogit(beta_true_vec)
gamma_true = rbinom(d, 1, w_true)
z = rnorm(d)
z[gamma_true == 1] = rnorm(sum(gamma_true), 0, 3)
image(1:128, 1:128, matrix(z, nrow=n, ncol=n), col=mycols, xlab='', ylab='')
# Pre-compute a sparse Cholesky factorization: L^t L = D^t D + I
# Use a fill-reducing permutation to keep L as sparse as possible
chol_factor = Matrix::Cholesky(Matrix::crossprod(D) + Matrix::Diagonal(d), perm=TRUE, super=FALSE)
#image(chol_factor)
# Quick timing test
arg = runif(ncol(chol_factor), -1,1)
system.time(replicate(100, (Matrix::solve(chol_factor, arg))))
# EM
beta_hat = rep(0,d)
f0 = dnorm(z,0,1)
f1 = dnorm(z,0,3)
#####
#### Russ data example
#####
# Load in a single slice from Russ Poldrack's data
Z_scores = read.csv("../data/zscores_russdata_hslice37.csv")
Z_scores = as.matrix(Z_scores)
par(mar=c(4,4,3,1))
# Color scale
mybreaks = c(seq(0,5, length=21), 20)
mycols = c(grey(seq(1,.3,length=20)^0.5), 'red')
image(1:nrow(Z_scores), 1:ncol(Z_scores), abs(Z_scores), breaks=mybreaks, col=mycols,
main="Raw z scores from a single horizontal section", xlab='x', ylab='y',
cex.main=0.9,
xlim=c(20,110), ylim=c(0, 105), las=1)
# Very high z scores
image(1:nrow(Z_scores), 1:ncol(Z_scores), 0+{abs(Z_scores) > 8}, breaks=c(-0.5,0.5,1.5), col=c('white', 'black'),
main="Findings under FDR control", xlab='x', ylab='y',
cex.main=0.8,
xlim=c(20,110), ylim=c(0, 105), las=1)
# Very high z scores
image(1:nrow(Z_scores), 1:ncol(Z_scores), 0+{abs(Z_scores) > 8}, breaks=c(-0.5,0.5,1.5), col=c('white', 'black'),
main="Findings under FDR control", xlab='x', ylab='y',
cex.main=0.8,
xlim=c(20,110), ylim=c(0, 105), las=1)
x_length = 128
y_length = 128
xy_grid = expand.grid(1:x_length, 1:y_length)
z_full = as.numeric(Z_scores)
brain_area = which(z_full != 0)
z = z_full[brain_area]
d = length(brain_area)
# Sanity check on the area we think is the brain
plot(xy_grid[brain_area,], pch=15, cex=0.6)
# Get the oriented incidence matrix for the retained nodes
D = genlasso::getD2dSparse(x_length,y_length) # appears to fastcount on rows
D = D[,brain_area]
scrub_edges = which(rowSums(abs(D)) != 2)
D = D[-scrub_edges,]
chol_factor = Matrix::Cholesky(Matrix::crossprod(D) + Matrix::Diagonal(d), perm=TRUE, super=FALSE)
# Fit empirical null and nonparametric alternative
# e1 = efron(z, nulltype='empirical')
e2 = locfdr(z)
mu0 = e2$fp0[3,1]
sig0 = e2$fp0[3,2]
pr1 = prfdr(z, mu0=mu0, sig0=sig0)
f0 = pr1$f0_z
f1 = pr1$f1_z
par(mar=c(2,4,3,1))
hist(z, 200, prob=TRUE, axes=FALSE,
main='z scores from fMRI experiment', xlab='',
col='lightgrey', border='grey', xlim=c(-6,6), las=1)
curve(0.745231088*dnorm(x), add=TRUE, lty='dotted')
curve(0.955349473*dnorm(x,mu0,sig0), add=TRUE)
legend('topright', legend=c('Theoretical Null: 0.75*N(0, 1)', 'Empirical Null: 0.96*N(-0.1, 1.3)'),
cex=0.6, lty=c('dotted', 'solid'), bty='n')
axis(2, las=1, tick=FALSE)
axis(1, at=-5:5, las=1, tick=FALSE, line=-1)
mtext('z', side=1, line=1)
lines(pr1$x_grid, pr1$f1_grid, lty='dashed', col='grey')
# EM
beta_hat = rep(0,d)
# Initialization
fl0 = list(x = rep(mean(z), d), # likelihood term
z = rep(0, d), # slack variable for likelihood
r = rep(0, nrow(D)), # penalty term
s = rep(0, nrow(D)), # slack variable for penalty
u_dual = rep(0,length(z)), # scaled dual variable for constraint x = z
t_dual = rep(0,nrow(D))) # scaled dual variable for constraint r = s
lambda = .38
rel_tol = 1e-6
travel = 1
prior_prob = ilogit(beta_hat)
old_objective = sum(-log(prior_prob*f1 + (1-prior_prob)*f0)) + lambda * sum(abs(fl0$r))
converged = FALSE
while(!converged) {
# E step
m1 = prior_prob*f1
m0 = (1-prior_prob)*f0
post_prob = m1/(m1+m0)
# Partial M step: one ADMM iteration, analogous to a single Newton iteration
weights = prior_prob*(1-prior_prob)
y = beta_hat - (prior_prob - post_prob)/weights
fl0 = fit_graphfusedlasso_cholcache(y, lambda=lambda, D=D, chol_factor=chol_factor, weights=weights,
initial_values = fl0, rel_tol = rel_tol, alpha=1.8, adaptive=FALSE)
beta_hat = fl0$x
prior_prob = ilogit(beta_hat)
# Check relative convergence
new_objective = sum(-log(prior_prob*f1 + (1-prior_prob)*f0)) + lambda * sum(abs(fl0$r))
travel = abs(new_objective - old_objective)
old_objective = new_objective
converged = {travel/(old_objective + rel_tol) < rel_tol}
}
Pi_smoothed = matrix(0, nrow=x_length, ncol=y_length)
Pi_smoothed[brain_area] = prior_prob
Z_empirical = (abs(Z_scores-mu0))/sig0
Z_empirical[Z_scores == 0] = 0
par(mfrow=c(2,2), mar=c(1,1,3,1))
# Color scale
mybreaks = c(seq(0,5, length=21), 20)
mycols = c(grey(seq(0.98,.1,length=21)^0.5))
image(1:nrow(Z_scores), 1:ncol(Z_scores), Z_empirical, breaks=mybreaks, col=mycols,
main="Raw z scores from a single horizontal section", xlab='x', ylab='y',
cex.main=0.8, axes=FALSE,
xlim=c(110,20), ylim=c(0, 105), las=1)
# Compare with BH
is_finding_BH = BenjaminiHochberg(z, 0.05)
findings_matrixBH = matrix(0, nrow=x_length, ncol=y_length)
findings_matrixBH[brain_area[which(is_finding_BH==1)]] = 1
image(1:nrow(Z_scores), 1:ncol(Z_scores), findings_matrixBH, breaks=c(-0.5,0.5,1.5), col=c('white', 'black'),
main="Findings using the Benjamini-Hochberg method", xlab='x', ylab='y',
cex.main=0.8, axes=FALSE,
xlim=c(110,20), ylim=c(0, 105), las=1)
mybreaks = c(0, seq(0.3,0.75,length=19), 1)
n_levels = 10
mybreaks = c(0,seq(1e-5,1, length=n_levels))
mycols = grey(c(1,seq(0.97,0.1,length=n_levels-1)))
image(1:nrow(Z_scores), 1:ncol(Z_scores), Pi_smoothed, breaks=mybreaks, col=mycols,
main="Estimated local fraction of signals", xlab='x', ylab='y',
cex.main=0.8, axes=FALSE,
xlim=c(110,20), ylim=c(0, 105), las=1)
local_fdr = (1-prior_prob)*pr1$f0_z
local_fdr = local_fdr / {local_fdr + prior_prob*pr1$f1_z}
is_finding = {getFDR(1-local_fdr)$FDR < 0.05}
findings_matrix = matrix(0, nrow=x_length, ncol=y_length)
findings_matrix[brain_area[is_finding]] = 1
image(1:nrow(Z_scores), 1:ncol(Z_scores), findings_matrix, breaks=c(-0.5,0.5,1.5), col=c('white', 'black'),
main="Findings using FDR smoothing", xlab='x', ylab='y',
cex.main=0.8, axes=FALSE,
xlim=c(110,20), ylim=c(0, 105), las=1)
write.csv(Z_empirical, file='Z_empiricalnull.csv', row.names=FALSE)
write.csv(Pi_smoothed, file='Pi_smoothed.csv', row.names=FALSE)
write.csv(findings_matrix, file='findings_matrix.csv', row.names=FALSE)
write.csv(findings_matrixBH, file='findings_matrix_BH.csv', row.names=FALSE)
par(mar=c(3,3,3,1))
filled.contour(1:n, 1:n, ilogit(beta_true), levels=mybreaks, col=mycols, main="True prior probability of signal")
zlevels = c(seq(0,2,length=25), max(z))
zcolors = c(grey(seq(1,0,length=24)^0.75), 'red')
image(1:n, 1:n, matrix(abs(z), nrow=n, ncol=n),
breaks=zlevels, col=zcolors,
main="Observed z score")
filled.contour(1:n, 1:n, matrix(prior_prob, nrow=n, ncol=n), levels=mybreaks, col=mycols, main="Estimated prior probability")
filled.contour(1:n, 1:n, matrix(beta_hat, nrow=n, ncol=n), col=mycols, main="Estimated prior probability")
Computing file changes ...