https://github.com/cyctbdbw/Shape-Asymmetry-Signature
Revision 242d90d06e90e7f200182acef46d403f3419a85a authored by Yu-Chi Chen on 21 May 2022, 02:34:41 UTC, committed by GitHub on 21 May 2022, 02:34:41 UTC
1 parent b248603
Tip revision: 242d90d06e90e7f200182acef46d403f3419a85a authored by Yu-Chi Chen on 21 May 2022, 02:34:41 UTC
Update README.md
Update README.md
Tip revision: 242d90d
cca_sig_test.R
## Script name: cca_sig_test
##
## Purpose of script: Non-parametric significance testing between a composite variable (e.g. a variate from a cca or pls analysis)
## and a matrix of original metrics used to compute the composite variable (e.g. SAS_mean). It also support an optional flag to
## to keep family structure intact if you use it for the HCP data.
## Author: Yu-Chi Chen and Sidhant Chopra
##
## Inputs:
##
## variate = a numeric matrix of dim n x 1.
## edgeMetric = a numeric matrix of dimensions n x e (where e is the number of edges)
## b = an integer for home many bootstraps (method1) or permutations (method2) to compute.
## method = a integer of value 0, 1, or 2
## Method 0: Returns uncorrected p-values, (same as method 1, but no FDR correction applied)
## Method 1: Returns FDR-corrected p-values by:
## 1) Using bootstrapping to compute a standard error at each edge (SE)
## 2) Convert to Z-scores (r/se)
## 3) Convert Z-scores to two-tailed p-values and apply FDR corrected to these p-values.
## Method 2 (Experimental): returns FWE-corrected p-values by:
## 1) Use permutation to compute a null distribution of r values at each edge (r)
## 2) Create a fwe-null distribution be saving the max permuted r-value from each edge.
## 3) compute a fwe-p-value using the original r value from step one and the distribution of max r values from step two.
##
##
## OPTIONAL:
## a grouping vector, which has a unique ID for each group which you want to preserve in the bootstrap (approximate).
## You can generate the vector by using PALM (https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/PALM).
## In matlab: EB=hcp2blocks('restricted_....csv'); grouping_id=EB(:,3); # You need to change the file name
##
## ---------------------------
## Outputs: a vector of p-values and standard errors
##
## ----------------------------
## Notes on using FDR/FWE:
## (1) is trying to get a robust estimate of the correlation that accounts for the sampling error of the statistics.
## (2) is asking whether the correlation is larger than null expectation.
## FDR method is usually preferred and used in the literature.
## ---------------------------
## Example usage
## mat <- rmatio::read.mat("a2_cca.mat")
## variate <- as.matrix(mat[["U"]][,1])
## edgeMetric <- read.table(patient_FC.txt)
## p <- cca_sig_test(variate, edgeMetric, method = 1, b=500)
##
## ---------------------------
## Required packages: Base r only
## ===========================
cca_sig_test <- function(variate, edgeMetric, method=1, b=500, grouping=NULL, cl=1) {
variate <- as.matrix(variate)
edgeMetric <- as.matrix(edgeMetric)
observerd_cor <- cor(variate, edgeMetric)
if(method == 0 | method == 1) {
boot.se <- NULL
for (e in 1:dim(edgeMetric)[2]){
n <- dim(variate)[1]
boot.cor.all<-NULL
for (i in 1:b){
if(is.null(grouping)) {
index <- sample(1:n, replace=T)
} else {
index <- 1:n
grouping <- c(grouping)
data <- as.data.frame(cbind(index, grouping))
data$grouping <- as.factor(data$grouping)
resample <- function(dat, cluster, replace) {
# exit early for trivial data
# if(nrow(dat) == 1 || all(replace==FALSE))
# return(dat)
# sample the clustering factor
cls <- sample(unique(dat[[cluster[1]]]), replace=replace[1])
# subset on the sampled clustering factors
sub <- lapply(cls, function(b) subset(dat, dat[[cluster[1]]]==b))
# sample lower levels of hierarchy (if any)
if(length(cluster) > 1)
sub <- lapply(sub, resample, cluster=cluster[-1], replace=replace[-1])
# join and return samples
do.call(rbind, sub)
}
index <- resample(data, 'grouping', TRUE)[,1]
while (length(index) < n) {
index <- resample(data, 'grouping', TRUE)[,1]
}
index <- index[1:n]
}
boot.patients.a2.resid<-as.matrix(edgeMetric[,e])
boot.brain_var<-as.matrix(variate[index,1])
boot.cor<-cor(boot.patients.a2.resid, boot.brain_var)
boot.cor.all<-c(boot.cor.all, boot.cor)
print(paste("Edge:", e, i, "bootstraps complete."))
}
## Bootstrap standard error
boot.se[e] <- sd(boot.cor.all)
print(paste("Variable :", e, b, "bootstraps complete."))
}
#hist(boot.se)
Z_score <- observerd_cor/boot.se
#compute 2 sided pvalues
pvalue2sided <- 2*pnorm(-abs(Z_score))
#if method = 0, return uncorrected p values
if(method==0) {
return(cbind(t(pvalue2sided),boot.se))
}
#do a FDR correction
if(method==1) {
pvalue2sided_fdr <- p.adjust(pvalue2sided, method = "fdr", n = length(pvalue2sided))
return(cbind(pvalue2sided_fdr, boot.se))
}
}
if(method== 2) {
max.perm.p <- NULL
#set.seed(419) # Make the results reproducible
nperm <- b
variate.perm <- numeric(nperm)
n <- nrow(edgeMetric)
max.vec <- NULL
for (i in 1:dim(edgeMetric)[2]){
for (ii in 1:nperm) {
ind <- sample(n, replace = F)
variate.perm[ii] <- cor(edgeMetric[ind,i], variate[,1])
}
max.vec[i] <- max(variate.perm[i])
message(paste("Variable:", i, nperm, "permutations."))
}
#fwe_corrected pvalue
p_vec_fwe <- NULL
for (p in 1:dim(observerd_cor)[2]) {
p_vec_fwe[p] <- sum(abs(max.vec) >= abs(observerd_cor[p])) / length(max.vec)
}
return(as.numeric(p_vec_fwe))
}
}

Computing file changes ...