testDA_GLMM_adjusted_outofCD45+.R
#' Adjusted version of the DA-GLMM method. It only includes the immune
#' cell populations of interest in the statistical analysis for differential
#' abundance; it specifies the contrasts between disease groups.
testDA_GLMM_adjusted_outofCD45 <- function (d_counts, formula, contrast, min_cells = 100,
min_samples = NULL, normalize = FALSE, norm_factors = "TMM",
contrast_extra = F, contrast_extra_con = NULL, settings)
{
# if (is.null(min_samples)) {
# min_samples <- ncol(d_counts)/3
# }
counts <- assays(d_counts)[["counts"]]
counts <- counts[!rownames(counts) %in% c("Unknown", "CD45-", "Undefined"), ] # to reduce multiple testing correction
cluster_id <- rowData(d_counts)$cluster_id
cluster_id <- cluster_id[!cluster_id %in% c("Unknown", "CD45-", "Undefined")] # to reduce multiple testing correction
## Threshold of minimum number of cells per cluster:
# tf <- counts >= min_cells
# ix_keep <- apply(tf, 1, function(r) sum(r) >= min_samples)
ix_keep <- apply(counts, 1, function(r) sum(r) > min_cells) # threshold per cluster, not per sample
counts <- counts[ix_keep, , drop = FALSE]
cluster_id <- cluster_id[ix_keep]
if (normalize & norm_factors == "TMM") {
norm_factors <- calcNormFactors(counts, method = "TMM")
}
if (normalize) {
n_cells_smp <- colSums(counts)/norm_factors
}
else { # Import table with absolute counts of CD45+ immune cells:
absolute_tab <- fread(paste0("~/Documents/Data analysis/CyTOF/CP_CyTOF_data/",
settings$tissue, "/subset/original/",
settings$tissue, "_absolute_table.csv"))
absolute_immune <- absolute_tab[pop != "CD45-", sum(n), by = "sample_id"]
setnames(absolute_immune, "V1", "n_cells_smp")
}
# if (ncol(contrast) == 1 & nrow(contrast) > 1) {
# contrast <- t(contrast)
# }
p_vals <- rep(NA, length(cluster_id))
for (i in seq_along(cluster_id)) {
p_vals[i] <- tryCatch({ # Create table with proportions out of total CD45+ cells:
i_counts <- as.data.table(counts[i, ], keep.rownames = T)
setnames(i_counts, c("V1", "V2"), c("sample_id", "n"))
data_i <- merge(i_counts, absolute_immune, by = "sample_id")
data_i <- merge(data_i, formula$data, by = "sample_id")
data_i[, y := n/n_cells_smp]
data_i <- data_i[, c("y", "n_cells_smp", "condition", "sample_id")]
if ("age" %in% colnames(data_i)) {
data_i$age <- as.numeric(data_i$age) # if added to the model
}
if ("pmd_hours" %in% colnames(data_i)) {
data_i$pmd_hours <- as.numeric(data_i$pmd_hours) # if added to the model
}
fit <- tryCatch(
glmer(formula$formula, data = data_i, family = "binomial",
weights = n_cells_smp)
, warning = function(w) NULL
)
if (is.null(fit)) {
fit <- tryCatch(
glmer(formula$formula,
data = data_i,
family = "binomial",
weights = n_cells_smp,
control=glmerControl(optimizer="bobyqa", # to reach convergence (https://rstudio-pubs-static.s3.amazonaws.com/33653_57fc7b8e5d484c909b615d8633c01d51.html; https://biologyforfun.wordpress.com/2018/04/09/help-i-have-convergence-warnings/)
optCtrl=list(maxfun=2e6)))
, warning = function(w) NULL
)
}
if (is.null(fit)) {
fit <- glmer(formula$formula,
data = data_i,
family = "binomial",
weights = n_cells_smp)
warn <- (paste0("*** Warning_", cluster_id[i]))
print(warn)
}
print(cluster_id[i]) # print cluster name
print(summary(fit)) # print model
### Create contrast (from https://publicifsv.sund.ku.dk/~jufo/courses/rm2018/nlmePackage.pdf):
name.coef <- names(coef(fit)[["sample_id"]])
n.coef <- length(name.coef)
contr <- matrix(0, nrow = 1, ncol = n.coef,
dimnames = list("contrast", name.coef))
contr[1, contrast] <- 1 # choose among: conditionms, conditionad, age, genderm, pmd_hours
if (contrast_extra) {
contr[1, contrast_extra_con] <- -1
}# C[1, "conditionms"] <- -1 # optional, if you want to compare to other than intercept (control)
test <- glht(fit, contr)
print(summary(test))
print(contrast_extra)
cat("\n")
summary(test)$test$pvalues
}, error = function(e) NA)
}
p_adj <- p.adjust(p_vals, method = "fdr")
stopifnot(length(p_vals) == length(p_adj))
out <- data.frame(p_val = p_vals, p_adj = p_adj, stringsAsFactors = FALSE)
row_data <- as.data.frame(matrix(as.numeric(NA), nrow = nlevels(cluster_id),
ncol = ncol(out)))
colnames(row_data) <- colnames(out)
cluster_id_nm <- as.numeric(cluster_id)
row_data[cluster_id_nm, ] <- out
row_data <- cbind(cluster_id = rowData(d_counts)$cluster_id,
row_data)
res <- d_counts
rowData(res) <- row_data
if (normalize) {
metadata(res)$norm_factors <- norm_factors
}
res
}