plot_percentages.R
#' Functions to plot the percentage of cell populations
#' and display differences based on the previous analyses.
### Create frequency file
create_freq_file <- function(abs_props = T, settings){
if (settings$norm) {
normalised_folder <- "normalised/"
} else {
normalised_folder <- "original/"
}
if (settings$subsetting) {
subset_folder <- "subset/"
} else {
subset_folder <- "whole/"
}
if (settings$zoom) {
zoom_folder <- paste0(settings$zoom_pop, "/")
if (abs_props == T) {
name = "_total_prop_table.csv"
} else {
name = "_prop_table.csv"
}
} else {
zoom_folder <- NULL
name = "_prop_table.csv"
}
tissue <- settings$tissue
freqs <- fread(paste0("./", tissue, "/",
subset_folder, normalised_folder, zoom_folder, tissue, name))
# Add condition
freqs[, condition := ifelse(grepl("control", sample_id), "control",
ifelse(grepl("ms", sample_id), "ms",
ifelse(grepl("ad", sample_id), "ad", NA)))]
freqs[, perc := prop*100]
# Summarise medians
med_sum <- freqs[,
.(fq_median = median(prop)),
by = c("pop", "condition")]
# Keep only significant (before correction)
ms <- fread(paste0("./", tissue, "/",
subset_folder, normalised_folder, zoom_folder, "/", tissue,
"_topTable_da_res_ms.csv"))
ad <- fread(paste0("./", tissue, "/",
subset_folder, normalised_folder, zoom_folder, "/", tissue,
"_topTable_da_res_ad.csv"))
adms <- fread(paste0("./", tissue, "/",
subset_folder, normalised_folder, zoom_folder, "/", tissue,
"_topTable_da_res_advsms.csv"))
p_vals_table <- rbindlist(list(
ms[, comparison := "ms_con"],
ad[, comparison := "ad_con"],
adms[, comparison := "ad_ms"]))
return(list(freqs = freqs, med_sum = med_sum, p_vals_table = p_vals_table))
}
### Organise freqs for plots
organise_freqs <- function(freqs, settings) {
# Re-order from more to less abundant, for a cleaner plot
freqs$pop <- reorder(freqs$pop, -freqs$perc)
# Split for visibility
if (settings$tissue == "cp") {
abundant <- freqs[pop %in% c("Myeloid", "Granulocytes", "Naive B cells",
"NK cells", "B cell lineage", "CD4+ T cells", "CD8+ T cells",
"CD8 c3", "DN", "CD8 c4", "CD8 c1",
"NK CD56dim")]
mid_ab <- freqs[pop %in% c("Monocytes",
"CD8 effector memory", "CD8 naive", "CD8 Temra", "CD8 Temra resident",
"CD8 c4", "CD8 c3")]
low_ab <- freqs[pop %in% c("B cell lineage", "gd T cells",
"ASCs", "B cells", "NK CD56dim", "NK CD56bright",
"CD8 transitional RA/RO", "CD4 effector memory PD1-",
"DN", "CD4 naive", "gd T cells",
"CD8 c5", "DN", "CD4 c3", "gd T"
)] # excluding "unknown'
non_ab <- freqs[!pop %in% c("Myeloid", "Granulocytes", "Naive B cells",
"NK cells", "B cell lineage", "CD4+ T cells", "CD8+ T cells",
"CD8 c3", "DN", "CD8 c4", "CD8 c1",
"NK CD56dim")]
}
if (settings$tissue == "septum") {
abundant <- freqs[pop %in% c("Microglia", "Myeloid",
"CD8 c3", "CD4 c2", "CD4 c1", "CD8 c1", "CD8 c2")]
mid_ab <- freqs[pop %in% c("CD8+ T cells", "Granulocytes", "CD4+ T cells",
"CD4 effector memory", "CD8 Temra",
"CD8 effector memory",
"CD8 c3", "CD8 c4", "CD4 c2"
)]
low_ab <- freqs[!(pop %in% c("Microglia", "Myeloid", "Undefined", "CD45-",
"CD8 Trm", "CD4 Trm", "CD8 Trm CD103+",
"CD4 effector memory", "CD8 Temra", "CD8 effector memory",
"CD8+ T cells", "Granulocytes", "CD4+ T cells",
"CD8 c1", "CD8 c2", "CD4 c1", "CD8 c3", "CD8 c4", "CD4 c2"))]
non_ab <- freqs[!pop %in% c("Microglia", "Myeloid",
"CD8 c3", "CD4 c2", "CD4 c1", "CD8 c1", "CD8 c2")]
}
if (settings$tissue == "blood") {
abundant <- freqs[pop %in% c("Granulocytes", "NK cells")]
mid_ab <- freqs[pop %in% c("CD4+ T cells", "CD8+ T cells", "Monocytes")]
low_ab <- freqs[pop %in% c("mDCs", "pDCs", "B cells", "TCRgd T cells")]
}
freqs$condition <- factor(freqs$condition, levels = c("control", "ms", "ad"))
# Factor so the plot recognises the position per group of abundance:
abundant$pop <- factor(abundant$pop)
if (settings$tissue != "cp") {
mid_ab$pop <- factor(mid_ab$pop)
}
low_ab$pop <- factor(low_ab$pop)
return(list(freqs = freqs, abundant = abundant,
mid_ab = mid_ab, low_ab = low_ab, non_ab = non_ab))
}
### Create function for plots
plot_perc <- function(populations, p_vals_table, x = pop, y = perc, fill = condition,
settings, abs_prop = abs_prop) {
if (settings$zoom) {
if (abs_prop == T) {
main_pop <- "CD45+"
} else {
main_pop <- settings$zoom_pop
}
} else {
main_pop <- "CD45+"
}
# Plot colors
colors = c("#043741", "#189cb3", "#e79d24")
linecol = "gray30"
black = c("black", "black", "black")
#Create plot
populations$condition <- factor(populations$condition, levels = c("control", "ad", "ms"))
p <- ggplot(populations, aes(x = pop, y = perc, fill = condition)) +
geom_boxplot(width = 0.75, color = linecol, lwd = 0.25,
outlier.shape = NA, alpha = 0.5) +
geom_point(position = position_jitterdodge(jitter.width = 0.1,
jitter.height = 0,
dodge.width = 0.75),
# position = position_dodge(width=0.5),
# position = position_jitter(w = 0.15, h = 0),
size = 1, shape = 21, stroke = 0.1,
# color = "black",
aes(color = condition, fill = condition)
# fill = "white"
# color = "black"
) +
xlab(NULL) +
ylab(paste0("Percentage of ", main_pop ," cells (%)")) +
expand_limits(y=0) +
# scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values=colors, labels = c('Control','Dementia','MS'), name = NULL) +
scale_color_manual(values=colors, labels = c('Control','Dementia','MS'), name = NULL) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 1)
, legend.position = "none"
)
max_height <- 0
for (cluster in populations[, unique(pop)]) {
height <- populations[pop == cluster, max(perc, na.rm = T)]
for (comp in c('ad_con', 'ad_ms', 'ms_con')) {
cond_a <- unlist(str_split(comp, "_"))[1]
cond_b <- unlist(str_split(comp, "_"))[2]
if (cond_a == "con") {cond_a <- "control"}
if (cond_b == "con") {cond_b <- "control"}
cond_pos <- data.table(cond = c("control", "ad", "ms"), pos = c(-0.33, 0, +0.33))
pos_a <- cond_pos[cond == cond_a, pos]
pos_b <- cond_pos[cond == cond_b, pos]
p_label <- data.table(ref_p = c(0.1, 0.05, 0.01), label = c("*", "**", "***"))
my_p_value <- p_vals_table[comparison == comp & cluster_id == cluster, p_adj]
label <- p_label[my_p_value <= ref_p][.N][, label]
if (length(label) == 0) {
print(paste("Skipping", comp, cluster))
next
} else {
height <- height * 1.1
}
p <- p + geom_segment(
x=as.numeric(factor(cluster, levels = levels(as.factor(populations$pop)))) + pos_a,
y=height * 1.1,
xend=as.numeric(factor(cluster, levels = levels(as.factor(populations$pop)))) + pos_b,
yend=height * 1.1
) + geom_text(
x = (as.numeric(factor(cluster, levels = levels(as.factor(populations$pop)))) + pos_a +
as.numeric(factor(cluster, levels = levels(as.factor(populations$pop)))) + pos_b) / 2,
y = height * 1.1 + 0.02,
label = label
)
}
max_height <- max(max_height, height)
}
p <- p + scale_y_continuous(expand = c(0,0), limits = c(0, max_height*1.2))
print(p)
}
### Create function for separate plots per pop
plot_perc_sep <- function(freqs, p_vals_table, x = pop, y = perc, fill = condition,
settings, abs_prop = abs, accuracy = 0.01) {
if (settings$zoom) {
if (abs_prop == T) {
main_pop <- "CD45+"
} else {
main_pop <- settings$zoom_pop
}
} else {
main_pop <- "CD45+"
}
# Plot colors
colors = c("#043741", "#189cb3", "#e79d24")
linecol = "gray30"
black = c("black", "black", "black")
#Create plot
populations <- unique(freqs$pop)
freqs$condition <- factor(freqs$condition, levels = c("control", "ad", "ms"))
for (population in populations) {
p <- ggplot(freqs[pop == population], aes(x = pop, y = perc, fill = condition)) +
geom_boxplot(width = 0.75, color = linecol, lwd = 0.25,
outlier.shape = NA, alpha = 0.5) +
geom_point(position = position_jitterdodge(jitter.width = 0.1,
jitter.height = 0,
dodge.width = 0.75),
# position = position_dodge(width=0.5),
# position = position_jitter(w = 0.15, h = 0),
size = 1, shape = 21, stroke = 0.1,
# color = "black",
aes(color = condition, fill = condition)
# fill = "white"
# color = "black"
) +
xlab(NULL) +
ylab(paste0("Percentage of ", main_pop ," cells (%)")) +
expand_limits(y=0) +
# scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values=colors, labels = c('Control','Dementia','MS'), name = NULL) +
scale_color_manual(values=colors, labels = c('Control','Dementia','MS'), name = NULL) +
theme_classic() +
theme(#axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)
max_height <- 0
height <- freqs[pop == population, max(perc, na.rm = T)]
for (comp in c('ad_con', 'ad_ms', 'ms_con')) {
cond_a <- unlist(str_split(comp, "_"))[1]
cond_b <- unlist(str_split(comp, "_"))[2]
if (cond_a == "con") {cond_a <- "control"}
if (cond_b == "con") {cond_b <- "control"}
cond_pos <- data.table(cond = c("control", "ad", "ms"), pos = c(0.75, 1, 1.25))
pos_a <- cond_pos[cond == cond_a, pos]
pos_b <- cond_pos[cond == cond_b, pos]
p_label <- data.table(ref_p = c(0.1, 0.05, 0.01), label = c("*", "**", "***"))
my_p_value <- p_vals_table[comparison == comp & cluster_id == population, p_adj]
label <- p_label[my_p_value <= ref_p][.N][, label]
if (length(label) == 0) {
print(paste("Skipping", comp, population))
next
} else {
height <- height * 1.1
}
p <- p + geom_segment(
x= pos_a,
y=height * 1.1,
xend=pos_b,
yend=height * 1.1
) + geom_text(
x = as.numeric(pos_a + pos_b) / 2,
y = height * 1.1 + 0.02,
label = label
)
}
max_height <- max(max_height, height)
if (population %in% abundant) {
p <- p + scale_y_continuous(expand = c(0,0), limits = c(0, max_height*1.2), labels = scales::number_format(accuracy = accuracy))
} else {
p <- p + scale_y_continuous(expand = c(0,0), limits = c(0, max_height*1.2), labels = scales::number_format(accuracy = accuracy))
}
set_dev(plot_path(settings, paste0("perc_", population)), height=2, width = 1.5)
print(p)
unset_dev()
}
}