### Load packages
library(HDCytoData)
library(readxl)
library(CATALYST)
library(ggplot2)
library(cowplot)
library(diffcyt)
library(SummarizedExperiment)
library(data.table)
library(stringr)
library(tidyverse)
library(SingleCellExperiment)
library(lme4)
library(multcomp)
library(ComplexHeatmap) # for DA heatmaps
library(dplyr)
library(RColorBrewer)
library(devtools)
setwd("~/Documents/Data analysis/CyTOF/CP_CyTOF_data/")
############################
### CHOOSE YOUR ANALYSIS ###
# Settings for parent clustering (choose tissue, no zoom)
PRESETTINGS <- list(
experiment_dir = normalizePath("."),
SAVE_PLOTS = T, # Export plots
SAVE_TABLES = T, # Export tables
norm = F, # Normalisation
subsetting = T, # Subsetting samples by cell number
zoom = F, # Zoom into cell pops?
zoom_pop = NULL, # Zoom into cell pops?
tissue = 'cp' # Choose tissue
)
# Settings for zoom clustering
SETTINGS <- list(
experiment_dir = normalizePath("."),
SAVE_PLOTS = T, # Export plots
SAVE_TABLES = T, # Export tables
norm = F, # Normalisation
subsetting = T, # Subsetting samples by cell number
zoom = T, # Zoom into cell pops?
zoom_pop = "NKB", # Zoom into cell pops? (Tcells/CD4/CD8/Myeloid/BtoPlasma/NKB/NK)
tissue = 'cp' # Choose tissue
)
source("./Custom_functions/CP_CyTOF_functions.R") # Load created functions
source("./Custom_functions/CP_CyTOF_zoom_functions.R")
source("./Custom_functions/clustering.R")
### LOAD SCE ###
sce <- sce_for_zoom(PRESETTINGS)
### SELECT CLUSTERS
selec_clus <- select_clusters(SETTINGS)
sub_panel <- selec_clus$sub_panel
pop_selec <- selec_clus$pop_selec
### CREATE SCE_POP ###
sce_pop <- create_sce_pop(sce)
####################
### CLUSTERING ###
####################
source("./Custom_functions/clustering.R")
set.seed(1234) # set random seed for reproducibility
### Adjust type markers for the specific subpopulation
pop_markers <- fread(paste0("./",
sub_panel))
pop_markers <- pop_markers[marker_class == "type", antigen]
sce_pop <- run_clustering(sce = sce_pop, settings = SETTINGS, features = pop_markers, skip_checkpoint = T)
plot_elbow_plot(sce_pop, SETTINGS)
# ADJUST k:
clusters <- "meta8"
plot_heatmap_cellpops(sce_pop, SETTINGS, clusters, features = pop_markers, name = "heatmap_cellpops")
plot_heatmap_cellpops(sce_pop, SETTINGS, clusters, features = pop_markers, bin_anno = T,
name = "heatmap_cellpops_anno")
plot_distribution_markers(sce_pop, clusters, SETTINGS)
plot_specific_marker_heatmap(sce_pop, "CD45RA", clusters, SETTINGS)
sce_pop <- run_tsne_and_umap(sce_pop, SETTINGS, skip_checkpoint = T)
plot_tsne_and_umap(sce_pop, clusters, SETTINGS)
plot_umap_per_sample(sce_pop, clusters, SETTINGS)
plot_umap_per_condition(sce_pop, clusters, SETTINGS)
plot_umap_markers(sce_pop, type_markers(sce_pop), SETTINGS)
# Compare tSNE and PCA (good practice)
plotCodes(sce_pop, k = clusters)
#####################################
### CLUSTER MERGING AND ANOTATION ###
#####################################
### Manual cluster merging and annotation based on heatmaps ###
sce_checkpoint <- object_path(SETTINGS, "sce_with_clustering_with_tsne_and_umap.RDS")
# saveRDS(sce, sce_checkpoint)
if (!exists("sce_pop")) {
sce_pop <- readRDS(sce_checkpoint)
}
clusters <- "meta8"
sce_pop <- apply_manual_merging(sce_pop, clusters, SETTINGS, name = "CP_CyTOF-cluster-merging-1.csv")
plot_umap_merged(sce_pop, SETTINGS)
# If you want to check how many clusterings the SCE object contains:
colnames(metadata(sce_pop)$cluster_codes)
plot_heatmap_cellpops_2layers(sce_pop, clusters, SETTINGS)
plot_heatmap_cellpops_2layers_merged(sce_pop, clusters, features = pop_markers, SETTINGS)
plot_umap_manual_vs_auto(sce_pop, SETTINGS, meta = clusters)
### ### ### ### ### ### ### ###
### DIFFERENTIAL ANALYSIS ###
### ### ### ### ### ### ### ###
FDR_cutoff <- 0.1
# Import adjusted functions:
source("./Custom_functions/differential_analysis.R")
source("./Custom_functions/diffcyt_adjusted.R")
source("./Custom_functions/plotDiffHeatmap_adjusted.R")
### ### ### ### ### ### ### ### ### ### ### ###
### Differential cell population abundance ###
stack_and_boxplots(sce_pop, SETTINGS)
export_prop_table(sce = sce_pop, merge = "merge1", name = "prop_table", SETTINGS)
export_absolute_table(sce = sce_pop, merge = "merge1", name = "absolute_table", SETTINGS)
export_absolute_props(sce = sce_pop, merge = "merge1", name = "total_prop_table", SETTINGS)
da_formulas <- create_da_formulas(sce_pop, SETTINGS) # We don't include age, gender, etc as covariates
da_formula1 <- da_formulas$da_formula1
da_formula2 <- da_formulas$da_formula2
if (SETTINGS$zoom_pop == "Tcells") {
different = F
abs = F
}
if (SETTINGS$zoom_pop == "NKB") {
different = T
abs = T
}
### MS vs control:
differential_abundance(sce_pop, SETTINGS, contrast = "conditionms", formula = da_formula1,
clustering_to_use = "merge1", group1 = "control", group2 = "ms",
different_prop = different, # if T, proportions are calculated out of total CD45+ cells
name = "da_res_ms")
### AD vs control:
differential_abundance(sce_pop, SETTINGS, contrast = "conditionad", formula = da_formula2,
clustering_to_use = "merge1", group1 = "control", group2 = "ad",
different_prop = different, # if T, proportions are calculated out of total CD45+ cells
name = "da_res_ad")
### AD vs MS
differential_abundance(sce_pop, SETTINGS, contrast = "conditionad", formula = da_formula1,
clustering_to_use = "merge1", group1 = "ms", group2 = "ad",
contrast_extra = T, contrast_extra_con = "conditionms",
different_prop = different, # if T, proportions are calculated out of total CD45+ cells
name = "da_res_advsms")
### Non-parametric tests
df <- get_freqs(sce_pop, merge = "merge1") # Get frequencies
kruskal_per_cluster(df, SETTINGS, name = "kruskall") ## Do Kruskall-Wallis per cluster and export
posthoc_pairwise(df) # Post hoc pairwise control, MS, AD; not exporting
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### Differential analysis of marker expression stratified by cell population ###
plot_median_expression(sce_pop, SETTINGS)
ds_formulas <- create_ds_formulas(sce_pop) # We don't include age, gender, etc as covariates
ds_formula1 <- ds_formulas$ds_formula1
state_dt <- select_markers(
markers_of_interest = paste0("CP_CyTOF_state_markers_", SETTINGS$zoom_pop, ".csv"))
### MS vs control
differential_expression(sce_pop, SETTINGS, contrast = "conditionms", formula = ds_formula1,
clustering_to_use = "merge1", group1 = "control", group2 = "ms",
name = "ds_res_ms")
### AD vs control
differential_expression(sce_pop, SETTINGS, contrast = "conditionad", formula = ds_formula1,
clustering_to_use = "merge1", group1 = "control", group2 = "ad",
name = "ds_res_ad")
### AD vs MS
differential_expression(sce_pop, SETTINGS, contrast = "conditionad", formula = ds_formula1,
clustering_to_use = "merge1", group1 = "ms", group2 = "ad",
contrast_extra = T, contrast_extra_con = "conditionms",
name = "ds_res_advsms")
plot_heatmap_expresssion(sce_pop, fdr = FDR_cutoff, SETTINGS)
###########################
### PLOTTING DA RESULTS ###
###########################
source("./Custom_functions/plot_percentages.R")
### Create frequency file
freq_file <- create_freq_file(abs_prop = abs, SETTINGS)
freqs <- freq_file$freqs
med_sum <- freq_file$med_sum
p_vals_table <- freq_file$p_vals_table
### Organise frequency file by abundance, per tissue:
org_freqs <- organise_freqs(freqs, SETTINGS)
abundant <- org_freqs$abundant
mid_ab <- org_freqs$mid_ab
low_ab <- org_freqs$low_ab
non_ab <- org_freqs$non_ab
### Plot
w <- length(unique(abundant$pop))
set_dev(plot_path(SETTINGS, "perc_abundant"), height=3, width = 1+w*0.5)
plot_perc(abundant, p_vals_table, settings = SETTINGS, abs_prop = abs)
unset_dev()
w <- length(unique(non_ab$pop))
set_dev(plot_path(SETTINGS, "perc_non_abundant"), height=3, width = 1+w*0.5)
plot_perc(non_ab, p_vals_table, settings = SETTINGS, abs_prop = abs)
unset_dev()
if (!is.null(mid_ab)) {
w <- length(unique(mid_ab$pop))
set_dev(plot_path(SETTINGS, "perc_mid_abundant"), height=3, width = 1+w*0.5)
plot_perc(mid_ab, p_vals_table, settings = SETTINGS, abs_prop = abs)
unset_dev()
}
# Plot percentages for each population individually/separately:
plot_perc_sep(abundant, p_vals_table, settings = SETTINGS, accuracy = 0.1)
plot_perc_sep(non_ab, p_vals_table, settings = SETTINGS, accuracy = 0.01)
###########################
### PLOTTING DE RESULTS ###
###########################
source("./Custom_functions/plot_exprs.R")
### Create expression file (p-value < 0.1)
expression_file <- create_expr_file(SETTINGS)
expr_for_plot <- expression_file$expr_for_plot # for plotting below
expression <- expression_file$expression # for obtaining median values if you're curious
p_vals_table <- expression_file$p_vals_table
### Plot
set_dev(plot_path(SETTINGS, "plots_expr"), height=3, width = 3)
plot_expr(expr_for_plot, p_vals_table, SETTINGS) # adj_p_value; ref_p = c(0.1, 0.05, 0.01), label = c("*", "**", "***")
unset_dev()