https://github.com/MolecularCellBiologyImmunology/cytof-periventricular-ms
Raw File
Tip revision: 70e973c2d935e4ff2cb080d7feef0dd08c23e061 authored by sabelarl on 07 July 2021, 12:56:38 UTC
Update README.md
Tip revision: 70e973c
CP_CyTOF_analysis_zoom.R
### Load packages
library(HDCytoData)
library(readxl)
library(CATALYST)
library(ggplot2)
library(cowplot)
library(diffcyt)
library(SummarizedExperiment)
library(data.table)
library(stringr)
library(tidyverse)
library(SingleCellExperiment)
library(lme4)
library(multcomp)
library(ComplexHeatmap) # for DA heatmaps
library(dplyr)
library(RColorBrewer)
library(devtools)

setwd("~/Documents/Data analysis/CyTOF/CP_CyTOF_data/")

############################
### CHOOSE YOUR ANALYSIS ###
# Settings for parent clustering (choose tissue, no zoom)
PRESETTINGS <- list(
    experiment_dir = normalizePath("."),
    SAVE_PLOTS = T, # Export plots
    SAVE_TABLES = T, # Export tables
    norm = F, # Normalisation
    subsetting = T, # Subsetting samples by cell number
    zoom = F, # Zoom into cell pops?
    zoom_pop = NULL, # Zoom into cell pops?
    tissue = 'cp' # Choose tissue
)

# Settings for zoom clustering
SETTINGS <- list(
    experiment_dir = normalizePath("."),
    SAVE_PLOTS = T, # Export plots
    SAVE_TABLES = T, # Export tables
    norm = F, # Normalisation
    subsetting = T, # Subsetting samples by cell number
    zoom = T, # Zoom into cell pops? 
    zoom_pop = "NKB", # Zoom into cell pops? (Tcells/CD4/CD8/Myeloid/BtoPlasma/NKB/NK)
    tissue = 'cp' # Choose tissue
)

source("./Custom_functions/CP_CyTOF_functions.R") # Load created functions
source("./Custom_functions/CP_CyTOF_zoom_functions.R")
source("./Custom_functions/clustering.R")

### LOAD SCE ###
sce <- sce_for_zoom(PRESETTINGS)

### SELECT CLUSTERS
selec_clus <- select_clusters(SETTINGS)
sub_panel <- selec_clus$sub_panel
pop_selec <- selec_clus$pop_selec

### CREATE SCE_POP ###
sce_pop <- create_sce_pop(sce)

####################
###  CLUSTERING  ###
####################
source("./Custom_functions/clustering.R")

set.seed(1234) # set random seed for reproducibility

### Adjust type markers for the specific subpopulation
pop_markers <- fread(paste0("./", 
                            sub_panel))
pop_markers <- pop_markers[marker_class == "type", antigen]

sce_pop <- run_clustering(sce = sce_pop, settings = SETTINGS, features = pop_markers, skip_checkpoint = T)

plot_elbow_plot(sce_pop, SETTINGS)

# ADJUST k:
clusters <- "meta8"

plot_heatmap_cellpops(sce_pop, SETTINGS, clusters, features = pop_markers, name = "heatmap_cellpops")

plot_heatmap_cellpops(sce_pop, SETTINGS, clusters, features = pop_markers, bin_anno = T, 
                      name = "heatmap_cellpops_anno")

plot_distribution_markers(sce_pop, clusters, SETTINGS)

plot_specific_marker_heatmap(sce_pop, "CD45RA", clusters, SETTINGS)

sce_pop <- run_tsne_and_umap(sce_pop, SETTINGS, skip_checkpoint = T)

plot_tsne_and_umap(sce_pop, clusters, SETTINGS)

plot_umap_per_sample(sce_pop, clusters, SETTINGS)

plot_umap_per_condition(sce_pop, clusters, SETTINGS)

plot_umap_markers(sce_pop, type_markers(sce_pop), SETTINGS)

# Compare tSNE and PCA (good practice)
plotCodes(sce_pop, k = clusters)

##################################### 
### CLUSTER MERGING AND ANOTATION ###
#####################################

### Manual cluster merging and annotation based on heatmaps ###

sce_checkpoint <- object_path(SETTINGS, "sce_with_clustering_with_tsne_and_umap.RDS")
# saveRDS(sce, sce_checkpoint)

if (!exists("sce_pop")) {
    sce_pop <- readRDS(sce_checkpoint)
}

clusters <- "meta8"

sce_pop <- apply_manual_merging(sce_pop, clusters, SETTINGS, name = "CP_CyTOF-cluster-merging-1.csv")

plot_umap_merged(sce_pop, SETTINGS)

# If you want to check how many clusterings the SCE object contains:
colnames(metadata(sce_pop)$cluster_codes)

plot_heatmap_cellpops_2layers(sce_pop, clusters, SETTINGS)

plot_heatmap_cellpops_2layers_merged(sce_pop, clusters, features = pop_markers, SETTINGS)

plot_umap_manual_vs_auto(sce_pop, SETTINGS, meta = clusters)


### ### ### ### ### ### ### ### 
### DIFFERENTIAL ANALYSIS ###
### ### ### ### ### ### ### ### 
FDR_cutoff <- 0.1

# Import adjusted functions:
source("./Custom_functions/differential_analysis.R")
source("./Custom_functions/diffcyt_adjusted.R") 
source("./Custom_functions/plotDiffHeatmap_adjusted.R") 

### ### ### ### ### ### ### ### ### ### ### ### 
### Differential cell population abundance ###
stack_and_boxplots(sce_pop, SETTINGS)

export_prop_table(sce = sce_pop, merge = "merge1", name = "prop_table", SETTINGS)
export_absolute_table(sce = sce_pop, merge = "merge1", name = "absolute_table", SETTINGS)
export_absolute_props(sce = sce_pop, merge = "merge1", name = "total_prop_table", SETTINGS)

da_formulas <- create_da_formulas(sce_pop, SETTINGS) # We don't include age, gender, etc as covariates
da_formula1 <- da_formulas$da_formula1
da_formula2 <- da_formulas$da_formula2

if (SETTINGS$zoom_pop == "Tcells") {
    different = F
    abs = F
}
if (SETTINGS$zoom_pop == "NKB") {
    different = T
    abs = T
}

### MS vs control:
differential_abundance(sce_pop, SETTINGS, contrast = "conditionms", formula = da_formula1, 
                       clustering_to_use = "merge1", group1 = "control", group2 = "ms",
                       different_prop = different, # if T, proportions are calculated out of total CD45+ cells
                       name = "da_res_ms")

### AD vs control:
differential_abundance(sce_pop, SETTINGS, contrast = "conditionad", formula = da_formula2, 
                       clustering_to_use = "merge1", group1 = "control", group2 = "ad",
                       different_prop = different, # if T, proportions are calculated out of total CD45+ cells
                       name = "da_res_ad")

### AD vs MS
differential_abundance(sce_pop, SETTINGS, contrast = "conditionad", formula = da_formula1, 
                       clustering_to_use = "merge1", group1 = "ms", group2 = "ad",
                       contrast_extra = T, contrast_extra_con = "conditionms", 
                       different_prop = different, # if T, proportions are calculated out of total CD45+ cells
                       name = "da_res_advsms")

### Non-parametric tests
df <- get_freqs(sce_pop, merge = "merge1") # Get frequencies

kruskal_per_cluster(df, SETTINGS, name = "kruskall") ## Do Kruskall-Wallis per cluster and export

posthoc_pairwise(df) # Post hoc pairwise control, MS, AD; not exporting

### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### Differential analysis of marker expression stratified by cell population ###
plot_median_expression(sce_pop, SETTINGS)

ds_formulas <- create_ds_formulas(sce_pop) # We don't include age, gender, etc as covariates
ds_formula1 <- ds_formulas$ds_formula1

state_dt <- select_markers(
    markers_of_interest = paste0("CP_CyTOF_state_markers_", SETTINGS$zoom_pop, ".csv"))


### MS vs control
differential_expression(sce_pop, SETTINGS, contrast = "conditionms", formula = ds_formula1, 
                        clustering_to_use = "merge1", group1 = "control", group2 = "ms",
                        name = "ds_res_ms")

### AD vs control
differential_expression(sce_pop, SETTINGS, contrast = "conditionad", formula = ds_formula1, 
                        clustering_to_use = "merge1", group1 = "control", group2 = "ad",
                        name = "ds_res_ad")

### AD vs MS
differential_expression(sce_pop, SETTINGS, contrast = "conditionad", formula = ds_formula1, 
                        clustering_to_use = "merge1", group1 = "ms", group2 = "ad",
                        contrast_extra = T, contrast_extra_con = "conditionms",
                        name = "ds_res_advsms")

plot_heatmap_expresssion(sce_pop, fdr = FDR_cutoff, SETTINGS)

###########################
### PLOTTING DA RESULTS ###
###########################
source("./Custom_functions/plot_percentages.R")

### Create frequency file
freq_file <- create_freq_file(abs_prop = abs, SETTINGS)
freqs <- freq_file$freqs
med_sum <- freq_file$med_sum
p_vals_table <- freq_file$p_vals_table

### Organise frequency file by abundance, per tissue:
org_freqs <- organise_freqs(freqs, SETTINGS)
abundant <- org_freqs$abundant
mid_ab <- org_freqs$mid_ab
low_ab <- org_freqs$low_ab
non_ab <- org_freqs$non_ab

### Plot
w <- length(unique(abundant$pop))
set_dev(plot_path(SETTINGS, "perc_abundant"), height=3, width = 1+w*0.5)
plot_perc(abundant, p_vals_table, settings = SETTINGS, abs_prop = abs)
unset_dev()

w <- length(unique(non_ab$pop))
set_dev(plot_path(SETTINGS, "perc_non_abundant"), height=3, width = 1+w*0.5)
plot_perc(non_ab, p_vals_table, settings = SETTINGS, abs_prop = abs)
unset_dev()

if (!is.null(mid_ab)) {
    w <- length(unique(mid_ab$pop))
    set_dev(plot_path(SETTINGS, "perc_mid_abundant"), height=3, width = 1+w*0.5)
    plot_perc(mid_ab, p_vals_table, settings = SETTINGS, abs_prop = abs)
    unset_dev()
}

# Plot percentages for each population individually/separately:
plot_perc_sep(abundant, p_vals_table, settings = SETTINGS, accuracy = 0.1)
plot_perc_sep(non_ab, p_vals_table, settings = SETTINGS, accuracy = 0.01)

###########################
### PLOTTING DE RESULTS ###
###########################
source("./Custom_functions/plot_exprs.R")

### Create expression file (p-value < 0.1)
expression_file <- create_expr_file(SETTINGS)
expr_for_plot <- expression_file$expr_for_plot # for plotting below
expression <- expression_file$expression # for obtaining median values if you're curious
p_vals_table <- expression_file$p_vals_table

### Plot
set_dev(plot_path(SETTINGS, "plots_expr"), height=3, width = 3)
plot_expr(expr_for_plot, p_vals_table, SETTINGS) # adj_p_value; ref_p = c(0.1, 0.05, 0.01), label = c("*", "**", "***")
unset_dev()

back to top