https://github.com/MolecularCellBiologyImmunology/cytof-periventricular-ms
Revision 70e973c2d935e4ff2cb080d7feef0dd08c23e061 authored by sabelarl on 07 July 2021, 12:56:38 UTC, committed by GitHub on 07 July 2021, 12:56:38 UTC
1 parent b8c7f95
Raw File
Tip revision: 70e973c2d935e4ff2cb080d7feef0dd08c23e061 authored by sabelarl on 07 July 2021, 12:56:38 UTC
Update README.md
Tip revision: 70e973c
CP_CyTOF_analysis.R
### Load packages
library(HDCytoData)
library(readxl)
library(CATALYST)
library(ggplot2)
library(cowplot)
library(diffcyt)
library(SummarizedExperiment)
library(data.table)
library(stringr)
library(tidyverse)
library(SingleCellExperiment)
library(lme4)
library(multcomp)
library(ComplexHeatmap) # for DA heatmaps
library(dplyr)
library(gplots)
library(RColorBrewer)
library(devtools)

setwd("~/Documents/Data analysis/CyTOF/CP_CyTOF_data/")

############################
### CHOOSE YOUR ANALYSIS ###
############################

# Make list with all options
SETTINGS <- list(
    experiment_dir = normalizePath("."),
    SAVE_PLOTS = T, # Export plots
    SAVE_TABLES = T, # Export tables
    norm = F, # Normalisation
    subsetting = T, # Subsetting samples by cell number
    zoom = F, # Zoom into cell pops?
    zoom_pop = NULL, # Zoom into cell pops?
    tissue = 'septum' # Choose tissue
)

source("./Custom_functions/CP_CyTOF_functions.R") # Load created functions

###################
### DATA IMPORT ###
###################

source("./Custom_functions/data_import.R")

experiment <- load_experiment(SETTINGS)

sce <- construct_sce(experiment)

########################
### DIAGNOSTIC PLOTS ###
########################

source("./Custom_functions/diagnostic_plots.R")

plot_marker_expression_distributions(sce, SETTINGS)

plot_marker_expression_distributions(sce, SETTINGS, do_log = TRUE)

plot_n_cells_per_sample(sce, SETTINGS)

plot_mds(sce, SETTINGS)

plot_heatmap(sce, SETTINGS)

plot_redundancy_score(sce, SETTINGS)

####################
###  CLUSTERING  ###
####################

source("./Custom_functions/clustering.R")

set.seed(1234) # set random seed for reproducibility

sce <- run_clustering(sce, SETTINGS, skip_checkpoint = T)

plot_elbow_plot(sce, SETTINGS)

# ADJUST k:
clusters <- "meta20"

plot_heatmap_cellpops(sce, SETTINGS, clusters, name = "heatmap_cellpops")

plot_heatmap_cellpops(sce, SETTINGS, clusters, bin_anno = T, name = "heatmap_cellpops_anno")

plot_distribution_markers(sce, clusters, SETTINGS)

plot_specific_marker_heatmap(sce, "CD45RA", clusters, SETTINGS)

sce <- run_tsne_and_umap(sce, SETTINGS, skip_checkpoint = T)

plot_tsne_and_umap(sce, clusters, SETTINGS)

plot_umap_per_sample(sce, clusters, SETTINGS)

plot_umap_per_condition(sce, clusters, SETTINGS)

plot_umap_markers(sce, type_markers(sce), SETTINGS)

##################################### 
### CLUSTER MERGING AND ANOTATION ###
#####################################

### Manual cluster merging and annotation based on heatmaps ###

sce_checkpoint <- object_path(SETTINGS, "sce_with_clustering_with_tsne_and_umap.RDS")
# saveRDS(sce, sce_checkpoint)

if (!exists("sce")) {
    sce <- readRDS(sce_checkpoint)
}

clusters <- "meta20"

sce <- apply_manual_merging(sce, clusters, SETTINGS)

plot_umap_merged(sce, SETTINGS)
if (SETTINGS$tissue == "septum") {
    sce_immune <- filterSCE(sce, cluster_id != "CD45-", k = "merge1")
    plot_umap_merged(sce_immune, SETTINGS, name = "umap_merged_clusters_immune")
}


# If you want to check how many clusterings the SCE object contains:
colnames(metadata(sce)$cluster_codes)

plot_heatmap_cellpops_2layers(sce, clusters, SETTINGS)

plot_heatmap_cellpops_2layers_merged(sce, clusters, features = "type", SETTINGS)
if (SETTINGS$tissue == "septum") {
    plot_heatmap_cellpops_2layers_merged(sce_immune, clusters, 
                                         features = "type", SETTINGS,
                                         name = "heatmap_2layers_merged_immune")
}

plot_umap_manual_vs_auto(sce, SETTINGS)

### ### ### ### ### ### ### ### 
### DIFFERENTIAL ANALYSIS ###
### ### ### ### ### ### ### ### 
FDR_cutoff <- 0.1

# Import adjusted functions:
source("./Custom_functions/differential_analysis.R")
source("./Custom_functions/diffcyt_adjusted.R") 
source("./Custom_functions/plotDiffHeatmap_adjusted.R") 

### ### ### ### ### ### ### ### ### ### ### ### 
### Differential cell population abundance ###
stack_and_boxplots(sce, SETTINGS)

# Proportion table (out of CD45+ immune cells):
export_prop_table(sce = sce, merge = "merge1", name = "prop_table", SETTINGS)
export_absolute_table(sce = sce, merge = "merge1", name = "absolute_table", SETTINGS)

da_formulas <- create_da_formulas(sce, SETTINGS) # We don't include age, gender, etc as covariates
da_formula1 <- da_formulas$da_formula1
da_formula2 <- da_formulas$da_formula2

### MS vs control:
differential_abundance(sce, SETTINGS, contrast = "conditionms", formula = da_formula1, 
                       clustering_to_use = "merge1", group1 = "control", group2 = "ms",
                       name = "da_res_ms")

### AD vs control:
differential_abundance(sce, SETTINGS, contrast = "conditionad", formula = da_formula2, 
                       clustering_to_use = "merge1", group1 = "control", group2 = "ad",
                       name = "da_res_ad")

### AD vs MS
differential_abundance(sce, SETTINGS, contrast = "conditionad", formula = da_formula1, 
                       clustering_to_use = "merge1", group1 = "ms", group2 = "ad",
                       contrast_extra = T, contrast_extra_con = "conditionms", 
                       name = "da_res_advsms")

### Non-parametric tests
df <- get_freqs(sce, merge = "merge1") # Get frequencies

kruskal_per_cluster(df, SETTINGS, name = "kruskall") ## Do Kruskall-Wallis per cluster and export

posthoc_pairwise(df) # Post hoc pairwise control, MS, AD; not exporting

### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### Differential analysis of marker expression stratified by cell population ###
plot_median_expression(sce, SETTINGS)

ds_formulas <- create_ds_formulas(sce) # We don't include age, gender, etc as covariates
ds_formula1 <- ds_formulas$ds_formula1

state_dt <- select_markers(markers_of_interest = "CP_CyTOF_state_markers_per_pop.csv")

### MS vs control
differential_expression(sce, SETTINGS, contrast = "conditionms", formula = ds_formula1, 
                       clustering_to_use = "merge1", group1 = "control", group2 = "ms",
                       name = "ds_res_ms")

### AD vs control
differential_expression(sce, SETTINGS, contrast = "conditionad", formula = ds_formula1, 
                        clustering_to_use = "merge1", group1 = "control", group2 = "ad",
                        name = "ds_res_ad")

### AD vs MS
differential_expression(sce, SETTINGS, contrast = "conditionad", formula = ds_formula1, 
                        clustering_to_use = "merge1", group1 = "ms", group2 = "ad",
                        contrast_extra = T, contrast_extra_con = "conditionms",
                        name = "ds_res_advsms")

plot_heatmap_expresssion(sce, fdr = FDR_cutoff, SETTINGS)


###########################
### PLOTTING DA RESULTS ###
###########################
source("./Custom_functions/plot_percentages.R")

### Create frequency file
freq_file <- create_freq_file(abs_prop = T, SETTINGS)
freqs <- freq_file$freqs
med_sum <- freq_file$med_sum
p_vals_table <- freq_file$p_vals_table

### Organise frequency file by abundance, per tissue:
org_freqs <- organise_freqs(freqs, SETTINGS)
abundant <- org_freqs$abundant
mid_ab <- org_freqs$mid_ab
low_ab <- org_freqs$low_ab
non_ab <- org_freqs$non_ab

### Plot
w <- length(unique(abundant$pop))
set_dev(plot_path(SETTINGS, "perc_abundant"), height=3, width = 1+w*0.5)
plot_perc(abundant, p_vals_table, settings = SETTINGS)
unset_dev()

w <- length(unique(low_ab$pop))
set_dev(plot_path(SETTINGS, "perc_low_abundant"), height= 3, width = 1+w*0.5)
plot_perc(low_ab, p_vals_table, settings = SETTINGS)
unset_dev()

if (!is.null(mid_ab)) {
    w <- length(unique(mid_ab$pop))
    set_dev(plot_path(SETTINGS, "perc_mid_abundant"), height=3, width = 1+w*0.5)
    plot_perc(mid_ab, p_vals_table, settings = SETTINGS)
    unset_dev()
}

# Plot percentages for each population individually/separately:
plot_perc_sep(abundant, p_vals_table, settings = SETTINGS, accuracy = 0.1)
plot_perc_sep(non_ab, p_vals_table, settings = SETTINGS, accuracy = 0.01)

###########################
### PLOTTING DE RESULTS ###
###########################
source("./Custom_functions/plot_exprs.R")

### Create expression file (p-value < 0.1)
expression_file <- create_expr_file(SETTINGS)
expr_for_plot <- expression_file$expr_for_plot # for plotting below
expression <- expression_file$expression # for obtaining median values if you're curious
p_vals_table <- expression_file$p_vals_table

### Plot
set_dev(plot_path(SETTINGS, "plots_expr"), height=5, width = 5)
plot_expr(expr_for_plot, p_vals_table, SETTINGS) # adj_p_value; ref_p = c(0.1, 0.05, 0.01), label = c("*", "**", "***")
unset_dev()


back to top