Raw File
plot_exprs.R
#' Functions to plot the marker expressions per cell population
#' and display differences based on the previous analyses.

### Create frequency file with only combinations of marker-cell pop that we want to plot
create_expr_file <- function(settings){
    if (settings$norm) {
        normalised_folder <- "normalised/"
    } else {
        normalised_folder <- "original/"
    }
    if (settings$subsetting) {
        subset_folder <- "subset/"
    } else {
        subset_folder <- "whole/"
    }
    if (settings$zoom) {
        zoom_folder <- settings$zoom_pop
    } else {
        zoom_folder <- NULL
    }
    tissue <- settings$tissue
    
    ms <- fread(paste0(settings$experiment_dir, "/", tissue, "/", 
                        subset_folder, normalised_folder, zoom_folder, "/", tissue,
                        "_ds_res_ms_results.csv"))
    ad <- fread(paste0(settings$experiment_dir, "/", tissue, "/", 
                        subset_folder, normalised_folder, zoom_folder, "/", tissue,
                        "_ds_res_ad_results.csv"))
    adms <- fread(paste0(settings$experiment_dir, "/", tissue, "/", 
                          subset_folder, normalised_folder, zoom_folder, "/", tissue,
                          "_ds_res_advsms_results.csv"))
    
    expression <- rbind(ms, ad, adms) # Pool the comparisons together
    expr <- expression
    # expr <- unique(expression, by = "unique") # Keep only unique combis
    # expr <- expr[p_val < 0.1]
    
    ### Melt table
    ids <- colnames(expr)[6:27]
    expr <- melt(expr, id.vars = c("unique", "cluster_id", "marker_id", "p_val"), 
                 measure.vars = ids, 
                 value.name = "median_expr", 
                 variable.name = "sample_id")
    
    # Add condition
    expr[, condition := ifelse(grepl("control", sample_id), "control",
                               ifelse(grepl("ms", sample_id), "ms", 
                                      ifelse(grepl("ad", sample_id), "ad", NA)))]
    
    # Keep only significant (before correction)
    expr_for_plot <- expr[p_val < 0.1]
    expression <- unique(expr, by = c("unique", "sample_id")) # Keep only unique combis

    
    p_vals_table <- rbindlist(list(
        ms[, comparison := "ms_con"],
        ad[, comparison := "ad_con"],
        adms[, comparison := "ad_ms"]))
    
    return(list(expr_for_plot = expr_for_plot, expression = expression, p_vals_table = p_vals_table))
}

### Plot
plot_expr <- function(expr, p_vals_table, settings){
    colors = c("#043741", "#189cb3", "#e79d24")
    linecol = "gray30"
    black = c("black", "black", "black")
    
    # Plot
    markers <- unique(expr$marker_id)
    for (m in markers) {
        ex <- expr[marker_id == m] # keep only one marker
        ex$cluster_id <- reorder(ex$cluster_id, -ex$median_expr) # re-order by abundance
        ex_m <- ex[, 
                   .(median_expr = mean(median_expr, na.rm = T)), 
                   by = c("condition", "marker_id", "cluster_id")]
        ex$condition <- factor(ex$condition, levels = c("control", "ms", "ad"))
        ex_m$condition <- factor(ex_m$condition, levels = c("control", "ms", "ad"))
        w <- length(unique(ex$cluster_id))
        print(m)
        
        ex$condition <- factor(ex$condition, levels = c("control", "ad", "ms"))
        p <- ggplot(ex, aes(x = cluster_id, y = median_expr, fill = condition)) +
            geom_boxplot(width = 0.75, color = linecol, lwd = 0.25, 
                         outlier.shape = NA, alpha = 0.5) +
            geom_point(position = position_jitterdodge(jitter.width = 0.1,
                                                       jitter.height = 0,
                                                       dodge.width = 0.75), 
                       size = 1, shape = 21, stroke = 0.1,
                       aes(color = condition, fill = condition)
                       # fill = "white"
            ) +
            xlab(NULL) +
            ylab(paste0(m, "expression")) + # bars show mean, dots are individual median expression
            expand_limits(y=0) +
            scale_fill_manual(values=colors, labels = c('Control','Dementia','MS'), name = NULL) +
            scale_color_manual(values=colors, labels = c('Control','Dementia','MS'), name = NULL) +
            theme_classic() + 
            theme(axis.text.x = element_text(angle = 45, hjust = 1)
                  , text=element_text(size=7)
                  # , legend.position = "none"
            )
       
        max_height <- 0
        for (cluster in ex[, unique(cluster_id)]) {
            
            height = ex[cluster_id == cluster, max(median_expr, na.rm = T)]
            for (comp in c('ad_con', 'ad_ms', 'ms_con')) {
                cond_a <- unlist(str_split(comp, "_"))[1]
                cond_b <- unlist(str_split(comp, "_"))[2]
                if (cond_a == "con") {cond_a <- "control"}
                if (cond_b == "con") {cond_b <- "control"}
                
                cond_pos <- data.table(cond = c("control", "ad", "ms"), pos = c(-0.33, 0, +0.33))
                pos_a <- cond_pos[cond == cond_a, pos]
                pos_b <- cond_pos[cond == cond_b, pos]
                
                p_label <- data.table(ref_p = c(0.1, 0.05, 0.01), label = c("*", "**", "***"))
                my_p_value <- p_vals_table[comparison == comp & cluster_id == cluster & marker_id == m, p_adj]
                label <- p_label[my_p_value <= ref_p][.N][, label]
                if (length(label) == 0) {
                    print(paste("Skipping", m, comp, cluster))
                    next
                } else {
                    height <- height * 1.1
                }
                
                p <- p + geom_segment(
                    x=as.numeric(factor(cluster, levels = levels(ex$cluster_id))) + pos_a,
                    y=height * 1.1,
                    xend=as.numeric(factor(cluster, levels = levels(ex$cluster_id))) + pos_b,
                    yend=height * 1.1
                ) + geom_text(
                    x = (as.numeric(factor(cluster, levels = levels(ex$cluster_id))) + pos_a + 
                             as.numeric(factor(cluster, levels = levels(ex$cluster_id))) + pos_b) / 2,
                    y = height * 1.1 + 0.02,
                    label = label
                )
            }
            max_height <- max(max_height, height)
        }
        p <- p + scale_y_continuous(expand = c(0,0), limits = c(0, max_height*1.5))
        
        print(p)
    }
}

back to top