clustering.R
#' Uses `cluster()` function from CATALYST to perform `FlowSOM` clustering
#' & `ConsensusClusterPlus` metaclustering.
run_clustering <- function(sce, settings, skip_checkpoint = FALSE, features = type_markers(sce)) {
### Use CATALYST function cluster() to build SOM with FlowSOM and do
### metaclustering with ConsensusClusterPlus()
# Check if RDS already there, open if so
checkpoint_path <- object_path(settings, "sce_with_clustering.RDS")
if (!skip_checkpoint && file.exists(checkpoint_path)) {
cat(paste0("Loading result from checkpoint ", checkpoint_path))
sce <- readRDS(checkpoint_path)
return(sce)
}
# Create new folder and RDS if there's none from before
dir.create(object_path(settings, NULL))
sce <- CATALYST::cluster(sce, features = features,
xdim = 10, ydim = 10, maxK = 20, seed = 1234)
saveRDS(sce, checkpoint_path)
# max n of clusters as an overestimation
return(sce)
}
plot_elbow_plot <- function(sce, settings) {
# Delta plot or elbow plot to find the optimal number of clusters
set_dev(plot_path(settings, "elbow_plot"), height=10, width=5)
p <- metadata(sce)$delta_area
print(p)
unset_dev()
return(p)
}
plot_heatmap_cellpops <- function(sce, settings, clusters, features = "type", bin_anno = F,
name = "heatmap_cellpops") {
### Heatmap
set_dev(plot_path(settings, name))
# p <- plotExprHeatmap(sce, #hm2 = NULL, cluster_anno = TRUE, draw_freqs = TRUE,
# k = clusters, m = NULL)
p <- plotExprHeatmap(sce, features = features, fun = "median",
by = "cluster_id", k = clusters,
bars = TRUE, perc = TRUE, bin_anno = bin_anno)
print(p)
unset_dev()
return(p)
}
plot_distribution_markers <- function(sce, clusters, settings) {
# Distributions of marker intensities
set_dev(plot_path(settings, "distrib_cellpops"))
p <- plotClusterExprs(sce, k = clusters, features = "type")
print(p)
unset_dev()
return(p)
}
plot_specific_marker_heatmap <- function(sce, markers, clusters, settings) {
# Visualize median marker expression in the 100 SOM codes
p <- plotClusterHeatmap(sce,
hm2 = markers, k = "som100", m = clusters,
cluster_anno = FALSE, draw_freqs = TRUE)
return(p)
}
run_tsne_and_umap <- function(sce, settings, skip_checkpoint = FALSE) {
checkpoint_path <- object_path(settings, "sce_with_clustering_with_tsne_and_umap.RDS")
if (!skip_checkpoint && file.exists(checkpoint_path)) {
cat(paste0("Loading result from checkpoint ", checkpoint_path))
sce <- readRDS(checkpoint_path)
return(sce)
}
# Run t-SNE/UMAP on at most 500/1000 cells per sample
sce <- runDR(sce, dr = "TSNE", cells = 500, features = "type")
sce <- runDR(sce, dr = "UMAP", cells = 1e3, features = "type")
saveRDS(sce, checkpoint_path)
return(sce)
}
plot_tsne_and_umap <- function(sce, clusters, settings) {
# Compare tSNE to uMAP
p1 <- plotDR(sce, "TSNE", color_by = clusters) +
theme(legend.position = "none")
p2 <- plotDR(sce, "UMAP", color_by = clusters)
lgd <- get_legend(p2 + guides(color = guide_legend(ncol = 2, override.aes = list(size = 3))))
p2 <- p2 + theme(legend.position = "none")
set_dev(plot_path(settings, "tsne_umap"))
p <- plot_grid(p1, p2, lgd, nrow = 1, rel_widths = c(5, 5, 2))
print(p)
unset_dev()
return(p)
}
plot_umap_per_sample <- function(sce, clusters, settings) {
# Facet per sample
set_dev(plot_path(settings, "umap_sample"))
p <- plotDR(sce, "UMAP", color_by = clusters) +
facet_wrap("sample_id") +
guides(color = guide_legend(ncol = 2, override.aes = list(size = 3)))
print(p)
unset_dev()
return(p)
}
plot_umap_per_condition <- function(sce, clusters, settings) {
# Facet per condition
set_dev(plot_path(settings, "umap_condition"))
p <- plotDR(sce, "UMAP", color_by = clusters) +
facet_wrap("condition") +
guides(color = guide_legend(ncol = 2, override.aes = list(size = 3)))
print(p)
unset_dev()
return(p)
}
plot_umap_markers <- function(sce, features = type_markers(sce), settings) {
set_dev(plot_path(settings, "umap_per_marker"))
p <- plotDR(sce, "UMAP", color_by = features, ncol = 7, a_pal = rev(brewer.pal(11, "RdYlBu"))) # UMAP
print(p)
unset_dev()
return(p)
}
apply_manual_merging <- function(sce, clusters, settings, name = "CP_CyTOF-cluster-merging-1.csv") {
merge1 <- fread(object_path(settings, name))[, 1:2]
# convert to factor with merged clusters in desired order
cluster_order <- unique(merge1$new_cluster)
merge1$new_cluster <- factor(merge1$new_cluster,
levels = cluster_order)
merge1$new_cluster <- factor(merge1$new_cluster)
# apply manual merging
sce <- mergeClusters(sce, k = clusters,
table = merge1, id = "merge1")
return(sce)
}
plot_umap_merged <- function(sce, settings, name = "umap_merged_clusters") {
set_dev(plot_path(settings, name = name), height=5, width=5)
p <- plotDR(sce, "UMAP", color_by = "merge1") # UMAP
print(p)
unset_dev()
return(p)
}
plot_heatmap_cellpops_2layers <- function(sce, clusters, settings) {
### Heatmap cell pops
# clustering to use for computing cluster medians is specified with k
# for visualization, we specify a second layer of cluster annotations with m
set_dev(plot_path(settings, "heatmap_2layers"))
p <- plotClusterHeatmap(sce, k = clusters, m = "merge1")
print(p)
unset_dev()
return(p)
}
plot_heatmap_cellpops_2layers_merged <- function(sce, clusters,
features = NULL, settings,
name = "heatmap_2layers_merged") {
# with merged clusters and freqs
set_dev(plot_path(settings, name), height = 5)
p <- plotExprHeatmap(sce, features = features,
by = "cluster_id", k = "merge1",
bars = TRUE, perc = TRUE)
print(p)
unset_dev()
return(p)
}
plot_umap_manual_vs_auto <- function(sce, settings, meta = "meta20") {
p1 <- plotDR(sce, "UMAP", color_by = "merge1")
p2 <- plotDR(sce, "UMAP", color_by = meta)
set_dev(plot_path(settings, "manual_vs_auto"))
p <- plot_grid(p1, p2, ncol = 1, align = "v", labels = c("A", "B"))
print(p)
unset_dev()
return(p)
}