--- title: "Adult scRNAseq datasets analysis" subtitle: "Scanpy PAGA" author: "Renaud Mevel" output: html_document: self_contained: yes toc: true toc_float: true df_print: paged number_sections: false editor_options: chunk_output_type: console --- ```{r setup, echo=FALSE, message=FALSE, results='hide'} library(knitr) knitr::opts_chunk$set(cache=TRUE, error=FALSE, cache.lazy = TRUE) ``` ## Objective Import data generated in Scanpy/PAGA and plot in R. Directories need to be adapted throughout the scripts. ## Prepare the environment ```{r , warning=FALSE, message=FALSE} # Data wrangling library(plyr) library(dplyr) library(tidyverse) library(data.table) # Plots library(gridExtra) library(ggpubr) library(viridis) # sc library(Seurat) library(sctransform) library(MAST) library(org.Mm.eg.db) library(DoubletFinder) # GO library(gprofiler2) # Palettes library(pals) pal25 <- as.character(pals::cols25(n=25)) pal.trt <- c("#a1e186", "#b9006e") pal.rfp <- c("#ea4749", "#479bea") pal.runs <- c("#ec0016", "#ffc554", "#20a4ff") pal.lobe = c("#272873", "#45A5A7", "#cb5155") pal.pop <- c( "#7a0177", "#dd3497", # cas AP "#f768a1", "#fa9fb5", # cas DLP "#fc9272", "#cb181d", # cas "#CC6677", "#AA4466", # cas VP "#081d58", "#225ea8", # hn AP "#7fcdbb", "#7fb8cd", # hn DLP "#67a9cf", "#b2d3e7" # hn VP ) pal.cl <- c( "#B2B8E0", #LumA "#4A6FE3", #LumB "#1037AA", #LumC "#D33F6A", #LumD "#EF9708", #LumE "#F0B98D", #LumF "#8DD593" #Basal ) # Directories setwd(dir = "~/set-directory/") pdf.dir <- "~/set-directory/" fig.dir <- "~/set-directory/" # Functions source("Adult_functions.R") # Seed set.seed(1) ``` ## Functions 1 ```{r} plotPAGAClustGraph <- function(embedding, edges, thresh = 0, colour = "in_silico_clusters") { is_discrete <- is.factor(embedding[[colour]]) gg <- ggplot(embedding, aes(x = X, y = Y)) if (is_discrete) { gg <- gg + geom_segment(data = filter(edges, Connectivity > thresh), aes(x = FromX, y = FromY, xend = ToX, yend = ToY, #alpha = Connectivity colour = Connectivity), size = (edges$Connectivity)*10 ) + scale_colour_viridis_c(direction = 1, begin = 0.2, end = 0.6, option="inferno") } else { gg <- gg + geom_segment(data = filter(edges, Connectivity > thresh), aes(x = FromX, y = FromY, xend = ToX, yend = ToY, alpha = Connectivity), colour = "grey30") + scale_alpha(limits = c(0, 1)) + scale_fill_viridis_c() } gg <- gg + geom_point(aes(fill = !!ensym(colour), size = Size), shape = 21) + scale_fill_manual(values = pal.cl) + geom_text(aes(label = in_silico_clusters)) + scale_size(range = c(5, 20)) + theme_void() + theme(legend.position = "none") return(gg) } ### ----------------------------------------- plotPAGACellGraph <- function(embedding, edges, thresh = 0, colour = "in_silico_clusters", label = FALSE) { is_discrete <- is.factor(embedding[[colour]]) gg <- ggplot(embedding, aes(x = X, y = Y, colour = !!ensym(colour))) + geom_segment(data = filter(edges, Connectivity > thresh), aes(x = FromX, y = FromY, xend = ToX, yend = ToY), size = 0.1, colour = "grey50") + geom_point(size = 0.5, aes(colour = in_silico_clusters)) + scale_colour_manual(values = pal.cl) + theme_void() + theme(legend.position = "none") if (!is_discrete) { gg <- gg + scale_color_viridis_c() } if (label) { clust_data <- embedding %>% group_by(in_silico_clusters) %>% summarise(X = mean(X), Y = mean(Y)) gg <- gg + #geom_point(data = clust_data, aes(fill = in_silico_clusters), # size = 10, shape = 21, colour = "white") + scale_fill_manual(values = pal.cl) + geom_text(data = clust_data, aes(label = in_silico_clusters), colour = "black") } return(gg) } ### ----------------------------------------- plotPAGACompare <- function(clust_embedding, clust_edges, clust_thresh = 0, cell_embedding, cell_edges, cell_thresh = 0, colour = NA, label = FALSE) { clusts <- plotPAGAClustGraph(clust_embedding, clust_edges, clust_thresh, colour) cells <- plotPAGACellGraph(cell_embedding, cell_edges, cell_thresh, colour, label) cowplot::plot_grid(clusts, cells, nrow = 1) } ``` ## Functions 2 Function to plot genes in PAGA and FA ```{r} ##### In PAGA space --------------------------------------------------------- plotPAGAgene <- function(scdata, cl.embedding, cl.edges, sc.embedding, gene = "Runx1", legend=TRUE, thresh=0, show.clusters=FALSE, rm.cl = NA) { # remove cluster if asked if (!is.na(rm.cl)) { cl.embedding <- dplyr::filter(cl.embedding, in_silico_clusters != rm.cl) cl.edges <- dplyr::filter(cl.edges, From != rm.cl, To != rm.cl) } sc.embedding[[gene]] <- logcounts(scdata)[gene, ] paga_gene <- sc.embedding %>% dplyr::select(-Cell, -X, -Y) %>% group_by(in_silico_clusters) %>% summarise_all(mean) paga_gene[, gene][paga_gene[, gene] == 0] <- NA max.scale = max(paga_gene[[gene]]) min.scale = min(paga_gene[[gene]], na.rm = TRUE) paga_embedding <- left_join(cl.embedding, paga_gene, by = "in_silico_clusters") cl.edges <- cl.edges %>% mutate(Connectivity = ifelse(Connectivity < thresh, NA, Connectivity)) g <- ggplot(paga_embedding, aes(x = X, y = Y)) + geom_segment(data = cl.edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY), size = (cl.edges$Connectivity)*10, alpha=0.5) + geom_point(aes(fill = !!ensym(gene), size = Size), shape = 21, colour="grey30") + scale_fill_viridis(option = "plasma", na.value = "grey80", begin = 0, end = 0.9, limits = c(0, max.scale)) + scale_size(range = c(6, 12), guide = FALSE) + theme_void() if (legend==FALSE) { g <- g + theme(legend.position = "none") } if (show.clusters==TRUE) { g <- g + geom_text(aes(label = in_silico_clusters)) } g } ##### In ForceAtlas space --------------------------------------------------------- plotFAgene <- function(scdata, cell.embedding, cell.edges, gene, legend=TRUE, pt.size=0.8, show.connections=TRUE, thresh=0) { cell.embedding[[gene]] <- logcounts(scdata)[gene, ] cell.embedding[, 5][cell.embedding[, 5] == 0] <- NA # replace 0 with NA max.scale = max(cell.embedding[[gene]]) min.scale = min(cell.embedding[[gene]], na.rm = TRUE) cell.edges <- cell.edges %>% mutate(Connectivity = ifelse(Connectivity < thresh, NA, Connectivity)) g <- ggplot(cell.embedding, aes(x = X, y = Y)) if (show.connections==TRUE) { g <- g + geom_segment(data = cell.edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, size = Connectivity, alpha = Connectivity), colour="grey50") g <- g + scale_alpha_continuous(limits = c(0, 1), range = c(0.2, 1), guide = FALSE) g <- g + scale_size(range = c(0.1, 0.5), guide = FALSE) } g <- g + geom_point(aes(colour = !!ensym(gene)), size=pt.size) g <- g + scale_colour_viridis_c(option = "plasma", na.value = "grey80", end = 1, limits = c(min.scale, max.scale)) g <- g + theme_void() if (legend==FALSE) { g <- g + theme(legend.position = "none") } g } ``` ## Load data ```{r} seu <- readRDS(file.path("r_save/sce.sc.umap.rds")) sce <- as.SingleCellExperiment(seu, assay = "RNA") ``` ## Load PAGA ```{r} # cluster embeddings-------------------------------------------------------------------------- clust_embedding <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cluster_embedding.csv", col_types = cols(.default = col_double())) %>% mutate(Size = as.numeric(table(colData(sce)$in_silico_clusters))) %>% dplyr::rename(Cluster = in_silico_clusters) %>% mutate(Cluster = factor(Cluster)) %>% mutate(in_silico_clusters = factor(names(table(colData(sce)$in_silico_clusters)), levels = levels(colData(sce)$in_silico_clusters))) # cluster edges-------------------------------------------------------------------------------- clust_edges <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cluster_edges.csv", col_types = cols(.default = col_double())) %>% mutate(To = factor(To, levels = levels(clust_embedding$Cluster)), From = factor(From, levels = levels(clust_embedding$Cluster))) %>% left_join(clust_embedding, by = c("From" = "Cluster")) %>% dplyr::rename(FromX = X, FromY = Y, From_in_silico_clusters = in_silico_clusters) %>% dplyr::select(-Size) %>% left_join(clust_embedding, by = c("To" = "Cluster")) %>% dplyr::rename(ToX = X, ToY = Y, To_in_silico_clusters = in_silico_clusters) %>% dplyr::select(-Size) # remove 'Cluster'---------------------------------------------------------------------------- clust_embedding <- dplyr::select(clust_embedding, -Cluster) clust_edges <- clust_edges %>% dplyr::mutate(From = From_in_silico_clusters, To = To_in_silico_clusters) %>% dplyr::select(-From_in_silico_clusters, -To_in_silico_clusters) # cell embeddings---------------------------------------------------------------------------- cell_embedding <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cell_embedding.csv", col_types = cols(.default = col_double(), Cell = col_character()) ) %>% mutate(in_silico_clusters = colData(sce)$in_silico_clusters) # cell edges --------------------------------------------------------------------------------- cell_edges <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cell_edges.csv", col_types = cols(.default = col_double(), From = col_character(), To = col_character()) ) %>% dplyr::left_join(cell_embedding, by = c("From" = "Cell")) %>% dplyr::rename(FromX = X, FromY = Y) %>% dplyr::select(-in_silico_clusters) %>% dplyr::left_join(cell_embedding, by = c("To" = "Cell")) %>% dplyr::rename(ToX = X, ToY = Y) %>% dplyr::select(-in_silico_clusters) ``` ## Visualise PAGA ### Cluster graph ```{r} plotPAGAClustGraph(clust_embedding, clust_edges, thresh = 0.0) ``` ### Edges threshold Number of selected edges for different threshold connectivities. ```{r} plot_data <- tibble( Threshold = seq(0, 1, 0.01) ) %>% mutate(Edges = map_int(Threshold, function(thresh) { sum(clust_edges$Connectivity > thresh) })) con_thresh <- 0.7 ggplot(plot_data, aes(x = Threshold, y = Edges)) + geom_point() + geom_line() + geom_vline(xintercept = con_thresh, colour = "red") + xlab("Connectivity threshold") + ylab("Number of edges") + theme_minimal() ``` ### Cell graph ```{r} plotPAGACellGraph(cell_embedding, cell_edges, thresh = 0.03) ``` ### Compare ```{r} plotPAGACompare(clust_embedding, clust_edges, clust_thresh = 0, cell_embedding, cell_edges, cell_thresh = 0) ``` ### Genes ```{r} known_genes <- c( # Luminal "Spink1", "Msmb", "Pbsn", "Psca", # Basal "Krt5", "Krt14", "Trp63", "Apoe", # Of interest "Runx1", "Nkx3-1", "Tacstd2", # Regressed "Lpl", "Basp1", "Car2", "Crym" ) for (gene in known_genes) { cell_embedding[[gene]] <- logcounts(sce)[gene, ] } clust_genes <- cell_embedding %>% dplyr::select(-Cell, -X, -Y) %>% group_by(in_silico_clusters) %>% summarise_all(mean) clust_embedding <- left_join(clust_embedding, clust_genes, by = "in_silico_clusters") ``` ```{r} plotPAGACompare(clust_embedding, clust_edges, clust_thresh = 0, cell_embedding, cell_edges, cell_thresh = 0, colour = 'Runx1') ``` ### Gene expression Bac a sable In PAGA ```{r} plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Nkx3-1") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Runx1") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Tacstd2") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Mki67") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Psca") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt4") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Nupr1") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Meis2") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Shh") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Ar") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Foxa1") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Etv1") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Etv4") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Gata2") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Trp63") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Apoe") ( plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt5") / plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt14") / plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt7") / plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt19") ) | ( plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt8")/ plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt18")/ plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt17")/ plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt4") ) plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Gsto1") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Cd24a") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Itga6") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Dpp4") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt20") plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Sox2") ``` In ForceAtlas ```{r} plotFAgene(sce, cell_embedding, cell_edges, gene = "Runx1", pt.size = 1, thresh=0, show.connections = TRUE, legend=FALSE) plotFAgene(sce, cell_embedding, cell_edges, gene = "Nkx3-1", pt.size = 1, thresh=0, show.connections = TRUE, legend=FALSE) plotFAgene(sce, cell_embedding, cell_edges, gene = "Shh", pt.size = 1, thresh=0, show.connections = TRUE) ``` ### Figures Coloured connections ```{r} ggplot(clust_embedding, aes(x = X, y = Y)) + geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, colour = Connectivity, alpha = Connectivity), size = (clust_edges$Connectivity)*8) + scale_colour_viridis_c(direction = 1, begin = 0.2, end = 0.6, option="inferno") + scale_alpha_continuous(limits = c(0, 1), range = c(0.7, 0.9), guide = FALSE) + geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") + scale_fill_manual(values = pal.cl) + scale_size(range = c(5, 10), guide = FALSE) + theme_void() #ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters_c.png"), device = "png", width = 7, height = 4, dpi = 300) ggplot(clust_embedding, aes(x = X, y = Y)) + geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, colour = Connectivity, alpha = Connectivity), size = (clust_edges$Connectivity)*8) + scale_colour_viridis_c(direction = 1, begin = 0.2, end = 0.6, option="inferno") + scale_alpha_continuous(limits = c(0, 1), range = c(0.7, 0.9), guide = FALSE) + geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") + scale_fill_manual(values = pal.cl) + scale_size(range = c(5, 10), guide = FALSE) + theme_void() + theme(legend.position = "none") #ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters_c_nolegend.png"), device = "png", width = 6, height = 4, dpi = 300) ``` Black connections ```{r} ggplot(clust_embedding, aes(x = X, y = Y)) + geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, alpha = Connectivity), size = (clust_edges$Connectivity)*8) + scale_alpha_continuous(limits = c(0, 1), range = c(0.7, 0.9), guide = FALSE) + geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") + scale_fill_manual(values = pal.cl) + scale_size(range = c(5, 10), guide = FALSE) + theme_void() #ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters.png"), device = "png", width = 7, height = 4, dpi = 300) ggplot(clust_embedding, aes(x = X, y = Y)) + geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, alpha = 0.5), size = (clust_edges$Connectivity)*10) + geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") + scale_fill_manual(values = pal.cl) + scale_size(range = c(6, 12), guide = FALSE) + theme_void() + theme(legend.position = "none") #+ scale_y_reverse() + coord_flip() ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters_nolegend.png"), device = "png", width = 5, height = 5, dpi = 300) ``` ```{r} ggplot(cell_embedding, aes(x = X, y = Y)) + geom_segment(data = cell_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, size = Connectivity, alpha = Connectivity), colour="grey50") + geom_point(aes(colour = in_silico_clusters), size = 0.5) + scale_colour_manual(values = pal.cl) + scale_alpha_continuous(limits = c(0, 1), range = c(0.05, 0.5), guide = FALSE) + scale_size(range = c(0.1, 0.5), guide = FALSE) + theme_void() #ggsave(filename = file.path(fig.dir, "4_paga/PAGA_cells.png"), device = "png", width = 7, height = 4, dpi = 300) ggplot(cell_embedding, aes(x = X, y = Y)) + geom_segment(data = cell_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, size = Connectivity, alpha = Connectivity), colour="grey50") + geom_point(aes(colour = in_silico_clusters), size = 0.5) + scale_colour_manual(values = pal.cl) + scale_alpha_continuous(limits = c(0, 1), range = c(0.05, 0.5), guide = FALSE) + scale_size(range = c(0.1, 0.5), guide = FALSE) + theme_void() + theme(legend.position = "none") ggsave(filename = file.path(fig.dir, "4_paga/PAGA_cells_nolegend.png"), device = "png", width = 5, height = 5, dpi = 300) ``` -------------------------------------------------------------------------------- ## Session Information ```{r} sessionInfo() ```