Explant_3_sc_explant_AUCell.Rmd
---
title: "Explant scRNAseq datasets analysis"
subtitle: "AUCell analysis"
author: "Renaud Mevel"
output:
html_document:
self_contained: yes
toc: true
toc_float: true
df_print: paged
number_sections: false
editor_options:
chunk_output_type: console
---
```{r setup, echo=FALSE, message=FALSE, results='hide'}
library(knitr)
knitr::opts_chunk$set(cache=TRUE, error=FALSE, cache.lazy = TRUE)
```
## Objective
Perform Gene Set Enrichment Analysis on the epithelial Adult Single Cell data, using the AUCell package.
## Prepare the environment
```{r , warning=FALSE, message=FALSE}
# Data wrangling
library(plyr)
library(dplyr)
library(tidyverse)
library(data.table)
# Plots
library(gridExtra)
library(ggpubr)
library(viridis)
# sc
library(Seurat)
library(sctransform)
library(MAST)
library(org.Mm.eg.db)
library(DoubletFinder)
# GO
library(gprofiler2)
library(scran)
library(scater)
library(SingleCellExperiment)
library(AUCell)
library(GSEABase)
library(biomaRt)
library(mixtools)
# Palettes
library(ggthemes)
library(ggbeeswarm)
library(RColorBrewer)
set2 <- brewer.pal(n = 8, name = "Set2")
library(viridisLite)
pal.sa.vi <- viridisLite::viridis(n = 4, begin = 0.05, end = 0.95)
library(pals)
pal25 <- as.character(pals::cols25(n=25))
kel <- as.character(pals::kelly(n=22)[3:22])
col4 <- c("#FFC300", "#d73027", "#4294ff", "#20027c")
# Custom palettes
#pal.hto <- c("#C687D6","#DC2B19", "#F7A400", "#72ABF8", "#434CA7","#9DE79D")
pal.hto.vi <- c("#FF679A", pal.sa.vi, "#43C3FF")
pal.all <- c("#F3C600", "#895295", "#E88DAC", "#008A53", "#C0132C", "#A1C9F4")
pal.sa <- c("#DC2B19", "#F7A400", "#72ABF8", "#434CA7")
pal.cl <- c(
"#F09900", #C0
"#F1BA88", #C1
"#D63A69", #C2
"#9DDED6", #C3
"#8DD593", #C4
"#428654", #C5
"#B2B8E0", #C6
"#4A6FE3", #C7
"#1037AA" #C8
)
# Directories
setwd(dir = "~/set-directory/")
pdf.dir <- "~/set-directory/"
fig.dir <- "~/set-directory/"
# Functions
source("Explant_functions.R")
# Seed
set.seed(1)
```
## Load and prepare data
Load if need to start here
```{r}
sce <- readRDS(file.path("r_save/sce_final_paga_fa_dpt.rds"))
```
Extract expression matrix
```{r}
SCE <- as.SingleCellExperiment(sce, assay = "RNA")
exprMatrix <- counts(SCE)
exprMatrix <- as.matrix(exprMatrix)
dim(exprMatrix)
```
Extract plot for dimension reduction
```{r}
# Extract embeddings
fa.sce <- reducedDims(SCE)[["FA"]]
# Make new column names
colnames(fa.sce) <- c("FA_1", "FA_2")
# Plot
plot(fa.sce, pch=16, cex=.3)
dev.off()
```
## Load Custom Gene sets
Load from a directory containing the output of marker genes obtained from DE analysis.
```{r directories, warning=F, message=F}
# directory of the data generated by QuPath
geneset.dir <- "/sc_adult_dataset/r_export/custom_gene_sets/"
# create a path to all files
file.list <- list.files(path = geneset.dir, full.names = TRUE, recursive = FALSE)
file.names <- list.files(path = geneset.dir, full.names = FALSE, recursive = FALSE)
# read it in a list of data franes, and name them appropriately
geneset.list <- lapply(file.list, read.delim, sep = "\t")
file.names <- gsub(".txt", "", file.names)
names(geneset.list) <- file.names
# Extract gene list
for ( i in seq_along(file.names) ) {
geneset.list[[i]] <-
geneset.list[[i]] %>%
#top_n(n = 80, wt = avg_logFC) %>%
filter(avg_logFC >= 0.5) %>%
dplyr::select(gene) %>%
pull() %>%
as.character()
}
# convert to GeneSet format
GeneSetFormat.list <- list()
for ( i in seq_along(file.names) ) {
GeneSetFormat.list[[i]] <- GeneSet(geneset.list[[i]], setName=names(geneset.list)[i])
}
# Make a GeneSetCollection object
geneSets <- GeneSetCollection(unlist(GeneSetFormat.list))
```
Check how many of these genes are in the expression matrix:
```{r}
gS <- AUCell::subsetGeneSets(geneSets = geneSets, rownames(exprMatrix))
cbind(nGenes(geneSets))
# Add number of genes in the name
geneSets <- setGeneSetNames(geneSets, newNames=paste(names(geneSets), " (", nGenes(geneSets) ,"g)", sep=""))
```
## 1. Gene rankings
```{r}
cells_rankings <- AUCell_buildRankings(exprMatrix, nCores=4, plotStats=TRUE)
cells_rankings
#save(cells_rankings, file=file.path("r_save/AUCell_rankings.RData"))
```
## 2. AUC gene signatures
```{r}
cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings)
#save(cells_AUC, file="r_save/AUCell_calcAUC.RData")
#load(file="r_save/AUCell_calcAUC.RData")
```
## 3. Determine cells given gene signatures or active gene sets
AUCell_exploreThresholds / cells assignments
```{r, eval=FALSE}
set.seed(123)
par(mfrow=c(3,3))
cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist=TRUE, assign=FALSE, smallestPopPercent=0.1)
dev.off()
#Show warnings
warningMsg <- sapply(cells_assignment, function(x) x$aucThr$comment)
warningMsg[which(warningMsg!="")]
```
## 4. Visualise Gene set activity
Histograms
```{r}
AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "histogram",
cellsAUC=cells_AUC[1:1,])
# Start ################################### Loop to export plots
list <- seq_along(cells_AUC)
plotnames <- names(cells_AUC[1:length(list),])
for (i in seq_along(list)){
plotn <- list[i]
plotname <- plotnames[i]
mypath <- file.path("r_figures/8_AUCell/", paste("AUCell_hist_", plotname, ".png", sep = ""))
png(file=mypath, width = 6, height = 4, units = "in", res = 300)
AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "histogram",
cellsAUC=cells_AUC[plotn,])#, thresholds=selectedThresholds)
dev.off()
}
# End Loop ###################################
```
On reduced dimensions
```{r}
AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "AUC",
cellsAUC=cells_AUC[1:1,],cex=0.5, alphaOn = 1)
# Start ################################### Loop to export plots
list <- seq_along(cells_AUC)
plotnames <- names(cells_AUC[1:length(list),])
for (i in seq_along(list)){
plotn <- list[i]
plotname <- plotnames[i]
mypath <- file.path("r_figures/8_AUCell/", paste("AUCell_", plotname, ".png", sep = ""))
png(file=mypath, width = 6, height = 6, units = "in", res = 300)
AUCell_plotTSNE(
tSNE=fa.sce,
exprMat=exprMatrix,
plots = "AUC",
cellsAUC=cells_AUC[plotn,])
dev.off()
}
# End Loop ###################################
```
## 5. Export AUCell scores
```{r}
# Extract per cell scores
AUC_scores <- cells_AUC@assays@data@listData[["AUC"]]
# Add as an independent assay
sce[["AUC"]] <- CreateAssayObject(data = AUC_scores)
# Plot
DefaultAssay(sce) <- "AUC"
seurat_genesets <- rownames(sce)
# X vs all
FeaturePlot(sce, reduction = "FA", features = seurat_genesets[c(9)], pt.size = 1, order=FALSE) +
scale_colour_viridis(option = "plasma", end = 0.8) #+ NoLegend() + NoAxes() + ggtitle("")
```
Function and extract coord.
```{r}
source("r_scripts/functions.R")
sce.FA <- as.data.frame(Embeddings(sce[["FA"]]))
sce.FA <- sce.FA %>%
dplyr::mutate(CellBarcode = colnames(sce),
Clusters = sce$seurat_clusters,
Sample = sce$sample)
```
Batch / Selection genesets - GGplot + legend
```{r, eval=FALSE}
DefaultAssay(sce) <- "AUC"
#sel <- seurat_genesets[c(2,4,8,10,12,15)]
sel <- seurat_genesets[c(2,4,6,8,10,12)]
for (i in seq_along(sel) ) {
geneset.name <- sel[i]
fplot <- plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes")
ggsave(fplot, filename = file.path(fig.dir, paste0("8_AUCell/AUCell_legend_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
}
```
## 6. Figures
test
```{r, eval=FALSE}
geneset.name <- "LumA-vs-intact (49g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)
geneset.name <- "LumB-vs-intact (269g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)
geneset.name <- "LumC-vs-intact (96g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)
geneset.name <-"LumD-vs-intact (205g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)
geneset.name <- "LumEF-vs-all (100g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)
geneset.name <- "Bas-vs-LumABCD (358g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)
```
Manual settings
Scale all luminal genesets from 0 (min) to max (0.46) of all luminal genesets (in Lum-D population). Scale basal independently to min-max of the geneset.
```{r}
geneset.name <- "LumA-vs-intact (49g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "LumB-vs-intact (269g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "LumC-vs-intact (96g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <-"LumD-vs-intact (205g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "LumEF-vs-all (100g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "Bas-vs-LumABCD (358g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(NA,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
```
Auto max settings
```{r}
geneset.name <- "LumA-vs-intact (49g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "LumB-vs-intact (269g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "LumC-vs-intact (96g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <-"LumD-vs-intact (205g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "LumEF-vs-all (100g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
geneset.name <- "Bas-vs-LumABCD (358g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(NA,NA)) #min max auto
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA)) #min max auto
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
```
Histograms
```{r}
# subset cells_AUC with only sets of interest
AUCsub <- cells_AUC[c(2,4,6,8,10,12)]
# Start ################################### Loop to export plots
list <- seq_along(AUCsub)
plotnames <- names(AUCsub[1:length(list),])
for (i in seq_along(list)){
plotn <- list[i]
plotname <- plotnames[i]
mypath <- file.path(fig.dir, paste0("8_AUCell/AUCell_Histogram_", plotname, ".png"))
png(file=mypath, width = 5, height = 3, units = "in", res = 300)
AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "histogram", cellsAUC=AUCsub[plotn,], thresholds=FALSE, xlim = c(0,0.6))
dev.off()
}
# End Loop ###################################
```
Heatmap
```{r}
DoHeatmap(object = sce, features = as.character(sel)[2:6], group.colors = pal.cl2, assay = "AUC", slot = "data", size = 4,
draw.lines = TRUE, lines.width = 10) + scale_fill_viridis(option = "inferno", na.value = "white")
```
## 7. Plot AUCscores
Violin plot of AUCscore for each gene sets.
sce.FA contains cell + cluster info
AUC_scores contains cell _ scores info
We need to combine both.
```{r}
meta = sce@meta.data %>%
tibble::rownames_to_column(var = "CellBarcode") %>%
dplyr::select(CellBarcode, in_silico_clusters, sample)
tAUC <- t(AUC_scores)
tAUC <- as.data.frame(tAUC) %>%
tibble::rownames_to_column(var = "CellBarcode") # %>% tidyr::gather(-CellBarcode)
# merge with metadata
df.score <- left_join(tAUC, meta)
# convert to long
df.long = df.score %>% gather(c(3,5,7,9,11,13), key = geneset, value = AUCscore)
# boxplot
bx <- ggplot(df.long, aes(x=geneset, y=AUCscore, colour=in_silico_clusters)) +
geom_boxplot(outlier.size = 0) +
#geom_violin(fill=NA, scale="width") +
scale_colour_manual(values=pal.cl) +
theme_bw() +
theme(legend.position = "right",
panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank")#, axis.text.x = element_text(angle = 45, hjust = 1)
) + labs(x = paste0("Genesets"), y = "AUC score") #+ coord_flip()
bx
ggsave(bx, filename = file.path(fig.dir, paste0("8_AUCell/AUCscores_boxplot_vert", ".png")), device = "png", width = 6, height = 3, dpi = 300)
```
--------------------------------------------------------------------------------
## Session Information
```{r}
sessionInfo()
```