Raw File
Explant_3_sc_explant_AUCell.Rmd
---
title: "Explant scRNAseq datasets analysis"
subtitle: "AUCell analysis"
author: "Renaud Mevel"
output:
  html_document:
    self_contained: yes
    toc: true
    toc_float: true
    df_print: paged
    number_sections: false
editor_options: 
  chunk_output_type: console
---
```{r setup, echo=FALSE, message=FALSE, results='hide'}
library(knitr)
knitr::opts_chunk$set(cache=TRUE, error=FALSE, cache.lazy = TRUE)
```

## Objective

Perform Gene Set Enrichment Analysis on the epithelial Adult Single Cell data, using the AUCell package.  
  
## Prepare the environment

```{r , warning=FALSE, message=FALSE}
# Data wrangling
library(plyr)
library(dplyr)
library(tidyverse)
library(data.table)

# Plots
library(gridExtra)
library(ggpubr)
library(viridis)

# sc
library(Seurat)
library(sctransform)
library(MAST)
library(org.Mm.eg.db)
library(DoubletFinder)

# GO
library(gprofiler2)
library(scran)
library(scater)
library(SingleCellExperiment)
library(AUCell)
library(GSEABase)
library(biomaRt)
library(mixtools)

# Palettes
library(ggthemes)
library(ggbeeswarm)
library(RColorBrewer)
set2 <- brewer.pal(n = 8, name = "Set2")

library(viridisLite)
pal.sa.vi <- viridisLite::viridis(n = 4, begin = 0.05, end = 0.95)

library(pals)
pal25 <- as.character(pals::cols25(n=25))
kel <- as.character(pals::kelly(n=22)[3:22])
col4 <- c("#FFC300", "#d73027", "#4294ff", "#20027c")

# Custom palettes
#pal.hto <- c("#C687D6","#DC2B19", "#F7A400", "#72ABF8", "#434CA7","#9DE79D")
pal.hto.vi <- c("#FF679A", pal.sa.vi, "#43C3FF")
pal.all <- c("#F3C600", "#895295", "#E88DAC", "#008A53", "#C0132C", "#A1C9F4")
pal.sa <- c("#DC2B19", "#F7A400", "#72ABF8", "#434CA7")

pal.cl <- c(
  "#F09900", #C0
  "#F1BA88", #C1
  "#D63A69", #C2
  "#9DDED6", #C3
  "#8DD593", #C4
  "#428654", #C5
  "#B2B8E0", #C6
  "#4A6FE3", #C7
  "#1037AA"  #C8 
)

# Directories
setwd(dir = "~/set-directory/")
pdf.dir <- "~/set-directory/"
fig.dir <- "~/set-directory/"

# Functions
source("Explant_functions.R")

# Seed
set.seed(1)
```

## Load and prepare data

Load if need to start here
```{r}
sce <- readRDS(file.path("r_save/sce_final_paga_fa_dpt.rds"))
```

Extract expression matrix
```{r}
SCE <- as.SingleCellExperiment(sce, assay = "RNA")
exprMatrix <- counts(SCE)
exprMatrix <- as.matrix(exprMatrix)
dim(exprMatrix)
```

Extract plot for dimension reduction
```{r}
# Extract embeddings
fa.sce <- reducedDims(SCE)[["FA"]] 
# Make new column names
colnames(fa.sce) <- c("FA_1", "FA_2")
# Plot
plot(fa.sce, pch=16, cex=.3)
dev.off()
```

## Load Custom Gene sets

Load from a directory containing the output of marker genes obtained from DE analysis.
```{r directories, warning=F, message=F}
# directory of the data generated by QuPath
geneset.dir <- "/sc_adult_dataset/r_export/custom_gene_sets/"

# create a path to all files
file.list <- list.files(path = geneset.dir, full.names = TRUE, recursive = FALSE)
file.names <- list.files(path = geneset.dir, full.names = FALSE, recursive = FALSE)

# read it in a list of data franes, and name them appropriately
geneset.list <- lapply(file.list, read.delim, sep = "\t")
file.names <- gsub(".txt", "", file.names)
names(geneset.list) <- file.names

# Extract gene list
for ( i in seq_along(file.names) ) {
  geneset.list[[i]] <- 
    geneset.list[[i]] %>% 
    #top_n(n = 80, wt = avg_logFC) %>% 
    filter(avg_logFC >= 0.5) %>% 
    dplyr::select(gene) %>% 
    pull() %>% 
    as.character()
}

# convert to GeneSet format
GeneSetFormat.list <- list()
for ( i in seq_along(file.names) ) {
  GeneSetFormat.list[[i]] <- GeneSet(geneset.list[[i]], setName=names(geneset.list)[i])
}

# Make a GeneSetCollection object
geneSets <- GeneSetCollection(unlist(GeneSetFormat.list))
```

Check how many of these genes are in the expression matrix:
```{r}
gS <- AUCell::subsetGeneSets(geneSets = geneSets, rownames(exprMatrix)) 
cbind(nGenes(geneSets))

# Add number of genes in the name
geneSets <- setGeneSetNames(geneSets, newNames=paste(names(geneSets), " (", nGenes(geneSets) ,"g)", sep=""))
```

## 1. Gene rankings

```{r}
cells_rankings <- AUCell_buildRankings(exprMatrix, nCores=4, plotStats=TRUE)
cells_rankings
#save(cells_rankings, file=file.path("r_save/AUCell_rankings.RData"))
```

## 2. AUC gene signatures

```{r}
cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings)
#save(cells_AUC, file="r_save/AUCell_calcAUC.RData")
#load(file="r_save/AUCell_calcAUC.RData")
```

## 3. Determine cells given gene signatures or active gene sets

AUCell_exploreThresholds / cells assignments
```{r, eval=FALSE}
set.seed(123)
par(mfrow=c(3,3)) 
cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist=TRUE, assign=FALSE, smallestPopPercent=0.1)
dev.off()

#Show warnings
warningMsg <- sapply(cells_assignment, function(x) x$aucThr$comment)
warningMsg[which(warningMsg!="")]
```

## 4. Visualise Gene set activity

Histograms
```{r}
AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "histogram", 
                cellsAUC=cells_AUC[1:1,])

# Start ################################### Loop to export plots
list <- seq_along(cells_AUC)
plotnames <- names(cells_AUC[1:length(list),])

for (i in seq_along(list)){
  
  plotn <- list[i]
  plotname <- plotnames[i]
  
  mypath <- file.path("r_figures/8_AUCell/", paste("AUCell_hist_", plotname, ".png", sep = ""))

 png(file=mypath, width = 6, height = 4, units = "in", res = 300)

 AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "histogram", 
                cellsAUC=cells_AUC[plotn,])#, thresholds=selectedThresholds)
   
   dev.off()
 
}
# End Loop ################################### 
```

On reduced dimensions
```{r}
AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "AUC", 
                cellsAUC=cells_AUC[1:1,],cex=0.5, alphaOn = 1)

# Start ################################### Loop to export plots
list <- seq_along(cells_AUC)
plotnames <- names(cells_AUC[1:length(list),])

for (i in seq_along(list)){
  
  plotn <- list[i]
  plotname <- plotnames[i]
  
  mypath <- file.path("r_figures/8_AUCell/", paste("AUCell_", plotname, ".png", sep = ""))

 png(file=mypath, width = 6, height = 6, units = "in", res = 300)

   AUCell_plotTSNE(
     tSNE=fa.sce,
     exprMat=exprMatrix, 
     plots = "AUC", 
     cellsAUC=cells_AUC[plotn,])
   
   dev.off()
 
}
# End Loop ################################### 
```

## 5. Export AUCell scores

```{r}
# Extract per cell scores
AUC_scores <- cells_AUC@assays@data@listData[["AUC"]]

# Add as an independent assay
sce[["AUC"]] <- CreateAssayObject(data = AUC_scores)

# Plot
DefaultAssay(sce) <- "AUC"
seurat_genesets <- rownames(sce)

# X vs all
FeaturePlot(sce, reduction = "FA", features = seurat_genesets[c(9)], pt.size = 1, order=FALSE) + 
  scale_colour_viridis(option = "plasma", end = 0.8) #+ NoLegend() + NoAxes() + ggtitle("")
```

Function and extract coord.
```{r}
source("r_scripts/functions.R")

sce.FA <- as.data.frame(Embeddings(sce[["FA"]]))
sce.FA <- sce.FA %>% 
  dplyr::mutate(CellBarcode = colnames(sce),
                Clusters = sce$seurat_clusters,
                Sample = sce$sample)
```

Batch / Selection genesets - GGplot + legend
```{r, eval=FALSE}
DefaultAssay(sce) <- "AUC"

#sel <- seurat_genesets[c(2,4,8,10,12,15)]
sel <- seurat_genesets[c(2,4,6,8,10,12)]

for (i in seq_along(sel) ) {
  geneset.name <- sel[i]
  fplot <- plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes")
  ggsave(fplot, filename = file.path(fig.dir, paste0("8_AUCell/AUCell_legend_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
}
```

## 6. Figures

test
```{r, eval=FALSE}
geneset.name <- "LumA-vs-intact (49g)" 
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)

geneset.name <- "LumB-vs-intact (269g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)

geneset.name <- "LumC-vs-intact (96g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)

geneset.name <-"LumD-vs-intact (205g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)

geneset.name <- "LumEF-vs-all (100g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)

geneset.name <- "Bas-vs-LumABCD (358g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
#ggsave(filename = file.path(fig.dir, paste0("7_AUCell/AUCell_legtest_", geneset.name, ".png")), device = "png", width = 5, height = 5, dpi = 300)
```

Manual settings  
Scale all luminal genesets from 0 (min) to max (0.46) of all luminal genesets (in Lum-D population). Scale basal independently to min-max of the geneset.
```{r}
geneset.name <- "LumA-vs-intact (49g)" 
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "LumB-vs-intact (269g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "LumC-vs-intact (96g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <-"LumD-vs-intact (205g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "LumEF-vs-all (100g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,0.46))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,0.48))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "Bas-vs-LumABCD (358g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(NA,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_man_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
```

Auto max settings
```{r}
geneset.name <- "LumA-vs-intact (49g)" 
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "LumB-vs-intact (269g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes",  lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "LumC-vs-intact (96g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <-"LumD-vs-intact (205g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "LumEF-vs-all (100g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(0,NA))
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)

geneset.name <- "Bas-vs-LumABCD (358g)"
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "minimal", lim=c(NA,NA)) #min max auto
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_", geneset.name, ".png")), device = "png", width = 4, height = 4, dpi = 300)
plotFA_colByGene(sce, sce.FA, geneset.name, ptsize = 1, alpha = 1, legend = "yes", lim=c(NA,NA)) #min max auto
ggsave(filename = file.path(fig.dir, paste0("8_AUCell/AUCell_auto_leg_", geneset.name, ".png")), device = "png", width = 6, height = 4, dpi = 300)
```

Histograms
```{r}
# subset cells_AUC with only sets of interest
AUCsub <- cells_AUC[c(2,4,6,8,10,12)]

# Start ################################### Loop to export plots
list <- seq_along(AUCsub)
plotnames <- names(AUCsub[1:length(list),])

for (i in seq_along(list)){
  
 plotn <- list[i]
 plotname <- plotnames[i]
  
 mypath <- file.path(fig.dir, paste0("8_AUCell/AUCell_Histogram_", plotname, ".png"))

 png(file=mypath, width = 5, height = 3, units = "in", res = 300)

 AUCell_plotTSNE(tSNE=fa.sce, exprMat=exprMatrix, plots = "histogram", cellsAUC=AUCsub[plotn,], thresholds=FALSE, xlim = c(0,0.6))
   
 dev.off()
 
}
# End Loop ################################### 
```

Heatmap
```{r}
DoHeatmap(object = sce, features = as.character(sel)[2:6], group.colors = pal.cl2, assay = "AUC", slot = "data", size = 4,
          draw.lines = TRUE, lines.width = 10) +  scale_fill_viridis(option = "inferno", na.value = "white")
```

## 7. Plot AUCscores

Violin plot of AUCscore for each gene sets.

sce.FA contains cell + cluster info
AUC_scores contains cell _ scores info
We need to combine both.
```{r}
meta = sce@meta.data %>% 
  tibble::rownames_to_column(var = "CellBarcode") %>% 
  dplyr::select(CellBarcode, in_silico_clusters, sample)
  

tAUC <- t(AUC_scores)
tAUC <- as.data.frame(tAUC) %>% 
  tibble::rownames_to_column(var = "CellBarcode") # %>% tidyr::gather(-CellBarcode)

# merge with metadata
df.score <- left_join(tAUC, meta)

# convert to long
df.long = df.score %>% gather(c(3,5,7,9,11,13), key = geneset, value = AUCscore)

# boxplot
bx <- ggplot(df.long, aes(x=geneset, y=AUCscore, colour=in_silico_clusters)) +
  geom_boxplot(outlier.size = 0) +
  #geom_violin(fill=NA, scale="width") +
  scale_colour_manual(values=pal.cl) +
  theme_bw() +
  theme(legend.position = "right",
        panel.grid.major = element_line(linetype = "blank"), 
        panel.grid.minor = element_line(linetype = "blank")#, axis.text.x = element_text(angle = 45, hjust = 1)
        ) + labs(x = paste0("Genesets"), y = "AUC score") #+ coord_flip()

bx 

ggsave(bx, filename = file.path(fig.dir, paste0("8_AUCell/AUCscores_boxplot_vert", ".png")), device = "png", width = 6, height = 3, dpi = 300)
```


--------------------------------------------------------------------------------
## Session Information
```{r}
sessionInfo()
```
back to top