Raw File
Explant_1_sc_explant_process_lognorm.Rmd
---
title: "Explant scRNAseq datasets analysis"
subtitle: "Run GL60_03"
author: "Renaud Mevel"
output:
  html_document:
    self_contained: yes
    toc: true
    toc_float: true
    df_print: paged
    number_sections: false
editor_options: 
  chunk_output_type: console
---
```{r setup, echo=FALSE, message=FALSE, results='hide'}
library(knitr)
knitr::opts_chunk$set(cache=TRUE, error=FALSE, cache.lazy = TRUE)
```

## Objective

* Demultiplex the MULTIseq tags/poulations  
* Perform QC and filter high quality cells  
* Process in Seurat and characterise cell populations  
* Filter cells of interest (prostate differentiation) for downstream analysis  
  
Once we have clustered and subsetted the cells of interest, we compute diffusion components and we will use scanpy to layout the cells using the ForceAtlas2 algorithm initiated on PAGA. We will then use the FA layout to estimate pseudotime using slingshot.  
  
## Prepare the environment

```{r , warning=FALSE, message=FALSE}
# Data wrangling
library(plyr)
library(dplyr)
library(tidyverse)
library(data.table)

# Plots
library(gridExtra)
library(ggpubr)

# sc
library(MAST)
library(loomR)
library(Seurat)
library(sctransform)
library(org.Mm.eg.db)
library(DoubletFinder)
library(scran)
library(scater)
library(slingshot)
library(SingleCellExperiment)

# GO
library(gprofiler2)

# Palettes
library(ggthemes)
library(ggbeeswarm)
library(RColorBrewer)
set2 <- brewer.pal(n = 8, name = "Set2")

library(viridisLite)
pal.sa.vi <- viridisLite::viridis(n = 4, begin = 0.05, end = 0.95)

library(pals)
pal25 <- as.character(pals::cols25(n=25))
kel <- as.character(pals::kelly(n=22)[3:22])
col4 <- c("#FFC300", "#d73027", "#4294ff", "#20027c")

# Custom palettes
#pal.hto <- c("#C687D6","#DC2B19", "#F7A400", "#72ABF8", "#434CA7","#9DE79D")
pal.hto.vi <- c("#FF679A", pal.sa.vi, "#43C3FF")
pal.all <- c("#F3C600", "#895295", "#E88DAC", "#008A53", "#C0132C", "#A1C9F4")
pal.sa <- c("#DC2B19", "#F7A400", "#72ABF8", "#434CA7")
pal.cc <- c("#4B6EB3", "#FFCC4C", "#FF7A7D") #cell cycle phases

pal.cla <- c(
  "#F09900", #C0
  "#F1BA88", #C1
  "#D63A69", #C2
  "#9DDED6", #C3
  "#8DD593", #C4
  "#428654", #C5
  "#B2B8E0", #C6
  "#4A6FE3", #C7
  "#1037AA", #C8 
  "grey60"   #C9  
)

pal.cl <- c(
  "#F09900", #C0
  "#F1BA88", #C1
  "#D63A69", #C2
  "#9DDED6", #C3
  "#8DD593", #C4
  "#428654", #C5
  "#B2B8E0", #C6
  "#4A6FE3", #C7
  "#1037AA"  #C8 
)

# Directories
setwd(dir = "~/set-directory/")
pdf.dir <- "~/set-directory/"
fig.dir <- "~/set-directory/"

# Functions
source("Explant_functions.R")

# Seed
set.seed(1)
```

## Load 10x data

```{r}
# This contains the  output of cell ranger count
cr <- Read10X(data.dir = file.path("raw_data/filtered_feature_bc_matrix/")) # HISEQ RUN
colnames(cr) <- gsub("-1", "", colnames(cr))

sc <- CreateSeuratObject(counts = cr, project = "GL60_03", assay = "RNA")
```

## Load multiseq data

```{r}
bar.table <- readRDS(file = file.path("raw_data/multiseq_count_matrix/bar.table.rds"))
```

Transform and plot data
```{r}
# transform
tbar <- t(bar.table)
tbar <- tbar[1:4, ]
htos <- as.data.frame(tbar)

# make long format and plot log read counts
df.long <- as.data.frame(tbar) %>% 
  tibble::rownames_to_column(var = "tag") %>% 
  tidyr::gather(cell_barcode, value, -tag) %>% 
  dplyr::mutate(Log = log10(value+1))

ggplot(df.long, aes(x=cell_barcode)) + 
  geom_point(aes(y=Log, colour = factor(tag)), alpha = 0.5) + 
  theme(panel.grid.minor = element_blank(),
        axis.text.x=element_blank()) +  # turn off minor grid
  facet_grid(~tag) +
  scale_colour_manual(values = pal.sa) +
  ylab("Log10 of tag read count") +
  xlab("Unique cell Barcodes") +
  labs(color='tags')
```

## A. Prepare dataset

### Demultiplex

Add hto data
```{r}
# Add HTO data as a new assay independent from RNA
sc[["HTO"]] <- CreateAssayObject(counts = htos)

# Number of cells
cat("Number of cells:", length(colnames(sc)))
```

Normalize HTO data, here we use centered log-ratio (CLR) transformation
```{r}
DefaultAssay(object = sc) <- "HTO"
sc <- NormalizeData(sc, assay = "HTO", normalization.method = "CLR")

# 0.9
sc <- HTODemux(sc, assay = "HTO", positive.quantile = 0.9, kfunc = "kmeans")
table(sc$HTO_classification.global)
table(sc$hash.ID)
```

Show barcodes in UMAP space. Some cells appear to be wrongly classified. We will deal with them later by applying a stricter threshold determined empirically to remove cells with low barcode counts.
```{r}
sc.umap <- RunUMAP(sc, assay = "HTO", features = rownames(sc@assays$HTO))
DimPlot(sc.umap, reduction = "umap", cols = pal.hto.vi, pt.size = 0.5)

ggsave(filename = file.path(fig.dir, "UMAP_tags_unfiltered.png"), device = "png", width = 6, height = 4, dpi = 300)
```

Add tag data to sc object
```{r}
# add metadata under tag name
sc <- AddMetaData(object = sc, metadata = sc$hash.ID, col.name = "tag")
```

### Add metadata

Add population/sample name to the tag information
```{r}
#read in metadata
meta <- read.delim(file.path("metadata/metadata_explant.txt"), "\t", header = T, stringsAsFactors = F)
meta <- dplyr::rename(meta, final.ids = bar_id)

#extract cell ids/htos
df.tags <- as.data.frame(sc$hash.ID)

#make the datafrane
df.pop <- tibble::rownames_to_column(.data = df.tags)
df.pop <- dplyr::rename(df.pop, final.ids = `sc$hash.ID`)
df.pop <- left_join(df.pop, meta, "final.ids")

df.sa <- dplyr::select(df.pop, rowname, sample)
table(df.sa$sample)

#make right vector to add metadata
df.sa <- tibble::column_to_rownames(.data = df.sa, var = "rowname")
df.sa$sample <- factor(df.sa$sample, levels = c("D0", "D1", "D3", "D6"))

#add metadata
sc <- AddMetaData(object = sc, metadata = df.sa, col.name = "sample")
```

### Filter low tags

Remove cells which have low tag counts as potential outliers.
```{r, eval=FALSE}
ggplot(df.long, aes(x=cell_barcode)) + 
  geom_point(aes(y=Log, colour = factor(tag)), alpha = 0.5) + 
  theme(panel.grid.minor = element_blank(),
        axis.text.x=element_blank()) +  # turn off minor grid
  facet_grid(~tag) +
  scale_colour_manual(values = pal.sa) +
  ylab("Log10 of tag read count") +
  xlab("Unique cell Barcodes") +
  labs(color='tags')
```
  

*Cut-offs found by HTODemux:*  
* Cutoff for Bar1 : 20 reads  
* Cutoff for Bar2 : 29 reads  
* Cutoff for Bar3 : 55 reads  
* Cutoff for Bar4 : 373 reads  
  
  
*Manual cut-offs:*  
* Bar1: 50  
* Bar2: 50  
* Bar3: 60  
* Bar4: 375  
  
  
```{r}
DefaultAssay(sc) <- "HTO"

Bar1 <- subset(sc, idents = "Bar1", subset = Bar1>50, slot = "counts")
Bar2 <- subset(sc, idents = "Bar2", subset = Bar2>50, slot = "counts")
Bar3 <- subset(sc, idents = "Bar3", subset = Bar3>70, slot = "counts")
Bar4 <- subset(sc, idents = "Bar4", subset = Bar4>370, slot = "counts")

sc <- merge(Bar1, y = c(Bar2,Bar3,Bar4))
rm(Bar1,Bar2,Bar3,Bar4)

#output numbers
table(sc$HTO_classification.global)
table(sc$hash.ID)

# Plot in barcode space
sc.umap <- RunUMAP(sc, assay = "HTO", features = rownames(sc@assays$HTO))
DimPlot(sc.umap, reduction = "umap", cols = pal.sa.vi, pt.size = 0.5)
ggsave(filename = file.path(fig.dir, "UMAP_tags_filtered.png"), device = "png", width = 6, height = 4, dpi = 300)
```

### Singlets

```{r}
# If manual filtering
singlets <- names(sc$HTO_classification.global)
sc <- subset(sc, cells = singlets)

# Number of cells
cat("Number of cells:", length(colnames(sc)))

# Else do,
#Idents(sc) <- "HTO_classification.global"
#sc <- subset(sc, idents = "Singlet", invert = FALSE)

# make a copy at this stage for future use if needed
sc_save <- sc

# relevel identity
DefaultAssay(sc) <- "RNA"
Idents(sc) <- "sample"
levels <- c("D0", "D1", "D3", "D6")
Idents(sc) <- factor(Idents(sc), levels = levels)
```

### Filter QC

QC plots
```{r}
sc[["percent.mt"]] <- PercentageFeatureSet(sc, pattern = "^mt-")
plot1 <- VlnPlot(sc, features = c("percent.mt"), cols = pal.sa.vi, pt.size = 0.1)
plot2 <- VlnPlot(sc, features = c("percent.mt"), y.max = 10, cols = pal.sa.vi, pt.size = 0.1)
plot1+plot2

plot1 <- FeatureScatter(sc, feature1 = "nCount_RNA", feature2 = "percent.mt", cols = pal.sa.vi, pt.size = 0.2)
plot2 <- FeatureScatter(sc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = pal.sa.vi, pt.size = 0.2)
plot1+plot2

VlnPlot(sc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, cols = pal.sa.vi, pt.size = 0.1)
```

Save individual plots before QC
```{r}
t = theme(axis.text.x = element_text(angle = 0))

VlnPlot(sc, features = c("nFeature_RNA"), cols = pal.sa.vi, pt.size = 0) + NoLegend() + ggtitle("Number of genes") + t
ggsave(filename = file.path(fig.dir, "QC_before_genes.png"), device = "png", width = 4, height = 3, dpi = 300)

VlnPlot(sc, features = c("nCount_RNA"), cols = pal.sa.vi, pt.size = 0) + NoLegend() + ggtitle("Number of UMIs") + t
ggsave(filename = file.path(fig.dir, "QC_before_UMIs.png"), device = "png", width = 4, height = 3, dpi = 300)

VlnPlot(sc, features = c("percent.mt"), cols = pal.sa.vi, pt.size = 0) + NoLegend() + ggtitle("% mitochondrial transcripts") + t
ggsave(filename = file.path(fig.dir, "QC_before_mito.png"), device = "png", width = 4, height = 3, dpi = 300)
```

Filter
```{r}
# Filter
sc <- subset(sc, subset = nFeature_RNA > 1000 & percent.mt < 7.5)
VlnPlot(sc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, cols = col4)
```

Save individual plots after QC
```{r}
t = theme(axis.text.x = element_text(angle = 0))

VlnPlot(sc, features = c("nFeature_RNA"), cols = pal.sa.vi, pt.size = 0) + NoLegend() + ggtitle("Number of genes") + t
ggsave(filename = file.path(fig.dir, "QC_after_genes.png"), device = "png", width = 4, height = 3, dpi = 300)

VlnPlot(sc, features = c("nCount_RNA"), cols = pal.sa.vi, pt.size = 0) + NoLegend() + ggtitle("Number of UMIs") + t
ggsave(filename = file.path(fig.dir, "QC_after_UMIs.png"), device = "png", width = 4, height = 3, dpi = 300)

VlnPlot(sc, features = c("percent.mt"), cols = pal.sa.vi, pt.size = 0) + NoLegend() + ggtitle("% mitochondrial transcripts") + t
ggsave(filename = file.path(fig.dir, "QC_after_mito.png"), device = "png", width = 4, height = 3, dpi = 300)
```

## B. All cells

* Use Seurat default normalisation/scaling procedure  
* Regress cell cycle effect  
* Identify and characterise main clusters  
* Subset the population of interest for downstream analysis  
  
### UMAP/PCA -reg

```{r}
DefaultAssay(sc) <- "RNA"
sc <- NormalizeData(sc, normalization.method = "LogNormalize", scale.factor = 1e4)
sc <- FindVariableFeatures(sc, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
sc <- ScaleData(sc, verbose = T, assay = "RNA", features = rownames(sc))
sc <- RunPCA(sc, npcs = 50, verbose = T)
sc <- RunUMAP(sc, reduction = "pca", dims = 1:50, seed.use = 1)
sc <- FindNeighbors(sc, reduction = "pca", dims = 1:50, k.param = 20L)
sc <- FindClusters(sc, resolution = 0.3)

DimPlot(sc, reduction = "umap", cols = kel, label = TRUE, pt.size = 0.5)
```

While cells cluster by main cell types, there is also a very strong unerlying cell cycle effect:
```{r}
FeaturePlot(sc, c("Top2a", "Mki67", "Mcm6", "Ung"))
```

Save intermediate object
```{r, eval=FALSE}
if ( file.exists(file.path("r_save/sc_lognorm_unregressed.rds")) ) {
  sc <- readRDS(file = "r_save/sc_lognorm_unregressed.rds")
} else { 
  saveRDS(sc, file = "/sc_explant_dataset/r_save/sc_lognorm_unregressed.rds")
  }
```

### Cell Cycle regression

Tutorial here: https://satijalab.org/seurat/v3.0/cell_cycle_vignette.html  
```{r}
# Marker list from Tirosh et al, 2015. Segregate this list into markers of G2/M phase and markers of S phase.0
s.genes <- cc.genes.updated.2019$s.genes
g2m.genes <- cc.genes.updated.2019$g2m.genes

# Function to convert each gene to mouse nomenclature by changing case (checked that gene names exist in ensembl too)
firstup <- function(x) { 
  x <- tolower(x)
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
  x
}

s.genes <- firstup(s.genes)
g2m.genes <- firstup(g2m.genes)
```

Make sure to specify the "assay" parameter 
```{r}
cc <- CellCycleScoring(
  sc,
  s.features = s.genes,
  g2m.features = g2m.genes,
  assay = 'RNA',
  set.ident = TRUE
)

head(cc[[]])
```

Visualize the distribution of cell cycle markers across
```{r}
RidgePlot(cc, features = c("Ube2c", "Top2a", "Mcm6", "Ung"), ncol = 2)
```

Running a PCA on cell cycle genes reveals, that cells separate by phase
```{r}
# PCA on cell cycle features
cc <- RunPCA(cc, features = c(s.genes, g2m.genes))
pca0 <- DimPlot(cc, reduction = "pca", cols=pal.cc)

# Overlay on UMAP computed on gene expression previously
umap0 <- DimPlot(cc, reduction = "umap", group.by = "Phase", cols=pal.cc, pt.size = 0.8)

pca0/umap0
ggsave(filename = file.path(fig.dir, "Allcells_before_ccReg.png"), device = "png", width = 5, height = 7, dpi = 300)

umap0 <- umap0 + NoLegend() + NoAxes()
ggsave(umap0, filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_before_ccReg_nolegend.png"), device = "png", width = 6, height = 6, dpi = 300)
```

*Time consuming step!*  
Regress/Scale using computed cell cycle scores  
If this has already been calculated, we load the previous file instead
```{r}
if ( file.exists(file.path("r_save/cc_lognorm_regressed.rds")) ) {
  cc <- readRDS(file = "r_save/cc_lognorm_regressed.rds")
} else { 
  cc <- ScaleData(cc, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(cc))
  saveRDS(cc, file = "/sc_explant_dataset/r_save/cc_lognorm_regressed.rds")
  }
```

Compare the before/after regression
```{r}
# cell cycle features
cc <- RunPCA(cc, features = c(s.genes, g2m.genes))
pca1 <- DimPlot(cc, reduction = "pca", cols=pal.cc)

pca0/pca1
ggsave(filename = file.path(fig.dir, "Allcells_PCA_before_vs_after_ccReg.png"), device = "png", width = 5, height = 7, dpi = 300)
```

### Load / Start here

```{r, eval=FALSE}
cc <- readRDS(file = "r_save/cc_lognorm_regressed.rds")
```

### UMAP/PCA +reg

Find neighbors and clusters
```{r}
DefaultAssay(cc) <- "RNA"
cc <- RunPCA(cc, verbose = T)
cc <- RunUMAP(cc, dims = 1:50, verbose = T, seed.use = 42, n.neighbors = 20L)
cc <- FindNeighbors(cc, reduction = "pca", dims = 1:50, k.param = 20L)
cc <- FindClusters(cc, resolution = 0.3)

DimPlot(cc, reduction = "umap", cols=kel, label = TRUE, label.size = 5)
```

Samples
```{r}
DimPlot(cc, reduction = "umap", group.by = "sample", cols = pal.sa)
ggsave(filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_days.png"), device = "png", width = 5, height = 4, dpi = 300)
```

Cell cycle before/after
```{r}
umap1 <- DimPlot(cc, reduction = "umap", group.by = "Phase", cols=pal.cc, pt.size=1)
umap1
ggsave(filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_cellcycle.png"), device = "png", width = 5, height = 4, dpi = 300)

#before/after
umap0/umap1
ggsave(filename = file.path(fig.dir, "Allcells_UMAP_before_vs_after_ccReg.png"), device = "png", width = 5, height = 7, dpi = 300)

#umap1 after
umap1 <- DimPlot(cc, reduction = "umap", group.by = "Phase", cols=pal.cc, pt.size=0.8)
umap1 <- umap1 + NoLegend() + NoAxes()
ggsave(umap1, filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_after_ccReg_nolegend.png"), device = "png", width = 6, height = 6, dpi = 300)
```

### Clustree

```{r, eval=FALSE}
library(clustree)
scc <- FindClusters(
  cc,
  reduction.type = "pca", 
  dims.use = 1:50,
  k.param = 20L,
  resolution = seq(0, 1, 0.1),
  save.SNN = TRUE,
  print.output = FALSE)

#Plot tree
clustree(scc)
clustree(scc, node_colour = "sc3_stability")

#Bac a sable
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.1", cols=kel)
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.2", cols=kel)
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.3", cols=kel)
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.4", cols=kel)
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.5", cols=kel)
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.6", cols=kel)
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.7", cols=kel)
DimPlot(object = scc, reduction = "umap", label=TRUE, group.by = "RNA_snn_res.0.8", cols=kel)
```

### Relabel clusters

Plot all clusters
```{r}
DimPlot(cc, reduction = "umap", cols=kel, label = TRUE, label.size = 5)
```

Merge clusters corresponding to prostate differentiation of interest
```{r}
cluster_label <- c(
  "Prostatic", #0
  "Prostatic", #1
  "Prostatic", #2
  "Prostatic", #3
  "Mesonephric 1", #4
  "Mesonephric 2", #5
  "Prostatic", #6
  "Urothelium", #7
  "Mesonephric 2", #8
  "Hypoxic/Stressed", #9
  "Mesenchyme"  #10
  )

names(cluster_label) <- levels(cc$seurat_clusters)
cc <- RenameIdents(cc, cluster_label)

#relevel
relevel <- c(
  "Prostatic", 
  "Mesonephric 1", 
  "Mesonephric 2", 
  "Urothelium", 
  "Mesenchyme", 
  "Hypoxic/Stressed"  
  )

Idents(cc) <- factor(Idents(cc), levels = relevel)
cc$cluster_label <- Idents(cc)

DimPlot(object = cc, reduction = "umap", label=FALSE, label.size = 4, cols = pal.all)
```

Figures
```{r}
# Labelled clusters
DimPlot(object = cc, reduction = "umap", label=FALSE, label.size = 4, cols = pal.all)
ggsave(filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_labelled_clusters.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = cc, reduction = "umap", label=FALSE, label.size = 4, cols = pal.all) + NoAxes() + NoLegend()
ggsave(filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_labelled_clusters_nolegend.png"), device = "png", width = 6, height = 6, dpi = 300)

# Days
DimPlot(object = cc, reduction = "umap", group.by = "sample", label=FALSE, label.size = 4, cols = pal.sa.vi)
ggsave(filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_labelled_days.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = cc, reduction = "umap", group.by = "sample", label=FALSE, label.size = 4, cols = pal.sa.vi) + NoAxes() + NoLegend()
ggsave(filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_labelled_days_nolegend.png"), device = "png", width = 6, height = 6, dpi = 300)
```

### Save/load  labelled object
```{r, eval=FALSE}
if ( file.exists(file.path("r_save/cc_lognorm_regressed_labelled.rds")) ) {
  cc <- readRDS(file = "r_save/cc_lognorm_regressed_labelled.rds")
} else { 
  saveRDS(cc, file =  "r_save/cc_lognorm_regressed_labelled.rds") 
  }
```

### Find markers

```{r}
if ( file.exists(file.path("r_export/Allcells.clusters.markers.txt")) ) {
  
  all.markers <- read.delim("r_export/Allcells.clusters.markers.txt", sep = "\t")
  top.all <- all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)

} else { 

  all.markers <- FindAllMarkers(cc, only.pos = TRUE, min.pct = 0.10, logfc.threshold = 0.25, test.use = "MAST") ## Repeat
  top.all <- all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
  write.table(all.markers, "r_export/Allcells.clusters.markers.txt", sep = "\t", row.names = T)
  
  }
```

### Visualise Genes

```{r}
# Extract UMAPs coordinates
cc.UMAP <- as.data.frame(Embeddings(cc[["umap"]]))
cc.UMAP <- cc.UMAP %>% 
  dplyr::mutate(CellBarcode = colnames(cc),
                Clusters = cc$seurat_clusters,
                Sample = cc$sample)
```

```{r}
# Wolfian duct markers
plotUMAP_colByGene(cc, cc.UMAP, "Pax2", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Pax8", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Lhx1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Upk3a", ptsize = .5, alpha = 1, legend = "yes") # uroplakin
plotUMAP_colByGene(cc, cc.UMAP, "Trp63", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Krt5", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Foxa1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Shh", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Vim", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Col1a1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Col3a1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Ero1l", ptsize = .5, alpha = 1, legend = "yes")
```

Prostate differentiating cells: 0,1,2,3,6
```{r, eval=FALSE}
plotUMAP_colByGene(cc, cc.UMAP, "Hoxb13", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Hoxd13", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Ar", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Nkx3-1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Shh", ptsize = .5, alpha = 1, legend = "yes")

# Sfrp1, Psap1l, Notch1
```

4,5,8: wolfia/mullerian duct remanants / mesonephric
```{r, eval=FALSE}
plotUMAP_colByGene(cc, cc.UMAP, "Pax2", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Pax8", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Lhx1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Gata3", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Hoxb7", ptsize = .5, alpha = 1, legend = "yes")
```

7: bladder / urothelium
```{r, eval=FALSE}
plotUMAP_colByGene(cc, cc.UMAP, "Upk3a", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Foxq1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Foxa1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Krt5", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Krt7", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Plaur", ptsize = .5, alpha = 1, legend = "yes")
```

10: mesenchyme
```{r, eval=FALSE}
plotUMAP_colByGene(cc, cc.UMAP, "Vim", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(cc, cc.UMAP, "Col3a1", ptsize = .5, alpha = 1, legend = "yes")
```

9: hypoxic/stressed cells
```{r}
plotUMAP_colByGene(cc, cc.UMAP, "Ero1l", ptsize = .5, alpha = 1, legend = "yes")
```

Dotplot
```{r, eval=FALSE}
# Plot
DefaultAssay(cc) <- "RNA"
markers.to.plot <- 
  c(
    "Epcam", "Krt8", "Krt5", "Krt14", "Krt15", "Ar", "Shh", "Hoxb13", "Hoxd13", "Nkx3-1",
    "Hoxb7", "Wfdc2", "Gata3", "Sox17", "Pax8", "Pax2", "Lhx1", 
    "Upk3a", "Foxq1", "Krt7", "Plaur", "Krt20",
    "Vim", "Col3a1", "Col1a1", "Pdgfra", "Zeb1",
    "Ero1l","Slc2a1","Aldoa","Bnip3","Hspa5"
    )

DotPlot(cc, 
        features = rev(markers.to.plot), 
        group.by = "cluster_label",
        cols = c("darkblue", "tomato"), dot.scale = 6) + RotatedAxis() + FontSize(x.text = 12, y.text = 12)

ggsave(filename = file.path(fig.dir, "Allcells_dotplot.png"), device = "png", width = 12, height = 4, dpi = 300)
```

Heatmap
```{r, eval=FALSE}
markers.to.plot <- 
  c(#"Runx1", "Runx2", "Runx3", "Cbfb",
    "Epcam", "Krt8", "Krt5", "Krt14", "Krt15", "Shh", "Hoxb13", "Hoxd13", "Ar", "Nkx3-1",
    "Hoxb7", "Wfdc2", "Gata3", "Pax8", "Pax2", "Lhx1", "Crip1", "Sox17",
    "Upk3a", "Foxq1", "Krt7", "Plaur", "Krt20",
    "Vim", "Col3a1", "Col1a1", "Pdgfra", "Zeb1",
    "Ero1l","Slc2a1","Aldoa","Bnip3","Hspa5"
    )

DoHeatmap(object = cc, features = as.character(markers.to.plot), group.colors = pal.all, draw.lines = TRUE, lines.width = 10,
          assay = "RNA", slot = "data", size = 4) + scale_fill_viridis(option = "inferno",na.value = "white")
ggsave(filename = file.path(fig.dir, "2_all_cells/Allcells_heatmap_selection.png"), device = "png", width = 8, height = 6, dpi = 300)
```

### GO

Cluster 9 consists mainly of explants harvested at Day 1, and the marker genes have a very strong signature associated with hypoxia / cell death / response to stress, therefore we remove these cells for downstream analysis.

Hypoxic/Stressed cells
```{r}
go <- gost(query = all.markers %>% filter(cluster == "Hypoxic/Stressed" & avg_logFC > 0.5) %>% pull(gene), 
           organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = "Hypoxic/Stressed")

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep("#A1C7F7",10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate() + theme(axis.title.x=element_blank())
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, "2_all_cells/GO_Hypoxic_Stressed_cluster.png"), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, "2_all_cells/GO_Hypoxic_Stressed_cluster_nolegend.png"), device = "png", width = 4, height = 2.5, dpi = 300)
```

### Figures / Genes

```{r}
gene <- "Hoxb13"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Hoxd13"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Ar"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Nkx3-1"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Shh"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Pax2"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Pax8"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Lhx1"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Gata3"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Hoxb7"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Upk3a"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Foxq1"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Foxa1"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Krt5"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Krt7"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Plaur"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Vim"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Col3a1"
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, ".png")), device = "png", width = 6, height = 4, dpi = 300)
plotUMAP_colByGene(cc, cc.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("2_all_cells/Allcells_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)
```

### Number of cells

```{r}
### --- Number of epithelial cells per day ------------------------------------

meta = cc@meta.data

stat <- 
  meta %>% 
  group_by(sample) %>% 
  summarise(count=n()) %>% 
  mutate(perc=(count/sum(count)*100))

ggplot(stat, aes(x=sample, y=count, fill=sample)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(y=count,label = count, hjust=-0.2)) +
  scale_fill_manual(values = pal.sa.vi) +
  labs(x="Timepoint", y="Number of cells") +
  scale_y_continuous(limits=c(0, max(stat$count) + 0.2*max(stat$count))) +
   theme(
          
     axis.title.x=element_blank(),
     axis.title.y=element_blank(),
     axis.text.y=element_blank(),
     
     legend.position = "none",
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()

ggsave(filename = file.path(fig.dir, "1_QC/Meta_n_cells_days.png"), device = "png", width = 3, height = 2, dpi = 300)
```


## C. Main cluster

### Filter

```{r}
DimPlot(object = cc, reduction = "umap", cols=pal.all)
sce <- subset(cc, idents = c("Prostatic"), invert = FALSE)
DefaultAssay(sce) <- "RNA"
```

### UMAP/PCA

```{r}
sce$RNA_snn_res.0.3 <- NULL # remove previous clustering
sce <- RunPCA(sce, verbose = T)
sce <- RunUMAP(sce, dims = 1:50, verbose = T, seed.use = 1, min.dist = 0.3, n.neighbors = 20L)
sce <- FindNeighbors(sce, dims = 1:50, verbose = T, k.param = 20)
sce <- FindClusters(sce, resolution = 0.5, verbose = T)

DimPlot(sce, reduction = "umap", label = TRUE, label.size = 5, cols = kel, group.by = "seurat_clusters") 
DimPlot(object = sce, reduction = "umap", group.by = "sample", cols = pal.sa)
```

Check cell cycle phases
```{r}
DimPlot(sce, reduction = "umap", group.by = "Phase", cols = pal.cc)
```

Save
```{r, eval=FALSE}
if ( file.exists(file.path("r_save/sce_lognorm_regressed.rds")) ) {
  sce <- readRDS(file = "r_save/sce_lognorm_regressed.rds")
} else { 
  saveRDS(sce, file = "r_save/sce_lognorm_regressed.rds") 
  }
```

### Clustree

```{r, eval=FALSE}
library(clustree)
scc <- FindClusters(
  sce,
  reduction.type = "pca", 
  dims.use = 1:50,
  k.param = 20L,
  resolution = seq(0, 1, 0.1),
  save.SNN = TRUE,
  print.output = FALSE)

# Plot tree
clustree(scc)

# Bac a sable
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.1", cols = kel) + ggtitle("0.1")
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.2", cols = kel) + ggtitle("0.2")
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.3", cols = kel) + ggtitle("0.3")
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.4", cols = kel) + ggtitle("0.4")
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.5", cols = kel) + ggtitle("0.5")
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.6", cols = kel) + ggtitle("0.6")
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.7", cols = kel) + ggtitle("0.7")
DimPlot(object = scc, reduction = "umap", label=TRUE, label.size = 5, group.by = "RNA_snn_res.0.8", cols = kel) + ggtitle("0.8") 
```

### Relabel clusters

Relevel clusters
```{r}
in_silico_clusters <- c(
  "C6", #0
  "C4", #1
  "C7", #2
  "C1", #3
  "C0", #4
  "C8", #5
  "C3", #6
  "C2", #7
  "C5", #8
  "C9"  #9
  )

names(in_silico_clusters) <- levels(sce$seurat_clusters)
sce <- RenameIdents(sce, in_silico_clusters)

#relevel
relevel <- c(
  "C0", 
  "C1", 
  "C2", 
  "C3", 
  "C4",  
  "C5", 
  "C6", 
  "C7", 
  "C8", 
  "C9"
  )


Idents(sce) <- factor(Idents(sce), levels = relevel)
sce$in_silico_clusters <- Idents(sce)

DimPlot(object = sce, reduction = "umap", label=TRUE, label.size = 5, cols = pal.cla, pt.size = 1)
```

Define a specific color palettes for clusters C0 to C9 - see intro.
```{r}
DimPlot(object = sce, reduction = "umap", label=TRUE, label.size = 5, cols = pal.cl)
DimPlot(object = sce, reduction = "umap", cols = pal.cl)
```

UMAPS
```{r}
u1  <- DimPlot(object = sce, reduction = "umap", group.by = "sample", label=FALSE, cols = pal.sa)
u2  <- DimPlot(object = sce, reduction = "umap", group.by = "in_silico_clusters", label=TRUE, label.size = 5, cols = pal.cl)

u1/u2
```

### Save / Load

Save after labelling
```{r, eval=FALSE}
if ( file.exists(file.path("r_save/sce_lognorm_regressed_labelled.rds")) ) {
  sce <- readRDS(file = "r_save/sce_lognorm_regressed_labelled.rds")
} else { 
  saveRDS(sce, file =  "r_save/sce_lognorm_regressed_labelled.rds") 
}

# check object
DimPlot(object = sce, reduction = "umap", label=TRUE, label.size = 5, cols = pal.cla)
```

### Figures

Clusters
```{r}
DimPlot(object = sce, reduction = "umap", group.by = "in_silico_clusters", label=TRUE, label.size = 5, cols = pal.cla, pt.size = 0.8)
ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_UMAP_clusters.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = sce, reduction = "umap", group.by = "in_silico_clusters", cols = pal.cla, pt.size = 1) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_UMAP_clusters_nolegend.png"), device = "png", width = 7, height = 5, dpi = 300)
```

Days
```{r}
DimPlot(object = sce, reduction = "umap", group.by = "sample", label=TRUE, label.size = 5, cols = pal.sa.vi, pt.size = 0.8)
ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_UMAP_days.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = sce, reduction = "umap", group.by = "sample", cols = pal.sa.vi, pt.size = 0.8) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_UMAP_days_nolegend.png"), device = "png", width = 7, height = 5, dpi = 300)
```

Clusters evolution, split by day
```{r}
DimPlot(object = sce, reduction = "umap", split.by = "sample", cols = pal.cla) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_UMAP_split_days.png"), device = "png", width = 14, height = 3, dpi = 300)
```

### Cell cycle phases

```{r}
#umap1 after
umap1 <- DimPlot(cc, reduction = "umap", group.by = "Phase", cols=pal.cc, pt.size=0.8)
umap1 <- umap1 + NoLegend() + NoAxes()
ggsave(umap1, filename = file.path(fig.dir, "2_all_cells/Allcells_UMAP_after_ccReg_nolegend.png"), device = "png", width = 6, height = 6, dpi = 300)
```


Cell cycle phase
```{r}
DimPlot(object = sce, reduction = "umap", group.by = "Phase", label=TRUE, label.size = 5, pt.size = 1, cols=pal.cc)
ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_UMAP_cellcycle.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = sce, reduction = "umap", group.by = "Phase", pt.size = 1, cols=pal.cc) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_UMAP_cellcycle_nolegend.png"), device = "png", width = 7, height = 5, dpi = 300)
```

Cell cycle phase per day
```{r}
meta = sce@meta.data

statcc <- 
  meta %>% 
  group_by(sample, Phase) %>% 
  summarise(count=n()) %>% 
  mutate(perc=(count/sum(count)*100))

ggplot(statcc, aes(x = sample, y = perc, fill = Phase, label = count)) +
  geom_bar(stat="identity", width = 0.5) +
  scale_fill_manual(values = pal.cc) +
  labs(x="Time point", y="Percentage", fill="Phase",
       title = "Proportion of cycling cells per days") +
   theme(
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
```

Cell cycle phase per cluster
```{r}
statcc <- 
  sce@meta.data %>% 
  group_by(in_silico_clusters, Phase) %>% 
  summarise(count=n()) %>% 
  mutate(perc=(count/sum(count)*100))
  
ggplot(statcc, aes(x = in_silico_clusters, y = perc, fill = factor(Phase, levels=c("G1", "S", "G2M")), label = count)) +
  geom_bar(stat="identity", width = 0.8) +
  geom_text(size = 3, position = position_stack(vjust = 0.5)) +
  scale_fill_manual(values = pal.cc) +
  labs(x="Clusters", y="Percentage", fill="",
       title = "Proportion of cycling cells per clusters") +
   theme(
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()

ggsave(filename = file.path(fig.dir, "3_main_cluster/Meta_ccPhase_per_cluster.png"), device = "png", width = 4, height = 3, dpi = 300)
```

### Proportions per clusters

Clusters by days
```{r}
stat1 <- 
  meta %>% 
  group_by(in_silico_clusters, sample) %>% 
  summarise(count=n()) %>% 
  mutate(perc=(count/sum(count)*100))

g1 <- ggplot(stat1, aes(x = in_silico_clusters, y = perc, fill = sample, label = count)) +
  geom_bar(stat="identity", width = 0.5) +
  #geom_text(size = 4, position = position_stack(vjust = 0.5)) +
  scale_fill_manual(values = pal.sa) +
  labs(x="Clusters", y="Percentage", fill="Day",
       title = "Proportion of cells per clusters") +
   theme(
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()

g1

ggsave(filename = file.path(fig.dir, "3_main_cluster/Meta_col_per_days.png"), device = "png", width = 4, height = 3, dpi = 300)
```

Days by clusters
```{r}
meta = sce@meta.data

stat2 <- 
  meta %>% 
  group_by(sample, in_silico_clusters) %>% 
  summarise(count=n()) %>% 
  mutate(perc=(count/sum(count)*100))

g2 <- ggplot(stat2, aes(x = sample, y = perc, fill = in_silico_clusters, label = count)) +
  geom_bar(stat="identity", width = 0.5) +
  #geom_text(size = 4, position = position_stack(vjust = 0.5)) +
  scale_fill_manual(values = pal.cla) +
  labs(x="Time point", y="Percentage", fill="Clusters",
       title = "Proportion of cells per days") +
   theme(
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()

g2

ggsave(filename = file.path(fig.dir, "3_main_cluster/Meta_col_per_clusters.png"), device = "png", width = 4, height = 3, dpi = 300)
```

```{r}
g1/g2
```

### Stacked area

```{r}
stat2 <- 
  meta %>% 
  group_by(sample, in_silico_clusters) %>% 
  summarise(count=n()) %>% 
  mutate(perc=(count/sum(count)*100)) %>% 
  ungroup %>% 
  complete(in_silico_clusters, nesting(sample), fill = list(count = 0, perc = 0)) %>% 
  mutate(Time = ifelse(sample == "D0", 0,
                       ifelse(sample == "D1", 1,
                       ifelse(sample == "D3", 3, 6))))

ggplot(stat2, aes(x=Time, y=perc, fill=in_silico_clusters)) +
  geom_area() +
  scale_fill_manual(values = pal.cla) +
  scale_x_continuous(breaks = c(0,1,3,6)) +
  labs(x="Time point", y="Percentage", fill="Clusters") +
   theme(
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white")
  )

ggsave(filename = file.path(fig.dir, "3_main_cluster/Meta_stacked_area_clusters.png"), device = "png", width = 4, height = 3, dpi = 300)
```

### Sankey

```{r}
library(ggalluvial)

df.allu <- meta %>% 
  tibble::rownames_to_column( "cell_id") %>%
  group_by(in_silico_clusters, sample) %>%
  dplyr::summarize(counts = n()) %>%
  ungroup() %>%
  #complete(in_silico_clusters, nesting(sample), fill = list(count = 0, perc = 0)) %>% 
  dplyr::arrange(desc(counts)) 

# Lobes / geom label
ggplot(df.allu, aes(y = counts, axis1 = sample, axis2 = in_silico_clusters)) +
  geom_alluvium(aes(fill = sample), width = 1/6) +
  geom_stratum(width = 0.25, fill = "white", color = "grey") +
  geom_label(stat = "stratum", infer.label = TRUE, label.size = NA, alpha = 0, color = 'black', fontface = "bold") +
  scale_x_discrete(limits = c("Time Points", "Clusters"), expand = c(.05, .05)) +
  scale_fill_manual(values = pal.sa) +
  theme(
    axis.ticks = element_blank(),
    axis.title.y=element_blank(),
    axis.text.y=element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    strip.background = element_rect(fill="white", colour = "white")
    ) 
```

### Find markers

```{r, eval=FALSE}
if ( file.exists(file.path("r_export/epi.clusters.markers.txt")) ) {
  epi.markers <- read.delim(file = "r_export/epi.clusters.markers.txt", sep = "\t")
  top.epi <- epi.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
} else { 
  epi.markers <- FindAllMarkers(sce, only.pos = TRUE, 
                              min.pct = 0.10, logfc.threshold = 0.25, ## Repeat
                              test.use = "MAST")
  top.epi <- epi.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
  write.table(epi.markers, "r_export/epi.clusters.markers.txt", sep = "\t", row.names = T)
}
```

Heatmap
```{r, eval=FALSE}
DoHeatmap(object = sce, features = as.character(top.epi$gene), group.colors = pal.cla, assay = "RNA", slot = "data", size = 4,
          draw.lines = TRUE, lines.width = 10) +  scale_fill_viridis(option = "inferno",na.value = "white")

ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_heatmap_top10.png"), device = "png", width = 8, height = 12, dpi = 300)
```

Custom heatmap
```{r, eval=FALSE}
genelist <- c(
  "Runx1", "Meis2", "Mif", "Notch1",
  "Krt5", "Krt14", "Trp63", "Nfib", "Apoe",
  "Nkx3-1", 
  "Tacstd2", "Krt4", "Psca", "Nupr1", 
  "Krt8", "Krt19", "Krt7")

DoHeatmap(object = sce, features = as.character(genelist), group.colors = pal.cl, slot = "scale.data", assay = "RNA", size = 4) + scale_fill_viridis()
#ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_heatmap_custom.png"), device = "png", width = 8, height = 12, dpi = 300)
```

### GO - to do

GO enrichment might not be best here...

C0
```{r}
Clus = "C0"

go <- gost(query = epi.markers %>% filter(cluster == Clus & avg_logFC > 0.5) %>% pull(gene), 
          organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = Clus)

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep(pal.cl[1],10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, ".png")), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, "nolegend",".png")), device = "png", width = 4, height = 2.5, dpi = 300)
```

C1=nothing
  
C2
```{r}
Clus = "C2"

go <- gost(query = epi.markers %>% filter(cluster == Clus & avg_logFC > 0.5) %>% pull(gene), 
          organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = Clus)

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep(pal.cl[3],10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, ".png")), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, "nolegend",".png")), device = "png", width = 4, height = 2.5, dpi = 300)
```

C3
```{r}
Clus = "C3"

go <- gost(query = epi.markers %>% filter(cluster == Clus & avg_logFC > 0.5) %>% pull(gene), 
          organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = Clus)

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep(pal.cl[4],10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, ".png")), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, "nolegend",".png")), device = "png", width = 4, height = 2.5, dpi = 300)
```

C4
```{r}
Clus = "C4"

go <- gost(query = epi.markers %>% filter(cluster == Clus & avg_logFC > 0.5) %>% pull(gene), 
          organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = Clus)

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep(pal.cl[5],10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, ".png")), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, "nolegend",".png")), device = "png", width = 4, height = 2.5, dpi = 300)
```

C5
```{r}
Clus = "C5"

go <- gost(query = epi.markers %>% filter(cluster == Clus & avg_logFC > 0.5) %>% pull(gene), 
          organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = Clus)

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep(pal.cl[6],10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, ".png")), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, "nolegend",".png")), device = "png", width = 4, height = 2.5, dpi = 300)
```

C6
```{r}
Clus = "C6"

go <- gost(query = epi.markers %>% filter(cluster == Clus & avg_logFC > 0.5) %>% pull(gene), 
          organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = Clus)

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep(pal.cl[7],10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, ".png")), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, "nolegend",".png")), device = "png", width = 4, height = 2.5, dpi = 300)
```

C7=Nothing
C8=Nothing

C9
```{r}
Clus = "C9"

go <- gost(query = epi.markers %>% filter(cluster == Clus & avg_logFC > 0.5) %>% pull(gene), 
          organism = "mmusculus",
           significant = TRUE)

gostplot(go, capped = FALSE, interactive = TRUE)

go <- go$result %>% 
  filter(source == "GO:BP") %>% 
  dplyr::select(p_value, term_id, source, term_name, parents) %>% 
  arrange(p_value) %>% 
  dplyr::mutate(Cluster = Clus)

#plot
g <- ggplot(go %>% top_n(n=-10, wt=p_value), aes(x=reorder(term_name, -p_value) , y=-log10(p_value), fill=term_name)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(y=-log10(p_value) ,label = term_id, hjust=0)) +
  #scale_fill_viridis_c(alpha = 1, begin = 0.5, end = 1, option = "magma") +
  scale_fill_manual(values = c(rep(pal.cl[10],10))) +
  scale_y_continuous(limits=c(0,25)) +
  geom_hline(yintercept = -log10(0.05), color="grey30", linetype="dashed") +
  theme(
    axis.title.y=element_blank(),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
    strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()
g

# save with and without legend
ggsave(g, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, ".png")), device = "png", width = 6, height = 2.5, dpi = 300)
t <- theme(axis.title.x=element_blank(), axis.text.y=element_blank())
ggsave(g+t, filename = file.path(fig.dir, paste0("4_main_go_terms/","GO_", Clus, "nolegend",".png")), device = "png", width = 4, height = 2.5, dpi = 300)
```

### Visualise Genes

```{r}
# Extract UMAPs coordinates
sce.UMAP <- as.data.frame(Embeddings(sce[["umap"]]))
sce.UMAP <- sce.UMAP %>% 
  dplyr::mutate(CellBarcode = colnames(sce),
                Clusters = sce$seurat_clusters,
                Sample = sce$sample)
```

Single gene
```{r}
plotUMAP_colByGene(sce, sce.UMAP, "Nkx3-1", ptsize = .5, alpha = 1, legend = "yes")
```

```{r, eval=FALSE}
plotUMAP_colByGene(sce, sce.UMAP, "Runx1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(sce, sce.UMAP, "Nkx3-1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(sce, sce.UMAP, "Trp63", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(sce, sce.UMAP, "Krt5", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(sce, sce.UMAP, "Ly6d", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(sce, sce.UMAP, "Nupr1", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(sce, sce.UMAP, "Krt4", ptsize = .5, alpha = 1, legend = "yes")
plotUMAP_colByGene(sce, sce.UMAP, "Psca", ptsize = .5, alpha = 1, legend = "yes")
```

### Figures / Genes

```{r, eval=FALSE}
gene <- "Runx1"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Nkx3-1"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Krt4"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Psca"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Krt5"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Krt8"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Trp63"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Krt19"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Ly6d"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Nupr1"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Tacstd2"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Krt14"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Mki67"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)

gene <- "Top2a"
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "yes")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, ".png")), device = "png", width = 4, height = 6, dpi = 300)
plotUMAP_colByGene(sce, sce.UMAP, gene, ptsize = .5, alpha = 1, legend = "minimal")
ggsave(filename = file.path(fig.dir, paste0("3_main_cluster/Main_Gene_", gene, "_minimal",".png")), device = "png", width = 6, height = 6, dpi = 300)
```

## D. Diffusion Map - allclusters

Diffusion maps is another way of reducing the dimensions in complex datasets to look at possible trajectories. When calculating the diffusion map, we found that C9 was largely outlying compared to the rest of the data. When may exclude it for downstream analysis. Here we process with or without.
  
### runDiffusionMap

Convert to singlecellexperiment and compute diffusion map / components=5 and k=20
```{r}
sce.all <- sce
SCE.all <- as.SingleCellExperiment(sce.all, assay = "RNA")

# DiffusionMap
set.seed(1)
SCE.all <- runDiffusionMap(SCE.all, name = "DiffusionMap", dimred = "PCA", n_dimred=50, ncomponents=20, k=20)

# Add DiffusionMap to the seurat object
dm <- reducedDim(SCE.all, type = "DiffusionMap")
sce.all[["DiffusionMap"]] <- CreateDimReducObject(embeddings = dm, key = "DC_", assay = DefaultAssay(sce.all))

# Plot in seurat
DimPlot(sce.all, reduction = "DiffusionMap", cols = pal.cla)
```

### Figures

```{r}
# Clusters
DimPlot(object = sce.all, reduction = "DiffusionMap", group.by = "in_silico_clusters", cols = pal.cla, pt.size = 1.5)
ggsave(filename = file.path(fig.dir, "5_diffusion/Main_DiffusionMap_clusters.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = sce.all, reduction = "DiffusionMap", group.by = "in_silico_clusters", cols = pal.cla, pt.size = 1.5) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "5_diffusion/Main_DiffusionMap_clusters_nolegend.png"), device = "png", width = 5, height = 5, dpi = 300)

# Days
DimPlot(object = sce.all, reduction = "DiffusionMap", group.by = "sample", cols = pal.sa, pt.size = 0.5)
ggsave(filename = file.path(fig.dir, "5_diffusion/Main_DiffusionMap_days.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = sce.all, reduction = "DiffusionMap", group.by = "sample", cols = pal.sa, pt.size = 0.5) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "5_diffusion/Main_DiffusionMap_days_nolegend.png"), device = "png", width = 5, height = 5, dpi = 300)
```

### Save

Save SingleCellExperiment object
And subsetted Seurat object with diffmap20
```{r}
# SingleCellExperiment object
if ( file.exists(file.path("r_save/SCE_all-diffmap.rds")) ) {
  SCE.all <- readRDS(file = "r_save/SCE_all-diffmap.rds")
} else { 
  saveRDS(SCE.all, file = "r_save/SCE_all-diffmap.rds")
}

# Seurat object
if ( file.exists(file.path("r_save/sce_all.diffmap.rds")) ) {
  sce.all <- readRDS(file = "r_save/sce_all.diffmap.rds")
} else { 
  saveRDS(sce.all, file = "r_save/sce_all.diffmap.rds")
}
```

### Export for scanpy

Object containing all clusters
```{r}
scl <- sce.all
DefaultAssay(scl) <- "RNA"
scl@project.name <- "mevel_et_al_sc_explant_sce"

# Check diffusion map
DimPlot(scl, reduction = "DiffusionMap", cols = pal.cla)

# export DiffusionMap with 10 components
write.csv(dm, "r_save/diffmap20_all.csv")

# remove useless slots
scl$nCount_HTO <- NULL
scl$nFeature_HTO <- NULL
scl$HTO_maxID <- NULL
scl$HTO_secondID <- NULL
scl$HTO_margin <- NULL
scl$HTO_classification <- NULL
scl$HTO_classification.global <- NULL
scl$hash.ID <- NULL
scl$tag <- NULL
scl$RNA_snn_res.0.5 <- NULL
scl$cluster_label <- NULL
scl$S.Score <- NULL
scl$G2M.Score <- NULL
scl$old.ident <- NULL
scl$orig.ident <- NULL

# add cell id in metadata
df.cell <- data.frame(Cell = colnames(sce.all))
rownames(df.cell) <- df.cell$Cell
scl <- AddMetaData(object = scl, metadata = df.cell, col.name = "Cell")

# as loom
loom.scl <- as.loom(scl)
loom.scl$close_all()
rm(loom.scl)
```

## F. Diffusion Map - excl. C9

Selection: subset object without C9
```{r}
sce.sub <- subset(sce, idents = c("C9"), invert = TRUE)
SCE.sub <- as.SingleCellExperiment(sce.sub, assay = "RNA")
```

### runDiffusionMap

We denoise the graph (suggested in scanpy vignette) usnig a few diffusion components on which we will calculate a Shared Nearest Neighbors Graph that we use to visualise possible trajectories.
```{r}
# DiffusionMap
set.seed(1)
SCE.sub <- runDiffusionMap(SCE.sub, name = "DiffusionMap", dimred = "PCA", n_dimred=50, ncomponents=20, k=10)

# Plot DiffusionMap in scater
plotReducedDim(SCE.sub, dimred = "DiffusionMap", colour_by = "sample") + scale_fill_manual(values=pal.sa)
plotReducedDim(SCE.sub, dimred = "DiffusionMap", colour_by = "in_silico_clusters") + scale_fill_manual(values=pal.cl)
```

### Figures

Plot in Seurat
```{r}
# Add DiffusionMap to the seurat object
diffmap20 <- reducedDim(SCE.sub, type = "DiffusionMap")
sce.sub[["DiffusionMap"]] <- CreateDimReducObject(embeddings = diffmap20, key = "DC_", assay = DefaultAssay(sce.sub))

# Plot in seurat
DimPlot(sce.sub, reduction = "DiffusionMap", cols = pal.cl)
DimPlot(sce.sub, reduction = "DiffusionMap", cols = pal.cl)
```

```{r}
# Clusters
DimPlot(object = sce.sub, reduction = "DiffusionMap", group.by = "in_silico_clusters", cols = pal.cl, pt.size = 1.5)
ggsave(filename = file.path(fig.dir, "5_diffusion/Selection_DiffusionMap_clusters.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = sce.sub, reduction = "DiffusionMap", group.by = "in_silico_clusters", cols = pal.cl, pt.size = 1.5) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "5_diffusion/Selection_DiffusionMap_clusters_nolegend.png"), device = "png", width = 5, height = 5, dpi = 300)

# Days
DimPlot(object = sce.sub, reduction = "DiffusionMap", group.by = "sample", cols = pal.sa, pt.size = 0.5)
ggsave(filename = file.path(fig.dir, "5_diffusion/Selection_DiffusionMap_days.png"), device = "png", width = 6, height = 4, dpi = 300)
DimPlot(object = sce.sub, reduction = "DiffusionMap", group.by = "sample", cols = pal.sa, pt.size = 0.5) + NoLegend() + NoAxes()
ggsave(filename = file.path(fig.dir, "5_diffusion/Selection_DiffusionMap_days_nolegend.png"), device = "png", width = 4, height = 5, dpi = 300)
```

### Save

Save SingleCellExperiment object
And subsetted Seurat object with diffmap20
```{r}
# SingleCellExperiment object
if ( file.exists(file.path("r_save/SCE_final-diffmap.rds")) ) {
  SCE.sub <- readRDS(file = "r_save/SCE_final-diffmap.rds")
} else { 
  saveRDS(SCE.sub, file = "r_save/SCE_final-diffmap.rds")
}

# Seurat object
if ( file.exists(file.path("r_save/sce_final.diffmap.rds")) ) {
  sce.sub <- readRDS(file = "r_save/sce_final.diffmap.rds")
} else { 
  saveRDS(sce.sub, file = "r_save/sce_final.diffmap.rds")
}
```

### Export for scanpy

We use the object that has not got the C2 cluster.  
In scanpy, we will use the diffusion map calculated in R.  
  
Export loom object
```{r}
scl <- sce.sub
DefaultAssay(scl) <- "RNA"
scl@project.name <- "mevel_et_al_sc_explant_sce_final"

write.csv(diffmap20, "r_save/diffmap20_final.csv")

# Check it's ok
DimPlot(scl, reduction = "DiffusionMap", cols = pal.cl)

# remove useless slots
scl$nCount_HTO <- NULL
scl$nFeature_HTO <- NULL
scl$HTO_maxID <- NULL
scl$HTO_secondID <- NULL
scl$HTO_margin <- NULL
scl$HTO_classification <- NULL
scl$HTO_classification.global <- NULL
scl$hash.ID <- NULL
scl$tag <- NULL
scl$RNA_snn_res.0.5 <- NULL
scl$cluster_label <- NULL
scl$S.Score <- NULL
scl$G2M.Score <- NULL
scl$old.ident <- NULL
scl$orig.ident <- NULL

# add cell id to the metadata slot
df.cell <- data.frame(Cell = colnames(sce.sub))
rownames(df.cell) <- df.cell$Cell
scl <- AddMetaData(object = scl, metadata = df.cell, col.name = "Cell")

# as loom
loom.scl <- as.loom(scl)
loom.scl$close_all()
rm(loom.scl)
```

### Markers

```{r, eval=FALSE}
if ( file.exists(file.path("r_export/epi.final.markers.txt")) ) {
  epi.final.markers <- read.delim(file = "r_export/epi.final.markers.txt", sep = "\t")
  top.epi.final <- epi.final.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
} else { 
  epi.final.markers <- FindAllMarkers(sce.sub, only.pos = FALSE, 
                              min.pct = 0.25, logfc.threshold = 0.25, ## Repeat
                              test.use = "MAST")
  top.epi.final <- epi.final.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
  write.table(epi.final.markers, "r_export/epi.final.markers.txt", sep = "\t", row.names = F)
}
```

Top genes heatmap
```{r}
DoHeatmap(object = sce.sub, features = as.character(top.epi.final$gene), group.colors = pal.cl, assay = "RNA", slot = "data", size = 4,
          draw.lines = TRUE, lines.width = 10) +  scale_fill_viridis(option = "inferno", na.value = "white")

ggsave(filename = file.path(fig.dir, "3_main_cluster/Main_Final_heatmap_top10.png"), device = "png", width = 8, height = 12, dpi = 300)
```


Custom heatmap
```{r}
geneList = c( 'Shh', "Foxa1", 'Notch1', 'Sox9', 'Hoxb13', 'Hoxd13', 'Ar', "Nkx3-1",  "Trp63", 'Wnt5a',
              'Krt5', 'Krt14', "Krt8", "Krt18", "Krt19", "Krt7",
              "Dcn", "Apoe", "Ly6a", "Ly6e",
              'Psca', 'Nupr1', 'Krt4', "Tacstd2", "Ly6d",
              'Mki67', 'Top2a')

DoHeatmap(object = sce.sub, features = geneList, group.colors = pal.cl, draw.lines = TRUE, lines.width = 10,
          assay = "RNA", slot = "data", size = 4) + scale_fill_viridis(option = "viridis",na.value = "white")
```


### DE

*Number of upregulated genes per cluster* (min.pct = 0.10, logfc.threshold = 0.25)
```{r}
stat <- 
  epi.final.markers %>% 
  group_by(cluster) %>% 
  summarise(count=n())

# Plot
ggplot(stat, aes(x=cluster, y=count, fill=cluster)) +
  geom_bar(stat = "identity", width = 0.8) +
  #geom_text(aes(y = count + sign(count) * max(abs(count)) * 0.05, label = abs(count)), size = 6, colour = "grey25") +
  geom_text(aes(y=count,label = count, hjust=-0.2)) +
  scale_y_continuous(limits=c(0, max(stat$count) + 0.1*max(stat$count))) +
  scale_fill_manual(values = pal.cl) +
  labs(x="Clusters", y="Number of upregulated genes", 
       title = "") +
   theme(
     axis.title.x=element_blank(),
     axis.title.y=element_blank(),
     axis.text.y=element_blank(),
     legend.position = "none",
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white")
  ) + rotate()

ggsave(filename = file.path(fig.dir, "3_main_cluster/nDE_final.png"), device = "png", width = 3, height = 3, dpi = 300)
```

```{r}
stat <- epi.final.markers %>%
    group_by(cluster) %>%
    summarise(count = n())

ggplot(stat, aes(x = fct_rev(cluster), y = count, fill = cluster)) +
    geom_col() +
    geom_text(aes(y = count + sign(count) * max(abs(count)) * 0.07, label = abs(count)), size = 6, colour = "grey25") +
    coord_flip() +
    scale_fill_manual(values = pal.cl2) +
    ggtitle("Number of upregulated genes") +
    theme_minimal() +
    theme(axis.title = element_blank(),
          axis.line = element_blank(),
          axis.ticks = element_blank(),
          axis.text.x = element_blank(),
          legend.position = "bottom")
```

```{r, eval=FALSE}
epi.final.all.markers <-
  FindAllMarkers(sce.sub, 
                 only.pos = FALSE, 
                 min.pct = 0.10, 
                 logfc.threshold = 0.25,
                 test.use = "MAST")
#write.table(epi.minC2.all.markers, "r_export/epi.minC2.all.markers.txt", sep = "\t", row.names = F)

plot_data <- epi.final.all.markers %>%
    mutate(IsUp = avg_logFC > 0) %>%
    group_by(cluster) %>%
    summarise(Up = sum(IsUp), Down = sum(!IsUp)) %>%
    mutate(Down = -Down) %>%
    gather(key = "Direction", value = "Count", -cluster) %>%
    mutate(Cluster = factor(cluster))

ggplot(plot_data, aes(x = fct_rev(cluster), y = Count, fill = Direction)) +
    geom_col() +
    geom_text(aes(y = Count + sign(Count) * max(abs(Count)) * 0.07,
                  label = abs(Count)),
              size = 6, colour = "grey25") +
    coord_flip() +
    scale_fill_manual(values = c("#377eb8", "#e41a1c")) +
    ggtitle("Number of identified genes") +
    theme_minimal() +
    theme(axis.title = element_blank(),
          axis.line = element_blank(),
          axis.ticks = element_blank(),
          axis.text.x = element_blank(),
          legend.position = "bottom")
```

### Stacked area

```{r}
meta = sce.sub@meta.data

stat2 <- 
  meta %>% 
  mutate(in_silico_clusters = factor(in_silico_clusters, levels=c("C0", "C1","C2", "C3", "C4", "C5", "C6", "C7", "C8"))) %>% 
  group_by(sample, in_silico_clusters) %>% 
  summarise(count=n()) %>% 
  mutate(perc=(count/sum(count)*100)) %>% 
  ungroup %>% 
  complete(in_silico_clusters, nesting(sample), fill = list(count = 0, perc = 0)) %>% 
  mutate(Time = ifelse(sample == "D0", 0,
                       ifelse(sample == "D1", 1,
                       ifelse(sample == "D3", 3, 6))))

gs <- ggplot(stat2, aes(x=Time, y=perc, fill=in_silico_clusters)) +
  geom_area() +
  scale_fill_manual(values = pal.cl) +
  scale_x_continuous(breaks = c(0,1,3,6)) +
  labs(x="Time point", y="Percentage", fill="Clusters") +
   theme(
     panel.grid.minor = element_blank(),
     panel.grid.major = element_blank(),
     panel.background = element_blank(),
     panel.border = element_rect(colour = "grey30", fill=NA, size=0.5),
     strip.background = element_rect(fill="white", colour = "white"))
gs
ggsave(gs, filename = file.path(fig.dir, "3_main_cluster/Meta_stacked_area_clusters_final.png"), device = "png", width = 4, height = 3, dpi = 300)

gs <- gs + theme(
  axis.title = element_blank(),
  axis.line = element_blank(),
  axis.text.x = element_blank(),
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),  
  legend.position = "none")
gs
ggsave(gs, filename = file.path(fig.dir, "3_main_cluster/Meta_stacked_area_clusters_final_nolegend.png"), device = "png", width = 3, height = 3, dpi = 300)
```

--------------------------------------------------------------------------------
## Session Information
```{r}
sessionInfo()
```
back to top