Raw File
---
title: "Adult scRNAseq datasets analysis"
subtitle: "Generate Gene Sets"
author: "Renaud Mevel"
output:
  html_document:
    self_contained: yes
    toc: true
    toc_float: true
    df_print: paged
    number_sections: false
editor_options: 
  chunk_output_type: console
---
```{r setup, echo=FALSE, message=FALSE, results='hide'}
library(knitr)
knitr::opts_chunk$set(cache=TRUE, error=FALSE, cache.lazy = TRUE)
```

## Objective

Generate gene sets 'specific' for:  

* Each individual cluster identified previously  
* Lum A/B/C vs Lum D  
* Lum D vs A/B/C  
* Lum E/F vs Lum A/B/C/D  
* Lum A/B/C/D vs Basal  
* Basal vs Lum A/B/C/D  
  
Directories need to be adapted throughout the scripts.
  
## Prepare the environment

```{r , warning=FALSE, message=FALSE}
# Data wrangling
library(plyr)
library(dplyr)
library(tidyverse)
library(data.table)

# Plots
library(gridExtra)
library(ggpubr)

# sc
library(Seurat)
library(sctransform)
library(MAST)
library(org.Mm.eg.db)
library(DoubletFinder)

# GO
library(gprofiler2)

# Palettes
library(pals)
pal25 <- as.character(pals::cols25(n=25))
pal.trt <- c("#a1e186", "#b9006e")
pal.rfp <- c("#ea4749", "#479bea")
pal.runs <- c("#ec0016", "#ffc554", "#20a4ff")
pal.lobe = c("#272873", "#45A5A7", "#cb5155")
pal.pop <- 
  c( 
    "#7a0177", "#dd3497",    # cas AP
    "#f768a1", "#fa9fb5",    # cas DLP
    "#fc9272", "#cb181d",    # cas
    "#CC6677", "#AA4466",    # cas VP
    "#081d58", "#225ea8",    # hn AP
    "#7fcdbb", "#7fb8cd",    # hn DLP
    "#67a9cf", "#b2d3e7"     # hn VP
    )
pal.cl <- c(
  "#00b6ed", "#b6ed00", "#3700ed", "#ed0040",
  "#ff54d7", "#f998bf",
  "#fc9101"
  )

# Directories
setwd(dir = "~/set-directory/")
pdf.dir <- "~/set-directory/"
fig.dir <- "~/set-directory/"

# Functions
source("Adult_functions.R")

# Seed
set.seed(1)
```

## Load data

```{r}
sce <- readRDS(file.path("r_save/sce_integrated.rds"))
```

Check the data
```{r}
DimPlot(sce, pt.size=.5, cols=pal.cl)
```

## Individual clusters

Load if exists already
```{r, eval=FALSE}
epi.markers <- readRDS(file.path("r_save/epi.markers.rds"))
```

*Cutoff used: logFC 0.25*

Lum A
```{r}
LumA <- epi.markers %>% dplyr::filter(cluster == "Lum-A" & avg_logFC > 0) 
write.table(LumA, file = paste0("r_export/custom_gene_sets/", "LumA_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

Lum B
```{r}
LumB <- epi.markers %>% dplyr::filter(cluster == "Lum-B" & avg_logFC > 0) 
write.table(LumB, file = paste0("r_export/custom_gene_sets/", "LumB_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

Lum C
```{r}
LumC <- epi.markers %>% dplyr::filter(cluster == "Lum-C" & avg_logFC > 0) 
write.table(LumC, file = paste0("r_export/custom_gene_sets/", "LumC_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

Lum D
```{r}
LumD <- epi.markers %>% dplyr::filter(cluster == "Lum-D" & avg_logFC > 0) 
write.table(LumD, file = paste0("r_export/custom_gene_sets/", "LumD_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

Lum E 
```{r}
LumE <- epi.markers %>% dplyr::filter(cluster == "Lum-E" & avg_logFC > 0) 
write.table(LumE, file = paste0("r_export/custom_gene_sets/", "LumE_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

Lum F 
```{r}
LumF <- epi.markers %>% dplyr::filter(cluster == "Lum-F" & avg_logFC > 0) 
write.table(LumF, file = paste0("r_export/custom_gene_sets/", "LumF_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

Bas
```{r}
Bas <- epi.markers %>% dplyr::filter(cluster == "Bas" & avg_logFC > 0) 
write.table(Bas, file = paste0("r_export/custom_gene_sets/", "Bas_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

## Lum ABC vs Lum D

```{r}
LumABC_vs_LumD <- FindMarkers(
  sce,
  ident.1 =c ("Lum-A", "Lum-B", "Lum-C"),
  ident.2 = c("Lum-D"), 
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumABC_vs_LumD <- LumABC_vs_LumD %>% tibble::rownames_to_column(var = "gene")

write.table(LumABC_vs_LumD, file = paste0("r_export/custom_gene_sets/", "LumABC_vs_LumD.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

## Lum D vs Lum ABC

```{r}
LumD_vs_LumABC <- FindMarkers(
  sce,
  ident.1 = c("Lum-D"), 
  ident.2 =c ("Lum-A", "Lum-B", "Lum-C"),
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumD_vs_LumABC <- LumD_vs_LumABC %>% tibble::rownames_to_column(var = "gene")

write.table(LumD_vs_LumABC, file = paste0("r_export/custom_gene_sets/", "LumD_vs_LumABC.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

## Lum EF vs Lum ABCD

```{r}
LumEF_vs_LumABCD <- FindMarkers(
  sce,
  ident.1 = c("Lum-E", "Lum-F"), 
  ident.2 =c ("Lum-A", "Lum-B", "Lum-C", "Lum-D"),
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumEF_vs_LumABCD <- LumEF_vs_LumABCD %>% tibble::rownames_to_column(var = "gene")

write.table(LumEF_vs_LumABCD, file = paste0("r_export/custom_gene_sets/", "LumEF_vs_LumABCD.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

## Lum EF vs Lum ABC

```{r}
LumEF_vs_LumABC <- FindMarkers(
  sce,
  ident.1 = c("Lum-E", "Lum-F"), 
  ident.2 =c ("Lum-A", "Lum-B", "Lum-C"),
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumEF_vs_LumABC <- LumEF_vs_LumABC %>% tibble::rownames_to_column(var = "gene")

write.table(LumEF_vs_LumABC, file = paste0("r_export/custom_gene_sets/", "LumEF_vs_LumABC.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

## Lum ABCD vs Basal

```{r}
LumABCD_vs_Bas <- FindMarkers(
  sce,
  ident.1 = c("Lum-A", "Lum-B", "Lum-C", "Lum-D"), 
  ident.2 =c ("Bas"),
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumABCD_vs_Bas <- LumABCD_vs_Bas %>% tibble::rownames_to_column(var = "gene")

write.table(LumABCD_vs_Bas, file = paste0("r_export/custom_gene_sets/", "LumABCD_vs_Bas.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

## Figure

### Basal vs Lum ABCD

```{r}
Bas_vs_LumABCD <- FindMarkers(
  sce,
  ident.1 =c ("Bas"),
  ident.2 = c("Lum-A", "Lum-B", "Lum-C", "Lum-D"), 
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

Bas_vs_LumABCD <- Bas_vs_LumABCD %>% tibble::rownames_to_column(var = "gene")

write.table(Bas_vs_LumABCD, file = paste0("r_export/custom_gene_sets/", "Bas_vs_LumABCD.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

### Lum D vs Lum ABC Basal

```{r}
LumD_vs_intact <- FindMarkers(
  sce,
  ident.1 =c ("Lum-D"),
  ident.2 = c("Lum-A", "Lum-B", "Lum-C", "Bas"), 
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumD_vs_intact <- LumD_vs_intact %>% tibble::rownames_to_column(var = "gene")

write.table(LumD_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumD_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

### Lum A vs Lum BCD Basal

```{r}
LumA_vs_intact <- FindMarkers(
  sce,
  ident.1 =c ("Lum-A"),
  ident.2 = c("Lum-B", "Lum-C", "Lum-D", "Bas"), 
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumA_vs_intact <- LumA_vs_intact %>% tibble::rownames_to_column(var = "gene")

write.table(LumA_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumA_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

### Lum B vs Lum ACD Basal

```{r}
LumB_vs_intact <- FindMarkers(
  sce,
  ident.1 =c ("Lum-B"),
  ident.2 = c("Lum-A", "Lum-C", "Lum-D", "Bas"), 
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumB_vs_intact <- LumB_vs_intact %>% tibble::rownames_to_column(var = "gene")

write.table(LumB_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumB_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

### Lum C vs Lum ABD Basal

```{r}
LumC_vs_intact <- FindMarkers(
  sce,
  ident.1 =c ("Lum-C"),
  ident.2 = c("Lum-A", "Lum-B", "Lum-D", "Bas"), 
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.25, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumC_vs_intact <- LumC_vs_intact %>% tibble::rownames_to_column(var = "gene")

write.table(LumC_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumC_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

### Lum EF vs Lum ABCD Basal

```{r}
LumEF_vs_all <- FindMarkers(
  sce,
  ident.1 =c ("Lum-E", "Lum-F"),
  ident.2 = c("Lum-A", "Lum-B", "Lum-C","Lum-D", "Bas"), 
  test.use = "MAST",
  assay = "RNA",
  min.pct = 0.1, 
  logfc.threshold = 0.25,
  only.pos = TRUE)

LumEF_vs_all <- LumEF_vs_all %>% tibble::rownames_to_column(var = "gene")

write.table(LumEF_vs_all, file = paste0("r_export/custom_gene_sets/", "LumEF_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
```

## Save image

```{r, eval=FALSE}
rm(sce)
save.image("r_save/4_make_genesets.RData")
```


--------------------------------------------------------------------------------
## Session Information
```{r}
sessionInfo()
```
back to top