--- title: "Adult scRNAseq datasets analysis" subtitle: "Generate Gene Sets" author: "Renaud Mevel" output: html_document: self_contained: yes toc: true toc_float: true df_print: paged number_sections: false editor_options: chunk_output_type: console --- ```{r setup, echo=FALSE, message=FALSE, results='hide'} library(knitr) knitr::opts_chunk$set(cache=TRUE, error=FALSE, cache.lazy = TRUE) ``` ## Objective Generate gene sets 'specific' for: * Each individual cluster identified previously * Lum A/B/C vs Lum D * Lum D vs A/B/C * Lum E/F vs Lum A/B/C/D * Lum A/B/C/D vs Basal * Basal vs Lum A/B/C/D Directories need to be adapted throughout the scripts. ## Prepare the environment ```{r , warning=FALSE, message=FALSE} # Data wrangling library(plyr) library(dplyr) library(tidyverse) library(data.table) # Plots library(gridExtra) library(ggpubr) # sc library(Seurat) library(sctransform) library(MAST) library(org.Mm.eg.db) library(DoubletFinder) # GO library(gprofiler2) # Palettes library(pals) pal25 <- as.character(pals::cols25(n=25)) pal.trt <- c("#a1e186", "#b9006e") pal.rfp <- c("#ea4749", "#479bea") pal.runs <- c("#ec0016", "#ffc554", "#20a4ff") pal.lobe = c("#272873", "#45A5A7", "#cb5155") pal.pop <- c( "#7a0177", "#dd3497", # cas AP "#f768a1", "#fa9fb5", # cas DLP "#fc9272", "#cb181d", # cas "#CC6677", "#AA4466", # cas VP "#081d58", "#225ea8", # hn AP "#7fcdbb", "#7fb8cd", # hn DLP "#67a9cf", "#b2d3e7" # hn VP ) pal.cl <- c( "#00b6ed", "#b6ed00", "#3700ed", "#ed0040", "#ff54d7", "#f998bf", "#fc9101" ) # Directories setwd(dir = "~/set-directory/") pdf.dir <- "~/set-directory/" fig.dir <- "~/set-directory/" # Functions source("Adult_functions.R") # Seed set.seed(1) ``` ## Load data ```{r} sce <- readRDS(file.path("r_save/sce_integrated.rds")) ``` Check the data ```{r} DimPlot(sce, pt.size=.5, cols=pal.cl) ``` ## Individual clusters Load if exists already ```{r, eval=FALSE} epi.markers <- readRDS(file.path("r_save/epi.markers.rds")) ``` *Cutoff used: logFC 0.25* Lum A ```{r} LumA <- epi.markers %>% dplyr::filter(cluster == "Lum-A" & avg_logFC > 0) write.table(LumA, file = paste0("r_export/custom_gene_sets/", "LumA_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` Lum B ```{r} LumB <- epi.markers %>% dplyr::filter(cluster == "Lum-B" & avg_logFC > 0) write.table(LumB, file = paste0("r_export/custom_gene_sets/", "LumB_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` Lum C ```{r} LumC <- epi.markers %>% dplyr::filter(cluster == "Lum-C" & avg_logFC > 0) write.table(LumC, file = paste0("r_export/custom_gene_sets/", "LumC_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` Lum D ```{r} LumD <- epi.markers %>% dplyr::filter(cluster == "Lum-D" & avg_logFC > 0) write.table(LumD, file = paste0("r_export/custom_gene_sets/", "LumD_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` Lum E ```{r} LumE <- epi.markers %>% dplyr::filter(cluster == "Lum-E" & avg_logFC > 0) write.table(LumE, file = paste0("r_export/custom_gene_sets/", "LumE_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` Lum F ```{r} LumF <- epi.markers %>% dplyr::filter(cluster == "Lum-F" & avg_logFC > 0) write.table(LumF, file = paste0("r_export/custom_gene_sets/", "LumF_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` Bas ```{r} Bas <- epi.markers %>% dplyr::filter(cluster == "Bas" & avg_logFC > 0) write.table(Bas, file = paste0("r_export/custom_gene_sets/", "Bas_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ## Lum ABC vs Lum D ```{r} LumABC_vs_LumD <- FindMarkers( sce, ident.1 =c ("Lum-A", "Lum-B", "Lum-C"), ident.2 = c("Lum-D"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumABC_vs_LumD <- LumABC_vs_LumD %>% tibble::rownames_to_column(var = "gene") write.table(LumABC_vs_LumD, file = paste0("r_export/custom_gene_sets/", "LumABC_vs_LumD.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ## Lum D vs Lum ABC ```{r} LumD_vs_LumABC <- FindMarkers( sce, ident.1 = c("Lum-D"), ident.2 =c ("Lum-A", "Lum-B", "Lum-C"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumD_vs_LumABC <- LumD_vs_LumABC %>% tibble::rownames_to_column(var = "gene") write.table(LumD_vs_LumABC, file = paste0("r_export/custom_gene_sets/", "LumD_vs_LumABC.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ## Lum EF vs Lum ABCD ```{r} LumEF_vs_LumABCD <- FindMarkers( sce, ident.1 = c("Lum-E", "Lum-F"), ident.2 =c ("Lum-A", "Lum-B", "Lum-C", "Lum-D"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumEF_vs_LumABCD <- LumEF_vs_LumABCD %>% tibble::rownames_to_column(var = "gene") write.table(LumEF_vs_LumABCD, file = paste0("r_export/custom_gene_sets/", "LumEF_vs_LumABCD.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ## Lum EF vs Lum ABC ```{r} LumEF_vs_LumABC <- FindMarkers( sce, ident.1 = c("Lum-E", "Lum-F"), ident.2 =c ("Lum-A", "Lum-B", "Lum-C"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumEF_vs_LumABC <- LumEF_vs_LumABC %>% tibble::rownames_to_column(var = "gene") write.table(LumEF_vs_LumABC, file = paste0("r_export/custom_gene_sets/", "LumEF_vs_LumABC.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ## Lum ABCD vs Basal ```{r} LumABCD_vs_Bas <- FindMarkers( sce, ident.1 = c("Lum-A", "Lum-B", "Lum-C", "Lum-D"), ident.2 =c ("Bas"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumABCD_vs_Bas <- LumABCD_vs_Bas %>% tibble::rownames_to_column(var = "gene") write.table(LumABCD_vs_Bas, file = paste0("r_export/custom_gene_sets/", "LumABCD_vs_Bas.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ## Figure ### Basal vs Lum ABCD ```{r} Bas_vs_LumABCD <- FindMarkers( sce, ident.1 =c ("Bas"), ident.2 = c("Lum-A", "Lum-B", "Lum-C", "Lum-D"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) Bas_vs_LumABCD <- Bas_vs_LumABCD %>% tibble::rownames_to_column(var = "gene") write.table(Bas_vs_LumABCD, file = paste0("r_export/custom_gene_sets/", "Bas_vs_LumABCD.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ### Lum D vs Lum ABC Basal ```{r} LumD_vs_intact <- FindMarkers( sce, ident.1 =c ("Lum-D"), ident.2 = c("Lum-A", "Lum-B", "Lum-C", "Bas"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumD_vs_intact <- LumD_vs_intact %>% tibble::rownames_to_column(var = "gene") write.table(LumD_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumD_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ### Lum A vs Lum BCD Basal ```{r} LumA_vs_intact <- FindMarkers( sce, ident.1 =c ("Lum-A"), ident.2 = c("Lum-B", "Lum-C", "Lum-D", "Bas"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumA_vs_intact <- LumA_vs_intact %>% tibble::rownames_to_column(var = "gene") write.table(LumA_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumA_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ### Lum B vs Lum ACD Basal ```{r} LumB_vs_intact <- FindMarkers( sce, ident.1 =c ("Lum-B"), ident.2 = c("Lum-A", "Lum-C", "Lum-D", "Bas"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumB_vs_intact <- LumB_vs_intact %>% tibble::rownames_to_column(var = "gene") write.table(LumB_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumB_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ### Lum C vs Lum ABD Basal ```{r} LumC_vs_intact <- FindMarkers( sce, ident.1 =c ("Lum-C"), ident.2 = c("Lum-A", "Lum-B", "Lum-D", "Bas"), test.use = "MAST", assay = "RNA", min.pct = 0.25, logfc.threshold = 0.25, only.pos = TRUE) LumC_vs_intact <- LumC_vs_intact %>% tibble::rownames_to_column(var = "gene") write.table(LumC_vs_intact, file = paste0("r_export/custom_gene_sets/", "LumC_vs_intact.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ### Lum EF vs Lum ABCD Basal ```{r} LumEF_vs_all <- FindMarkers( sce, ident.1 =c ("Lum-E", "Lum-F"), ident.2 = c("Lum-A", "Lum-B", "Lum-C","Lum-D", "Bas"), test.use = "MAST", assay = "RNA", min.pct = 0.1, logfc.threshold = 0.25, only.pos = TRUE) LumEF_vs_all <- LumEF_vs_all %>% tibble::rownames_to_column(var = "gene") write.table(LumEF_vs_all, file = paste0("r_export/custom_gene_sets/", "LumEF_vs_all.txt"), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) ``` ## Save image ```{r, eval=FALSE} rm(sce) save.image("r_save/4_make_genesets.RData") ``` -------------------------------------------------------------------------------- ## Session Information ```{r} sessionInfo() ```