Revision f1ad799015c901dad378f6e488dc38f4a19fd703 authored by Yifang Liu on 06 November 2020, 22:49:13 UTC, committed by GitHub on 06 November 2020, 22:49:13 UTC
1 parent 404c7ab
3_2020-02-05_Top_genes_per_cluster_Dotplot_miniViolin_Heatmap.Rmd
---
title: "3 Top genes per cluster Dotplot miniViolin Heatmap"
author: "Yifang Liu"
date: "`r Sys.Date()`"
output:
rmdformats::html_clean:
code_folding: hide
fig_width: 10
fig_height: 10
highlight: kate
thumbnails: false
lightbox: true
gallery: true
---
```{r knitr_init, echo=FALSE, cache=FALSE}
library(knitr)
library(rmdformats)
options(max.print = 200)
opts_chunk$set(echo = TRUE,
cache = FALSE,
prompt = FALSE,
tidy = TRUE,
comment = NA,
message = FALSE,
warning = FALSE,
dev = c('png', 'pdf'),
fig.width = 10,
fig.height = 10,
fig.align = "center",
fig.path = '3_PDF_2020-02-05_Top_genes_per_cluster_Dotplot_miniViolin_Heatmap/',
dpi = 72)
opts_knit$set(width = 75)
```
```{r setup}
set.seed(123)
npc <- 20
# theta1 <- 2
# theta2 <- 5
# theta <- c(theta1, theta2)
resolution <- 0.1
selected_res <- paste0("RNA_snn_res.", resolution)
pt_size <- 1
# alpha <- 0.8
# Suppress loading messages
suppressPackageStartupMessages({
library(Matrix)
library(dplyr)
library(tidyverse)
library(Seurat)
library(cowplot)
library(Rcpp)
library(harmony)
library(SoupX)
})
```
```{r load_data}
EGFP <- readRDS("Data/2020-02-05_EGFP_seurat_obj.Rds")
Idents(EGFP) <- selected_res
```
# Top genes unbiased
```{r markers}
if (file.exists("Markers/2020-02-05_EGFP_markers.Rds")) {
markers_list <- readRDS("Markers/2020-02-05_EGFP_markers.Rds")
} else {
dir.create("Markers")
table_df <- table(EGFP@meta.data[ , selected_res], EGFP@meta.data$LibraryID) %>% as.data.frame() %>% spread(key = Var2, value = Freq)
colnames(table_df)[1] <- "cluster"
# str(table_df)
markers_list <- list()
for(cluster_id in table_df$cluster){
markers <- FindMarkers(EGFP, ident.1 = cluster_id)
# markers$gene <- row.names(markers)
write.csv(markers, file = paste0("Markers/2020-02-05_EGFP_markers_cluster", cluster_id, ".csv"))
markers_list[[paste0("cluster", cluster_id)]] <- markers
}
saveRDS(markers_list,
file = "Markers/2020-02-05_EGFP_markers.Rds")
}
```
# Dotplot Top5 per cluster
```{r Dotplot_top5}
n <- 5
gene_list <- character(length = 0)
for(cluster_n in names(markers_list)){
df <- markers_list[[`cluster_n`]]
df <- subset(df, df$avg_logFC > 0)
df <- df[order(-df$avg_logFC), ]
df <- df[!(row.names(df) %in% c("EGFP")), ]
count <- 0
gene_names <- row.names(df)
for (i in 1:length(gene_names)) {
if (gene_names[i] %in% gene_list) {
# print(cluster_n)
# print(i)
# print(gene_names[i])
} else {
gene_list <- c(gene_names[i], gene_list)
count <- count + 1
}
if (count == n) {
break
}
}
}
DotPlot(EGFP, features = gene_list, dot.scale = 5) + RotatedAxis() + ylab("Cluster") + theme(axis.text.x = element_text(size = 8))
```
# miniViolin Top3 per cluster
```{r miniViolin_top3}
n <- 3
gene_list <- character(length = 0)
for(cluster_n in names(markers_list)){
df <- markers_list[[`cluster_n`]]
df <- subset(df, df$avg_logFC > 0)
df <- df[order(-df$avg_logFC), ]
df <- df[!(row.names(df) %in% c("EGFP")), ]
count <- 0
gene_names <- row.names(df)
for (i in 1:length(gene_names)) {
if (gene_names[i] %in% gene_list) {
# print(cluster_n)
# print(i)
# print(gene_names[i])
} else {
gene_list <- c(gene_names[i], gene_list)
count <- count + 1
}
if (count == n) {
break
}
}
}
# gene_list
for (gene in gene_list) {
plot <- VlnPlot(EGFP, features = gene, pt.size = 0, combine = FALSE)
p <- plot[[1]] +
theme(legend.position = 'none') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ylab(gene) +
theme(plot.title = element_blank()) +
theme(axis.text.y = element_text(size = 2)) +
theme(text = element_text(size = 8))
assign(gene, p)
}
x_label <- plot[[1]] +
theme(legend.position = 'none') +
ylab("") +
theme(plot.title = element_blank()) +
theme(axis.text.y = element_text(size = 2)) +
theme(text = element_text(size = 8)) +
theme(axis.text.x = element_text(size = 20))
plot_grid(EbpII, Ebp, Sfp60F, Pif2, whip, soti,
CG33970, EbpIII, Obp56d, CG13618, CG12268, Elo68alpha,
CG4928, Mur18B, CG14292, lcs, CG34212, CG16826,
yip7, CG5107, alphaTry, Cpr49Ab, CG14661, Cpr65Au,
mtgo, CG2765, FASN3, CG13868, LpR2, CG10960, x_label, ncol = 1)
```
# Heatmap Top10 per cluster
```{r Heatmap_top10}
n <- 10
gene_list <- character(length = 0)
for(cluster_n in names(markers_list)){
df <- markers_list[[`cluster_n`]]
df <- subset(df, df$avg_logFC > 0)
df <- df[order(-df$avg_logFC), ]
df <- df[!(row.names(df) %in% c("EGFP")), ]
count <- 0
gene_names <- row.names(df)
for (i in 1:length(gene_names)) {
if (gene_names[i] %in% gene_list) {
# print(cluster_n)
# print(i)
# print(gene_names[i])
} else {
gene_list <- c(gene_names[i], gene_list)
count <- count + 1
}
if (count == n) {
break
}
}
}
gene_list <- rev(gene_list)
DoHeatmap(EGFP, features = gene_list, raster = FALSE)
```
# Hand picked genes from Arpan
```{r gene_list}
gene_list <- c(
"sls", "Mlc2", "Mlp60A", "Mhc", "Mf", "Mp20",
"FASN3", "FASN2", "LpR1", "salm", "ImpL2",
"Pli", "LpR2", "path", "apolpp", "FASN1", "Hnf4"
)
```
# Dotplot
```{r Dotplot_picked}
DotPlot(EGFP, features = gene_list, dot.scale = 5) + RotatedAxis() + ylab("Cluster") + theme(axis.text.x = element_text(size = 8))
```
# miniViolin
```{r miniViolin_picked}
for (gene in gene_list) {
plot <- VlnPlot(EGFP, features = gene, pt.size = 0, combine = FALSE)
p <- plot[[1]] +
theme(legend.position = 'none') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ylab(gene) +
theme(plot.title = element_blank()) +
theme(axis.text.y = element_text(size = 3)) +
theme(text = element_text(size = 8))
assign(gene, p)
}
plot <- VlnPlot(EGFP, features = "Lsd-1", pt.size = 0, combine = FALSE)
Lsd1 <- plot[[1]] +
theme(legend.position = 'none') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ylab("Lsd-1") +
theme(plot.title = element_blank()) +
theme(axis.text.y = element_text(size = 3)) +
theme(text = element_text(size = 8))
x_label <- plot[[1]] +
theme(legend.position = 'none') +
ylab("") +
theme(plot.title = element_blank()) +
theme(axis.text.y = element_text(size = 3)) +
theme(text = element_text(size = 8)) +
theme(axis.text.x = element_text(size = 20))
plot_grid(sls, Mlc2, Mlp60A, Mhc, Mf, Mp20,
FASN3, FASN2, LpR1, salm, ImpL2,
Pli, LpR2, path, apolpp, FASN1, Hnf4,
x_label, ncol = 1)
```
# Heatmap
```{r Heatmap_picked}
DoHeatmap(EGFP, features = rev(gene_list), raster = FALSE)
```
# Notes
2020-02-05:
* Top genes per cluster Dotplot miniViolin Heatmap only for EGFP.
Tue Dec 10, 2019:
* Violin plot for EGFP and TSC separately for the last set of genes; Combined violin plot for the same; Combined dot plot for top 5 genes by fold change in all clusters (EGFP: Green and TSC: Red).
Wed Dec 5, 2019:
* Removed three genes: Lsd-1, mdy, Fas1.
Wed Dec 4, 2019:
* Use these selected genes for cluster 0, 1 and 2. Cluster 0: Pli, LpR2, path, apolpp, FASN1, Hnf4, Lsd-1, mdy; Cluster 1: sls, Mlc2, Mlp60A, Mhc, Mf, Mp20; Cluster 2: FASN3, FASN2, LpR1, salm, ImpL2, Fas1.
Sun Dec 1, 2019:
* Dot plot and mini violin plots: 3 top expressed marker genes for dot plot; 2 top for mini-violin plot.
Tue Oct 29, 2019:
* Use SoupX fixed 0.45 to remove ambient RNA.
Mon Oct 7, 2019:
* Add more sequence depth.
Mon, Sep 30, 2019:
* Remove genes: EGFP, Tsc1, gig. Then perform integrate analysis of EGFP, TSC1.
Fri, Sep 20, 2019:
* First version for integrate analysis of EGFP, TSC1.
# Session Info
```{r sessioninfo, message=TRUE}
sessionInfo()
```
Computing file changes ...