Revision f1ad799015c901dad378f6e488dc38f4a19fd703 authored by Yifang Liu on 06 November 2020, 22:49:13 UTC, committed by GitHub on 06 November 2020, 22:49:13 UTC
1 parent 404c7ab
Raw File
3_2020-02-05_Top_genes_per_cluster_Dotplot_miniViolin_Heatmap.Rmd
---
title: "3 Top genes per cluster Dotplot miniViolin Heatmap"
author: "Yifang Liu"
date: "`r Sys.Date()`"
output:
  rmdformats::html_clean:
    code_folding: hide
    fig_width: 10
    fig_height: 10
    highlight: kate
    thumbnails: false
    lightbox: true
    gallery: true
---

```{r knitr_init, echo=FALSE, cache=FALSE}
library(knitr)
library(rmdformats)

options(max.print = 200)
opts_chunk$set(echo = TRUE,
               cache = FALSE,
               prompt = FALSE,
               tidy = TRUE,
               comment = NA,
               message = FALSE,
               warning = FALSE,
               dev = c('png', 'pdf'),
               fig.width = 10,
               fig.height = 10,
               fig.align = "center",
               fig.path = '3_PDF_2020-02-05_Top_genes_per_cluster_Dotplot_miniViolin_Heatmap/',
               dpi = 72)
opts_knit$set(width = 75)
```

```{r setup}
set.seed(123)

npc <- 20
# theta1 <- 2
# theta2 <- 5
# theta <- c(theta1, theta2)
resolution <- 0.1
selected_res <- paste0("RNA_snn_res.", resolution)
pt_size <- 1
# alpha <- 0.8

# Suppress loading messages
suppressPackageStartupMessages({
  library(Matrix)
  library(dplyr)
  library(tidyverse)
  library(Seurat)
  library(cowplot)
  library(Rcpp)
  library(harmony)
  library(SoupX)
})
```

```{r load_data}
EGFP <- readRDS("Data/2020-02-05_EGFP_seurat_obj.Rds")
Idents(EGFP) <- selected_res
```

# Top genes unbiased

```{r markers}
if (file.exists("Markers/2020-02-05_EGFP_markers.Rds")) {
    markers_list <- readRDS("Markers/2020-02-05_EGFP_markers.Rds")
} else {
  dir.create("Markers")
  table_df <- table(EGFP@meta.data[ , selected_res], EGFP@meta.data$LibraryID) %>% as.data.frame() %>% spread(key = Var2, value = Freq)
  colnames(table_df)[1] <- "cluster"
  # str(table_df)
  markers_list <- list()
  for(cluster_id in table_df$cluster){
      markers <- FindMarkers(EGFP, ident.1 = cluster_id)
      # markers$gene <- row.names(markers)
      write.csv(markers, file = paste0("Markers/2020-02-05_EGFP_markers_cluster", cluster_id, ".csv"))
      markers_list[[paste0("cluster", cluster_id)]] <- markers
  }
  saveRDS(markers_list,
          file = "Markers/2020-02-05_EGFP_markers.Rds")
}
```

# Dotplot Top5 per cluster

```{r Dotplot_top5}
n <- 5
gene_list <- character(length = 0)
for(cluster_n in names(markers_list)){
  df <- markers_list[[`cluster_n`]]
  df <- subset(df, df$avg_logFC > 0)
  df <- df[order(-df$avg_logFC), ]
  df <- df[!(row.names(df) %in% c("EGFP")), ]
  count <- 0
  gene_names <- row.names(df)
  for (i in 1:length(gene_names)) {
    if (gene_names[i] %in% gene_list) {
      # print(cluster_n)
      # print(i)
      # print(gene_names[i])
    } else {
      gene_list <- c(gene_names[i], gene_list)
      count <- count + 1
    }
    if (count == n) {
      break
    }
  }
}
DotPlot(EGFP, features = gene_list, dot.scale = 5) + RotatedAxis() + ylab("Cluster") + theme(axis.text.x = element_text(size = 8))
```

# miniViolin Top3 per cluster

```{r miniViolin_top3}
n <- 3
gene_list <- character(length = 0)
for(cluster_n in names(markers_list)){
  df <- markers_list[[`cluster_n`]]
  df <- subset(df, df$avg_logFC > 0)
  df <- df[order(-df$avg_logFC), ]
  df <- df[!(row.names(df) %in% c("EGFP")), ]
  count <- 0
  gene_names <- row.names(df)
  for (i in 1:length(gene_names)) {
    if (gene_names[i] %in% gene_list) {
      # print(cluster_n)
      # print(i)
      # print(gene_names[i])
    } else {
      gene_list <- c(gene_names[i], gene_list)
      count <- count + 1
    }
    if (count == n) {
      break
    }
  }
}

# gene_list

for (gene in gene_list) {
  plot <- VlnPlot(EGFP, features = gene, pt.size = 0, combine = FALSE)
  p <- plot[[1]] +
    theme(legend.position = 'none') +
    theme(axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank()) +
    ylab(gene) +
    theme(plot.title = element_blank()) +
    theme(axis.text.y = element_text(size = 2)) +
    theme(text = element_text(size = 8))
  assign(gene, p)
}

x_label <- plot[[1]] +
  theme(legend.position = 'none') +
  ylab("") +
  theme(plot.title = element_blank()) +
  theme(axis.text.y = element_text(size = 2)) +
  theme(text = element_text(size = 8)) +
  theme(axis.text.x = element_text(size = 20))

plot_grid(EbpII, Ebp, Sfp60F, Pif2, whip, soti, 
CG33970, EbpIII, Obp56d, CG13618, CG12268, Elo68alpha,
CG4928, Mur18B, CG14292, lcs, CG34212, CG16826, 
yip7, CG5107, alphaTry, Cpr49Ab, CG14661, Cpr65Au, 
mtgo, CG2765, FASN3, CG13868, LpR2, CG10960, x_label, ncol = 1)
```

# Heatmap Top10 per cluster

```{r Heatmap_top10}
n <- 10
gene_list <- character(length = 0)
for(cluster_n in names(markers_list)){
  df <- markers_list[[`cluster_n`]]
  df <- subset(df, df$avg_logFC > 0)
  df <- df[order(-df$avg_logFC), ]
  df <- df[!(row.names(df) %in% c("EGFP")), ]
  count <- 0
  gene_names <- row.names(df)
  for (i in 1:length(gene_names)) {
    if (gene_names[i] %in% gene_list) {
      # print(cluster_n)
      # print(i)
      # print(gene_names[i])
    } else {
      gene_list <- c(gene_names[i], gene_list)
      count <- count + 1
    }
    if (count == n) {
      break
    }
  }
}
gene_list <- rev(gene_list)
DoHeatmap(EGFP, features = gene_list, raster = FALSE)
```

# Hand picked genes from Arpan

```{r gene_list}
gene_list <- c(
  "sls", "Mlc2", "Mlp60A", "Mhc", "Mf", "Mp20",
  "FASN3", "FASN2", "LpR1", "salm", "ImpL2",
  "Pli", "LpR2", "path", "apolpp", "FASN1", "Hnf4"
)
```

# Dotplot

```{r Dotplot_picked}
DotPlot(EGFP, features = gene_list, dot.scale = 5) + RotatedAxis() + ylab("Cluster") + theme(axis.text.x = element_text(size = 8))
```

# miniViolin

```{r miniViolin_picked}
for (gene in gene_list) {
  plot <- VlnPlot(EGFP, features = gene, pt.size = 0, combine = FALSE)
  p <- plot[[1]] +
    theme(legend.position = 'none') +
    theme(axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank()) +
    ylab(gene) +
    theme(plot.title = element_blank()) +
    theme(axis.text.y = element_text(size = 3)) +
    theme(text = element_text(size = 8))
  assign(gene, p)
}

plot <- VlnPlot(EGFP, features = "Lsd-1", pt.size = 0, combine = FALSE)
Lsd1 <- plot[[1]] +
    theme(legend.position = 'none') +
    theme(axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank()) +
    ylab("Lsd-1") +
    theme(plot.title = element_blank()) +
    theme(axis.text.y = element_text(size = 3)) +
    theme(text = element_text(size = 8))
  
x_label <- plot[[1]] +
  theme(legend.position = 'none') +
  ylab("") +
  theme(plot.title = element_blank()) +
  theme(axis.text.y = element_text(size = 3)) +
  theme(text = element_text(size = 8)) +
  theme(axis.text.x = element_text(size = 20))

plot_grid(sls, Mlc2, Mlp60A, Mhc, Mf, Mp20, 
          FASN3, FASN2, LpR1, salm, ImpL2, 
          Pli, LpR2, path, apolpp, FASN1, Hnf4, 
          x_label, ncol = 1)
```

# Heatmap

```{r Heatmap_picked}
DoHeatmap(EGFP, features = rev(gene_list), raster = FALSE)
```

# Notes

2020-02-05:

  * Top genes per cluster Dotplot miniViolin Heatmap only for EGFP.

Tue Dec 10, 2019:

  * Violin plot for EGFP and TSC separately for the last set of genes; Combined violin plot for the same; Combined dot plot for top 5 genes by fold change in all clusters (EGFP: Green and TSC: Red).

Wed Dec 5, 2019:

  * Removed three genes: Lsd-1, mdy, Fas1.

Wed Dec 4, 2019:

  * Use these selected genes for cluster 0, 1 and 2. Cluster 0: Pli, LpR2, path, apolpp, FASN1, Hnf4, Lsd-1, mdy; Cluster 1: sls, Mlc2, Mlp60A, Mhc, Mf, Mp20; Cluster 2: FASN3, FASN2, LpR1, salm, ImpL2, Fas1.

Sun Dec 1, 2019:

  * Dot plot and mini violin plots: 3 top expressed marker genes for dot plot; 2 top for mini-violin plot.

Tue Oct 29, 2019:

  * Use SoupX fixed 0.45 to remove ambient RNA.

Mon Oct 7, 2019:

  * Add more sequence depth.

Mon, Sep 30, 2019:

  * Remove genes: EGFP, Tsc1, gig. Then perform integrate analysis of EGFP, TSC1.

Fri, Sep 20, 2019:

  * First version for integrate analysis of EGFP, TSC1.

# Session Info

```{r sessioninfo, message=TRUE}
sessionInfo()
```

back to top