Raw File
---
title: "Adult scRNAseq datasets analysis"
subtitle: "Scanpy PAGA"
author: "Renaud Mevel"
output:
  html_document:
    self_contained: yes
    toc: true
    toc_float: true
    df_print: paged
    number_sections: false
editor_options: 
  chunk_output_type: console
---
```{r setup, echo=FALSE, message=FALSE, results='hide'}
library(knitr)
knitr::opts_chunk$set(cache=TRUE, error=FALSE, cache.lazy = TRUE)
```

## Objective

Import data generated in Scanpy/PAGA and plot in R.
  
Directories need to be adapted throughout the scripts.
  
## Prepare the environment

```{r , warning=FALSE, message=FALSE}
# Data wrangling
library(plyr)
library(dplyr)
library(tidyverse)
library(data.table)

# Plots
library(gridExtra)
library(ggpubr)
library(viridis) 

# sc
library(Seurat)
library(sctransform)
library(MAST)
library(org.Mm.eg.db)
library(DoubletFinder)

# GO
library(gprofiler2)

# Palettes
library(pals)
pal25 <- as.character(pals::cols25(n=25))
pal.trt <- c("#a1e186", "#b9006e")
pal.rfp <- c("#ea4749", "#479bea")
pal.runs <- c("#ec0016", "#ffc554", "#20a4ff")
pal.lobe = c("#272873", "#45A5A7", "#cb5155")
pal.pop <- 
  c( 
    "#7a0177", "#dd3497",    # cas AP
    "#f768a1", "#fa9fb5",    # cas DLP
    "#fc9272", "#cb181d",    # cas
    "#CC6677", "#AA4466",    # cas VP
    "#081d58", "#225ea8",    # hn AP
    "#7fcdbb", "#7fb8cd",    # hn DLP
    "#67a9cf", "#b2d3e7"     # hn VP
    )

pal.cl <- c(
  "#B2B8E0", #LumA
  "#4A6FE3", #LumB
  "#1037AA", #LumC
  "#D33F6A", #LumD
  "#EF9708", #LumE
  "#F0B98D", #LumF
  "#8DD593"  #Basal
  )


# Directories
setwd(dir = "~/set-directory/")
pdf.dir <- "~/set-directory/"
fig.dir <- "~/set-directory/"

# Functions
source("Adult_functions.R")

# Seed
set.seed(1)
```

## Functions 1

```{r}
plotPAGAClustGraph <- function(embedding, edges, thresh = 0, colour = "in_silico_clusters") {

    is_discrete <- is.factor(embedding[[colour]])

    gg <- ggplot(embedding, aes(x = X, y = Y))

    if (is_discrete) {
        gg <- gg +
            geom_segment(data = filter(edges, Connectivity > thresh),
                         aes(x = FromX, y = FromY, xend = ToX, yend = ToY, #alpha = Connectivity
                             colour = Connectivity), size = (edges$Connectivity)*10 ) +
            scale_colour_viridis_c(direction = 1, begin = 0.2, end = 0.6, option="inferno")
    } else {
        gg <- gg +
            geom_segment(data = filter(edges, Connectivity > thresh),
                         aes(x = FromX, y = FromY, xend = ToX, yend = ToY,
                             alpha = Connectivity), colour = "grey30") +
            scale_alpha(limits = c(0, 1)) +
            scale_fill_viridis_c()
    }

    gg <- gg +
        geom_point(aes(fill = !!ensym(colour), size = Size), shape = 21) +
        scale_fill_manual(values = pal.cl) +
        geom_text(aes(label = in_silico_clusters)) +
        scale_size(range = c(5, 20)) +
        theme_void() +
        theme(legend.position = "none")

    return(gg)
}


###  -----------------------------------------

plotPAGACellGraph <- function(embedding, edges, thresh = 0, colour = "in_silico_clusters", label = FALSE) {

    is_discrete <- is.factor(embedding[[colour]])

    gg <- ggplot(embedding, aes(x = X, y = Y, colour = !!ensym(colour))) +
        geom_segment(data = filter(edges, Connectivity > thresh),
                     aes(x = FromX, y = FromY, xend = ToX, yend = ToY),
                     size = 0.1, colour = "grey50") +
        geom_point(size = 0.5, aes(colour = in_silico_clusters)) +
        scale_colour_manual(values = pal.cl) +
        theme_void() +
        theme(legend.position = "none")

    if (!is_discrete) {
        gg <- gg + scale_color_viridis_c()
    }

    if (label) {
        clust_data <- embedding %>%
            group_by(in_silico_clusters) %>%
            summarise(X = mean(X),
                      Y = mean(Y))

        gg <- gg +
          #geom_point(data = clust_data, aes(fill = in_silico_clusters),
          #             size = 10, shape = 21, colour = "white") +
          scale_fill_manual(values = pal.cl) +
          geom_text(data = clust_data, aes(label = in_silico_clusters),
                      colour = "black")
    }

    return(gg)
}

###  -----------------------------------------

plotPAGACompare <- function(clust_embedding, clust_edges, clust_thresh = 0,
                            cell_embedding, cell_edges, cell_thresh = 0,
                            colour = NA, label = FALSE) {

    clusts <- plotPAGAClustGraph(clust_embedding, clust_edges, clust_thresh,
                                 colour)

    cells <- plotPAGACellGraph(cell_embedding, cell_edges, cell_thresh,
                               colour, label)

    cowplot::plot_grid(clusts, cells, nrow = 1)
}
```


## Functions 2 

Function to plot genes in PAGA and FA
```{r}

##### In PAGA space ---------------------------------------------------------
plotPAGAgene <- function(scdata, cl.embedding, cl.edges, sc.embedding, gene = "Runx1", legend=TRUE, thresh=0, show.clusters=FALSE, rm.cl = NA) {
  
  # remove cluster if asked
  if (!is.na(rm.cl)) { 
    cl.embedding <- dplyr::filter(cl.embedding, in_silico_clusters != rm.cl)
    cl.edges <- dplyr::filter(cl.edges, From != rm.cl, To != rm.cl)
    }
  
  sc.embedding[[gene]] <- logcounts(scdata)[gene, ]
  
  paga_gene <- sc.embedding %>%
    dplyr::select(-Cell, -X, -Y) %>%
    group_by(in_silico_clusters) %>%
    summarise_all(mean)
  
  paga_gene[, gene][paga_gene[, gene] == 0] <- NA 
  
  max.scale = max(paga_gene[[gene]])
  min.scale = min(paga_gene[[gene]], na.rm = TRUE)
  
  paga_embedding <- left_join(cl.embedding, paga_gene, by = "in_silico_clusters")
  
  cl.edges <- cl.edges %>% mutate(Connectivity = ifelse(Connectivity < thresh, NA, Connectivity))
  
  g <- ggplot(paga_embedding, aes(x = X, y = Y)) +
  geom_segment(data = cl.edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY), size = (cl.edges$Connectivity)*10, alpha=0.5) +
  geom_point(aes(fill = !!ensym(gene), size = Size), shape = 21, colour="grey30") +
  scale_fill_viridis(option = "plasma", na.value = "grey80", begin = 0, end = 0.9, limits = c(0, max.scale)) +
  scale_size(range = c(6, 12), guide = FALSE) +
  theme_void()
  
  if (legend==FALSE) { g <- g + theme(legend.position = "none") }
  if (show.clusters==TRUE) { g <- g + geom_text(aes(label = in_silico_clusters)) }
  g
}

##### In ForceAtlas space ---------------------------------------------------------

plotFAgene <- function(scdata, cell.embedding, cell.edges, gene, legend=TRUE, pt.size=0.8, show.connections=TRUE, thresh=0) {
  
  cell.embedding[[gene]] <- logcounts(scdata)[gene, ]
  
  cell.embedding[, 5][cell.embedding[, 5] == 0] <- NA # replace 0 with NA
  
  max.scale = max(cell.embedding[[gene]])
  min.scale = min(cell.embedding[[gene]], na.rm = TRUE)
  
  cell.edges <- cell.edges %>% mutate(Connectivity = ifelse(Connectivity < thresh, NA, Connectivity))
  
  g <- ggplot(cell.embedding, aes(x = X, y = Y))
  
    if (show.connections==TRUE) { 
  g <- g + geom_segment(data = cell.edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, size = Connectivity, alpha = Connectivity), colour="grey50")
  g <- g + scale_alpha_continuous(limits = c(0, 1), range = c(0.2, 1), guide = FALSE)
  g <- g + scale_size(range = c(0.1, 0.5), guide = FALSE)
  }
  
  g <- g + geom_point(aes(colour = !!ensym(gene)), size=pt.size)
  g <- g + scale_colour_viridis_c(option = "plasma", na.value = "grey80", end = 1, limits = c(min.scale, max.scale))
  g <- g + theme_void()

  if (legend==FALSE) { g <- g + theme(legend.position = "none") }
  
  g
}
```

## Load data

```{r}
seu <- readRDS(file.path("r_save/sce.sc.umap.rds"))
sce <- as.SingleCellExperiment(seu, assay = "RNA")
```

## Load PAGA

```{r}
# cluster embeddings--------------------------------------------------------------------------
clust_embedding <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cluster_embedding.csv",
                            col_types = cols(.default = col_double()))  %>%
  mutate(Size = as.numeric(table(colData(sce)$in_silico_clusters))) %>%
  dplyr::rename(Cluster = in_silico_clusters) %>% 
  mutate(Cluster = factor(Cluster)) %>% 
  mutate(in_silico_clusters = factor(names(table(colData(sce)$in_silico_clusters)), levels = levels(colData(sce)$in_silico_clusters)))

# cluster edges--------------------------------------------------------------------------------
clust_edges <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cluster_edges.csv",
                        col_types = cols(.default = col_double()))  %>%
  mutate(To = factor(To, levels = levels(clust_embedding$Cluster)),
         From = factor(From, levels = levels(clust_embedding$Cluster))) %>%
    left_join(clust_embedding, by = c("From" = "Cluster")) %>%
    dplyr::rename(FromX = X, 
                  FromY = Y,
                  From_in_silico_clusters = in_silico_clusters) %>%  
    dplyr::select(-Size) %>%
    left_join(clust_embedding, by = c("To" = "Cluster")) %>%
    dplyr::rename(ToX = X, 
                  ToY = Y, 
                  To_in_silico_clusters = in_silico_clusters) %>%
    dplyr::select(-Size)

# remove 'Cluster'----------------------------------------------------------------------------
clust_embedding <- dplyr::select(clust_embedding, -Cluster)
clust_edges <- clust_edges %>% 
  dplyr::mutate(From = From_in_silico_clusters,
                To = To_in_silico_clusters) %>% 
  dplyr::select(-From_in_silico_clusters, -To_in_silico_clusters)

# cell embeddings----------------------------------------------------------------------------
cell_embedding <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cell_embedding.csv",
                           col_types = cols(.default = col_double(), Cell = col_character())
                           ) %>%
  mutate(in_silico_clusters = colData(sce)$in_silico_clusters)

# cell edges ---------------------------------------------------------------------------------
cell_edges <- read_csv("/sc_adult_dataset/paga_all_epithelial/output_umap/paga/cell_edges.csv",
                        col_types = cols(.default = col_double(), From = col_character(), To = col_character())
                       )  %>%
  dplyr::left_join(cell_embedding, by = c("From" = "Cell")) %>%
  dplyr::rename(FromX = X, FromY = Y) %>%
  dplyr::select(-in_silico_clusters) %>%
  dplyr::left_join(cell_embedding, by = c("To" = "Cell")) %>%
  dplyr::rename(ToX = X, ToY = Y) %>%
  dplyr::select(-in_silico_clusters)
```

## Visualise PAGA


### Cluster graph

```{r}
plotPAGAClustGraph(clust_embedding, clust_edges, thresh = 0.0)
```


### Edges threshold

Number of selected edges for different threshold connectivities.
```{r}
plot_data <- tibble(
    Threshold = seq(0, 1, 0.01)
) %>%
    mutate(Edges = map_int(Threshold, function(thresh) {
        sum(clust_edges$Connectivity > thresh)
    }))

con_thresh <- 0.7

ggplot(plot_data, aes(x = Threshold, y = Edges)) +
    geom_point() +
    geom_line() +
    geom_vline(xintercept = con_thresh, colour = "red") +
    xlab("Connectivity threshold") +
    ylab("Number of edges") +
    theme_minimal()
```

### Cell graph

```{r}
plotPAGACellGraph(cell_embedding, cell_edges, thresh = 0.03)
```

### Compare

```{r}
plotPAGACompare(clust_embedding, clust_edges, clust_thresh = 0,
                cell_embedding, cell_edges, cell_thresh = 0)
```

### Genes

```{r}
known_genes <- c(
    # Luminal
    "Spink1", "Msmb", "Pbsn", "Psca",
    # Basal
    "Krt5", "Krt14", "Trp63", "Apoe",
    # Of interest
    "Runx1", "Nkx3-1", "Tacstd2",
    # Regressed
    "Lpl", "Basp1", "Car2", "Crym"
)

for (gene in known_genes) {
    cell_embedding[[gene]] <- logcounts(sce)[gene, ]
}

clust_genes <- cell_embedding %>%
    dplyr::select(-Cell, -X, -Y) %>%
    group_by(in_silico_clusters) %>%
    summarise_all(mean)

clust_embedding <- left_join(clust_embedding, clust_genes, by = "in_silico_clusters")
```

```{r}
plotPAGACompare(clust_embedding, clust_edges, clust_thresh = 0, 
                cell_embedding, cell_edges, cell_thresh = 0, 
                colour = 'Runx1')
```


### Gene expression

Bac a sable
In PAGA
```{r}
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Nkx3-1")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Runx1")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Tacstd2")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Mki67")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Psca")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt4")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Nupr1")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Meis2")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Shh")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Ar")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Foxa1")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Etv1")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Etv4")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Gata2")

plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Trp63")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Apoe")

(
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt5") /
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt14") /
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt7") /
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt19") 
) | (
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt8")/
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt18")/
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt17")/
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt4")
)

plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Gsto1")

plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Cd24a")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Itga6")

plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Dpp4")
plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Krt20")

plotPAGAgene(sce, clust_embedding, clust_edges, cell_embedding, gene = "Sox2")
```

In ForceAtlas
```{r}
plotFAgene(sce, cell_embedding, cell_edges, gene = "Runx1", pt.size = 1, thresh=0, show.connections = TRUE, legend=FALSE)
plotFAgene(sce, cell_embedding, cell_edges, gene = "Nkx3-1", pt.size = 1, thresh=0, show.connections = TRUE, legend=FALSE)
plotFAgene(sce, cell_embedding, cell_edges, gene = "Shh", pt.size = 1, thresh=0, show.connections = TRUE)
```

### Figures

Coloured connections
```{r}
ggplot(clust_embedding, aes(x = X, y = Y)) +
  geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, colour = Connectivity, alpha = Connectivity), 
               size = (clust_edges$Connectivity)*8) +
  scale_colour_viridis_c(direction = 1, begin = 0.2, end = 0.6, option="inferno") +
  scale_alpha_continuous(limits = c(0, 1), range = c(0.7, 0.9), guide = FALSE) +
  geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") +
  scale_fill_manual(values = pal.cl) +
  scale_size(range = c(5, 10), guide = FALSE) +
  theme_void()

#ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters_c.png"), device = "png", width = 7, height = 4, dpi = 300)


ggplot(clust_embedding, aes(x = X, y = Y)) +
  geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, colour = Connectivity, alpha = Connectivity), 
               size = (clust_edges$Connectivity)*8) +
  scale_colour_viridis_c(direction = 1, begin = 0.2, end = 0.6, option="inferno") +
  scale_alpha_continuous(limits = c(0, 1), range = c(0.7, 0.9), guide = FALSE) +
  geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") +
  scale_fill_manual(values = pal.cl) +
  scale_size(range = c(5, 10), guide = FALSE) +
  theme_void() + theme(legend.position = "none")

#ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters_c_nolegend.png"), device = "png", width = 6, height = 4, dpi = 300)

```

Black connections
```{r}
ggplot(clust_embedding, aes(x = X, y = Y)) +
  geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, alpha = Connectivity),
               size = (clust_edges$Connectivity)*8) +
  scale_alpha_continuous(limits = c(0, 1), range = c(0.7, 0.9), guide = FALSE) +
  geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") +
  scale_fill_manual(values = pal.cl) +
  scale_size(range = c(5, 10), guide = FALSE) +
  theme_void()

#ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters.png"), device = "png", width = 7, height = 4, dpi = 300)


ggplot(clust_embedding, aes(x = X, y = Y)) +
  geom_segment(data = clust_edges, aes(x = FromX, y = FromY, xend = ToX, yend = ToY, alpha = 0.5), size = (clust_edges$Connectivity)*10) +
  geom_point(aes(fill = in_silico_clusters, size = Size), shape = 21, colour="white") +
  scale_fill_manual(values = pal.cl) +
  scale_size(range = c(6, 12), guide = FALSE) +
  theme_void() + theme(legend.position = "none")  #+ scale_y_reverse() + coord_flip() 

ggsave(filename = file.path(fig.dir, "4_paga/PAGA_clusters_nolegend.png"), device = "png", width = 5, height = 5, dpi = 300)

```

```{r}
ggplot(cell_embedding, aes(x = X, y = Y)) +
    geom_segment(data = cell_edges,
                 aes(x = FromX, y = FromY, xend = ToX, yend = ToY,
                     size = Connectivity, alpha = Connectivity), colour="grey50") +
    geom_point(aes(colour = in_silico_clusters), size = 0.5) +
    scale_colour_manual(values = pal.cl) +
    scale_alpha_continuous(limits = c(0, 1), range = c(0.05, 0.5), guide = FALSE) +
    scale_size(range = c(0.1, 0.5), guide = FALSE) +
    theme_void()

#ggsave(filename = file.path(fig.dir, "4_paga/PAGA_cells.png"), device = "png", width = 7, height = 4, dpi = 300)

ggplot(cell_embedding, aes(x = X, y = Y)) +
    geom_segment(data = cell_edges,
                 aes(x = FromX, y = FromY, xend = ToX, yend = ToY,
                     size = Connectivity, alpha = Connectivity), colour="grey50") +
    geom_point(aes(colour = in_silico_clusters), size = 0.5) +
    scale_colour_manual(values = pal.cl) +
    scale_alpha_continuous(limits = c(0, 1), range = c(0.05, 0.5), guide = FALSE) +
    scale_size(range = c(0.1, 0.5), guide = FALSE) +
    theme_void() + theme(legend.position = "none")

ggsave(filename = file.path(fig.dir, "4_paga/PAGA_cells_nolegend.png"), device = "png", width = 5, height = 5, dpi = 300)
```















--------------------------------------------------------------------------------
## Session Information
```{r}
sessionInfo()
```
back to top