https://github.com/Terkild/CITE-seq_optimization
Revision 9a6313fbd3b87c90dff8dd0a1da13ac749c4b141 authored by Terkild on 12 June 2020, 12:28:01 UTC, committed by Terkild on 12 June 2020, 12:28:01 UTC
1 parent 80a60b8
Raw File
Tip revision: 9a6313fbd3b87c90dff8dd0a1da13ac749c4b141 authored by Terkild on 12 June 2020, 12:28:01 UTC
Added FigShare reposititory to README.
Tip revision: 9a6313f
Cell-number-titration.md
CITE-seq optimization - Reducing cell number at staining
================
Terkild Brink Buus
30/3/2020

## Load utilities

Including libraries, plotting and color settings and custom utility
functions

``` r
set.seed(114)
require("Seurat", quietly=T)
require("tidyverse", quietly=T)
library("Matrix", quietly=T)
library("patchwork", quietly=T)

## Load ggplot theme and defaults
source("R/ggplot_settings.R")

## Load helper functions
source("R/Utilities.R")

## Load predefined color schemes
source("R/color.R")

## Load feature_rankplot functions
source("R/feature_rankplot.R")
source("R/feature_rankplot_hist.R")
source("R/feature_rankplot_hist_custom.R")

outdir <- "figures"
data.Seurat <- "data/5P-CITE-seq_Titration.rds"
data.abpanel <- "data/Supplementary_Table_1.xlsx"
data.markerStats <- "data/markerByClusterStats.tsv"

## Make a custom function for formatting the concentration scale
scaleFUNformat <- function(x) sprintf("%.2f", x)
```

## Load Seurat object

Subset to only focus on conditions with 1 mio cells and dilution factor
4 (thus comparing 50µl to 25µl staining volume in PBMCs).

``` r
object <- readRDS(file=data.Seurat)

## Show number of cells from each sample
table(object$group)
```

    ## 
    ## PBMC_50ul_1_1000k PBMC_50ul_4_1000k PBMC_25ul_4_1000k  PBMC_25ul_4_200k 
    ##              1777              1777              1777              1777 
    ##  Lung_50ul_1_500k  Lung_50ul_4_500k           Doublet          Negative 
    ##              1681              1681                 0                 0

``` r
object <- subset(object, subset=volume == "25µl")
object
```

    ## An object of class Seurat 
    ## 33572 features across 3554 samples within 3 assays 
    ## Active assay: RNA.kallisto (33514 features)
    ##  2 other assays present: HTO.kallisto, ADT.kallisto
    ##  3 dimensional reductions calculated: pca, tsne, umap

## Load Ab panel annotation and concentrations

Marker stats is reused in other comparisons and was calculated in the
end of the preprocessing vignette.

``` r
abpanel <- data.frame(readxl::read_excel(data.abpanel))
rownames(abpanel) <- abpanel$Marker

## As we are only working with dilution factor 4 samples here, we want to show labels accordingly
# a bit of a hack...
abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4

markerStats <- read.table(data.markerStats)
markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",]
rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_")

## Make a ordering vector ordering markers per concentration and total UMI count
marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)]

head(abpanel)
```

    ##        Marker Category     Alias   Clone Isotype_Mouse Corresponding_gene
    ## CD103   CD103        B      <NA> BerACT8          IgG1              ITGAE
    ## CD107a CD107a        B     LAMP1    H4A3          IgG1              LAMP1
    ## CD117   CD117        E     C-kit   104D2          IgG1                KIT
    ## CD11b   CD11b        B      <NA>  ICRF44          IgG1              ITGAM
    ## CD123   CD123        E      <NA>     6H6          IgG1              IL3RA
    ## CD127   CD127        E IL7Ralpha  A019D5          IgG1               IL7R
    ##        TotalSeqC_Tag BioLegend_Cat Stock_conc_µg_per_mL conc_µg_per_mL
    ## CD103           0145        350233                  500        0.31250
    ## CD107a          0155        328649                  500        0.62500
    ## CD117           0061        313243                  500        0.62500
    ## CD11b           0161        301359                  500        0.15625
    ## CD123           0064        306045                  500        0.12500
    ## CD127           0390        351356                  500        0.31250
    ##        dilution_1x
    ## CD103          400
    ## CD107a         200
    ## CD117          200
    ## CD11b          800
    ## CD123         1000
    ## CD127          400

``` r
head(markerStats)
```

    ##             marker tissue fineCluster nCells UMIsum   nth median   f90 UMItotal
    ## CD103_PBMC   CD103   PBMC           1    638   1740   5.0      2   5.0     5082
    ## CD103_Lung   CD103   Lung          16    132   7084 187.0      5 187.0    60252
    ## CD107a_PBMC CD107a   PBMC           1    638   7757  26.3      8  26.3    13396
    ## CD107a_Lung CD107a   Lung          12    260  11674  99.2     15  99.2    23273
    ## CD117_PBMC   CD117   PBMC           1    638   1318   4.0      2   4.0     3316
    ## CD117_Lung   CD117   Lung          21     32   1695  41.0     41 130.4     5878
    ##             Alias   Clone Isotype_Mouse Corresponding_gene TotalSeqC_Tag
    ## CD103_PBMC   <NA> BerACT8          IgG1              ITGAE           145
    ## CD103_Lung   <NA> BerACT8          IgG1              ITGAE           145
    ## CD107a_PBMC LAMP1    H4A3          IgG1              LAMP1           155
    ## CD107a_Lung LAMP1    H4A3          IgG1              LAMP1           155
    ## CD117_PBMC  C-kit   104D2          IgG1                KIT            61
    ## CD117_Lung  C-kit   104D2          IgG1                KIT            61
    ##             BioLegend_Cat Stock_conc_µg_per_mL conc_µg_per_mL dilution_1x
    ## CD103_PBMC         350233                  500           1.25         400
    ## CD103_Lung         350233                  500           1.25         400
    ## CD107a_PBMC        328649                  500           2.50         200
    ## CD107a_Lung        328649                  500           2.50         200
    ## CD117_PBMC         313243                  500           2.50         200
    ## CD117_Lung         313243                  500           2.50         200
    ##             marker.y DSB.cutoff positive count   pct
    ## CD103_PBMC     CD103          7       14  1777  0.79
    ## CD103_Lung     CD103          7      501  1681 29.80
    ## CD107a_PBMC   CD107a          7      122  1777  6.87
    ## CD107a_Lung   CD107a          7      150  1681  8.92
    ## CD117_PBMC     CD117          7        3  1777  0.17
    ## CD117_Lung     CD117          7       32  1681  1.90

## Cell type and tissue overview

Make tSNE plots colored by cell type, cluster and tissue of origin.

``` r
p.tsne.cellsAtStaining <- DimPlot(object, group.by="cellsAtStaining", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"cellsAtStaining") + scale_color_manual(values=color.cellsAtStaining)

p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types")

p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(  ~"Clusters") + guides(col=F)

p.tsne.cluster + p.tsne.finecluster + p.tsne.cellsAtStaining
```

![](Cell-number-titration_files/figure-gfm/tsnePlots-1.png)<!-- -->

## Overall ADT counts

Extract UMI data and calculate UMI sum per marker within each condition.

``` r
## Get the data
ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
ADT.matrix$marker <- rownames(ADT.matrix)
ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))

## Get cell annotations
cell.annotation <- FetchData(object, vars=c("cellsAtStaining"))

## Calculate marker sum from each dilution within both tissues
ADT.matrix.agg <- ADT.matrix %>% group_by(cellsAtStaining=cell.annotation[name,"cellsAtStaining"], marker, conc) %>% summarise(sum=sum(value))

## Order markers by concentration
ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order)

## Extract marker annotation
ann.markerConc <- abpanel[marker.order,]
ann.markerConc$Marker <- factor(marker.order, levels=marker.order)

ADT.matrix.agg.total <- ADT.matrix.agg
```

## Plot overall ADT counts by conditions

Samples stained with diluted Ab panel have reduced ADT counts.

``` r
p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=cellsAtStaining, y=sum/10^6, fill=conc)) + 
  geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 
  scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
  scale_y_continuous(expand=c(0,0,0,0.05)) + 
  labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 
  guides(fill=guide_colourbar(reverse=T)) + 
  theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right")

p.UMIcountsPerCondition
```

![](Cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png)<!-- -->

## Compare total UMI counts per marker

Plot total UMI counts for each marker at the investigated dilution
factors (DF1 vs. DF4). To ease readability, we place dashed lines
between each concentration.

``` r
## Calculate "breaks" where concentration change.
lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5
lines <- data.frame(breaks=lines[-length(lines)])

## Make a marker by concentration "heatmap"
p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 
  geom_tile(col=alpha(col="black",alpha=0.2)) + 
  geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 
  scale_fill_viridis_c(trans="log2") + 
  labs(fill="µg/mL") + 
  theme_get() + 
  theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0))
  
## Make UMI counts per Marker plot
p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 
  geom_line(aes(group=marker), size=1.2, color="#666666") + 
  geom_point(aes(group=cellsAtStaining, fill=cellsAtStaining), pch=21, size=0.7) + 
  geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
  scale_fill_manual(values=color.cellsAtStaining) + 
  scale_y_continuous(breaks=c(9:17)) + 
  ylab("log2(UMI sum)") + 
  guides(fill=guide_legend(override.aes=list(size=1.5), reverse=TRUE)) + 
  theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 
  coord_flip()

## Combine plot with markerByConc annotation heatmap
plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')

plotUMIcountsPerMarker
```

![](Cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png)<!-- -->

## Compare change in UMI/cell within expressing cluster

Using a specific percentile may be prone to outliers in small clusters
(i.e. the 90th percentile of a cluster of 30 will be the \#3 higest cell
making it prone to outliers). We thus set a threshold of the value to
only be the 90th percentile if cluster contains more than 100 cells. For
smaller clusters, the median is used. Expressing cluster is identified
in the “preprocessing” vignette.

``` r
## Get the data
ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
ADT.matrix$marker <- rownames(ADT.matrix)
ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))

## Get cell annotations
cell.annotation <- FetchData(object, vars=c("cellsAtStaining", "fineCluster"))

## Calculate marker statistics from each dilution within each cluster
ADT.matrix.agg <- ADT.matrix %>% group_by(cellsAtStaining=cell.annotation[name,"cellsAtStaining"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value))
ADT.matrix.agg$tissue == "PBMC"
```

    ## logical(0)

``` r
## Use data for the previously determined expressing cluster.
Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")]
Cluster.max$fineCluster <- factor(Cluster.max$fineCluster)

ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg)
ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order)

p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 
  geom_line(aes(group=marker), size=1.2, color="#666666") + 
  geom_point(aes(group=cellsAtStaining, fill=cellsAtStaining), pch=21, size=0.7) + 
  geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
  geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 
  scale_fill_manual(values=color.cellsAtStaining) + 
  scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 
  ylab("90th percentile UMI of expressing cluster") + 
  theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 
  coord_flip()

## Combine plot with markerByConc annotation heatmap
UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')

UMIinExpressingCells
```

![](Cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png)<!-- -->

## Titration examples

Most markers are largely unaffected by reducing staining
cellsAtStaining. However, some antibodies used at low concentrations and
targeting abundant epitopes are affected, an example of such is CD31:

``` r
## Make helper function for plotting titration plots
titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){
  curMarker.name <- marker
  
  ## Get antibody concentration for legends
  curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"]
  if(show.gate==TRUE){
    ## Load gating percentages from manually set DSB thresholds
    gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")])
    gate$gate <- 1-(gate$gate/100)
    rownames(gate) <- gate$wrap
    ## Allow manual gating
    if(!is.null(gate.PBMC)) gate <- gate.PBMC
  } else {
    gate <- NULL
  }

  p <- feature_rankplot_hist_custom(data=object, 
                                    marker=paste0("adt_",curMarker.name),      
                                    group="cellsAtStaining",
                                    barcodeGroup="supercluster",
                                    conc=curMarker.DF1conc, 
                                    legend=legend, 
                                    yaxis.text=y.axis, 
                                    gates=gate,
                                    histogram.colors=color.cellsAtStaining, 
                                    title=curMarker.name)
  
  return(p)
}

p.CD31 <- titrationPlot("CD31", legend=TRUE)

p.CD31
```

![](Cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png)<!-- -->

## Final plot

``` r
A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 
                                     legend.key.height=unit(0.4,"cm"), 
                                     legend.text=element_text(size=unit(5,"pt")),
                                     plot.margin=unit(c(0.3,0,0.5,0),"cm"))

B1 <- p.markerByConc + theme(text = element_text(size=10), 
                             plot.margin=unit(c(0.3,0,0,0),"cm"),
                             legend.position="none")
B2 <- p.UMIcountsPerMarker + theme(legend.position="none")
C <- p.UMIinExpressingCells + theme(legend.position="none")

BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 
                                   guides(fill=guide_legend(reverse=FALSE)) + 
                                   theme(legend.position="bottom", 
                                         legend.direction="horizontal", 
                                         legend.background=element_blank(), 
                                         legend.box.background=element_blank(), legend.key=element_blank()))

D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm"))

AD <- cowplot::plot_grid(A,D,NULL, 
                         ncol=1, 
                         rel_heights = c(13,17,1.5),
                         labels=c("A","D",""), 
                         label_size=panel.label_size, 
                         vjust=panel.label_vjust, 
                         hjust=panel.label_hjust)

BC <- cowplot::plot_grid(B1, B2, C, 
                         nrow=1, 
                         rel_widths=c(2,10,10), 
                         align="h", 
                         axis="tb", 
                         labels=c("B", "", "C"), 
                         label_size=panel.label_size, 
                         vjust=panel.label_vjust, 
                         hjust=panel.label_hjust)

p.figure <- cowplot::ggdraw(plot_grid(AD, BC, 
                                      nrow=1, 
                                      rel_widths=c(1,4), 
                                      align="v", 
                                      axis="l")) + 
    cowplot::draw_plot(BC.legend,0.27,0.020,0.2,0.00001)

png(file=file.path(outdir,"Figure 4.png"), 
    width=figure.width.full, 
    height=4.5, 
    units = figure.unit, 
    res=figure.resolution, 
    antialias=figure.antialias)

  p.figure
  
dev.off()
```

    ## png 
    ##   2

``` r
p.figure
```

![](Cell-number-titration_files/figure-gfm/figure-1.png)<!-- -->

## Individual titration plots

For supplementary information.

``` r
plots.columns = 6
rows.max <- 5

markers <- abpanel[rownames(object[["ADT.kallisto"]]),]
markers <- markers[order(markers$Category, markers$Marker),]

plots <- list()

## Make individual plots for each marker
for(i in 1:nrow(markers)){
  curMarker <- markers[i,]
  curMarker.name <- curMarker$Marker
  y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE)
  plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis)
}

# a bit of a hack to make celltype legend
p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 
                                           aes(color=supercluster,x=1,y=1)) + 
  geom_point(shape=15, size=1.5) + 
  scale_color_manual(values=color.supercluster) + 
  theme(legend.title=element_blank(), 
        legend.margin=margin(0,0,0,0), 
        legend.key.size = unit(0.15,"cm"),
        legend.position = c(0.98,1.1), 
        legend.justification=c(1,1), 
        legend.direction="horizontal"))

plots.num <- length(plots)
plots.perPage <- plots.columns*rows.max
plots.pages <- ceiling(plots.num/plots.perPage)

## Make a supplementary figure split into pages
for(i in 1:plots.pages){
  start <- (i-1)*plots.perPage+1
  end <- i*plots.perPage
  end <- min(end,plots.num)
  curPlots <- c(start:end)
  plots.rows <- ceiling(length(curPlots)/plots.columns)
  
  curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb")
  curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows))
  
  png(file=file.path(outdir,paste0("Supplementary Figure 4",LETTERS[i],".png")), 
      units=figure.unit, 
      res=figure.resolution, 
      width=figure.width.full, 
      height=(2*plots.rows),
      antialias=figure.antialias)

  print(curPlots.layout)
  
  dev.off()
  
  print(curPlots.layout)
}
```

![](Cell-number-titration_files/figure-gfm/suppFig-1.png)<!-- -->![](Cell-number-titration_files/figure-gfm/suppFig-2.png)<!-- -->
back to top