https://github.com/sjackman/giab
Tip revision: 4f57b57128f7e41e58996c9fc781677d400e9da0 authored by Shaun Jackman on 04 August 2017, 18:53:50 UTC
abyss2/Makefile: Change N to 9
abyss2/Makefile: Change N to 9
Tip revision: 4f57b57
abyss-stats.rmd
---
title: Assembly of the Genome in a Bottle HG004 sequencing data
author: Shaun Jackman
output:
html_document:
keep_md: true
---
```{r install-packages, eval=FALSE, echo=FALSE}
install.packages("dplyr")
install.packages("ggplot2")
install.packages("ggrepel")
install.packages("knitr")
install.packages("magrittr")
install.packages("readr")
install.packages("tidyr")
```
```{r load-libraries, message=FALSE}
library(dplyr)
library(ggplot2)
library(ggrepel)
library(knitr)
library(magrittr)
library(readr)
library(tidyr)
```
```{r read-data}
data_orig <- read_tsv("abyss-stats.tsv")
scaffolds_data <- read_tsv("abyss-samtobreak.tsv")
```
```{r tidy-data}
data <- data_orig %>%
separate(name, c("Directory", "File"), "/") %>%
separate(File, c("Species", "Stage", "Ext")) %>%
select(-Ext) %>%
mutate(Directory = factor(Directory, unique(Directory)),
Stage = factor(Stage, unique(Stage))) %>%
mutate(Name = factor(Directory,
levels = c("k96", "k128", "k144", "k192"),
labels = c(
"ABySS 1.9.0 k=96",
"ABySS 1.9.0 k=128",
"ABySS 1.9.0 k=144",
"ABySS 1.9.0 k=192")))
```
# ABySS contigs
```{r plot-contigs}
ggplot(data = data %>% filter(Stage != "8")) +
aes(x = Stage, y = NG50 / 1e3, color = Directory, group = Directory) +
geom_point() +
geom_line() +
ylab("NG50 (kbp)")
```
# ABySS scaffolds
```{r plot-scaffolds}
ggplot(data = data) +
aes(x = Stage, y = NG50 / 1e6, color = Directory, group = Directory) +
geom_point() +
geom_line() +
ylab("NG50 (Mbp)")
```
# ABySS assembly stats
```{r plot-assembly-stats}
ggplot(data = data) +
aes(x = Stage, y = NG50, color = Directory, group = Directory) +
geom_point() +
geom_line() +
scale_y_log10(breaks = 10^(3:7), labels = c("1 kbp", "10 kbp", "100 kbp", "1 Mbp", "10 Mbp"))
```
# ABySS assembly stats
```{r plot-NG50, fig.width=10.5, fig.height=10, dpi=300}
ggplot(data = data) +
aes(x = Stage, y = NG50, color = Name, group = Name) +
geom_point(size = 3) +
geom_line(size = 1.5) +
geom_hline(aes(yintercept = NG50, color = Name),
data = data %>% filter(Stage == "1"),
linetype = "dashed") +
scale_x_discrete(labels = c("preunitigs", "unitigs", "contigs", "scaffolds", "scaftigs")) +
scale_y_log10(breaks = 10^(3:7), labels = c("1 kbp", "10 kbp", "100 kbp", "1 Mbp", "10 Mbp")) +
scale_colour_brewer(name = NULL, palette = "Dark2") +
theme_grey(28) +
theme(legend.position = c(.35, .85))
```
# Plot scaffold NGA50 and breakpoints
```{r plot-scaffold-NGA50-breakpoints}
ggplot(data = scaffolds_data %>% filter(as.integer(k) %% 8 == 0)) +
aes(x = Scaffold_breakpoints, y = Scaffold_NGA50 / 1e6, label = k) +
xlab("Scaffold breakpoints") + ylab("Scaffold NGA50 (Mbp)") +
geom_path() +
geom_point(size = 3) +
geom_text_repel() +
scale_colour_brewer(name = NULL, palette = "Dark2") +
theme_grey(28) +
theme(legend.position = c(.35, .85))
```
# Table of ABySS assembly stats
```{r tabulate-assembly-stats}
kable(data)
```
# Table of aligned assembly stats
```{r tabulate-aligned-stats}
kable(scaffolds_data)
```