https://github.com/sjackman/giab
Raw File
Tip revision: 4f57b57128f7e41e58996c9fc781677d400e9da0 authored by Shaun Jackman on 04 August 2017, 18:53:50 UTC
abyss2/Makefile: Change N to 9
Tip revision: 4f57b57
abyss-stats.rmd
---
title: Assembly of the Genome in a Bottle HG004 sequencing data
author: Shaun Jackman
output:
  html_document:
    keep_md: true
---

```{r install-packages, eval=FALSE, echo=FALSE}
install.packages("dplyr")
install.packages("ggplot2")
install.packages("ggrepel")
install.packages("knitr")
install.packages("magrittr")
install.packages("readr")
install.packages("tidyr")
```

```{r load-libraries, message=FALSE}
library(dplyr)
library(ggplot2)
library(ggrepel)
library(knitr)
library(magrittr)
library(readr)
library(tidyr)
```

```{r read-data}
data_orig <- read_tsv("abyss-stats.tsv")
scaffolds_data <- read_tsv("abyss-samtobreak.tsv")
```

```{r tidy-data}
data <- data_orig %>%
	separate(name, c("Directory", "File"), "/") %>%
	separate(File, c("Species", "Stage", "Ext")) %>%
	select(-Ext) %>%
	mutate(Directory = factor(Directory, unique(Directory)),
		Stage = factor(Stage, unique(Stage))) %>%
	mutate(Name = factor(Directory,
		levels = c("k96", "k128", "k144", "k192"),
		labels = c(
			"ABySS 1.9.0 k=96",
			"ABySS 1.9.0 k=128",
			"ABySS 1.9.0 k=144",
			"ABySS 1.9.0 k=192")))
```

# ABySS contigs
```{r plot-contigs}
ggplot(data = data %>% filter(Stage != "8")) +
	aes(x = Stage, y = NG50 / 1e3, color = Directory, group = Directory) +
	geom_point() +
	geom_line() +
  ylab("NG50 (kbp)")
```

# ABySS scaffolds
```{r plot-scaffolds}
ggplot(data = data) +
	aes(x = Stage, y = NG50 / 1e6, color = Directory, group = Directory) +
	geom_point() +
	geom_line() +
  ylab("NG50 (Mbp)")
```

# ABySS assembly stats
```{r plot-assembly-stats}
ggplot(data = data) +
	aes(x = Stage, y = NG50, color = Directory, group = Directory) +
	geom_point() +
	geom_line() +
	scale_y_log10(breaks = 10^(3:7), labels = c("1 kbp", "10 kbp", "100 kbp", "1 Mbp", "10 Mbp"))
```

# ABySS assembly stats
```{r plot-NG50, fig.width=10.5, fig.height=10, dpi=300}
ggplot(data = data) +
	aes(x = Stage, y = NG50, color = Name, group = Name) +
	geom_point(size = 3) +
	geom_line(size = 1.5) +
	geom_hline(aes(yintercept = NG50, color = Name),
		data = data %>% filter(Stage == "1"),
		linetype = "dashed") +
	scale_x_discrete(labels = c("preunitigs", "unitigs", "contigs", "scaffolds", "scaftigs")) +
	scale_y_log10(breaks = 10^(3:7), labels = c("1 kbp", "10 kbp", "100 kbp", "1 Mbp", "10 Mbp")) +
	scale_colour_brewer(name = NULL, palette = "Dark2") +
	theme_grey(28) +
	theme(legend.position = c(.35, .85))
```

# Plot scaffold NGA50 and breakpoints
```{r plot-scaffold-NGA50-breakpoints}
ggplot(data = scaffolds_data %>% filter(as.integer(k) %% 8 == 0)) +
  aes(x = Scaffold_breakpoints, y = Scaffold_NGA50 / 1e6, label = k) +
  xlab("Scaffold breakpoints") + ylab("Scaffold NGA50 (Mbp)") +
  geom_path() +
  geom_point(size = 3) +
  geom_text_repel() +
  scale_colour_brewer(name = NULL, palette = "Dark2") +
	theme_grey(28) +
	theme(legend.position = c(.35, .85))
```

# Table of ABySS assembly stats
```{r tabulate-assembly-stats}
kable(data)
```

# Table of aligned assembly stats
```{r tabulate-aligned-stats}
kable(scaffolds_data)
```
back to top