https://github.com/eastmallingresearch/crosslink
Raw File
Tip revision: 5c9ec6b6b27a18ad1687c51f95dc4a2ef894aa1a authored by Robert Vickerstaff on 19 January 2018, 14:40:21 UTC
fixed link to QTL pipelines
Tip revision: 5c9ec6b
make_table_erate_3way.R
#!/usr/bin/Rscript
#Crosslink Copyright (C) 2016 NIAB EMR see included NOTICE file for details

#plot max memory usage, ordering accuracy, map expansion, CPU time

#run these first:
# ~/git_repos/crosslink/compare_progs/get_maxvmem_mdensity.sh
# ~/git_repos/crosslink/compare_progs/recalc_mapping_accuracy_mdensity.sh

library(ggplot2)
library(scales)
library(gridExtra)
library(gtable)
library(grid) # for R 3.2.3 for gpar

setwd("~/rjv_mnt/cluster/crosslink/ploscompbiol_data/erate_simdata/figs")


dat = read.table("erate_4way",col.names=c("algorithm","erate","t_user","t_sys","corr","missing","expansion"))
dat$t_cpu = dat$t_user + dat$t_sys
dat$t_hrs = dat$t_cpu / 3600
dat$err = 1 - dat$corr

rates = levels(as.factor(dat$erate))
progs = levels(as.factor(dat$algorithm))
vals = c('err','t_hrs','expansion')


for ( p in progs )
{
    cat(paste(p))
    
    for ( v in vals )
    {
        for ( r in rates )
        {
            tmp = subset(dat,algorithm==p & erate==r)
        
            cat(paste(',',median(tmp[[v]])))
        }
    }
    
    cat(paste('\n'))
}

#===============ordering
datsum <- summarySE(dat, measurevar="err", groupvars=c("algorithm","erate"))
p2 = ggplot(datsum, aes(x=erate, y=err, colour=algorithm, shape=algorithm)) + 
    scale_shape_manual(values=1:nlevels(datsum$algorithm)) +
    geom_errorbar(aes(ymin=lq, ymax=uq), width=ew, size=lw) +
    geom_line(size=lw) +
    geom_point(size=ps) +
    ylab("Ordering Error (1 - Corr. Coeff)") +
    xlab("Error/Missing Rate") +
    scale_y_log10(breaks=c(1e-3,1e-2,1e-1,1)) +
    scale_x_log10(breaks=c(1e-3,5e-3,1e-2,3e-2,6e-2)) +
    theme(legend.position = "none",
          text = element_text(size=8, family="Arial"),
          title = element_text(size=8, family="Arial"),
          legend.key.size=unit(.1,"in"))


#============>time
datsum <- summarySE(dat, measurevar="t_hrs", groupvars=c("algorithm","erate"))
p3 = ggplot(datsum, aes(x=erate, y=t_hrs, colour=algorithm, shape=algorithm)) + 
    scale_shape_manual(values=1:nlevels(datsum$algorithm)) +
    geom_errorbar(aes(ymin=lq, ymax=uq), width=ew, size=lw) +
    geom_line(size=lw) +
    geom_point(size=ps) +
    ylab("CPU Time (hrs)") +
    xlab("Error/Missing Rate") +
    scale_y_log10(breaks=c(1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1)) +
    scale_x_log10(breaks=c(1e-3,5e-3,1e-2,3e-2,6e-2)) +
    theme(legend.position = "none",
          text = element_text(size=8, family="Arial"),
          title = element_text(size=8, family="Arial"),
          legend.key.size=unit(.1,"in"))


#============>expansion
datsum <- summarySE(dat, measurevar="expansion", groupvars=c("algorithm","erate"))
p4 = ggplot(datsum, aes(x=erate, y=expansion, colour=algorithm, shape=algorithm)) + 
    scale_shape_manual(values=1:nlevels(datsum$algorithm)) +
    geom_errorbar(aes(ymin=lq, ymax=uq), width=ew, size=lw) +
    geom_line(size=lw) +
    geom_point(size=ps) +
    ylab("Map Expansion") +
    xlab("Error/Missing Rate") +
    scale_y_log10(breaks=c(1,2,10,20)) +
    scale_x_log10(breaks=c(1e-3,5e-3,1e-2,3e-2,6e-2)) +
    theme(legend.position = "right",
          text = element_text(size=8, family="Arial"),
          title = element_text(size=8, family="Arial"),
          legend.key.size=unit(.1,"in"))


plots = matrix(list(ggplotGrob(p2),ggplotGrob(p3),ggplotGrob(p4)),nrow=1,byrow=TRUE)
w=unit(c(0.72,0.72,1),"null")
h=unit(c(1),"null")
z=matrix(c(1,2,3),nrow=1)
gp = gpar(lwd = 3, fontsize = 18)
gtab = gtable_matrix("X", grobs=plots, widths=w, heights=h, z=z)
gtab = gtable_add_grob(gtab, textGrob("A", x=0.01, y=0.99, hjust=0, vjust=1, gp=gp), t=1, l=1, b=1, r=2, clip="off", z=100, name="A")
gtab = gtable_add_grob(gtab, textGrob("B", x=0.51, y=0.99, hjust=0, vjust=1, gp=gp), t=1, l=1, b=1, r=2, clip="off", z=101, name="B")
gtab = gtable_add_grob(gtab, textGrob("C", x=1.01, y=0.99, hjust=0, vjust=1, gp=gp), t=1, l=1, b=1, r=2, clip="off", z=102, name="C")

res=300
width=7.5
height=3.25
ptsize=10
tiff("3way_erate.tiff",res=res,width=width*res,height=height*res,compression="lzw",bg="white",pointsize=ptsize)
grid.draw(gtab)
dev.off()

pp = ggplot(dat, aes(x = algorithm, y = err, fill=algorithm)) +
    geom_boxplot(outlier.size=0.1,lwd=0.2) +
    scale_y_log10(breaks=c(.001,.01,.1,1)) +
    scale_x_discrete(position="top") +
    facet_wrap(~factor(erate)) +
    ylab("Ordering Error (1 - Corr. Coeff)") +
    xlab("Error/Missing Rate") + 
    theme(#axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank(),
          axis.text.y = element_text(colour="black"),
          #axis.text.x = element_text(colour="black"),
          text = element_text(size=7, family="Arial"),
          title = element_text(size=8, family="Arial"),
          legend.key.size=unit(.1,"in")
          )
    #xlab("erate") #+
    #theme

res=450
width=3.75
height=3.75
ggsave("erate_err_facet.tiff",width=width,height=height,units="in",dpi=res,compression="lzw")


#============>missing markers
datsum <- summarySE(dat, measurevar="missing", groupvars=c("algorithm","erate"))
p5 = ggplot(datsum, aes(x=erate, y=missing, colour=algorithm)) + 
    geom_errorbar(aes(ymin=lq, ymax=uq), width=.1) +
    geom_line() +
    geom_point() +
    ylab("Filtered Markers") +
    xlab("Error Rate") +
    #scale_y_log10() +
    scale_x_log10()

    #scale_y_log10() +


ggsave("filtered_markers.png")

back to top