### Jinliang Yang ### 04/13/2017 ### cal deleterious per line and complemantation library(data.table) g <- fread("largedata/gerpsnp_v3_345176_del.csv", data.table=FALSE) ## Note col "del" is the major from the alignment, should be the beneficial alleles get_per_line <- function(g){ g$del <- as.character(g$del) out <- data.frame() for(i in 10:ncol(g)){ g[, i] <- as.character(g[, i]) sub <- subset(g, !(g[, i] %in% "N")) b <- subset(sub, g[, "del"] == g[, i]) b <- b[!is.na(b$snpid), ] d <- subset(sub, g[, "del"] != g[, i]) d <- d[!is.na(d$snpid), ] lod <- sum(d$RS) tem <- data.frame(pid=names(g)[i], tot=nrow(sub), ben=nrow(b), del=nrow(d), sumRS=lod) out <- rbind(out, tem) } return(out) } ### deleterious alleles carried by each line res <- get_per_line(g) write.table(res, "cache/del_per_line.csv", sep=",", row.names=FALSE, quote=FALSE) perline <- read.csv("cache/del_per_line.csv") perline$percent <- with(perline, round(del/tot, 3)) with(perline, mean(del)) with(perline, range(del)) comp_two_lines <- function(g){ out <- data.frame() for(i in 10:(ncol(g)-1)){ message(sprintf("###>>> working on [ %s ]", names(g)[i])) for(j in (i+1):ncol(g)){ ### complement c <- subset(g, g[, "del"] == g[, i] | g[, "del"] == g[, j]) ### additive genetic load # ad <- subset(sub, (g[, "del"] != g[, i] & g[, i] != "N") | (g[, "del"] == g[, j] & g[, j] != "N")) ### non-complement dd <- subset(g, (g[, "del"] != g[, i] & g[, i] != "N") & (g[, "del"] != g[, j] & g[, j] != "N")) tem <- data.frame(pid1=names(g)[i], pid2=names(g)[j], comp=nrow(c), load=nrow(dd)) out <- rbind(out, tem) } } return(out) } ### deleterious alleles carried by each line ids <- sort(names(g)[10:ncol(g)]) newg <- g[, c(names(g)[1:9], ids)] res2 <- comp_two_lines(g=newg) write.table(res2, "cache/del_complemenation.csv", sep=",", row.names=FALSE, quote=FALSE) res22 <- subset(res2, pid1 != "B73") write.table(res22, "cache/del_complemenation_noB73.csv", sep=",", row.names=FALSE, quote=FALSE) range(res22$load) #[1] 47219 77210 res22[which.min(res22$load),] res22[which.max(res22$load),] c1 <- read.csv("cache/del_complemenation.csv") c2 <- read.csv("cache/del_complemenation_noB73.csv") ########## comp_trait <- function(res2, pheno, excludeB73=TRUE){ out <- data.frame() t <- as.character(unique(pheno$trait)) if(excludeB73){ res2 <- subset(res2, !(pid1 %in% "B73")) } for(i in 1:length(t)){ subp <- subset(pheno, trait %in% t[i]) subp <- merge(res2[, 3:5], subp, by="Hyb") if(nrow(subp) > 50){ fit1 <- lm(valHyb ~ comp, data=subp) fit2 <- lm(valHyb ~ load, data=subp) fit3 <- lm(BPHmax ~ comp, data=subp) fit4 <- lm(BPHmax ~ load, data=subp) fit5 <- lm(pBPHmax ~ comp, data=subp) fit6 <- lm(pBPHmax ~ load, data=subp) fit7 <- lm(MPH ~ comp, data=subp) fit8 <- lm(MPH ~ load, data=subp) fit9 <- lm(pMPH ~ comp, data=subp) fit10 <- lm(pMPH ~ load, data=subp) tem <- data.frame(trait=t[i], pheno=rep(c("perse", "BPHmax", "pBPHmax", "MPH", "pMPH"), each=2), geno=rep(c("comp", "load"), times=5)) pval <- c(summary(fit1)$coefficients[2,4], summary(fit2)$coefficients[2,4], summary(fit3)$coefficients[2,4], summary(fit4)$coefficients[2,4], summary(fit5)$coefficients[2,4], summary(fit6)$coefficients[2,4], summary(fit7)$coefficients[2,4], summary(fit8)$coefficients[2,4], summary(fit9)$coefficients[2,4], summary(fit10)$coefficients[2,4]) est <- c(summary(fit1)$coefficients[2,1], summary(fit2)$coefficients[2,1], summary(fit3)$coefficients[2,1], summary(fit4)$coefficients[2,1], summary(fit5)$coefficients[2,1], summary(fit6)$coefficients[2,1], summary(fit7)$coefficients[2,1], summary(fit8)$coefficients[2,1], summary(fit9)$coefficients[2,1], summary(fit10)$coefficients[2,1]) tem$pval <- pval tem$est <- est }else{ stop("err! less < 50 ids!") } out <- rbind(out, tem) } return(out) } res2 <- read.csv("cache/del_complemenation.csv") res2$Hyb <- paste(res2$pid1, res2$pid2, sep="_") mean(res2$load) range(res2$load) idx1 <- which.min(res2$load) idx2 <- which.max(res2$load) res2[idx1,] res2[idx2,] pheno <- read.csv("data/hyb_heterosis.csv") pheno$Hyb <- paste(pheno$Par1, pheno$Par2, sep="_") res3 <- comp_trait(res2, pheno, excludeB73 = TRUE) write.table(res3, "cache/complementation_rest_noB73.csv", sep=",", row.names=FALSE, quote=FALSE) res3 <- read.csv("cache/complementation_rest_noB73.csv") subset(res3, pheno %in% "MPH" & geno %in% "load" & pval < 0.05) stable <- subset(res3, pheno %in% c("perse", "MPH") & geno %in% "load") write.table(stable[, -3], "table/Table_hybload_pheno.csv", sep=",", row.names=FALSE, quote=FALSE) res4 <- comp_trait(res2, pheno, excludeB73 = FALSE) write.table(res3, "cache/complementation_rest.csv", sep=",", row.names=FALSE, quote=FALSE) res3 <- read.csv("cache/complementation_rest.csv") subset(res3, pheno %in% "perse" & geno %in% "load" & pval < 0.05) stable2 <- subset(res3, pheno %in% c("perse", "MPH") & geno %in% "load") stable$withB73 <- "no" stable2$withB73 <- "yes" st <- rbind(stable, stable2) write.table(st[, -3], "table/Table_hyb_load_pheno.csv", sep=",", row.names=FALSE, quote=FALSE)