Revision 74628305678f30d5b126745e5c6cd6cec4091c12 authored by BrionChristian on 03 July 2020, 02:17:14 UTC, committed by BrionChristian on 03 July 2020, 02:17:14 UTC
Shell script example for read processing
1 parent e445af8
Raw File
flowC-NEWpopHeri2-gh.r
library(flowCore)
library(reshape2)
library(plyr)
library(data.table)
par(mfrow = c(1, 1))

setwd("YOUR/WORKING/DIRECTORY")
file<-all[1]

#dataGAIN <- read.FCS("sort181203/Specimen_001_Tube_002_002.fcs", transformation=FALSE)

infoplate<-read.csv("181203matSample.txt", sep="\t",header = T)

import_dataO <- function(file){
  x = unlist(strsplit(basename(file), "\\."))[1]
  name = unlist(strsplit(basename(x), "\\_"))[4]
  #print(paste("Reading", condition, id, time, sep=" "))  
  name <- as.numeric(name)
  data <- read.FCS(file, transformation=FALSE)
  print(paste("Reading", x, nrow(data), sep=" "))  
  mdata <- data.frame(exprs(data))
  mdata$gene <- infoplate$gene[infoplate$sample==name]
  mdata$rep <- infoplate$rep[infoplate$sample==name]
  mdata$tec <- infoplate$teck[infoplate$sample==name]
  mdata$gate <- infoplate$gate[infoplate$sample==name]
  mdata$sample <- infoplate$sample[infoplate$sample==name]
  return(mdata)
}

all = list.files(path="sort181203", pattern = "Specimen", full = TRUE)
data_all = ldply(all, import_dataO) #plyr library
colnames(data_all)[colnames(data_all)=="eGFP.A"]<-"GFP.A"

data_all$strain<-paste(data_all$gene,data_all$rep,sep="_")
data_all$name<-paste(data_all$gene,data_all$rep,data_all$tec,data_all$gate,sep="_")

boxplot(log(data_all$FSC.A)~data_all$name)


#sizesort
plotdata<-data_all[data_all$gate %in% c(20,21),]
plot(log(plotdata$FSC.A[plotdata$gate==20 & plotdata$gene=="BMH2"]),log(plotdata$SSC.A[plotdata$gate==20 & plotdata$gene=="BMH2"]),pch=16,cex=0.4,col=rgb(0,0,0,0.1),ylim=c(9,12.5),xlim=c(9.5,12.5))
points(log(plotdata$FSC.A[plotdata$gate==21 & plotdata$gene=="BMH2"]),log(plotdata$SSC.A[plotdata$gate==21 & plotdata$gene=="BMH2"]),pch=16,cex=0.4,col=rgb(0,0,0.5,0.1))

plot(log(plotdata$FSC.A[plotdata$gate==20 & plotdata$gene=="CTS1"]),log(plotdata$SSC.A[plotdata$gate==20 & plotdata$gene=="CTS1"]),pch=16,cex=0.4,col=rgb(0,0,0,0.1),ylim=c(9,12.5),xlim=c(9.5,12.5))
points(log(plotdata$FSC.A[plotdata$gate==21 & plotdata$gene=="CTS1"]),log(plotdata$SSC.A[plotdata$gate==21 & plotdata$gene=="CTS1"]),pch=16,cex=0.4,col=rgb(0,0,0.5,0.1))


d1 <- density(log(plotdata$FSC.A[plotdata$gate==20 & plotdata$gene=="BMH2"]),na.rm = T)
d2 <- density(log(plotdata$FSC.A[plotdata$gate==21 & plotdata$gene=="BMH2"]),na.rm = T)
d3 <- density(log(plotdata$FSC.A[plotdata$gate==20 & plotdata$gene=="CTS1"]),na.rm = T)
d4 <- density(log(plotdata$FSC.A[plotdata$gate==21 & plotdata$gene=="CTS1"]),na.rm = T)
plot(d1, lwd = 2, main="", col = rgb(0,0,0,1), xlim=c(9.7,12.5),xlab="FSC",ylim=c(0,1.1*max(c(d1$y,d2$y,d3$y,d4$y)))) # plots the results 
points(d2,typ = "l", lty =2, lwd = 2, col = rgb(0,0,0,1))
points(d3,typ = "l", lty =1, lwd = 2, col = rgb(0.9,0.3,0.1,1))
points(d4,typ = "l", lty =2, lwd = 2, col = rgb(0.9,0.3,0.1,1))



#only single
randdata40000<-data_all[sample(nrow(data_all),40000,replace = FALSE),]

plot(log(randdata40000$FSC.A),log(randdata40000$SSC.A),pch=16,col=rgb(0,0,1,0.05),cex=0.4,ylim=c(9,12.5),xlim=c(9.5,12.5))
gateTOPx<-c(9.7,10.2,10.8,11.1)
gateTOPy<-c(9.4,10.05,10.6,10.6)
gateBOTx<-c(9.7,10.2,10.8,11.1)
gateBOTy<-c(9.4,9.3,9.7,10.6)
points(c(gateTOPx,rev(gateBOTx)),c(gateTOPy,rev(gateBOTy)),typ="l",col="grey",lwd=2)


subdata<-data_all
for (i in 1:(length(gateTOPx)-1)) {
  coor<-lm(gateTOPy[i:(i+1)]~gateTOPx[i:(i+1)])
  subdata<-subdata[log(subdata$SSC.A) < coor$coefficients[2]*log(subdata$FSC.A)+coor$coefficients[1],]
}
for (i in 1:(length(gateBOTx)-1)) {
  coor<-lm(gateBOTy[i:(i+1)]~gateBOTx[i:(i+1)])
  subdata<-subdata[log(subdata$SSC.A) > coor$coefficients[2]*log(subdata$FSC.A)+coor$coefficients[1],]
}
subdata<-subdata[subdata$FSC.A>0 & subdata$SSC.A>0 & subdata$mCherry.A>0 & subdata$GFP.A>0,]
subdata<-subdata[!is.na(subdata$gene),]

genes<-levels(as.factor(subdata$gene))
genes<-genes[c(1,5,3,7,2,6)]
typetest<-c("whole pop mCH","whole pop GFP")
mat<-matrix(c(1,2,3,1,4,5),nrow = 2,ncol = 3,byrow = T)

subdata$mCHc<-subdata$mCherry.A
subdata$GFPc<-subdata$mCherry.A


strains<-levels(as.factor(subdata$strain))




correl<-c()
r2<-c()
correlc<-c()
r2c<-c()
strains<-levels(as.factor(subdata$strain))
strains <- strains[c(1,4,3,6,2,5)]

pdf("181204_Flow_effectSize.pdf", width = 12, height = 12)
par(mfcol = c(2, 2))
i=1
date()
#subdataG1<-subdata[subdata$gate==1,]
for (i in 1:length(strains)) {
  strain<-strains[i]
  test<-loess(log(subdata$mCherry.A[subdata$strain==strain])~subdata$FSC.A[subdata$strain==strain])
  subdata$mCHc[subdata$strain==strain]<-unlist(test$residuals)+mean(log(subdata$mCherry.A[subdata$strain==strain]))
  test2<-loess(log(subdata$GFP.A[subdata$strain==strain])~subdata$FSC.A[subdata$strain==strain])
  subdata$GFPc[subdata$strain==strain]<-unlist(test2$residuals)+mean(log(subdata$GFP.A[subdata$strain==strain]))
  reg<-lm(log(subdata$mCherry.A[subdata$strain==strain])~log(subdata$GFP.A[subdata$strain==strain]))
  reg2<-lm(unlist(test$residuals)~unlist(test2$residuals))
  
  plot(log(subdata$mCherry.A[subdata$strain==strain])~log(subdata$GFP.A[subdata$strain==strain]),pch=16,cex=0.6,col=rgb(0,0,0,0.12),xlim=c(2,12),ylim=c(4,10),main=paste(strain),xlab = "GFP",ylab = "mCherry")
  reg<-lm(log(subdata$mCherry.A[subdata$strain==strain])~log(subdata$GFP.A[subdata$strain==strain]))
  summary(reg)
  abline(reg,col="red")
  text(x = 3, y = 9, paste("s:",round(reg$coefficients[[2]],3)))
  text(x = 3, y = 8, paste("r2:",round(summary(reg)$r.squared,3)))
  
  plot(subdata$mCHc[subdata$strain==strain]~subdata$GFPc[subdata$strain==strain],pch=16,cex=0.6,col=rgb(0,0,0,0.12),xlim=c(2,12),ylim=c(4,10),main=paste(strain,"corrected"),xlab = "GFP",ylab = "mCherry")
  reg2<-lm(subdata$mCHc[subdata$strain==strain]~subdata$GFPc[subdata$strain==strain])
  summary(reg2)
  abline(reg2,col="red")
  text(x = 3, y = 9, paste("s:",round(reg2$coefficients[[2]],3)))
  text(x = 3, y = 8, paste("r2:",round(summary(reg2)$r.squared,3)))
  
  
  correl[i]<-reg$coefficients[[2]]
  correlc[i]<-reg2$coefficients[[2]]
  r2[i]<-summary(reg)$r.squared
  r2c[i]<-summary(reg2)$r.squared
}
date() #10min

dev.off()

sizecor<-data.frame(strains,correl,r2,correlc,r2c)
write.table(sizecor,"FLOWsizecor_new2.txt",sep="\t",row.names = FALSE, quote = F)

GOI<-as.character(factor(rep(strains,4),levels=strains)[order(factor(rep(strains,4),levels=strains))])

final<-data.frame(GOI,stringsAsFactors = F)
final$chanel<-rep(c("mCH","mCH","GFP","GFP"),length(GOI)/4)
final$pop<-rep(c("high","low"),length(GOI)/4)
final$atlchanel<-rep("all",length(GOI))
final$mCHeffect<-rep(0,length(GOI))
final$mCHpval<-rep(0,length(GOI))
final$GFPeffect<-rep(0,length(GOI))
final$GFPpval<-rep(0,length(GOI))
final$finalPV<-rep(0,length(GOI))
final$heritability<-rep("",length(GOI))
final$synergy<-rep("",length(GOI))
finalc<-final
finalcD<-finalc[finalc$pop=="high",]

pdf("181204_Flow_sortAll.pdf", width = 22, height = 12)
par(mfcol = c(3, 4))

n=1
for (strain in strains) {
  plotdata<-subdata[subdata$strain==strain,]
  gene<-strain
  
  for (i in 1:2) {
    m<-mat[i,]
    plot(log(plotdata$GFP.A[plotdata$gate==m[1]]),log(plotdata$mCherry.A[plotdata$gate==m[1]]),pch=16,col=rgb(0,0,0,0.2),cex=0.4,xlim=c(2,12),ylim=c(2,12),xlab="GFP",ylab="mCherry",main=paste(gene,typetest[i],sep=" "))
    points(log(plotdata$GFP.A[plotdata$gate==m[2]]),log(plotdata$mCherry.A[plotdata$gate==m[2]]),pch=16,col=rgb(0,0,1,0.2),cex=0.4)
    points(log(plotdata$GFP.A[plotdata$gate==m[3]]),log(plotdata$mCherry.A[plotdata$gate==m[3]]),pch=16,col=rgb(0.9,0.3,0.1,0.2),cex=0.4)
    d1 <- density(log(plotdata$GFP.A[plotdata$gate==m[1]]),na.rm = T) 
    d2 <- density(log(plotdata$GFP.A[plotdata$gate==m[2]]),na.rm = T) # returns the density data
    d3 <- density(log(plotdata$GFP.A[plotdata$gate==m[3]]),na.rm = T) # returns the density data
    plot(d1, lwd = 2, main="", col = rgb(0,0,0,1), xlim=c(2,12),xlab="GFP",ylim=c(0,1.1*max(c(d1$y,d2$y,d3$y)))) # plots the results 
    points(d2,typ = "l", lwd = 2, col = rgb(0,0,1,1))
    points(d3,typ = "l", lwd = 2, col = rgb(0.9,0.3,0.1,1))
    testup<-t.test(log(plotdata$GFP.A[plotdata$gate==m[1]]),log(plotdata$GFP.A[plotdata$gate==m[3]]))
    testdown<-t.test(log(plotdata$GFP.A[plotdata$gate==m[1]]),log(plotdata$GFP.A[plotdata$gate==m[2]]))
    legend("topleft",legend = c(paste(typetest[i],"up",round(testup$estimate[2]-testup$estimate[1],digits = 3),round(-log10(testup$p.value),0),sep=" "),
                                paste(typetest[i],"down",round(testdown$estimate[2]-testdown$estimate[1],digits = 3),round(-log10(testdown$p.value),0),sep=" ")),
           lty=1,col=c(rgb(0.9,0.3,0.1,1),rgb(0,0,1,1)))
    final$GFPeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$estimate[2]-testup$estimate[1]
    final$GFPpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$p.value
    final$GFPeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$estimate[2]-testdown$estimate[1]
    final$GFPpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$p.value
    
    d1 <- density(log(plotdata$mCherry.A[plotdata$gate==m[1]]),na.rm = T) 
    d2 <- density(log(plotdata$mCherry.A[plotdata$gate==m[2]]),na.rm = T) # returns the density data
    d3 <- density(log(plotdata$mCherry.A[plotdata$gate==m[3]]),na.rm = T) # returns the density data
    plot(d1, lwd = 2, main="", col = rgb(0,0,0,1), xlim=c(2,12),xlab="mCherry",ylim=c(0,1.1*max(c(d1$y,d2$y,d3$y)))) # plots the results 
    points(d2,typ = "l", lwd = 2, col = rgb(0,0,1,1))
    points(d3,typ = "l", lwd = 2, col = rgb(0.9,0.3,0.1,1))
    testup<-t.test(log(plotdata$mCherry.A[plotdata$gate==m[1]]),log(plotdata$mCherry.A[plotdata$gate==m[3]]))
    testdown<-t.test(log(plotdata$mCherry.A[plotdata$gate==m[1]]),log(plotdata$mCherry.A[plotdata$gate==m[2]]))
    legend("topleft",legend = c(paste(typetest[i],"up",round(testup$estimate[2]-testup$estimate[1],digits = 3),round(-log10(testup$p.value),0),sep=" "),
                                paste(typetest[i],"down",round(testdown$estimate[2]-testdown$estimate[1],digits = 3),round(-log10(testdown$p.value),0),sep=" ")),
           lty=1,col=c(rgb(0.9,0.3,0.1,1),rgb(0,0,1,1)))
    final$mCHeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$estimate[2]-testup$estimate[1]
    final$mCHpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$p.value
    final$mCHeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$estimate[2]-testdown$estimate[1]
    final$mCHpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$p.value

    testup<-t.test(plotdata$GFPc[plotdata$gate==m[1]],plotdata$GFPc[plotdata$gate==m[3]])
    testdown<-t.test(plotdata$GFPc[plotdata$gate==m[1]],plotdata$GFPc[plotdata$gate==m[2]])
    finalc$GFPeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$estimate[2]-testup$estimate[1]
    finalc$GFPpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$p.value
    finalc$GFPeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$estimate[2]-testdown$estimate[1]
    finalc$GFPpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$p.value
    
    
    testup<-t.test(plotdata$mCHc[plotdata$gate==m[1]],plotdata$mCHc[plotdata$gate==m[3]])
    testdown<-t.test(plotdata$mCHc[plotdata$gate==m[1]],plotdata$mCHc[plotdata$gate==m[2]])
    finalc$mCHeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$estimate[2]-testup$estimate[1]
    finalc$mCHpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="high" & final$atlchanel==final$atlchanel[i*2]]<-testup$p.value
    finalc$mCHeffect[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$estimate[2]-testdown$estimate[1]
    finalc$mCHpval[final$GOI==gene & final$chanel==final$chanel[i*2] & final$pop=="low" & final$atlchanel==final$atlchanel[i*2]]<-testdown$p.value
    
    testup<-t.test(plotdata$GFPc[plotdata$gate==m[2]],plotdata$GFPc[plotdata$gate==m[3]])
    finalcD$GFPeffect[finalcD$GOI==gene & finalcD$chanel==final$chanel[i*2] & finalcD$pop=="high" & finalcD$atlchanel==final$atlchanel[i*2]]<-testup$estimate[2]-testup$estimate[1]
    finalcD$GFPpval[finalcD$GOI==gene & finalcD$chanel==final$chanel[i*2] & finalcD$pop=="high" & finalcD$atlchanel==final$atlchanel[i*2]]<-testup$p.value
    
    testup<-t.test(plotdata$mCHc[plotdata$gate==m[2]],plotdata$mCHc[plotdata$gate==m[3]])
    finalcD$mCHeffect[finalcD$GOI==gene & finalcD$chanel==final$chanel[i*2] & finalcD$pop=="high" & finalcD$atlchanel==final$atlchanel[i*2]]<-testup$estimate[2]-testup$estimate[1]
    finalcD$mCHpval[finalcD$GOI==gene & finalcD$chanel==final$chanel[i*2] & finalcD$pop=="high" & finalcD$atlchanel==final$atlchanel[i*2]]<-testup$p.value
    
  }
}


dev.off()

for (i in 1:nrow(final)) {
  if (final$pop[i]=="high") {m=1} else {m=-1}
  if (final$chanel[i]=="mCH") {
    cha<-5
    oth<-7
  } else {
    cha<-7
    oth<-5
  }
  final$finalPV[i]<-final[i,cha+1]
  if (final[i,cha]*m<0 | final[i,cha+1] > 0.01) {final$heritability[i]<-"nul"}
  else if (final[i,cha+1] > 0.001) {final$heritability[i]<-"average"}
  else if (final[i,cha+1] <= 0.001) {final$heritability[i]<-"good"}
  if (final[i,oth+1] > 0.01) {final$synergy[i]<-"none"}
  else if (final[i,oth]*m<0) {final$synergy[i]<-"inverted"}
  else if (final[i,oth]*m>0) {final$synergy[i]<-"similar"}
}

write.table(final,"finalFLOW_new2.txt",sep="\t",row.names = FALSE, quote = F)
write.table(finalc,"finalFLOW_new2-sc.txt",sep="\t",row.names = FALSE, quote = F)
write.table(finalcD,"finalFLOW_new2-scD.txt",sep="\t",row.names = FALSE, quote = F)
#final2<-read.table("finalFLOW_new.txt",sep="\t",stringsAsFactors = F, header = T)
#finalF<-rbind(final,final2)
finalF<-final

plot(final$mCHeffect,finalc$mCHeffect)
abline(a=0,b=1)
abline(v=0,col="grey")
abline(h=0,col="grey")
plot(final$GFPeffect,finalc$GFPeffect)
abline(a=0,b=1)
abline(v=0,col="grey")
abline(h=0,col="grey")
plot(log(-log10(final$mCHpval)),log(-log10(finalc$mCHpval)))
abline(a=0,b=1)
abline(v=log(-log10(0.00001)),col="red")
abline(h=log(-log10(0.00001)),col="red")
plot(log(-log10(final$GFPpval)),log(-log10(finalc$GFPpval)))
abline(a=0,b=1)
abline(v=log(-log10(0.00001)),col="red")
abline(h=log(-log10(0.00001)),col="red")

barplot(as.matrix(dcast(finalF,heritability~chanel+finalF$GOI)[c(3,1,2),-1]))

coltyp<-c(rep(rgb(0,0.5,0.3,1),7),rep(rgb(1,0,0,1),7),rep(rgb(0,0.37,0.25,1),7),rep(rgb(0.6,0,0,1),7))
boxplot(-log10(finalF$finalPV)~finalF$GOI+finalF$chanel,ylim=c(0,300),col=coltyp)
points(c(0,29),c(3,3),typ="l",col="red")
points(c(0,29),c(2,2),typ="l",col="grey")

barplot(as.matrix(dcast(finalF,heritability~atlchanel+chanel)[c(3,1,2),-1]))

coltyp<-c(rep(rgb(0,0.5,0.3,1),2),rep(rgb(1,0,0,1),2),rep(rgb(0,0.37,0.25,1),2),rep(rgb(0.6,0,0,1),2))
boxplot(-log10(finalF$finalPV)~finalF$atlchanel+finalF$chanel,ylim=c(0,300),col=coltyp)
points(c(0.5,8.5),c(3,3),typ="l",col="red")
points(c(0.5,8.5),c(2,2),typ="l",col="grey")

coltyp<-c(rgb(0.75,0.45,0.15,1),rgb(1,1,1,1),rgb(0.2,0.8,1,1))
barplot(as.matrix(dcast(finalF,synergy~atlchanel+finalF$GOI)[,-1]),col=coltyp)

barplot(as.matrix(dcast(finalF,synergy~atlchanel)[,-1]),col=coltyp)

back to top