https://github.com/davolilab/Proteogenomic-Analysis-of-Aneuploidy
Tip revision: 9aa99245ac462b4134976293e52f56650ecb5c00 authored by breezyzhao on 23 August 2022, 23:15:57 UTC
Delete license
Delete license
Tip revision: 9aa9924
FigS3.1B.R
setwd("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/figS3/figS3.1/211020/")
library(stringr)
colon<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/Colon.fisher.txt",sep="\t")
colon$ID<-str_to_title(colon$ID)
breast<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/BREAST.fisher.txt",sep="\t")
breast$ID<-str_to_title(breast$ID)
ovarian<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/OV.fisher.txt",sep="\t")
ovarian$ID<-str_to_title(ovarian$ID)
ccRCC<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/ccRCC.fisher.txt",sep = "\t")
ccRCC$ID<-str_to_title(ccRCC$ID)
endometrial<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/Endometrial.fisher.txt",sep = "\t")
endometrial$ID<-str_to_title(endometrial$ID)
luad<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/LUAD.fisher.txt",sep = "\t")
luad$ID<-str_to_title(luad$ID)
hnsc<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/HNSC.fisher.txt",sep = "\t")
hnsc$ID<-str_to_title(hnsc$ID)
pancancer<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig3/210813.fisher.pathway/Pan-cancer.fisher.txt",
sep="\t")
pancancer$ID<-str_to_title(pancancer$ID)
names<-list(colon,breast,ovarian,ccRCC,endometrial,luad,hnsc,pancancer)
names1<-c("COAD","BRCA","OV","ccRCC","UCEC","LUAD","HNSC","Pan-cancer")
datalist<-list()
col1<-"#20854e"
col2<-"#104327"
col3<-"#4d9d71"
col4<-"#90c2a7"
for (j in 1:8){
#j<-3
data1<-names[[j]]
rownames(data1)<-data1$ID
data2<-as.data.frame(data1[,c(9)] )#fdr value
data.temp<--1*log10(data2)
#data.temp[data.temp>4]<-4
data.temp$rank<-rank(data.temp[,1])
data.temp$ID<-data1$ID
colnames(data.temp)[1]<-names1[j]
colnames(data.temp)[2]<-paste0(names1[j],".rank")
datalist[[j]]<-data.temp
}
HL <- Reduce(
function(x, y, ...) merge(x, y, all=T,...),
datalist
)
HL[is.na(HL)] <- 0
HL$rank.mean<-apply(HL[,c(3,5,7,9,11,13,15,17)],1,mean)
HL<-HL[order(HL$rank.mean,decreasing = T),]
HL1<-HL[1:10,]
HL1<-HL1[order(HL1$rank.mean,decreasing = F),]
datalist1<-list()
for (j in 1:8){
HL1.temp<-HL1[,c(1,j*2)]
HL1.temp$group<-colnames(HL1.temp)[2]
colnames(HL1.temp)[2]<-"logp"
datalist1[[j]]<-HL1.temp
}
data.use<-do.call(rbind,datalist1)
data.use$group<-factor(data.use$group,levels = c("Pan-cancer","COAD","BRCA","OV","ccRCC","UCEC","LUAD","HNSC"))
data.use$log10FDR<-ifelse(data.use$logp>=8,8,data.use$logp)
data.use$ID<-factor(data.use$ID,level=unique(data.use$ID))
g1.bubble<-ggplot(data.use, aes(x =group , y = ID)) +
geom_point(aes(size = log10FDR),color=col2,shape=16) +
#scale_size(range = c(0.5, 8))+# Adjust the range of points size
theme_classic()+
scale_x_discrete(name ="")+ylab("")+scale_y_discrete(position = "right")+
theme(text = element_text(size=12),
axis.text.x=element_blank(),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 12, face = "bold"))+
ggtitle("Top10 enrichments in \n Group1 High DNA-RNA Corr & Low RNA-Pro Corr")
#### FOR LH
datalist<-list()
for (j in 1:8){
#j<-3
data1<-names[[j]]
rownames(data1)<-data1$ID
data2<-as.data.frame(data1[,c(13)] )#FDR value
data.temp<--1*log10(data2)
#data.temp[data.temp>4]<-4
data.temp$rank<-rank(data.temp[,1])
data.temp$ID<-data1$ID
colnames(data.temp)[1]<-names1[j]
colnames(data.temp)[2]<-paste0(names1[j],".rank")
datalist[[j]]<-data.temp
}
LH <- Reduce(
function(x, y, ...) merge(x, y, all=T,...),
datalist
)
LH[is.na(LH)] <- 0
LH$rank.mean<-apply(LH[,c(3,5,7,9,11,13,15,17)],1,mean)
LH<-LH[order(LH$rank.mean,decreasing = T),]
LH1<-LH[1:10,]
LH1<-LH1[order(LH1$rank.mean,decreasing = F),]
datalist1<-list()
for (j in 1:8){
LH1.temp<-LH1[,c(1,j*2)]
LH1.temp$group<-colnames(LH1.temp)[2]
colnames(LH1.temp)[2]<-"logp"
datalist1[[j]]<-LH1.temp
}
data.use2<-do.call(rbind,datalist1)
data.use2$group<-factor(data.use2$group,levels = c("Pan-cancer","COAD","BRCA","OV","ccRCC","UCEC","LUAD","HNSC"))
data.use2$log10FDR<-ifelse(data.use2$logp>=8,8,data.use2$logp)
data.use2$ID<-factor(data.use2$ID,level=unique(data.use2$ID))
g2.bubble<-ggplot(data.use2, aes(x =group , y = ID)) +
geom_point(aes(size = log10FDR),color=col4,shape=16) +
#scale_size(range = c(0.5, 8))+# Adjust the range of points size
theme_classic()+
scale_x_discrete(name ="")+ylab("")+scale_y_discrete(position = "right")+
theme(text = element_text(size=12),
axis.text.x = element_text(angle=45,size = 12,hjust = 1),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 12, face = "bold"))+
ggtitle("Top10 enrichments in \n Group2:Low DNA-RNA Corr & High RNA-Pro Corr")
#### FOR HH
datalist<-list()
for (j in 1:8){
#j<-3
data1<-names[[j]]
rownames(data1)<-data1$ID
data2<-as.data.frame(data1[,c(5)] )#FDR value
data.temp<--1*log10(data2)
#data.temp[data.temp>4]<-4
data.temp$rank<-rank(data.temp[,1])
data.temp$ID<-data1$ID
colnames(data.temp)[1]<-names1[j]
colnames(data.temp)[2]<-paste0(names1[j],".rank")
datalist[[j]]<-data.temp
}
HH <- Reduce(
function(x, y, ...) merge(x, y, all=T,...),
datalist
)
HH[is.na(HH)] <- 0
HH$rank.mean<-apply(HH[,c(3,5,7,9,11,13,15,17)],1,mean)
HH<-HH[order(HH$rank.mean,decreasing = T),]
HH1<-HH[1:10,]
HH1<-HH1[order(HH1$rank.mean,decreasing = F),]
datalist1<-list()
for (j in 1:8){
HH1.temp<-HH1[,c(1,j*2)]
HH1.temp$group<-colnames(HH1.temp)[2]
colnames(HH1.temp)[2]<-"logp"
datalist1[[j]]<-HH1.temp
}
data.use3<-do.call(rbind,datalist1)
data.use3$group<-factor(data.use3$group,levels = c("Pan-cancer","COAD","BRCA","OV","ccRCC","UCEC","LUAD","HNSC"))
data.use3$log10FDR<-ifelse(data.use3$logp>=8,8,data.use3$logp)
data.use3$ID<-factor(data.use3$ID,level=unique(data.use3$ID))
g3.bubble<-ggplot(data.use3, aes(x =group , y = ID)) +
geom_point(aes(size = log10FDR),color=col1,shape=16) +
scale_size(range = c(0.5, 8))+# Adjust the range of points size
theme_classic()+
scale_x_discrete(name ="")+ylab("")+scale_y_discrete(position = "right")+
theme(text = element_text(size=12),
axis.text.x=element_blank(),legend.position = "none",
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 12, face = "bold"))+
ggtitle("Top10 enrichments in \n Group 3 High DNA-RNA Corr & Low RNA-Pro Corr")
#### FOR LL
datalist<-list()
for (j in 1:8){
#j<-3
data1<-names[[j]]
rownames(data1)<-data1$ID
data2<-as.data.frame(data1[,c(17)] )#FDR value
data.temp<--1*log10(data2)
#data.temp[data.temp>4]<-4
data.temp$rank<-rank(data.temp[,1])
data.temp$ID<-data1$ID
colnames(data.temp)[1]<-names1[j]
colnames(data.temp)[2]<-paste0(names1[j],".rank")
datalist[[j]]<-data.temp
}
LL <- Reduce(
function(x, y, ...) merge(x, y, all=T,...),
datalist
)
LL[is.na(LL)] <- 0
LL$rank.mean<-apply(LL[,c(3,5,7,9,11,13,15,17)],1,mean)
LL<-LL[order(LL$rank.mean,decreasing = T),]
LL1<-LL[1:10,]
LL1<-LL1[order(LL1$rank.mean,decreasing = F),]
datalist1<-list()
for (j in 1:8){
LL1.temp<-LL1[,c(1,j*2)]
LL1.temp$group<-colnames(LL1.temp)[2]
colnames(LL1.temp)[2]<-"logp"
datalist1[[j]]<-LL1.temp
}
data.use4<-do.call(rbind,datalist1)
data.use4$group<-factor(data.use4$group,levels = c("Pan-cancer","COAD","BRCA","OV","ccRCC","UCEC","LUAD","HNSC"))
data.use4$log10FDR<-ifelse(data.use4$logp>=8,8,data.use4$logp)
data.use4$ID<-factor(data.use4$ID,level=unique(data.use4$ID))
g4.bubble<-ggplot(data.use4, aes(x =group , y = ID)) +
geom_point(aes(size = log10FDR),color=col3,shape=16) +
scale_size(range = c(0.5, 8))+# Adjust the range of points size
theme_classic()+
scale_x_discrete(name ="")+ylab("")+scale_y_discrete(position = "right")+
theme(text = element_text(size=12),
axis.text.x = element_text(angle=45,size = 12,hjust = 1),legend.position = "none",
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 12, face = "bold"))+
ggtitle("Top10 enrichments in \n Group 4 Low DNA-RNA Corr & Low RNA-Pro Corr")
library(ggpubr)
g1<-ggarrange(g1.bubble,g2.bubble,ncol=1,nrow=2,common.legend = F,legend = "none",align = "v",heights = c(1,1.2))
g2<-ggarrange(g3.bubble,g4.bubble,ncol=1,nrow=2,common.legend = F,legend = "none",align = "v",heights = c(1,1.2))
pdf("s3-1.CPTAC.pdf",width = 8,height = 8)
print(g1)
dev.off()
pdf("s3-2.CPTAC.pdf",width = 8,height = 8)
print(g2)
dev.off()