https://github.com/davolilab/Proteogenomic-Analysis-of-Aneuploidy
Tip revision: 9aa99245ac462b4134976293e52f56650ecb5c00 authored by breezyzhao on 23 August 2022, 23:15:57 UTC
Delete license
Delete license
Tip revision: 9aa9924
FigS3.2.R
`%notin%` <- Negate(`%in%`)
set.seed(1234)
bootstrap<-function(dataset,iterations,index,groups,para1,para2){
s.size.g1 <- length(dataset[,index][dataset[,groups]%in%para1])
s.size.g2 <- length(dataset[,index][dataset[,groups]%in%para2])
pool <- dataset[,index]
obs.diff.b1 <- median (dataset[,index][dataset[,groups]==para1]) - median (dataset[,index][dataset[,groups]==para2])
iterations <- iterations
sampl.dist.b1 <- NULL
for (i in 1 : iterations) {
resample <- sample (c(1:length (pool)), length(pool), replace = TRUE)
g1.perm = pool[resample][1 : s.size.g1]
g2.perm = pool[resample][(s.size.g1+1) : length(pool)]
sampl.dist.b1[i] = median (g1.perm) - median (g2.perm)
}
p.boot1 <- (sum ( abs(sampl.dist.b1) >= abs(obs.diff.b1)) + 1)/ (iterations+1)
return(p.boot1)
}
library(tidyr)
library(data.table)
library(ggplot2)
library(boot)
library(ggpubr)
library(rstatix)
library(clusterProfiler)
library(msigdbr)
library(ggridges)
setwd("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/potential_analysis/210827.sublocation.all/")
corum <- as.data.frame(read.delim("/Users/zhaox12/Desktop/Teresas_lab/project/10.protein/database/coreComplexesv3.0.txt", sep = "\t"))
corum<-corum[corum$Organism=="Human",]
master_list <- strsplit(as.character(corum$subunits.Entrez.IDs.), split = ";")
master_list <- unique(as.numeric(as.character(unlist(master_list))))
# Create master list of CORUM UniProt IDs
master_list_uniprot <- strsplit(as.character(corum$subunits.UniProt.IDs.), split = ";")
master_list_uniprot <- unique(as.character(unlist(master_list_uniprot)))
# Create master list of CORUM gene names (use this)
master_list_names <- strsplit(as.character(corum$subunits.Gene.name.), split = ";")
master_list_names <- unique(as.character(unlist(master_list_names)))
#######################################################################################################
sublocation<-read.delim("/Users/zhaox12/Desktop/Teresas_lab/project/global_database/location_cell/subcellular_location.tsv",sep="\t",header = T)
subloc<-separate_rows(sublocation, Main.location,sep=";")
nucleus.loc<-c("Nucleoplasm","Nuclear speckles","Nuclear bodies","Nuclear membrane")
cytoplasm.loc<-c("Microtubules","Cytosol","Actin filaments","Centrosome","Centriolar satellite","Cytoplasmic bodies","Intermediate filaments",
"Cytokinetic bridge","Mitotic spindle","Microtubule ends")
nucleoli.loc<-c("Nucleoli","Nucleoli fibrillar center","Nucleoli rim")
organelles.loc<-c("Mitochondria","Vesicles","Golgi apparatus","Peroxisomes",
"Endosomes","Lysosomes")
er.loc<-c("Endoplasmic reticulum")
plasma_membrane.loc<-c("Plasma membrane")
mito.loc<-c("Mitochondria")
ves.loc<-c("Vesicles")
golgi.loc<-c("Golgi apparatus")
pero.loc<-c("Peroxisomes")
endo.loc<-c("Endosomes")
lyso.loc<-c("Lysosomes")
ribo.list<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/potential_analysis/210824.ribo/ribosomal.txt",
sep="\t",header = T)
proteasome.list<-read.delim("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/potential_analysis/210824.ribo/proteasome.txt",
sep="\t",header = T)
nucleus.list<-as.data.frame(unique(subloc[subloc$Main.location %in% nucleus.loc,]))
cytoplasm.list<-as.data.frame(unique(subloc[subloc$Main.location %in% cytoplasm.loc,]))
nucleoli.list<-as.data.frame(unique(subloc[subloc$Main.location %in% nucleoli.loc,]))
organelles.list<-as.data.frame(unique(subloc[subloc$Main.location %in% organelles.loc,]))
plasma_membrane.list<-as.data.frame(unique(subloc[subloc$Main.location %in% plasma_membrane.loc,]))
er.list<-as.data.frame(unique(subloc[subloc$Main.location %in% er.loc,]))
mito.list<-as.data.frame(unique(subloc[subloc$Main.location %in% mito.loc,]))
ves.list<-as.data.frame(unique(subloc[subloc$Main.location %in% ves.loc,]))
golgi.list<-as.data.frame(unique(subloc[subloc$Main.location %in% golgi.loc,]))
pero.list<-as.data.frame(unique(subloc[subloc$Main.location %in% pero.loc,]))
endo.list<-as.data.frame(unique(subloc[subloc$Main.location %in% endo.loc,]))
lyso.list<-as.data.frame(unique(subloc[subloc$Main.location %in% lyso.loc,]))
er.ribo.list<-as.data.frame(unique(subloc[subloc$Main.location %in% er.loc & subloc $Gene.name %in%ribo.list$Gene.name,]))
er.other.list<-as.data.frame(unique(subloc[subloc$Main.location %in% er.loc & subloc $Gene.name %notin%er.ribo.list$Gene.name,]))
########################################################################################################
## generate pan-cancer data
###############################3
path1<-c("/Users/zhaox12/Dropbox (NYU Langone Health)/Xin_backup/Teresa_lab/project/10.protein/11.12-For paper/Manuscript_2021/figure_2021/fig2/210812_all_exp_genes/")
colon<-read.delim(paste0(path1,"/Colon.corr.table.txt"),sep="\t")
colnames(colon)[2]<-"colon.cor.rna.dna"
colnames(colon)[5]<-"colon.cor.rna.pro"
breast<-read.delim(paste0(path1,"/BREAST.corr.table.txt"),sep="\t")
colnames(breast)[2]<-"breast.cor.rna.dna"
colnames(breast)[5]<-"breast.cor.rna.pro"
ovarian<-read.delim(paste0(path1,"/OV.corr.table.txt"),sep="\t")
colnames(ovarian)[2]<-"ovarian.cor.rna.dna"
colnames(ovarian)[5]<-"ovarian.cor.rna.pro"
ccRCC<-read.delim(paste0(path1,"/ccRCC.corr.table.txt"),sep = "\t")
colnames(ccRCC)[2]<-"ccRCC.cor.rna.dna"
colnames(ccRCC)[5]<-"ccRCC.cor.rna.pro"
endometrial<-read.delim(paste0(path1,"/Endometrial.corr.table.txt"),sep = "\t")
colnames(endometrial)[2]<-"endometrial.cor.rna.dna"
colnames(endometrial)[5]<-"endometrial.cor.rna.pro"
luad<-read.delim(paste0(path1,"/LUAD.corr.table.txt"),sep = "\t")
colnames(luad)[2]<-"luad.cor.rna.dna"
colnames(luad)[5]<-"luad.cor.rna.pro"
hnsc<-read.delim(paste0(path1,"/HNSC.corr.table.txt"),sep = "\t")
colnames(hnsc)[2]<-"hnsc.cor.rna.dna"
colnames(hnsc)[5]<-"hnsc.cor.rna.pro"
score<-na.omit(read.delim("/Users/zhaox12/Desktop/Teresas_lab/project/36.evolution/hg19.gene.score.txt",sep="\t",header=T))
score$Corum<-ifelse(score$genes%in%master_list_names,"Corum","NoCorum")
merge.data<-merge(merge(merge(merge(merge(merge(colon,breast,by="gene",all = T),ovarian,by="gene",all = T),ccRCC,by="gene",all = T),
endometrial,by="gene",all = T),luad,by="gene",all = T),hnsc,by="gene",all = T)
Pan.dna.rna<-cbind(merge.data$gene, merge.data[,colnames(merge.data) %like% "cor.rna.dna"])
Pan.rna.pro<-cbind(merge.data$gene, merge.data[,colnames(merge.data) %like% "cor.rna.pro"])
Pan.dna.rna$mean.rna.dna<-apply(Pan.dna.rna[,2:8],1,function(x)mean(x,na.rm=T))
Pan.rna.pro$mean.rna.pro<-apply(Pan.rna.pro[,2:8],1,function(x)mean(x,na.rm=T))
Pan.data<-merge(Pan.dna.rna,Pan.rna.pro,by="merge.data$gene",all = T)
colnames(Pan.data)[1]<-"gene"
names<-list(colon,breast,ovarian,ccRCC,endometrial,luad,hnsc,Pan.data)
names1<-c("Colon","Breast","OV","ccRCC","Endometrial","LUAD","HNSC","PAN-Cancer")
# col1<-"#AAA900" ###budddha gold
# col2<-"#6500aa" ## purple
# col3<-"#bfc2c3"
# col4<-"#acafb0"
# col5<-"#999b9c"
# col6<-"#868889"
# col8.nc<-"#777600" ### er
# col7.nc<-"#c4c34d" ### pm
# col8<-"#3d0066" ## er
# col7<-"#a366cc" ###pm
# col9<-"#d7dbdb"
# cancer<-"CPTAC"
for (j in 1:8){
#j<-8
cancer<-"Subloc"
CPTAC<-names1[j]
merge.data<-names[[j]]
if(names1[j]=="PAN-Cancer"){
colnames(merge.data)[1]<-"genes"
score.data<-merge(score,merge.data[,c("genes","mean.rna.dna","mean.rna.pro")],by="genes")
colnames(score.data)[5]<-"corr.rna.dna"
colnames(score.data)[6]<-"corr.rna.pro"
}else{
colnames(merge.data)[1]<-"genes"
score.data<-merge(score,merge.data[,c(1,2,5)],by="genes")
colnames(score.data)[5]<-"corr.rna.dna"
colnames(score.data)[6]<-"corr.rna.pro"
}
corum<-score.data[score.data$Corum=="Corum",]
corum$group<-"Corum:all"
corum.ribo<-score.data[score.data$genes%in%ribo.list$Gene.name & score.data$Corum=="Corum",]
corum.ribo$group<-"Corum:Ribosome"
corum.PR<-score.data[score.data$genes%in%proteasome.list$Gene.name & score.data$Corum=="Corum",]
corum.PR$group<-"Corum:Proteasome"
corum.nuclues<-score.data[score.data$genes%in%nucleus.list$Gene.name & score.data$Corum=="Corum",]
corum.nuclues$group<-"Corum:nucleus"
corum.cytoplasm<-score.data[score.data$genes%in%cytoplasm.list$Gene.name & score.data$Corum=="Corum",]
corum.cytoplasm$group<-"Corum:cytoplasm"
corum.nucleoli<-score.data[score.data$genes%in%nucleoli.list$Gene.name & score.data$Corum=="Corum",]
corum.nucleoli$group<-"Corum:nucleoli"
corum.organelle<-score.data[score.data$genes%in%organelles.list$Gene.name & score.data$Corum=="Corum",]
corum.organelle$group<-"Corum:organelles"
corum.PM<-score.data[score.data$genes%in%plasma_membrane.list$Gene.name & score.data$Corum=="Corum",]
corum.PM$group<-"Corum:plasma_membrane"
corum.ER<-score.data[score.data$genes%in%er.list$Gene.name & score.data$Corum=="Corum",]
corum.ER$group<-"Corum:ER"
corum.ER.ribo<-score.data[score.data$genes%in%er.ribo.list$Gene.name & score.data$Corum=="Corum",]
corum.ER.ribo$group<-"Corum:ER.Ribo"
corum.ER.other<-score.data[score.data$genes%in%er.other.list$Gene.name & score.data$Corum=="Corum",]
corum.ER.other$group<-"Corum:ER.other"
corum.mito<-score.data[score.data$genes%in%mito.list$Gene.name & score.data$Corum=="Corum",]
corum.mito$group<-"Corum:Mitochondria"
corum.ves<-score.data[score.data$genes%in%ves.list$Gene.name & score.data$Corum=="Corum",]
corum.ves$group<-"Corum:Vesicles"
corum.golgi<-score.data[score.data$genes%in%golgi.list$Gene.name & score.data$Corum=="Corum",]
corum.golgi$group<-"Corum:Golgi apparatus"
corum.pero<-score.data[score.data$genes%in%pero.list$Gene.name & score.data$Corum=="Corum",]
corum.pero$group<-"Corum:Peroxisomes"
corum.endo<-score.data[score.data$genes%in%endo.list$Gene.name & score.data$Corum=="Corum",]
corum.endo$group<-"Corum:Endosomes"
corum.lyso<-score.data[score.data$genes%in%lyso.list$Gene.name & score.data$Corum=="Corum",]
corum.lyso$group<-"Corum:Lysosomes"
nocorum<-score.data[score.data$Corum=="NoCorum",]
nocorum$group<-"NoCorum:all"
#nocorum.ribo<-score.data[score.data$genes%in%ribo.list$Gene.name & score.data$Corum=="NoCorum",]
#nocorum.ribo$group<-"NoCorum:Ribosome"
nocorum.PR<-score.data[score.data$genes%in%proteasome.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.PR$group<-"NoCorum:Proteasome"
nocorum.nuclues<-score.data[score.data$genes%in%nucleus.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.nuclues$group<-"NoCorum:nucleus"
nocorum.cytoplasm<-score.data[score.data$genes%in%cytoplasm.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.cytoplasm$group<-"NoCorum:cytoplasm"
nocorum.nucleoli<-score.data[score.data$genes%in%nucleoli.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.nucleoli$group<-"NoCorum:nucleoli"
nocorum.organelle<-score.data[score.data$genes%in%organelles.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.organelle$group<-"NoCorum:organelles"
nocorum.PM<-score.data[score.data$genes%in%plasma_membrane.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.PM$group<-"NoCorum:plasma_membrane"
nocorum.ER<-score.data[score.data$genes%in%er.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.ER$group<-"NoCorum:ER"
#nocorum.ER.ribo<-score.data[score.data$genes%in%er.ribo.list$Gene.name & score.data$Corum=="NoCorum",]
#nocorum.ER.ribo$group<-"NoCorum:ER.Ribo"
nocorum.ER.other<-score.data[score.data$genes%in%er.other.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.ER.other$group<-"NoCorum:ER.other"
nocorum.mito<-score.data[score.data$genes%in%mito.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.mito$group<-"NoCorum:Mitochondria"
nocorum.ves<-score.data[score.data$genes%in%ves.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.ves$group<-"NoCorum:Vesicles"
nocorum.golgi<-score.data[score.data$genes%in%golgi.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.golgi$group<-"NoCorum:Golgi apparatus"
nocorum.pero<-score.data[score.data$genes%in%pero.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.pero$group<-"NoCorum:Peroxisomes"
nocorum.endo<-score.data[score.data$genes%in%endo.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.endo$group<-"NoCorum:Endosomes"
nocorum.lyso<-score.data[score.data$genes%in%lyso.list$Gene.name & score.data$Corum=="NoCorum",]
nocorum.lyso$group<-"NoCorum:Lysosomes"
plot.data<-rbind(corum,corum.cytoplasm,corum.endo,corum.ER,corum.ER.other,corum.ER.ribo,
corum.golgi,corum.lyso,corum.mito,corum.nucleoli,corum.nuclues,corum.organelle,
corum.pero,corum.PM,corum.PR,corum.ribo,corum.ves,
nocorum,nocorum.cytoplasm,nocorum.endo,nocorum.ER,nocorum.ER.other,
nocorum.golgi,nocorum.lyso,nocorum.mito,nocorum.nucleoli,nocorum.nuclues,nocorum.organelle,
nocorum.pero,nocorum.PM,nocorum.PR,nocorum.ves)
plot.data$group<-factor(plot.data$group,levels = c("Corum:Lysosomes","NoCorum:Lysosomes","Corum:Endosomes","NoCorum:Endosomes",
"Corum:Peroxisomes","NoCorum:Peroxisomes","Corum:Golgi apparatus","NoCorum:Golgi apparatus",
"Corum:Vesicles","NoCorum:Vesicles","Corum:Mitochondria","NoCorum:Mitochondria",
"Corum:ER","NoCorum:ER","Corum:ER.Ribo",
"Corum:ER.other","NoCorum:ER.other","Corum:Ribosome",
"Corum:plasma_membrane","NoCorum:plasma_membrane",
"Corum:organelles","NoCorum:organelles",
"Corum:nucleoli","NoCorum:nucleoli",
"Corum:cytoplasm","NoCorum:cytoplasm",
"Corum:nucleus","NoCorum:nucleus",
"Corum:Proteasome","NoCorum:Proteasome",
"Corum:all", "NoCorum:all"))
write.table(plot.data,paste0(names1[j],".subloc.corr.txt"),sep = "\t",row.names = F,quote = F)
g1<-ggplot(plot.data, aes(x = corr.rna.dna, y = group)) +
geom_density_ridges(scale = 2,quantile_lines=TRUE,
quantile_fun=function(x,...)median(x),
aes(fill = group)) +
scale_y_discrete(expand = c(0, 0)) + # will generally have to set the `expand` option
scale_x_continuous(expand = c(0, 0)) + # for both axes to remove unneeded padding
coord_cartesian(clip = "off") +
xlim(0, 1)+
ggtitle(paste0(cancer,"_",CPTAC,sep=""))+
theme_classic()+
geom_text(data=plot.data %>% group_by(group) %>%
summarise(corr.rna.dna=median(corr.rna.dna)),
aes(label=round(corr.rna.dna,3)),
position=position_nudge(y=0.5,x=-0.1), colour="black", size=3.5)+# to avoid clipping of the very top of the top ridgeline
theme_ridges()+
theme(legend.position = "none")
g2<-ggplot(plot.data, aes(x = corr.rna.pro, y = group)) +
geom_density_ridges(scale = 2,quantile_lines=TRUE,
quantile_fun=function(x,...)median(x),
aes(fill = group)) +
scale_y_discrete(expand = c(0, 0)) + # will generally have to set the `expand` option
scale_x_continuous(expand = c(0, 0)) + # for both axes to remove unneeded padding
coord_cartesian(clip = "off") +
xlim(0, 1)+
ggtitle(paste0(cancer,"_",CPTAC,sep=""))+
theme_classic()+
geom_text(data=plot.data %>% group_by(group) %>%
summarise(corr.rna.pro=median(corr.rna.pro)),
aes(label=round(corr.rna.pro,3)),
position=position_nudge(y=0.5,x=-0.1), colour="black", size=3.5)+# to avoid clipping of the very top of the top ridgeline
theme_ridges()+
theme(legend.position = "none")
figure<-ggarrange(g1,g2,nrow=1,ncol=2)
pdf(paste0(names1[j],".sublocation.pdf"),width = 12,height = 16)
print(figure)
dev.off()
}