https://github.com/ChengLiLab/myeloma
Tip revision: 634d6aabda1b3c0bc7ddfe145dfc34b5018b6f63 authored by ChengLiLab on 17 November 2017, 12:16:24 UTC
Add files via upload
Add files via upload
Tip revision: 634d6aa
Interactions between TADs on either side of a CNV.R
# Interactions between TADs on either side of a CNV
# 201170412
# wupz
# 1.1 read CNV data
U266_CNV_40kb <- read.table("/lustre/user/liclab/wupz/DNA_seq/CNV_final/resolution_40kb/U266-merged.sorted.bam.dedup.bam_ratio.txt",
stringsAsFactors = F, header = T)
head(U266_CNV_40kb)
dim(U266_CNV_40kb)
U266_CNV_40kb_block <- FindCNVBlock(freec_ratio = U266_CNV_40kb)
head(U266_CNV_40kb_block )
dim(U266_CNV_40kb_block )
# 1.2 find the gain or loss cnv regions
tmp_dim <- dim(U266_CNV_40kb_block)[1] #
tmp_cnv <- U266_CNV_40kb_block$score
length(tmp_cnv)
cnv_gain_loss_state <- vector() # used for store the state of gain or loss
for (i in 1:(length(tmp_cnv)-2) ) {
if ( (U266_CNV_40kb_block$chrom[i+1] == U266_CNV_40kb_block$chrom[i]) & (U266_CNV_40kb_block$chrom[i+1] == U266_CNV_40kb_block$chrom[i+2]) ) { # the same chrom
if ( (tmp_cnv[i+1] < tmp_cnv[i]) & (tmp_cnv[i+1] < tmp_cnv[i+2]) ) { # loss fragment
cnv_gain_loss_state[i] <- "Loss"
} else if ( (tmp_cnv[i+1] > tmp_cnv[i]) & (tmp_cnv[i+1] > tmp_cnv[i+2]) ) { # gain fragment
cnv_gain_loss_state[i] <- "Gain"
} else {
cnv_gain_loss_state[i] <- "Others"
}
} else {
cnv_gain_loss_state[i] <- "Others" # start or end of a chromo
}
}
length(cnv_gain_loss_state) # [1] 428
cnv_gain_loss_state <- c("Others", cnv_gain_loss_state, "Others")
U266_CNV_40kb_block <- cbind(U266_CNV_40kb_block, cnv_gain_loss_state = cnv_gain_loss_state)
class(U266_CNV_40kb_block)
dim(U266_CNV_40kb_block)
U266_CNV_40kb_block$chrom[U266_CNV_40kb_block$chrom=="X"] <- 23
U266_CNV_40kb_block$chrom[U266_CNV_40kb_block$chrom=="Y"] <- 24
sum(U266_CNV_40kb_block$chromEnd - U266_CNV_40kb_block$chromStart <= 120000)
png(filename = "Length of U266 CNV blocks.png", width = 1024, height = 1024)
par(cex = 3, lwd = 3, mgp = c(2.5,1,0))
plot(density(U266_CNV_40kb_block$chromEnd-U266_CNV_40kb_block$chromStart), log = "x",
xlab = "Length of CNV blocks (bp)", main = "Length of U266 CNV blocks")
# lines(density(RPMI8226_CNV_40kb_block$chromEnd-RPMI8226_CNV_40kb_block$chromStart), col = "red")
dev.off()
write.table(x = U266_CNV_40kb_block, file = "U266_CNV_40kb_block.bed", quote = F, sep = "\t", row.names = F, col.names = F)
# 1.3 For each TAD, calculate region starts and ends and CNVs of this TAD
## use 1...23 as chromosome names
tmp_U266_CNV_40kb <- U266_CNV_40kb
tmp_U266_CNV_40kb$Chromosome[U266_CNV_40kb$Chromosome == "X"] <- 23
tmp_U266_CNV_40kb$Chromosome[U266_CNV_40kb$Chromosome == "Y"] <- 24
# read insulation score data
U266_insulation_score <- list()
U266_insulation_score_tad <- list()
for ( i in 1:23 ) {
tmp_chr <- paste0("chr", i)
U266_insulation_score[[i]] <- read.table(paste( "/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release12.2/resolution_40k/cis/TAD_boundary/",
tmp_chr, "_", tmp_chr, "_40k_normalmatrix.txt.is1000001.ids240001.insulation", sep = ""), header = T)
U266_insulation_score_tad[[i]] <- read.table(paste( "/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release12.2/resolution_40k/cis/TAD_boundary/",
tmp_chr, "_", tmp_chr, "_40k_normalmatrix.txt.is1000001.ids240001.insulation.boundaries", sep = ""), header = T)
}
U266_TAD_filter_bed <- vector()
U266_TAD_bed <- vector()
for ( i in 1:23 ) {
tmp_chr <- paste0("chr", i)
tmp_u266_insulation_score <- U266_insulation_score[[i]]
tmp_u266_insulation_score_tad <- U266_insulation_score_tad[[i]]
# construct TAD file with bed format
tmp_U266_TAD_bed <- cbind(chr = i,
TAD_start = tmp_u266_insulation_score_tad$end[-length(tmp_u266_insulation_score_tad$end)] - 1,
TAD_end = tmp_u266_insulation_score_tad$end[-1] - 40001)
dim(tmp_U266_TAD_bed) # Check numbers
tmp_U266_TAD_bed <- cbind(tmp_U266_TAD_bed, apply(X = tmp_U266_TAD_bed, MARGIN = 1, FUN = GetCNVofRegions, cnv_files = tmp_U266_CNV_40kb))
U266_TAD_bed <- rbind(U266_TAD_bed, tmp_U266_TAD_bed)
# keep TAD which length over 160 kb
tmp_U266_TAD_filter_bed <- tmp_U266_TAD_bed[ as.numeric(tmp_U266_TAD_bed[, 3]) - as.numeric(tmp_U266_TAD_bed[, 2]) >= 160000, ]
dim(tmp_U266_TAD_filter_bed) # Check numbers
# calculate CNV at TAD level
head(tmp_U266_TAD_filter_bed )
U266_TAD_filter_bed <- rbind(U266_TAD_filter_bed, tmp_U266_TAD_filter_bed)
}
# check data
dim( U266_TAD_bed )
dim( U266_TAD_filter_bed )
png(filename = "U266 Length of raw TADs.png", width = 1024, height = 1024)
par(cex = 3, lwd = 3, mgp = c(2.5,1,0))
plot(table(as.numeric(U266_TAD_bed[, 3])-as.numeric(U266_TAD_bed[, 2])), xlim = c(0, 3000000), type = "l",
xlab = "Length of \"raw\" TADs (bp)", main = "U266, distribution of TADs' length", ylab = "Counts")
dev.off()
png(filename = "U266 Length of filtered TADs.png", width = 1024, height = 1024)
par(cex = 3, lwd = 3, mgp = c(2.5,1,0))
plot(table(as.numeric(U266_TAD_filter_bed[, 3])-as.numeric(U266_TAD_filter_bed[, 2])), xlim = c(0, 3000000), type = "l",
xlab = "Length of \"filtered\" TADs (bp)", main = "U266, distribution of TADs' length", ylab = "Counts")
dev.off()
write.table( x = U266_TAD_bed, file = "U266_TAD_raw.bed", quote = F, sep = "\t", col.names = F, row.names = F )
write.table( x = U266_TAD_filter_bed, file = "U266_TAD_filter.bed", quote = F, sep = "\t", col.names = F, row.names = F )
## found local intra-chromosome interactions in each TAD
# read Hi-C data of U266, 40kb
U266_hic_ice_matrix <- list()
U266_hic_raw_matrix <- list()
for (i in 1:23) {
print(paste0("Read raw U266 Hi-C matrix of chr ", i) )
U266_hic_ice_matrix[[i]] <- list()
U266_hic_ice_matrix[[i]][[i]] <- read.table(paste0( "/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release12.2/resolution_40k/cis/ice_normalization/chr",
i, "_40k_normalized_matrix.txt") )
print(paste0("Read raw U266 Hi-C matrix of chr ", i) )
U266_hic_raw_matrix[[i]] <- list()
U266_hic_raw_matrix[[i]][[i]] <- read.table(paste0( "/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release12.2/resolution_40k/cis/raw/chr",
i, "_40k_rawmatrix.txt") )
}
# calculate TAD-TAD level interaction, use median interaction as represent
U266_TAD_filter_HiC_matrix_raw <- list()
U266_TAD_filter_HiC_matrix_ice <- list()
for (i in 1:23) {
tmp_U266_TAD_filter_bed <- U266_TAD_filter_bed[U266_TAD_filter_bed[, 1] == i, ]
U266_TAD_filter_HiC_matrix_raw[[i]] <- matrix(0, nrow = dim(tmp_U266_TAD_filter_bed)[1], ncol = dim(tmp_U266_TAD_filter_bed)[1])
U266_TAD_filter_HiC_matrix_ice[[i]] <- matrix(0, nrow = dim(tmp_U266_TAD_filter_bed)[1], ncol = dim(tmp_U266_TAD_filter_bed)[1])
tmp_u266_hic_raw <- U266_hic_raw_matrix[[i]][[i]]
tmp_u266_hic_ice <- U266_hic_ice_matrix[[i]][[i]]
diag(tmp_u266_hic_raw) <- diag(tmp_u266_hic_raw)/2
diag(tmp_u266_hic_ice) <- diag(tmp_u266_hic_ice)/2
print("Calculate TAD-TAD interaction")
for (j in 1:dim(tmp_U266_TAD_filter_bed)[1] ) {
for (k in i:dim(tmp_U266_TAD_filter_bed)[1] ) {
U266_TAD_filter_HiC_matrix_raw[[i]][j, k] <- median( as.vector(as.numeric( FindLocalHicMatrix(region1 = tmp_U266_TAD_filter_bed[j, ],
region2 = tmp_U266_TAD_filter_bed[k, ],
matrix = tmp_u266_hic_raw,
resolution = 40000) ) ),
na.rm = T)
U266_TAD_filter_HiC_matrix_ice[[i]][j, k] <- median( as.vector(as.numeric( FindLocalHicMatrix(region1 = tmp_U266_TAD_filter_bed[j, ],
region2 = tmp_U266_TAD_filter_bed[k, ],
matrix = tmp_u266_hic_ice,
resolution = 40000) ) ),
na.rm = T)
}
}
}
U266_local_intra_hic_matrix_raw <-
# 1.4 U266, Read up- and down-stream TAD regions and calculate its interactions
# 1.4.1 Read files of the upstream or downstream closest TADs to the CNVs
U266_CNV_40kb_upTAD <- read.table("/lustre/user/liclab/wupz/dosageEffect/201703_nc_revise/FISH_TAD_CNV_interactions/U266_CNV_nearestUpstream_TAD.bed",
stringsAsFactors = F)
U266_CNV_40kb_downTAD <- read.table("/lustre/user/liclab/wupz/dosageEffect/201703_nc_revise/FISH_TAD_CNV_interactions/U266_CNV_nearestDownstream_TAD.bed",
stringsAsFactors = F)
U266_CNV_40kb_nearest_TAD <- cbind(U266_CNV_40kb_upTAD, U266_CNV_40kb_downTAD[, 8:12])
head(U266_CNV_40kb_nearest_TAD)
dim(U266_CNV_40kb_nearest_TAD)
# 1.4.2 calculate the upstreamTAD-downstreamTAD interactions
U266_CNV_40kb_nearest_TAD_interactions <- numeric(length = dim(U266_CNV_40kb_nearest_TAD)[1])
tmp_chr <- "chr1"
tmp_u266_hic <- read.table( paste( "/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release12.2/resolution_40k/cis/ice_normalization/",
tmp_chr, "_40k_normalized_matrix.txt", sep = "") )
for( i in 1:dim(U266_CNV_40kb_nearest_TAD)[1] ) {
# change hi-c data according to chromosomes
if ( tmp_chr != paste0("chr", U266_CNV_40kb_upTAD[i, 1] ) ) {
tmp_chr <- paste0("chr", U266_CNV_40kb_upTAD[i, 1] )
tmp_u266_hic <- read.table( paste( "/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release12.2/resolution_40k/cis/ice_normalization/",
tmp_chr, "_40k_normalized_matrix.txt", sep = "") )
}
if( U266_CNV_40kb_upTAD[i, 8]!= "." & U266_CNV_40kb_downTAD[i, 8] != "." ) { # not include the border
U266_CNV_40kb_nearest_TAD_interactions[i] <- median( as.vector( as.numeric( FindLocalHicMatrix(region1 = U266_CNV_40kb_upTAD[i, 8:10],
region2 = U266_CNV_40kb_downTAD[i, 8:10],
matrix = tmp_u266_hic, resolution = 40000) ) ) )
}
}
## 1.4.3 Generate two random TADs of the same CNV block and find its interactions
### 1.4.3.1 choose large CNV blocks
tmp_index <- U266_CNV_40kb_block$chromEnd - U266_CNV_40kb_block$chromStart >= 5000000
sum( tmp_index ) # [1] 121
head( U266_CNV_40kb_block[tmp_index,] )
### 1.4.3.2 select two TADs from the chose CNV blocks
for( tmp_chr in U266_CNV_40kb_block[tmp_index, 1] ) {
}
## 1.4.3 color calculation and distance calculation
tmp_col <- ifelse(U266_CNV_40kb_upTAD[, 7] == "Gain", "red", ifelse(U266_CNV_40kb_upTAD[, 7] == "Loss", "blue", "green"))
tmp_up_distance <- abs(U266_CNV_40kb_upTAD[, 12])
tmp_down_distance <- abs(U266_CNV_40kb_downTAD[, 12]) # tmp_distance < 260000 & tmp_distance_2 < 260000 &
tmp_index <- U266_CNV_40kb_downTAD[, 8] != "." & U266_CNV_40kb_upTAD[, 8] != "."
tmp_distance_3 <- apply(U266_CNV_40kb_downTAD[tmp_index, c(9,10)], 1, mean) - apply(U266_CNV_40kb_upTAD[tmp_index, c(9,10)], 1, mean)
## 1.4.4 Statistic figures
### Interactions vs. distance
png(filename = "U266_Interactions_upTAD_downTAD_of_CNV_vs_distance.png", width = 1024, height = 1024)
par(cex = 2.5, mgp = c(1, 0.5, 0))
plot(U266_CNV_40kb_nearest_TAD_interactions[tmp_index] ~ tmp_distance_3, col = tmp_col[tmp_index])
dev.off()
### Interactions (all) vs. cnv state
png(filename = "U266_Interactions_upTAD_downTAD_of_CNV_vs_cnvState.png", width = 1024, height = 1024)
par(cex = 2.5, mgp = c(1, 0.5, 0))
boxplot( U266_CNV_40kb_nearest_TAD_interactions[tmp_index] ~ U266_CNV_40kb_upTAD[tmp_index, 7],
col = c("red", "blue", "green"))
dev.off()
### Interactions (non-zero) vs. cnv state
png(filename = "U266_nonZero_Interactions_upTAD_downTAD_of_CNV_vs_cnvState.png", width = 1024, height = 1024)
par(cex = 2.5, mgp = c(1, 0.5, 0))
boxplot( U266_CNV_40kb_nearest_TAD_interactions[tmp_index][U266_CNV_40kb_nearest_TAD_interactions[tmp_index]!=0] ~ U266_CNV_40kb_upTAD[tmp_index, 7][U266_CNV_40kb_nearest_TAD_interactions[tmp_index]!=0],
col = c("red", "blue", "green"))
dev.off()
tmp_interactions <- U266_CNV_40kb_nearest_TAD_interactions[tmp_index][U266_CNV_40kb_nearest_TAD_interactions[tmp_index]!=0]
tmp_cnv_state <- U266_CNV_40kb_upTAD[tmp_index, 7][U266_CNV_40kb_nearest_TAD_interactions[tmp_index]!=0]
t.test(tmp_interactions[tmp_cnv_state=="Loss"], tmp_interactions[tmp_cnv_state=="Others"])
t.test(tmp_interactions[tmp_cnv_state=="Gain"], tmp_interactions[tmp_cnv_state=="Others"])
### Distance vs. cnv state
png(filename = "U266_nonZero_Interactions_upTAD_downTAD_of_CNV_vs_cnvState.png", width = 1024, height = 1024)
par(cex = 2.5, mgp = c(1, 0.5, 0))
boxplot(tmp_distance_3 ~ U266_CNV_40kb_upTAD[tmp_index, 7], col = c("red", "blue", "green"))
dev.off()