https://github.com/ChengLiLab/myeloma
Raw File
Tip revision: 634d6aabda1b3c0bc7ddfe145dfc34b5018b6f63 authored by ChengLiLab on 17 November 2017, 12:16:24 UTC
Add files via upload
Tip revision: 634d6aa
Distance_distribution_of_CNV_Block_boundary_and_TAD_boudary.R
# Distance distribution of CNV Block boundary and TAD boudary
# 20160113
# wupz

# 1.1 read data of insulation score
RPMI8226_is_40kb <- list()
for ( i in 1:23 ) {
  RPMI8226_is_40kb[[i]] <- read.table( paste('/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release10/resolution_40k/cis/TAD_boundary/chr',
                                             i, '_chr', i, '_40k_normalmatrix.txt.is1000001.ids240001.insulation', sep = ''), 
                                       header = T, 
                                       stringsAsFactors = F)
}


# 1.2 read data of TAD boundary
RPMI8226_is_40kb_TAD <- list()
for ( i in 1:23 ) {
  RPMI8226_is_40kb_TAD[[i]] <- read.table( paste( '/lustre/user/liclab/lirf/Project/hic/data.2015.6.24/release10/resolution_40k/cis/TAD_boundary/chr', 
                                                  i, '_chr', i, '_40k_normalmatrix.txt.is1000001.ids240001.insulation.boundaries', sep = '' ), 
                                           header = T, 
                                           stringsAsFactors = F) 
}


# 1.3 read data of cnv
RPMI8226_cnv_40kb <- read.table( '/lustre/user/liclab/wupz/DNA_seq/CNV_final/resolution_40kb/8226-merged.sorted.bam.dedup.bam_ratio.txt', 
                                 header = T, 
                                 stringsAsFactors = F)
# Change chromose X Y to 23, 24
RPMI8226_cnv_40kb$Chromosome[ RPMI8226_cnv_40kb$Chromosome == 'X'] <- 23
RPMI8226_cnv_40kb$Chromosome[ RPMI8226_cnv_40kb$Chromosome == 'Y'] <- 24

# 2 Analysis of the overlaping between CNV block boundary and TAD boundary
# 2.1 call CNV block at 40kb resolution
RPMI8226_cnv_block_40kb <- FindCNVBlock( freec_ratio = RPMI8226_cnv_40kb)

# 2.2 get the distance of each CNV block boudnary
drawDistance <- function(cnv_sites, subject_sites_list, all_sites, ...) {
  CNV_block_nearest_distance <- numeric()
  tamdom_nearest_distance <- numeric()
  for ( i in 1:23) {
    tmp_nearest_distance <- nearestTADDistance( query_Sites = cnv_sites[cnv_sites[,1] == i, ], 
                                                subject_Sites =  subject_sites_list[[i]])
    CNV_block_nearest_distance <- c(CNV_block_nearest_distance, tmp_nearest_distance)
    tmp_random_number <- dim(cnv_sites[cnv_sites[,1] == i, ])[1]  # for each chromosome random select some sites with the same number of TADs
    tmp_random_site <- all_sites[[i]][ sample( x = 1:dim(all_sites[[i]])[1], size = tmp_random_number), ]
    tmp_nearest_distance <- nearestTADDistance( query_Sites = tmp_random_site, subject_Sites = subject_sites_list[[i]])
    tamdom_nearest_distance <- c(tamdom_nearest_distance, tmp_nearest_distance)
  }
  density_tamdom <- density(tamdom_nearest_distance )
  density_query <- density(CNV_block_nearest_distance )
  ylim = c(0, max(density_tamdom$y, density_query$y)) 
  plot(density(tamdom_nearest_distance ), col = 'black', ...)
  lines(density(CNV_block_nearest_distance), col = 'red' )
}

png(filename = 'Distance Distribution of sites from TAD boudary RPMI8226.png', height = 1024, width = 1024)
par(cex = 1.5, mgp = c(1, 0.2, 0), lwd = 3, tcl =-0.1)
drawDistance( RPMI8226_cnv_block_40kb, subject_sites_list = RPMI8226_is_40kb_TAD, RPMI8226_is_40kb, xlim = c(0, 500000),
              main = 'Distance Distribution of sites from TAD boudary, RPMI8226')
legend( 'topright', legend = c('CNV block boudary', 'Random'), col = c('Red', 'black'), lty = 1)
dev.off()

png(filename = 'Distance Distribution of sites from TAD boudary U266', height = 1024, width = 1024)
par(cex = 1.5, mgp = c(1, 0.2, 0), lwd = 3, tcl =-0.1)
drawDistance( U266_cnv_block_40kb, subject_sites_list = U266_is_40kb_TAD, U266_is_40kb, xlim = c(0, 500000),
              main = 'Distance Distribution of sites from TAD boudary, U266')
legend( 'topright', legend = c('CNV block boudary', 'Random'), col = c('Red', 'black'), lty = 1)
dev.off()

png(filename = 'Distance Distribution of sites from TAD boudary GM12878', height = 1024, width = 1024)
par(cex = 1.5, mgp = c(1, 0.2, 0), lwd = 3, tcl =-0.1)
drawDistance( U266_cnv_block_40kb, subject_sites_list = GM12878_is_10kb_TAD, GM12878_is_10kb, xlim = c(0, 500000),
              main = 'Distance Distribution of sites from TAD boudary, GM12878')
legend( 'topright', legend = c('CNV block boudary', 'Random'), col = c('Red', 'black'), lty = 1)
dev.off()



CNV_block_nearest_distance <- numeric()
tamdom_nearest_distance <- numeric()
for ( i in 1:23) {
  tmp_nearest_distance <- nearestTADDistance( query_Sites = U266_cnv_block_40kb[U266_cnv_block_40kb[,1] == i, ], 
                                              subject_Sites =  GM12878_is_10kb_TAD[[i]])
  CNV_block_nearest_distance <- c(CNV_block_nearest_distance, tmp_nearest_distance)
  tmp_random_number <- dim(U266_cnv_block_40kb[U266_cnv_block_40kb[,1] == i, ])[1]  # for each chromosome random select some sites with the same number of TADs
  tmp_random_site <- GM12878_is_10kb[[i]][ sample( x = 1:dim(U266_is_40kb[[i]])[1], size = tmp_random_number), ]
  tmp_nearest_distance <- nearestTADDistance( query_Sites = tmp_random_site, subject_Sites = GM12878_is_10kb_TAD[[i]])
  tamdom_nearest_distance <- c(tamdom_nearest_distance, tmp_nearest_distance)
}

plot(density(tamdom_nearest_distance ), xlim = c(0, 1000000))
lines(density(CNV_block_nearest_distance), col = 'red')
median(tamdom_nearest_distance)
median(CNV_block_nearest_distance)
t.test(tamdom_nearest_distance, CNV_block_nearest_distance)

nearTAD_8226_U266 <- numeric()
for ( i in 1:23 ) {
  tmp_nearest_distance <- nearestTADDistance( query_Sites = RPMI8226_is_40kb_TAD[[i]], 
                                              subject_Sites =  U266_is_40kb_TAD[[i]])
  nearTAD_8226_U266 <- c(nearTAD_8226_U266, tmp_nearest_distance)
}
png(filename = 'The Shifts of TAD Boundaries (kb).png', width = 1024, height = 1024)
par(cex = 3, mgp = c(1.5, 0.5, 0), lwd = 3, tcl =-0.1)
plot(table(nearTAD_8226_U266), xlab = 'The Shifts of TAD Boundaries ( bp)', ylab = 'RPMI-8226 v.s. U266', type = 'l',
     col = 'red', axes = FALSE, main = 'The Shifts of TAD Boundaries\n between RPMI-8226 and U266')
axis(2)
axis(1)
dev.off()
write.table( x = table(nearTAD_8226_U266), file = 'The Shifts of TAD Boundaries.txt')
getwd()
back to top