1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#(5)	EWAS-GWAS overlap
#The following R code is a function to compute the summary statistics for our EWAS-GWAS overlap analaysis based on hypergeometric tests. The parameters in the function are: ORDER is a dummy variable, aar0 is data frame of EWAS summary statistics that lists the variables as described in head(aars), other0 lists the gene P value generated by MAGENTA, and HGNC gene symbol from a test GWAS, other.name0 is the label of the test GWAS, cutoff is 2.5% in our study, n.topcpg0=1000 (the top number of CpGs) in our study, and background.hg is the background based on the genomic regions in our custom Mammalian array, as described in head(background.hg).
#
#R code
#
> head(background.hg)
  HGNC_gene CHR        bp       CGid GeneID
1      A1BG  19  58864846 cg22568540      1
2      A1CF  10  52619664 cg02811585  29974
3      A1CF  10  52573791 cg25496747  29974
4      AACS  12 125578355 cg02591564  65985
6      AAK1   2  69811375 cg05656329  22848
7      AATF  17  35336293 cg04920416  26574
> head(aars)
            CGid   Meta.Z P HGNC_gene CHR        bp GeneID
17511 cg18141557 58.21023 0    POU3F3   2 105472226   5455
8958  cg09227056 55.57959 0      EVX2   2 176940449 344191
12408 cg12879445 55.28574 0     COX8C  14  93897410 341947
12409 cg12879445 55.28574 0  KIAA1409  14  93897410  57578
15207 cg15682828 55.10546 0      ZIC2  13 100635161   7546
5426  cg05551621 54.09696 0     FOXD3   1  63789233  27022
F_enrich<- function(ORDER,aar0,other0,other.name0,cutoff,n.topcpg0,background.hg=background.hg19){
  aar0=subset(aar0,!is.na(aar0$Meta.Z))
  other0=subset(other0,!is.na(other0$p.other))
  other0.gene=other0
  other0=merge(by='HGNC_gene',other0,subset(background.hg,select=c(HGNC_gene,CHR,bp,CGid,HGNC_gene.amin)))
  
  other0$genomic_region=paste(as.character(other0$HGNC_gene),as.character(other0$CGid),sep='-')
  #length(unique(other0$GeneID))
  #
  #
  background=other0
  #very important
  ntot=nrow(background)
  #
  aar0=aar0[order(-abs(aar0$Meta.Z)),]
  aar0$genomic_region=paste(as.character(aar0$HGNC_gene),as.character(aar0$CGid),sep='-')
  aar.top=vector(length=2,mode='list')
  aar.top[[1]]=subset(aar0,Meta.Z>0)
  aar.top[[2]]=subset(aar0,Meta.Z<0)
  aar.top[[1]]=aar.top[[1]][1:n.topcpg0,]
  aar.top[[2]]=aar.top[[2]][1:n.topcpg0,]
  names(aar.top)=c('pos','neg')
  #
  other0=other0[order(other0$p.other),]
  other0.gene=other0.gene[order(other0.gene$p.other),]
  #
  output={}
 
    n2=round(cutoff*dim(other0.gene)[1])#top cutoff genes
    other.gene=other0.gene[1:n2,]
    #
    other=subset(other0,HGNC_gene%in%other.gene$HGNC_gene)
    #
    for(t in 1:2){
      index=is.element(aar.top[[t]]$genomic_region,other$genomic_region)
      x=sum(as.numeric(index))
      m=dim(other)[1]# number white balls/other gwas
      n=ntot-m
      k=dim(aar.top[[t]])[1] # numer of draw
      #
      p.enrich=    phyper(x-1,m=m,n=n,k=k,lower.tail=F)
      logp.enrich  =phyper(x-1,m=m,n=n,k=k,lower.tail=F,log.p=T)
      log10p.enrich= -(logp.enrich/log(10))
      a1=10^(ceiling(log10p.enrich)-log10p.enrich)
      Hyper.P.scientic=ifelse(p.enrich>=0.1,
                              round(p.enrich,digits=2),
                              paste0(round(a1,1),'e-',ceiling(log10p.enrich)))
      #
      #
      OverlapGenes=paste(unique(aar.top[[t]]$HGNC_gene[index]),collapse =';')
      OverlapGenes.CpG=paste(aar.top[[t]]$CGid[index],collapse =';')
      #
      n.overlapgene=length(unique(aar.top[[t]]$HGNC_gene[index]))
      n.annotation=length(unique((other$HGNC_gene)))
      #
      output0=data.frame(index=ORDER,class=names(aar.top)[t],cutoff=cutoff,GWAS=other.name0,
                         n.overlapgene=n.overlapgene,
                         n.annotation=n.annotation,
                         n.overlapGR=x,
                         n.annotationGR=m,
                         Overlap=paste(x,m,sep="/"),
                         P=p.enrich,log10P=log10p.enrich,P.scientic=Hyper.P.scientic,
                         OverlapGenes=OverlapGenes,OverlapGenes.CpG=OverlapGenes.CpG)
      output=rbind(output,output0)
    }
    
  
  return(output)
}