2021-04-07-annota-overlp-heatmap
  • DeepSea brain
In [4]:
cd /storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt
for i in $(seq 8 12) ;
do
   for j in A C G T ;
   do
   bedtools intersect -a $i.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.$i.alt$j.bed ;
   done
done

cd wd
realpath *bed >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • spliceai
In [7]:
cd /storage11_7T/fuy/TADA-A/annotation/spliceai/alt

for i in AG DG ;
do
   for j in A C G T ;
   do
   bedtools intersect -a DS_$i.allele.bed.05.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.DS_$i.allele.bed.05.alt$j.bed ;
   done
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • spidex
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/spidex

for j in A C G T ;
do
bedtools intersect -a spidex_public_noncommercial_v1_0.tab_alt_$j\_lower10pct.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.spidex_alt_$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • CLIPdb
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/CLIPdb

for j in A C G T ;
do
bedtools intersect -a human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.CLIPdb.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • RADAR_RBP
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/RADAR_RBP

for j in A C G T ;
do
bedtools intersect -a $j.top005.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.RADAR_RBP.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • RBP-VarDB
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/RBP-VarDB
for j in A C G T ;
do
bedtools intersect -a RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.alt$j.bed.merge.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.RBP-VarDB.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • ribosnitch
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/ribosnitch
for j in A C G T ;
do
bedtools intersect -a hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.ribosnitch.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • primateAI
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/primateAI
for j in A C G T ;
do
bedtools intersect -a chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.primateAI.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • MVP
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/MVP
for j in A C G T ;
do
bedtools intersect -a chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.MVP.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • MPC
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/MPC_score/v1
for j in A C G T ;
do
bedtools intersect -a mpc2.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.mpc2.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • vep ptv
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv
for j in A C G T ;
do
bedtools intersect -a ptv.0-05.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.ptv.0-05.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • ccr
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/ccr
for j in A C G T ;
do
bedtools intersect -a ccrs.allchrom.gt90.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.ccr.$j.bed &
done



for i in A C G T ; 
do 
bedtools intersect -a uq.wd.ccr.$i.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/uq.87.auto.protein_cd.exon.st.mg.$i.bedgraph > per_base_uq.wd.ccr.$i.bed ; 
done

cd wd
realpath per* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • PPH2
In [85]:
cd /storage11_7T/fuy/TADA-A/annotation/dbNSFP
for i in D.HDIV D.HVAR ;
do
awk '{OFS="\t";if($5 == "A"){print $1,$2,$3}}' $i.bed > PPH2_$i.altA.bed
awk '{OFS="\t";if($5 == "C"){print $1,$2,$3}}' $i.bed > PPH2_$i.altC.bed
awk '{OFS="\t";if($5 == "G"){print $1,$2,$3}}' $i.bed > PPH2_$i.altG.bed
awk '{OFS="\t";if($5 == "T"){print $1,$2,$3}}' $i.bed > PPH2_$i.altT.bed ; 
done


for j in A C G T ;
do
bedtools intersect -a PPH2_D.HVAR.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.PPH2_D.HVAR.$j.bed &
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
  • PPI
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI
bedtools intersect -a hg19.mapped.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/wd.hg19.mapped.bed


for i in A C G T ; 
do 
bedtools intersect -a wd/wd.hg19.mapped.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/uq.87.auto.protein_cd.exon.st.mg.$i.bedgraph > wd/per_base_uq.wd.PPI.$i.bed ; 
done


cd /storage11_7T/fuy/TADA-A/annotation/PPI/wd
realpath per* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
In [11]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI

for i in A C G T ; 
do 
bedtools intersect -a MPC2.hg19.interactome.alt$i.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/wd.MPC-PPI.alt$i.bed
done


cd /storage11_7T/fuy/TADA-A/annotation/PPI/wd
realpath wd* >> /storage11_7T/fuy/TADA-A/annotation/overlp/supple.lst
  • PPI-PPH2
In [66]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI
awk '{OFS="\t";if($5 == "A"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altA.bed
awk '{OFS="\t";if($5 == "C"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altC.bed
awk '{OFS="\t";if($5 == "G"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altG.bed
awk '{OFS="\t";if($5 == "T"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altT.bed


for i in A C G T;
do
bedtools intersect -a PPH2/PPH2_D.HDIV.alt$i.bed -b hg19.mapped.bed | sort | uniq > PPI.PPH2_D.HDIV.alt$i.bed ;
bedtools intersect -a PPH2/PPH2_D.HVAR.alt$i.bed -b hg19.mapped.bed | sort | uniq > PPI.PPH2_D.HVAR.alt$i.bed ;
done
In [95]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI
for j in A C G T ;
do
bedtools intersect -a PPI.PPH2_D.HDIV.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.PPI.PPH2_D.HDIV.$j.bed 
bedtools intersect -a PPI.PPH2_D.HVAR.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.PPI.PPH2_D.HVAR.$j.bed ;
done

cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst

count overlp

In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/overlp
ctj=0
for j in `cat C.lst`;
do
    ctj=`expr $ctj + 1`
    cti=0
    for i in `cat C.lst` ; 
    do 
    cti=`expr $cti + 1`
    echo  -e `wc -l $j`'\t'`wc -l $i`'\toverlp\t'`bedtools intersect -a $j -b $i | sort | uniq |wc -l ` >> all.C.lst ; 
    done
done
In [23]:
setwd("/storage11_7T/fuy/TADA-A/annotation/overlp")
library(tidyr)
library(data.table)
library(stringr)
In [37]:
A.all = fread("all.A.lst",header=F)
A.all = unique(A.all)
A.all2 = separate(A.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
A.all3 = separate(A.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(A.all3)[c(5,6)] = c("ovlp","ct3")
A.all3$ct2 = as.numeric(A.all3$ct2)
A.all3$ct1 = as.numeric(A.all3$ct1)

C.all = fread("all.C.lst",header=F)
C.all = unique(C.all)
C.all2 = separate(C.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
C.all3 = separate(C.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(C.all3)[c(5,6)] = c("ovlp","ct3")
C.all3$ct2 = as.numeric(C.all3$ct2)
C.all3$ct1 = as.numeric(C.all3$ct1)

G.all = fread("all.G.lst",header=F)
G.all = unique(G.all)
G.all2 = separate(G.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
G.all3 = separate(G.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(G.all3)[c(5,6)] = c("ovlp","ct3")
G.all3$ct2 = as.numeric(G.all3$ct2)
G.all3$ct1 = as.numeric(G.all3$ct1)

T.all = fread("all.T.lst",header=F)
T.all = unique(T.all)
T.all2 = separate(T.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
T.all3 = separate(T.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(T.all3)[c(5,6)] = c("ovlp","ct3")
T.all3$ct2 = as.numeric(T.all3$ct2)
T.all3$ct1 = as.numeric(T.all3$ct1)

all = fread("all.A.lst",header=F)
all = unique(all)
all2 = separate(all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
all3 = separate(all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(all3)[c(5,6)] = c("ovlp","ct3")
all3$ct2 = as.numeric(all3$ct2)
all3$ct1 = as.numeric(all3$ct1)

a1 = data.frame(A.all3$ct1,C.all3$ct1,G.all3$ct1,T.all3$ct1)
a2 = data.frame(A.all3$ct2,C.all3$ct2,G.all3$ct2,T.all3$ct2)
a3 = data.frame(A.all3$ct3,C.all3$ct3,G.all3$ct3,T.all3$ct3)

all3$all_ct1 = apply(a1,1,sum)
all3$all_ct2 = apply(a2,1,sum)
all3$all_ct3 = apply(a3,1,sum)
In [38]:
all3$r1 = round(all3$all_ct3 / all3$all_ct1,4)
all3$r2 = round(all3$all_ct3 / all3$all_ct2,4)
all3$idx = 1:nrow(all3)
all3$annota1 = gsub(".[a-zA-Z]*.bed","",all3$annota1)
all3$annota2 = gsub(".[a-zA-Z]*.bed","",all3$annota2)
head(all3)
A data.table: 6 × 12
ct1annota1ct2annota2ovlpct3all_ct1all_ct2all_ct3r1r2idx
<dbl><chr><dbl><chr><chr><int><dbl><dbl><int><dbl><dbl><int>
276633DeepSEA/alt/wd/uq.wd.10276633DeepSEA/alt/wd/uq.wd.10 overlp2766331044321104432110443211.00001.00001
276633DeepSEA/alt/wd/uq.wd.10276756DeepSEA/alt/wd/uq.wd.11 overlp 881310443211044021 305890.02930.02932
276633DeepSEA/alt/wd/uq.wd.10277148DeepSEA/alt/wd/uq.wd.12 overlp 715710443211043497 258580.02480.02483
276633DeepSEA/alt/wd/uq.wd.10278814DeepSEA/alt/wd/uq.wd.8 overlp 1454110443211045817 543890.05210.05204
276633DeepSEA/alt/wd/uq.wd.10281202DeepSEA/alt/wd/uq.wd.9 overlp 1491610443211046969 546100.05230.05225
276633DeepSEA/alt/wd/uq.wd.10 68085spliceai/alt/wd/uq.wd.DS_AG.05overlp 8211044321 224945 22790.00220.01016
In [39]:
saveRDS(all3,"all3.rds")
In [40]:
m = all3[all3$annota1 != all3$annota2,]

# max(m$r1)

# m[m$r1 > .4,]

# m[m$annota1 == "spidex/wd/uq.wd.spidex_alt",]
In [ ]:
m[m$annota1 == "MPC_score/v1/wd/uq.wd.mpc2",]
In [ ]:
library(reshape2)
library(ggplot2)
In [44]:
p <- ggplot(all3, aes(x=annota1,y=annota2)) 
p <- p + geom_tile(aes(fill=r1)) + theme(axis.text.x=element_text(angle=90,hjust=1, vjust=1)) +
   scale_fill_gradient2(low="blue", high="red",midpoint = 0.4,breaks=c(0,0.4,0.8,1.1) )
p