- DeepSea brain
In [4]:
cd /storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt
for i in $(seq 8 12) ;
do
for j in A C G T ;
do
bedtools intersect -a $i.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.$i.alt$j.bed ;
done
done
cd wd
realpath *bed >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- spliceai
In [7]:
cd /storage11_7T/fuy/TADA-A/annotation/spliceai/alt
for i in AG DG ;
do
for j in A C G T ;
do
bedtools intersect -a DS_$i.allele.bed.05.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.DS_$i.allele.bed.05.alt$j.bed ;
done
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- spidex
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/spidex
for j in A C G T ;
do
bedtools intersect -a spidex_public_noncommercial_v1_0.tab_alt_$j\_lower10pct.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.spidex_alt_$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- CLIPdb
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/CLIPdb
for j in A C G T ;
do
bedtools intersect -a human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.CLIPdb.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- RADAR_RBP
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/RADAR_RBP
for j in A C G T ;
do
bedtools intersect -a $j.top005.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.RADAR_RBP.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- RBP-VarDB
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/RBP-VarDB
for j in A C G T ;
do
bedtools intersect -a RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.alt$j.bed.merge.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.RBP-VarDB.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- ribosnitch
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/ribosnitch
for j in A C G T ;
do
bedtools intersect -a hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.ribosnitch.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- primateAI
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/primateAI
for j in A C G T ;
do
bedtools intersect -a chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.primateAI.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- MVP
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/MVP
for j in A C G T ;
do
bedtools intersect -a chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.MVP.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- MPC
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/MPC_score/v1
for j in A C G T ;
do
bedtools intersect -a mpc2.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.mpc2.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- vep ptv
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv
for j in A C G T ;
do
bedtools intersect -a ptv.0-05.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.ptv.0-05.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- ccr
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/ccr
for j in A C G T ;
do
bedtools intersect -a ccrs.allchrom.gt90.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.ccr.$j.bed &
done
for i in A C G T ;
do
bedtools intersect -a uq.wd.ccr.$i.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/uq.87.auto.protein_cd.exon.st.mg.$i.bedgraph > per_base_uq.wd.ccr.$i.bed ;
done
cd wd
realpath per* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- PPH2
In [85]:
cd /storage11_7T/fuy/TADA-A/annotation/dbNSFP
for i in D.HDIV D.HVAR ;
do
awk '{OFS="\t";if($5 == "A"){print $1,$2,$3}}' $i.bed > PPH2_$i.altA.bed
awk '{OFS="\t";if($5 == "C"){print $1,$2,$3}}' $i.bed > PPH2_$i.altC.bed
awk '{OFS="\t";if($5 == "G"){print $1,$2,$3}}' $i.bed > PPH2_$i.altG.bed
awk '{OFS="\t";if($5 == "T"){print $1,$2,$3}}' $i.bed > PPH2_$i.altT.bed ;
done
for j in A C G T ;
do
bedtools intersect -a PPH2_D.HVAR.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.PPH2_D.HVAR.$j.bed &
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
- PPI
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI
bedtools intersect -a hg19.mapped.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/wd.hg19.mapped.bed
for i in A C G T ;
do
bedtools intersect -a wd/wd.hg19.mapped.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/uq.87.auto.protein_cd.exon.st.mg.$i.bedgraph > wd/per_base_uq.wd.PPI.$i.bed ;
done
cd /storage11_7T/fuy/TADA-A/annotation/PPI/wd
realpath per* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
In [11]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI
for i in A C G T ;
do
bedtools intersect -a MPC2.hg19.interactome.alt$i.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/wd.MPC-PPI.alt$i.bed
done
cd /storage11_7T/fuy/TADA-A/annotation/PPI/wd
realpath wd* >> /storage11_7T/fuy/TADA-A/annotation/overlp/supple.lst
- PPI-PPH2
In [66]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI
awk '{OFS="\t";if($5 == "A"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altA.bed
awk '{OFS="\t";if($5 == "C"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altC.bed
awk '{OFS="\t";if($5 == "G"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altG.bed
awk '{OFS="\t";if($5 == "T"){print $1,$2,$3}}' PPH2_D.HVAR.bed > PPH2_D.HVAR.altT.bed
for i in A C G T;
do
bedtools intersect -a PPH2/PPH2_D.HDIV.alt$i.bed -b hg19.mapped.bed | sort | uniq > PPI.PPH2_D.HDIV.alt$i.bed ;
bedtools intersect -a PPH2/PPH2_D.HVAR.alt$i.bed -b hg19.mapped.bed | sort | uniq > PPI.PPH2_D.HVAR.alt$i.bed ;
done
In [95]:
cd /storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI
for j in A C G T ;
do
bedtools intersect -a PPI.PPH2_D.HDIV.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.PPI.PPH2_D.HDIV.$j.bed
bedtools intersect -a PPI.PPH2_D.HVAR.alt$j.bed -b /storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/nohd.17478gene_hg19.87.auto.exon.wd.bed | sort | uniq > wd/uq.wd.PPI.PPH2_D.HVAR.$j.bed ;
done
cd wd
realpath uq* >> /storage11_7T/fuy/TADA-A/annotation/overlp/lst
count overlp¶
In [ ]:
cd /storage11_7T/fuy/TADA-A/annotation/overlp
ctj=0
for j in `cat C.lst`;
do
ctj=`expr $ctj + 1`
cti=0
for i in `cat C.lst` ;
do
cti=`expr $cti + 1`
echo -e `wc -l $j`'\t'`wc -l $i`'\toverlp\t'`bedtools intersect -a $j -b $i | sort | uniq |wc -l ` >> all.C.lst ;
done
done
In [23]:
setwd("/storage11_7T/fuy/TADA-A/annotation/overlp")
library(tidyr)
library(data.table)
library(stringr)
In [37]:
A.all = fread("all.A.lst",header=F)
A.all = unique(A.all)
A.all2 = separate(A.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
A.all3 = separate(A.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(A.all3)[c(5,6)] = c("ovlp","ct3")
A.all3$ct2 = as.numeric(A.all3$ct2)
A.all3$ct1 = as.numeric(A.all3$ct1)
C.all = fread("all.C.lst",header=F)
C.all = unique(C.all)
C.all2 = separate(C.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
C.all3 = separate(C.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(C.all3)[c(5,6)] = c("ovlp","ct3")
C.all3$ct2 = as.numeric(C.all3$ct2)
C.all3$ct1 = as.numeric(C.all3$ct1)
G.all = fread("all.G.lst",header=F)
G.all = unique(G.all)
G.all2 = separate(G.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
G.all3 = separate(G.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(G.all3)[c(5,6)] = c("ovlp","ct3")
G.all3$ct2 = as.numeric(G.all3$ct2)
G.all3$ct1 = as.numeric(G.all3$ct1)
T.all = fread("all.T.lst",header=F)
T.all = unique(T.all)
T.all2 = separate(T.all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
T.all3 = separate(T.all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(T.all3)[c(5,6)] = c("ovlp","ct3")
T.all3$ct2 = as.numeric(T.all3$ct2)
T.all3$ct1 = as.numeric(T.all3$ct1)
all = fread("all.A.lst",header=F)
all = unique(all)
all2 = separate(all,V1,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct1","annota1"))
all3 = separate(all2,V2,sep="/storage11_7T/fuy/TADA-A/annotation/",into=c("ct2","annota2"))
colnames(all3)[c(5,6)] = c("ovlp","ct3")
all3$ct2 = as.numeric(all3$ct2)
all3$ct1 = as.numeric(all3$ct1)
a1 = data.frame(A.all3$ct1,C.all3$ct1,G.all3$ct1,T.all3$ct1)
a2 = data.frame(A.all3$ct2,C.all3$ct2,G.all3$ct2,T.all3$ct2)
a3 = data.frame(A.all3$ct3,C.all3$ct3,G.all3$ct3,T.all3$ct3)
all3$all_ct1 = apply(a1,1,sum)
all3$all_ct2 = apply(a2,1,sum)
all3$all_ct3 = apply(a3,1,sum)
In [38]:
all3$r1 = round(all3$all_ct3 / all3$all_ct1,4)
all3$r2 = round(all3$all_ct3 / all3$all_ct2,4)
all3$idx = 1:nrow(all3)
all3$annota1 = gsub(".[a-zA-Z]*.bed","",all3$annota1)
all3$annota2 = gsub(".[a-zA-Z]*.bed","",all3$annota2)
head(all3)
In [39]:
saveRDS(all3,"all3.rds")
In [40]:
m = all3[all3$annota1 != all3$annota2,]
# max(m$r1)
# m[m$r1 > .4,]
# m[m$annota1 == "spidex/wd/uq.wd.spidex_alt",]
In [ ]:
m[m$annota1 == "MPC_score/v1/wd/uq.wd.mpc2",]
In [ ]:
library(reshape2)
library(ggplot2)
In [44]:
p <- ggplot(all3, aes(x=annota1,y=annota2))
p <- p + geom_tile(aes(fill=r1)) + theme(axis.text.x=element_text(angle=90,hjust=1, vjust=1)) +
scale_fill_gradient2(low="blue", high="red",midpoint = 0.4,breaks=c(0,0.4,0.8,1.1) )
p