In [1]:
library(Rcpp)
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
In [ ]:
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A/annotation")
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/6788snv.affected.cd.auto.no_pli_rm.allele.bed",
window_file = "/storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/17478gene_hg19.87.auto.exon.wd.bed" ,
mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/sf_1792_d_2058.1_cd_uniform_scaling_factors.txt"),
sample_sizes = 6430,
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt",
nonAS_noncoding_annotations = c("/storage11_7T/fuy/TADA-A/annotation/ccr/ccrs.allchrom.gt90.bed") ,
AS_noncoding_annotations = lst,
report_proportion = 1, #1000/17484,
chunk = 20,
node_n = 2,
mutrate_ref_files = c("/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.A.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.C.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.G.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.T.bw")
))
saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/cell_WES/DNM/",Sys.Date(),"_copy_selected_6788SNV_sf_uni_prior_compact.rds"))
In [17]:
9774/3600
In [2]:
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
library(data.table)
selected_annotations=c(2,3,6,7,8)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]
logP_Zg0 = sumall0(data)
fr_pi <- function(par){
all_rr = par[1:length(selected_annotations)]
gene_prior$prior <- rep(par[length(par)],nrow(gene_prior))
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}
gm.lst = c()
ll.lst = c()
In [50]:
tmm = proc.time()
for(i in seq(0.13,0.2,0.01)){
a = c(rep(.1,length(selected_annotations)),i)
gama = optim(a, fr_pi ,method="BFGS",control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
gm.lst = rbind(gm.lst,gama)
ll = fr_pi(gama)
ll.lst = c(ll.lst,ll)
}
proc.time() - tmm
gm.lst
ll.lst
In [51]:
gm.lst
ll.lst
In [52]:
plot(ll.lst)
In [54]:
saveRDS(gm.lst,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/repo/BFGS_gm.lst.rds")
saveRDS(ll.lst,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/repo/BFGS_001-006-0005_ll.lst.rds")
In [53]:
which.max(ll.lst)
var(ll.lst)
max(ll.lst)
min(ll.lst)
In [26]:
plot(ll.lst)
不估pi¶
In [57]:
source("/storage11_7T/fuy/TADA-A/tadaA/R/fr.R")
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
library(data.table)
selected_annotations=c(2,3,6,7,8)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]
logP_Zg0 = sumall0(data)
tm = proc.time()
df = optim(rep(0.1, length(selected_annotations)), fr ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
proc.time() - tm
In [58]:
tm = proc.time()
bfgs = optim(rep(0.1, length(selected_annotations)), method="BFGS",fr ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
proc.time() - tm
In [60]:
df
bfgs
In [20]:
RR$rr_report
In [61]:
gama
In [15]:
source("/storage11_7T/fuy/TADA-A/tadaA/R/fr.R")
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
library(data.table)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]
logP_Zg0 = sumall0(data)
rr.lst = c()
for(selected_annotations in c(2,3,6,7,8)){
tm = proc.time()
df = optim(rep(0.1, 1), fr, method = "Brent", lower = -1, upper = 10,
control=list("fnscale"=-1, "maxit" = optimization_iteration), hessian = TRUE)$par
rr.lst = c(rr.lst,df)
proc.time() - tm
}
rr.lst
In [ ]:
library(tadaA)
In [ ]:
RR = TADA_A_RR_estimate(data = data, selected_annotations = c(2,3,6,7,8) ,
gene_prior_file = gene_prior_file ,
optimization_iteration = optimization_iteration)
RR
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [3]:
gm.lst = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/repo/BFGS_gm.lst.rds")
In [10]:
gama = gm.lst[1,]
gama
In [11]:
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
gene_prior = fread(gene_prior_file)
gene_prior$prior = rep(gama[6],nrow(gene_prior))
post_fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
post.dt = data.table(genename =u,prior = post )
post.dt
}
In [12]:
g = post_fr(gama[-6])
g$q0 = 1- g$prior
g2 = g[order(g$q0),]
FDR = c()
for (i in 1:nrow(g2)) FDR[i] <- sum(g2$q0[1:i]) / i
g2$FDR = FDR
nrow(g2[g2$FDR<0.1,])
options(scipen=200)
g3 = g2[g2$FDR<0.1,]
In [13]:
gg = fread("/storage11_7T/fuy/TADA-A/cell_WES/DNM/102gene_pval.txt")
g3[g3$genename %in% gg$gene,]
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [12]:
lst = list(
# c("/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altA.bed", ###2
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altA.bed",###3
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altA.bed", ###4
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altA.bed", ###5
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altA.bed",###6
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.A.bed", ###7
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.A.bed", ###8
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.A.bed", ###9
# "/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.A.bed", ###10
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.A.bed", ##### 与MVP大部分重合,MVP rr高
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.A.bed", ###11
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.A.bed", ###12
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.A.bed", ###13
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_A.bed", ##### 与ptv大部分重合
# "/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altA.bed", ###14
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altT.bed"),
#
c("/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altA.bed", ###15
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altT.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.A.bed", ###16
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.A.bed", ###17
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.A.bed", ###18
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.T.bed")
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altT.bed"),
)
In [ ]:
In [ ]: