In [1]:
library(Rcpp)
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
In [ ]:
# library(tadaA)
# setwd("/storage11_7T/fuy/TADA-A/annotation")
# system.time(compact_data <- TADA_A_read_info_by_chunks(
# mut_files = "/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/6788snv.affected.cd.auto.no_pli_rm.allele.bed",
# window_file = "/storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/17478gene_hg19.87.auto.exon.wd.bed" ,
# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/sf_1792_d_2058.1_cd_uniform_scaling_factors.txt"),
# sample_sizes = 6430,
# gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt",
# nonAS_noncoding_annotations = c("/storage11_7T/fuy/TADA-A/annotation/ccr/ccrs.allchrom.gt90.bed") ,
# AS_noncoding_annotations = lst,
# report_proportion = 1, #1000/17484,
# chunk = 20,
# node_n = 2,
# mutrate_ref_files = c("/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.A.bw",
# "/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.C.bw",
# "/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.G.bw",
# "/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.T.bw")
# ))
# saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/cell_WES/DNM/",Sys.Date(),"_copy_selected_6788SNV_sf_uni_prior_compact.rds"))
In [25]:
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
library(data.table)
# selected_annotations=c(2,3,6,7,8)
selected_annotations=seq(2,8)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]
logP_Zg0 = sumall0(data)
fr_pi <- function(par){
all_rr = par[1:length(selected_annotations)]
gene_prior$prior <- rep(par[length(par)],nrow(gene_prior))
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}
gm.lst = c()
ll.lst = c()
tmm = proc.time()
# for(i in seq(0.13,0.2,0.01)){
a = c(rep(.1,length(selected_annotations)),0.1)
gama = optim(a, fr_pi ,method="BFGS",control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
gm.lst = rbind(gm.lst,gama)
ll = fr_pi(gama)
ll.lst = c(ll.lst,ll)
# }
proc.time() - tmm
gm.lst
ll.lst
In [26]:
pre = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/results/1-8_joint_BFGS_pi0_gm.rds")
pre
In [24]:
saveRDS(gm.lst,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/results/1-8_joint_BFGS_pi0_gm.rds")
In [54]:
# saveRDS(gm.lst,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/repo/BFGS_gm.lst.rds")
# saveRDS(ll.lst,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/repo/BFGS_001-006-0005_ll.lst.rds")
不估pi¶
In [ ]:
source("/storage11_7T/fuy/TADA-A/tadaA/R/fr.R")
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
library(data.table)
selected_annotations=seq(1,8)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]
logP_Zg0 = sumall0(data)
tm = proc.time()
df = optim(rep(0.1, length(selected_annotations)), fr ,method = "BFGS", control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
proc.time() - tm
In [15]:
df
In [17]:
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
gene_prior = fread(gene_prior_file)
gene_prior$prior = rep(.06,nrow(gene_prior))
post_fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
post.dt = data.table(genename =u,prior = post )
post.dt
}
g = post_fr(df)
g$q0 = 1- g$prior
g2 = g[order(g$q0),]
FDR = c()
for (i in 1:nrow(g2)) FDR[i] <- sum(g2$q0[1:i]) / i
g2$FDR = FDR
nrow(g2[g2$FDR<0.1,])
options(scipen=200)
g3 = g2[g2$FDR<0.1,]
In [ ]:
In [ ]:
In [ ]:
In [ ]:
source("/storage11_7T/fuy/TADA-A/tadaA/R/fr.R")
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
library(data.table)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]
logP_Zg0 = sumall0(data)
rr.lst = c()
for(selected_annotations in seq(1,8)){
tm = proc.time()
df = optim(rep(0.1, 1), fr, method = "Brent", lower = -1, upper = 10,
control=list("fnscale"=-1, "maxit" = optimization_iteration), hessian = TRUE)$par
rr.lst = c(rr.lst,df)
proc.time() - tm
}
rr.lst
In [22]:
rr.lst
In [21]:
gama
constrain?
In [32]:
selected_annotations=seq(1,8)
In [33]:
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
gene_prior = fread(gene_prior_file)
gene_prior$prior = rep(gama[length(selected_annotations)+1],nrow(gene_prior))
post_fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
post.dt = data.table(genename =u,prior = post )
post.dt
}
In [34]:
g = post_fr(gama[-(length(selected_annotations)+1)])
g$q0 = 1- g$prior
g2 = g[order(g$q0),]
FDR = c()
for (i in 1:nrow(g2)) FDR[i] <- sum(g2$q0[1:i]) / i
g2$FDR = FDR
nrow(g2[g2$FDR<0.1,])
options(scipen=200)
g3 = g2[g2$FDR<0.1,]
In [10]:
gg = fread("/storage11_7T/fuy/TADA-A/cell_WES/DNM/102gene_pval.txt")
g3[g3$genename %in% gg$gene,]
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [12]:
lst = list(
# c("/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ccr/wd/per_base_uq.wd.ccr.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CLIPdb/wd/uq.wd.CLIPdb.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altA.bed", ###2
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.10.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altA.bed",###3
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.11.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altA.bed", ###4
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.12.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altA.bed", ###5
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.8.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altA.bed",###6
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/alt/wd/uq.wd.9.altT.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc01.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.A.bed", ###7
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc12.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.A.bed", ###8
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/v1/wd/uq.wd.mpc2.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.A.bed", ###9
# "/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/wd/uq.wd.MVP.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.A.bed", ###10
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/per_base_uq.wd.PPI.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.A.bed", ##### 与MVP大部分重合,MVP rr高
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/wd/uq.wd.primateAI.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.A.bed", ###11
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/wd/uq.wd.RADAR_RBP.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.A.bed", ###12
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/wd/uq.wd.RBP-VarDB.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.A.bed", ###13
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/wd/uq.wd.ribosnitch.T.bed"),
# c("/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_A.bed", ##### 与ptv大部分重合
# "/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/spidex/wd/uq.wd.spidex_alt_T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altA.bed", ###14
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_AG.allele.bed.05.altT.bed"),
#
c("/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altA.bed", ###15
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/spliceai/alt/wd/uq.wd.DS_DG.allele.bed.05.altT.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.A.bed", ###16
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.0-05.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.A.bed", ###17
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.05-995.T.bed"),
c("/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.A.bed", ###18
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/vep/new_annota/ptv/wd/uq.wd.ptv.995.T.bed")
# c("/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/PPI/wd/wd.MPC-PPI.altT.bed"),
)
In [ ]:
In [ ]: