In [1]:
# compact_data = readRDS("/storage11_7T/fuy/TADA-A/annotation/spliceai/2021-03-15_spliceai_DS_AG_6788SNV_sf_uni_prior_compact.rds")
compact_data = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-01-19cp6_all_gene_PO_6788SNV_sf_uni_prior_compact.rds")
data=compact_data$base_info
selected_annotations = c(1)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
In [2]:
library(Rcpp)
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
In [70]:
logP_Zg0 = sumall0(data)
fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}
tmm = proc.time()
fr(c(.1))
proc.time() - tmm
In [58]:
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
post_fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
post.dt = data.table(genename =u,prior = post )
post.dt
}
In [36]:
tmm = proc.time()
p = post_fr(2)
proc.time() - tmm
In [ ]:
gm.lst = c()
gama = 0
rd = 0
logP_Zg0 = sumall0(data)
library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]
tmm = proc.time()
repeat{
rd = rd + 1
res = gama
gama = optim(.1,fr,method='Brent',lower=-1,upper=10,control=list('fnscale'=-1, "maxit" = 2000),hessian = TRUE)$par
gene_prior = post_fr(gama)
gm.lst = c(gm.lst,gama)
print(gama)
# if(rd > 600)break
if(abs(res - gama)/res < 1e-5) break
}
proc.time() - tmm
In [69]:
plot(gm.lst,type="b")
In [78]:
which.max(gm.lst)
In [84]:
nrow(gene_prior[gene_prior$prior > 0.9,])
In [76]:
saveRDS(gm.lst,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/tmp/601rd.gm.lst.rds")
fwrite(data.frame(gm.lst),"/storage11_7T/fuy/TADA-A/cell_WES/DNM/tmp/601gm_lst.txt")
In [87]:
gm.lst = c()
gama = 0
rd = 0
logP_Zg0 = sumall0(data)
library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]
gene_prior$risk = rep(.9,nrow(gene_prior))
tmm = proc.time()
repeat{
rd = rd + 1
res = gama
gama = optim(.1,fr,method='Brent',lower=-1,upper=10,control=list('fnscale'=-1, "maxit" = 2000),hessian = TRUE)$par
gene_prior = post_fr(gama)
gm.lst = c(gm.lst,gama)
print(gama)
# if(rd > 600)break
if(abs(res - gama)/res < 1e-5) break
}
proc.time() - tmm
In [89]:
max(gm.lst)
In [88]:
plot(gm.lst)
In [ ]: