In [36]:
# compact_data = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-01-19cp6_all_gene_PO_6788SNV_sf_uni_prior_compact.rds")
# data=compact_data$base_info
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
# selected_annotations = c(1)
selected_annotations=c(2,3,6,7,8)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
###########
library(Rcpp)
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
logP_Zg0 = sumall0(data)
fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}
################
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
post_fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
post.dt = data.table(genename =u,prior = post )
post.dt
}
In [37]:
gm.lst = c()
ll.lst = c()
gama = rep(0,length(selected_annotations))
rd = 0
logP_Zg0 = sumall0(data)
library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]
In [48]:
tmm = proc.time()
repeat{
rd = rd + 1
res = gama
# gama = optim(.1,fr,method='Brent',lower=-1,upper=10,control=list('fnscale'=-1, "maxit" = 2000),hessian = TRUE)$par
gama = optim(rep(.1,5), fr ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
gene_prior = post_fr(gama)
gm.lst = rbind(gm.lst,gama)
ll.lst = c(ll.lst, fr(gama))
# print(gama)
# if(rd > 600)break
if(sum(abs(res - gama)/res < 1e-5) == length(selected_annotations) ) break
}
proc.time() - tmm
In [50]:
rd
In [49]:
gm.lst
In [51]:
plot(ll.lst,type="b")
In [52]:
nrow(gene_prior[gene_prior$prior > 0.9,])