2021-03-25-optim-update-posterior-pi
In [1]:
# compact_data = readRDS("/storage11_7T/fuy/TADA-A/annotation/spliceai/2021-03-15_spliceai_DS_AG_6788SNV_sf_uni_prior_compact.rds")
compact_data = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-01-19cp6_all_gene_PO_6788SNV_sf_uni_prior_compact.rds")
data=compact_data$base_info

selected_annotations = c(1)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
In [2]:
library(Rcpp)

sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")

sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
In [70]:
logP_Zg0 = sumall0(data)

fr <- function(x){
    all_rr = x

    logP_Zg1 = sumall1(data,selected_annotations,all_rr)

    logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
    logP_table <- logP_table[gene_prior, on = "genename"]
    logP_table <- logP_table[complete.cases(logP_table)]
    idx = match(unique(logP_table$genename),logP_table$genename)
    idx2 = c(idx,nrow(logP_table) + 1)
    pr = logP_table[idx,]$prior
    ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
    ll_sum1 
    }

tmm = proc.time()
fr(c(.1))
proc.time() - tmm
-36631.7254103853
   user  system elapsed 
  4.484   0.000   4.076 
In [58]:
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")

post_fr <- function(x){
    all_rr = x
    logP_Zg1 = sumall1(data,selected_annotations,all_rr)   
    logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
    logP_table <- logP_table[gene_prior, on = "genename"]
    logP_table <- logP_table[complete.cases(logP_table)]
    u = unique(logP_table$genename)
    idx = match(u,logP_table$genename)
    idx2 = c(idx,nrow(logP_table) + 1)
    pr = logP_table[idx,]$prior 
    post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
    post.dt = data.table(genename =u,prior = post )   
    post.dt
}
In [36]:
tmm = proc.time()
p = post_fr(2)
proc.time() - tmm
   user  system elapsed 
  4.630   0.004   4.188 
In [ ]:
gm.lst = c()

gama = 0
rd = 0
logP_Zg0 = sumall0(data)

library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]

tmm = proc.time()
repeat{
rd = rd + 1

res = gama   

gama = optim(.1,fr,method='Brent',lower=-1,upper=10,control=list('fnscale'=-1, "maxit" = 2000),hessian = TRUE)$par
    
gene_prior = post_fr(gama)

gm.lst = c(gm.lst,gama)
print(gama)
# if(rd > 600)break
if(abs(res - gama)/res < 1e-5) break
    
}
proc.time() - tmm
In [69]:
plot(gm.lst,type="b")
In [78]:
which.max(gm.lst)
56
In [84]:
nrow(gene_prior[gene_prior$prior > 0.9,])
421
In [76]:
saveRDS(gm.lst,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/tmp/601rd.gm.lst.rds")

fwrite(data.frame(gm.lst),"/storage11_7T/fuy/TADA-A/cell_WES/DNM/tmp/601gm_lst.txt")
In [87]:
gm.lst = c()

gama = 0
rd = 0
logP_Zg0 = sumall0(data)

library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]
gene_prior$risk = rep(.9,nrow(gene_prior))

tmm = proc.time()
repeat{
rd = rd + 1

res = gama   

gama = optim(.1,fr,method='Brent',lower=-1,upper=10,control=list('fnscale'=-1, "maxit" = 2000),hessian = TRUE)$par
    
gene_prior = post_fr(gama)

gm.lst = c(gm.lst,gama)
print(gama)
# if(rd > 600)break
if(abs(res - gama)/res < 1e-5) break
    
}
proc.time() - tmm
In [89]:
max(gm.lst)
2.56285055663014
In [88]:
plot(gm.lst)
In [ ]: