2021-04-11-optim-update-posterior-pi
In [36]:
# compact_data = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-01-19cp6_all_gene_PO_6788SNV_sf_uni_prior_compact.rds")
# data=compact_data$base_info
data=readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-04-08_copy_selected_6788SNV_sf_uni_prior_compact.rds")$base_info
# selected_annotations = c(1)
selected_annotations=c(2,3,6,7,8)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
###########
library(Rcpp)

sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")

sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")

logP_Zg0 = sumall0(data)

fr <- function(x){
    all_rr = x

    logP_Zg1 = sumall1(data,selected_annotations,all_rr)

    logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
    logP_table <- logP_table[gene_prior, on = "genename"]
    logP_table <- logP_table[complete.cases(logP_table)]
    idx = match(unique(logP_table$genename),logP_table$genename)
    idx2 = c(idx,nrow(logP_table) + 1)
    pr = logP_table[idx,]$prior
    ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
    ll_sum1 
    }

################
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
post_fr <- function(x){
    all_rr = x
    logP_Zg1 = sumall1(data,selected_annotations,all_rr)   
    logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
    logP_table <- logP_table[gene_prior, on = "genename"]
    logP_table <- logP_table[complete.cases(logP_table)]
    u = unique(logP_table$genename)
    idx = match(u,logP_table$genename)
    idx2 = c(idx,nrow(logP_table) + 1)
    pr = logP_table[idx,]$prior 
    post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
    post.dt = data.table(genename =u,prior = post )   
    post.dt
}
In [37]:
gm.lst = c()
ll.lst = c()

gama = rep(0,length(selected_annotations))
rd = 0
logP_Zg0 = sumall0(data)

library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]
In [48]:
tmm = proc.time()
repeat{
rd = rd + 1

res = gama   

# gama = optim(.1,fr,method='Brent',lower=-1,upper=10,control=list('fnscale'=-1, "maxit" = 2000),hessian = TRUE)$par
gama = optim(rep(.1,5), fr ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
    
gene_prior = post_fr(gama)

gm.lst = rbind(gm.lst,gama)
ll.lst = c(ll.lst, fr(gama))
# print(gama)
# if(rd > 600)break
if(sum(abs(res - gama)/res < 1e-5) == length(selected_annotations) ) break
    
}
proc.time() - tmm
     user    system   elapsed 
62179.569    22.926 29908.382 
In [50]:
rd
110
In [49]:
gm.lst
A matrix: 107 × 5 of type dbl
gama1.4634462.1710041.864333 1.444445933.329111
gama1.8790282.3191032.944603 -5.162275243.306241
gama2.0167572.3057713.144717 -8.668729363.250513
gama2.0659702.2709723.171423 -4.572957903.212517
gama2.0759352.2402633.180675 -3.530842663.170110
gama2.0758562.2168213.194212 -4.015900003.128911
gama2.0741532.1977123.203891 -1.911672033.095609
gama2.0711432.1784013.216079 -2.969122673.068561
gama2.0656382.1690273.222317 -1.553606893.046136
gama2.0776592.0520663.185217-35.250490943.090876
gama2.0597962.1477923.230478 -2.075876693.011308
gama2.0574082.1409423.236096 -2.712300652.993527
gama2.0541232.1379543.235736 -1.805365772.983923
gama2.0519722.1293593.241289 -6.999247822.956883
gama2.0499832.1255003.243683 -2.969484412.956646
gama2.0468072.1250773.245730 -4.042625892.941251
gama2.0375082.2838062.859680 2.422594352.023666
gama2.0443762.1247063.245110 -1.596990972.923855
gama2.0426262.1205263.247969 -2.300260582.914068
gama2.0413262.1195063.249284 -1.881116582.909568
gama2.0397632.1168213.247607 -1.731219252.905502
gama2.0389992.1130283.250118 -2.485547962.900662
gama2.0380122.1185623.241317 -0.776105912.896981
gama2.0372582.1111123.249792 -1.656804602.892748
gama2.0365732.1115933.250264 -1.652157152.888703
gama2.0354822.1083823.253844 -1.956355482.883269
gama2.0348522.1157423.235264 0.019416162.870545
gama2.0506202.1612143.073527 2.085916742.401350
gama2.0321822.1364763.176327 1.176927692.808493
gama2.0355942.1530643.133110 1.584164502.722693
gama2.0202242.1059993.258163-0.25770482.825820
gama2.0219132.1060553.257674-0.25794402.825951
gama2.0211932.1059123.259901-0.25731012.824403
gama2.0195982.1038813.259981-0.25615572.825903
gama2.0192482.1048253.260031-0.25661052.825434
gama2.0193762.1032883.260227-0.25566532.824607
gama2.0190172.1009523.264459-0.49556842.827039
gama2.0203172.1061953.261188-0.48807812.825485
gama2.0184402.0997923.266129-0.48632992.827289
gama2.0183132.0984763.269379-4.48803822.787282
gama2.0178562.0959813.267244-4.03734252.792816
gama2.0173232.0958773.266636-4.03565602.791446
gama2.0175542.0997753.271404-1.19618592.815042
gama2.0170202.0992453.271425-1.50232432.812457
gama2.0174432.0994383.271008-1.46242492.811651
gama2.0176082.0989043.273309-1.45176602.811630
gama2.0170392.0992193.268733-1.23236352.814850
gama2.0180732.0991443.264073-0.52842122.822425
gama2.0218762.1159253.160632 1.38447852.770713
gama2.0165022.0991413.271668-1.24313932.813465
gama2.0169222.1000303.263497-0.48107952.820962
gama2.0170012.0999943.263651-0.48103542.821155
gama2.0168282.0998663.263654-0.48091922.820888
gama2.0169462.0997033.264396-0.48370872.821631
gama2.0165602.0989123.264139-0.48361302.820264
gama2.0164942.0989493.263627-0.48371472.819727
gama2.0150652.1004333.255823-0.13741992.821475
gama2.0149972.1009763.257201-0.13697862.822651
gama2.0150842.1007673.256148-0.13735812.820326
gama2.0150842.1007673.256148-0.13735812.820326
In [51]:
plot(ll.lst,type="b")
In [52]:
nrow(gene_prior[gene_prior$prior > 0.9,])
1066