1
2
3
4
5
6
# compact_data = readRDS("/storage11_7T/fuy/TADA-A/annotation/spliceai/2021-03-15_spliceai_DS_AG_6788SNV_sf_uni_prior_compact.rds")
compact_data = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-01-19cp6_all_gene_PO_6788SNV_sf_uni_prior_compact.rds")
data=compact_data$base_info

gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000
1
2
3
4
5
library(Rcpp)
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum2.cpp")
1
a = c(0.1,.1)
  1. 1.8937946953163
  2. 0.0505166997981521
   user  system elapsed 
289.189   0.120 251.151 
1
a = c(0.1,.09)
  1. 1.88965458214482
  2. 0.0507358499127291
   user  system elapsed 
503.183   0.176 437.068 
1
a = c(0.1,.08)
  1. 1.89210396615122
  2. 0.0505336486705047
   user  system elapsed 
392.631   0.139 341.112 
1
a = c(0.1,.07)
  1. 0.245983525327687
  2. 0.999998702672526
   user  system elapsed 
378.752   0.151 329.180 
1
a = c(0.1,.06)
  1. 0.246327863634136
  2. 0.999910154526206
   user  system elapsed 
387.356   0.175 336.879 
1
a = c(exp(1),.05)
  1. 1.89123499319297
  2. 0.0504755152875449
   user  system elapsed 
264.286   0.091 229.482 
1
a = c(exp(1),.9)
  1. 1.89459951460619
  2. 0.050258497374786
   user  system elapsed 
291.112   0.080 257.158 
1
gama = optim(a, fr_pi ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
0.01 0.01
  1. 1.89569034280832
  2. 0.0502794671650586
   user  system elapsed 
522.476   0.182 454.060 
1
2
3
4
a1 = runif(1,min=0.1,max=10) 
a2 = runif(1,min=1e-5,max=0.9999999999999)
a = c(a1,a2)
gama = optim(a, fr_pi ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
9.21374 0.1124439
  1. 9.43424405642677
  2. 1.19131638900873e-08
   user  system elapsed 
400.185   0.159 347.496 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]

selected_annotations = c(1)

logP_Zg0 = sumall0(data)

fr_pi <- function(par){

all_rr = par[1:length(selected_annotations)]

logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[complete.cases(logP_table)]
logP_table <- logP_table[order(logP_table$genename),]

idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = par[length(par)]
ll_sum1 <- ll_sum2(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}


a1 = .1
a2 = .01
cat(a1,a2)
a = c(a1,a2)
tmm = proc.time()
gama = optim(a, fr_pi ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
gama
proc.time() - tmm
0.1 0.01
  1. 0.253397948831721
  2. 0.999944137348025
   user  system elapsed 
274.667   0.120 252.777 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]

selected_annotations = c(1,2,3)

logP_Zg0 = sumall0(data)

fr_pi <- function(par){

all_rr = par[1:length(selected_annotations)]
gene_prior$prior <- rep(par[length(par)],nrow(gene_prior))

logP_Zg1 = sumall1(data,selected_annotations,all_rr)

logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}

a = rep(.01,4)

tmm = proc.time()
gama = optim(a, fr_pi ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
gama
proc.time() - tmm
  1. 0.138382339759046
  2. 0.454278795722041
  3. 0.139083933516685
  4. 0.986488015759151
    user   system  elapsed 
1284.302    0.466 1118.847 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
library(data.table)
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]

selected_annotations = c(1,2,3)

logP_Zg0 = sumall0(data)

fr_pi <- function(par){

all_rr = par[1:length(selected_annotations)]
gene_prior$prior <- rep(par[length(par)],nrow(gene_prior))

logP_Zg1 = sumall1(data,selected_annotations,all_rr)

logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}

a = rep(.1,3)

tmm = proc.time()
gama = optim(a, fr_pi ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par
gama
proc.time() - tmm

局部最优?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]
gene_prior$prior = rep(0.586574959329091,nrow(gene_prior))

post_fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
post.dt = data.table(genename =u,prior = post )
post.dt
}
1
2
g = post_fr(0.282147203712872)
head(g)
A data.table: 6 × 2
genenameprior
<chr><dbl>
A1BG 0.5865750
A1CF 0.5855259
A2M 0.5802551
A2ML1 0.5862923
A3GALT20.5862808
A4GALT 0.5865750
1
2
3
min(g$prior)
max(g$prior)
nrow(g[g$prior > .9,])

0.488492768677638

0.923903019519617

2

1
g[g$prior > .9,]
A data.table: 2 × 2
genenameprior
<chr><dbl>
CHD8 0.901968
SCN2A0.923903
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]

fr <- function(x){
all_rr = x

logP_Zg0 = sumall0(data)
logP_Zg1 = sumall1(data,selected_annotations,all_rr)

logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}
1
2
3
4
5
6
7
8
selected_annotations = seq(1,2)
tmm = proc.time()

gama = optim(rep(exp(1), 2), fr ,control=list("fnscale"=-1, "maxit" = optimization_iteration), hessian = TRUE)$par

proc.time() - tmm

gama
   user  system elapsed 
449.122   0.208 412.240 
  1. 1.19516468584344
  2. 1.6678512493307

原始 fr ,多个变量,初始值不重要

1
2
3
4
5
6
7
8
selected_annotations = seq(1,2)
tmm = proc.time()

gama = optim(c(exp(1), 6), fr ,control=list("fnscale"=-1, "maxit" = optimization_iteration), hessian = TRUE)$par

proc.time() - tmm

gama
   user  system elapsed 
550.851   0.327 502.504 
  1. 1.19712309320709
  2. 1.67062419381747
1
2
3
4
5
6
7
8
selected_annotations = seq(1,2)
tmm = proc.time()

gama = optim(c(exp(1),exp(1),.05), fr_pi ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par

proc.time() - tmm

gama
   user  system elapsed 
540.017   0.233 478.671 
  1. 1.09919237592268
  2. 1.49613778568841
  3. 0.076776311502852
1
2
3
4
5
6
7
8
selected_annotations = seq(1,2)
tmm = proc.time()

gama = optim(c(.1,.1,.05), fr_pi ,control=list("fnscale"=-1, "maxit" = optimization_iteration))$par

proc.time() - tmm

gama
   user  system elapsed 
940.195   0.363 832.371 
  1. 1.10186122803905
  2. 1.5013327085674
  3. 0.0764190730631155
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
gene_prior = fread("/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt")
gene_prior = gene_prior[order(gene_prior$genename),]
gene_prior$prior = rep(0.0764190730631155,nrow(gene_prior))

post_fr <- function(x){
all_rr = x
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
post.dt = data.table(genename =u,prior = post )
post.dt
}
1
2
all_rr = c(1.19516468584344,1.6678512493307)
selected_annotations = c(1,2)
1
logP_Zg1 = sumall1(data,selected_annotations,all_rr)
1
2
3
4
5
6
7
8
logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
u = unique(logP_table$genename)
idx = match(u,logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = rep(0.05,length(idx))
post = post(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)