PPI-MPC2

1
2
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A/annotation")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/6788snv.affected.cd.auto.no_pli_rm.allele.bed",

window_file = "/storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/17478gene_hg19.87.auto.exon.wd.bed" ,

mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/sf_1792_d_2058.1_cd_uniform_scaling_factors.txt"),

sample_sizes = 6430,

gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt",

nonAS_noncoding_annotations = NA,

AS_noncoding_annotations = list(

c("/storage11_7T/fuy/TADA-A/annotation/PPI/MPC2.hg19.interactome.altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/MPC2.hg19.interactome.altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/MPC2.hg19.interactome.altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/MPC2.hg19.interactome.altT.bed")

),

report_proportion = 1, #1000/17484,
chunk = 1,
node_n = 1,

mutrate_ref_files = c("/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.A.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.C.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.G.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.T.bw")

))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/cell_WES/DNM/",Sys.Date(),"_PPI_6788SNV_sf_uni_prior_compact.rds"))
[1] "Made a tmp folder for tmp files."
[1] "Read in window files."
[1] "Read in mutrate scaling file /storage11_7T/fuy/TADA-A/cell_WES/sf_1792_d_2058.1_cd_uniform_scaling_factors.txt"
[1] "Read in gene prior file."
[1] "Chose the top 100% genes for parameter estimation."
[1] "Patitioned genomic window to base-level coordinates at Round 1. Time consumed: 640.778s."
[1] "Obtained un-calibrated base-level mutation rate for alt alleles. Time consumed: 2078.17.s"
[1] "Added allele specific annotations 1 .Time consumed: 1751.883s."
[1] "Begin collapsing data..."
[1] "Read in mutation file /storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/6788snv.affected.cd.auto.no_pli_rm.allele.bed."
[1] "Collapsed data based on annotations for Study 1 .Time consumed: 1813.595.s"
[1] "Temp files cleaned and data recording finished!"



     user    system   elapsed 
10148.569   275.272  6318.571 
1
2
3
4
5
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 

gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt",
optimization_iteration = 2000)$rr_report
RR
[1] "Read in gene prior file. Time consumed: 0.022s."
[1] "Finished optimization. Time consumed: 46.834.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
2.9395152.5703413.308688
1
2
df = data.frame(logRR=2.939515,lower_bound=2.570341,upper_bound=3.308688)
saveRDS(df,"/storage11_7T/fuy/TADA-A/annotation/PPI/PPI_rr.rds")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
options(scipen=200)
tm = proc.time()
BF = TADA_A_get_BFs(data = compact_data$base_info,
selected_annotations = c(1),
rr = RR$logRR ,
additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt"),
# additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/cc_only_uniform_cd_BFs.txt"),
# additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/17484_pp_all_BF.txt"),
TADA_p0 = 0.94)

g_BF = BF$gene_BF_table
g_BF = g_BF[order(g_BF$FDR_all),]
g_BF2 = g_BF[g_BF$FDR_all <= 0.1,]
proc.time() - tm
nrow(g_BF2)
[1] "Read in additional BF file /storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt."
[1] "Flagged genes that don't have any bases with any informative annotation."
[1] "Got genenames without bases that have informative rr features."
[1] "Got logBF_noncoding."
[1] "Added coding and non-coding logBF. Time consumed: 2.865s."
[1] "Finished!"



   user  system elapsed 
  3.707   1.133   4.098 

7

  • 2021/4/2

    PPI only

1
2
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A/annotation")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/6788snv.affected.cd.auto.no_pli_rm.allele.bed",

window_file = "/storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/17478gene_hg19.87.auto.exon.wd.bed" ,

mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/sf_1792_d_2058.1_cd_uniform_scaling_factors.txt"),

sample_sizes = 6430,

gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt",

nonAS_noncoding_annotations = c("/storage11_7T/fuy/TADA-A/annotation/PPI/hg19.mapped.bed") ,

AS_noncoding_annotations = NA,

report_proportion = 1, #1000/17484,
chunk = 1,
node_n = 1,

mutrate_ref_files = c("/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.A.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.C.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.G.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.T.bw")

))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/cell_WES/DNM/",Sys.Date(),"_PPI_6788SNV_sf_uni_prior_compact.rds"))
[1] "Made a tmp folder for tmp files."
[1] "Read in window files."
[1] "Read in mutrate scaling file /storage11_7T/fuy/TADA-A/cell_WES/sf_1792_d_2058.1_cd_uniform_scaling_factors.txt"
[1] "Read in gene prior file."
[1] "Chose the top 100% genes for parameter estimation."
[1] "Patitioned genomic window to base-level coordinates at Round 1. Time consumed: 642.339s."
[1] "Obtained un-calibrated base-level mutation rate for alt alleles. Time consumed: 2066.244.s"
[1] "Added non-allele specific annotations 1. Time consumed: 576.144s."
[1] "Begin collapsing data..."
[1] "Read in mutation file /storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/6788snv.affected.cd.auto.no_pli_rm.allele.bed."
[1] "Collapsed data based on annotations for Study 1 .Time consumed: 1976.235.s"
[1] "Temp files cleaned and data recording finished!"



    user   system  elapsed 
8261.069  225.750 5295.197 
1
2
3
4
library(Rcpp)
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# compact_data = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-01-19cp6_all_gene_PO_6788SNV_sf_uni_prior_compact.rds")
data=compact_data$base_info
selected_annotations=c(1)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000

gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]

fr <- function(x){
all_rr = x

logP_Zg0 = sumall0(data)
logP_Zg1 = sumall1(data,selected_annotations,all_rr)

logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}

mle <- optim(rep(0.1, 1), fr, method = "Brent", lower = -1, upper = 10,
control=list("fnscale"=-1, "maxit" = optimization_iteration), hessian = TRUE)

fisher_info <- solve(-mle$hessian)
prop_sigma <- sqrt(c(diag(fisher_info)))
rr_estimate <- c(mle$par)
upper<-rr_estimate+1.96*prop_sigma
lower<-rr_estimate-1.96*prop_sigma

rr_report <-data.frame(logRR = rr_estimate, lower_bound = lower, upper_bound = upper)
rr_report
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
0.017097660.011496440.02269889
1
rr_report$annota = "PPI_only"
1
saveRDS(rr_report,"/storage11_7T/fuy/TADA-A/annotation/PPI/PPI_only.rds")

PPH2_PPI-PPH2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/6788snv.affected.cd.auto.no_pli_rm.allele.bed",

window_file = "/storage11_7T/fuy/TADA-A/db/MS_data/windows_partition/17478gene_hg19.87.auto.exon.wd.bed" ,

mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/sf_1792_d_2058.1_cd_uniform_scaling_factors.txt"),

sample_sizes = 6430,

gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt",

nonAS_noncoding_annotations = NA ,

AS_noncoding_annotations = list(
c(
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.A.bed",
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HDIV.T.bed" ) ,

c(
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.A.bed",
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/dbNSFP/wd/uq.wd.PPH2_D.HVAR.T.bed" ) ,

c(
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.A.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HDIV.T.bed") ,

c(
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.A.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.C.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.G.bed",
"/storage11_7T/fuy/TADA-A/annotation/PPI/PPH2-PPI/wd/uq.wd.PPI.PPH2_D.HVAR.T.bed")


),

report_proportion = 1, #1000/17484,
chunk = 20,
node_n = 2,

mutrate_ref_files = c("/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.A.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.C.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.G.bw",
"/storage11_7T/fuy/TADA-A/db/MS_data/mutrate/merge/supple3/plus_supple3_uq.17478gene.st.mg.T.bw")

))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/cell_WES/DNM/",Sys.Date(),"_PPH2_PPI-PPH2_6788SNV_sf_uni_prior_compact.rds"))
1
2
3
4
library(Rcpp)
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/ll_sum.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/multi_annota.cpp")
sourceCpp(file="/storage11_7T/fuy/TADA-A/cell_WES/DNM/post.cpp")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# compact_data = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-01-19cp6_all_gene_PO_6788SNV_sf_uni_prior_compact.rds")
data=compact_data$base_info
selected_annotations=c(4)
gene_prior_file = "/storage11_7T/fuy/TADA-A/db/MS_data/prior/new_uniform_gene_prior.txt"
optimization_iteration = 2000

gene_prior = fread(gene_prior_file)
gene_prior = gene_prior[order(gene_prior$genename),]

fr <- function(x){
all_rr = x

logP_Zg0 = sumall0(data)
logP_Zg1 = sumall1(data,selected_annotations,all_rr)

logP_table<-data.table(logP_Zg1 = logP_Zg1, logP_Zg0 = logP_Zg0, genename = names(data))
logP_table <- logP_table[gene_prior, on = "genename"]
logP_table <- logP_table[complete.cases(logP_table)]
idx = match(unique(logP_table$genename),logP_table$genename)
idx2 = c(idx,nrow(logP_table) + 1)
pr = logP_table[idx,]$prior
ll_sum1 <- ll_sum(idx2,logP_table$logP_Zg1,logP_table$logP_Zg0,pr)
ll_sum1
}

mle <- optim(rep(0.1, 1), fr, method = "Brent", lower = -1, upper = 10,
control=list("fnscale"=-1, "maxit" = optimization_iteration), hessian = TRUE)

fisher_info <- solve(-mle$hessian)
prop_sigma <- sqrt(c(diag(fisher_info)))
rr_estimate <- c(mle$par)
upper<-rr_estimate+1.96*prop_sigma
lower<-rr_estimate-1.96*prop_sigma

rr_report <-data.frame(logRR = rr_estimate, lower_bound = lower, upper_bound = upper)
rr_report
1
2
3
rr_report$annota = "PPI.PPH2_D.HVAR"
aaaa = rbind(aaa,rr_report)
aaaa
A data.frame: 4 × 4
logRRlower_boundupper_boundannota
<dbl><dbl><dbl><chr>
1.7690771.6429461.895207PPH2_D.HDIV
1.9024191.7697542.035085PPH2_D.HVAR
2.5514752.3453282.757622PPI.PPH2_D.HDIV
2.6004932.3729152.828070PPI.PPH2_D.HVAR
1
saveRDS(aaaa,"/storage11_7T/fuy/TADA-A/annotation/PPI/2021-04-07_PPH2_PPI-PPH2_sep_RR.rds")