1
2
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A/db/MS_data")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_DNM_allele.bed",

window_file = "windows_partition/cd_windows_with_div_score.bed",

mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_DNM_cd_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt"),

sample_sizes = 8609,

gene_prior_file = "prior/uniform_gene_prior.txt",

nonAS_noncoding_annotations = NA, # c(
# "/storage11_7T/fuy/TADA-A/annotation/ccr/ccrs.allchrom.gt90.bed"),
#"/storage11_7T/fuy/TADA-A/annotation/ccr/chr_ccr_gt95_syn_rm.bed"),

AS_noncoding_annotations = list(
####################### WES denovo ptv ############################
# c("/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_A.bed"),

####################### gnomad lof ############################
c("/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

####################### gnomad PTV tiers ############################
# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_T.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

####################### annova missense ############################
# c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_T.bed"),

####################### annova lof ############################
# c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_T.bed"),

####################### annova syn ############################
# # c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_A.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_C.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_G.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_T.bed"),

####################### CADD ############################
# c("/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altA_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altC_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altG_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altT_within_10kb_and_promoter_no_utr.bed"),

####################### RBP ############################
# c("/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altA.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altC.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altG.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altT.bed.merge_in_coding_windows.bed"),

###################### MVP ###########################
# c("/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_T.bed"),

########################### primateAI ################################
# c("/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_T.bed"),

# ########################## spidex #############################
# c("/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_A_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_C_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_G_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_T_lower10pct.bed"),

############## MPC ##################
c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altA.sorted.merged_in_coding_windows.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altC.sorted.merged_in_coding_windows.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altG.sorted.merged_in_coding_windows.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altT.sorted.merged_in_coding_windows.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC12_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC12_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC12_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC12_alt_T.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC01_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC01_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC01_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/MPC01_alt_T.bed")

#################### ribosnitch ###########################
# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altA.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altC.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altG.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altT.bed.merge.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altA_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altC_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altG_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altT_in_coding_windows.bed"),

###################### CLIPdb ############################
#c("/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altA.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altC.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altG.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altT.bed")

),


report_proportion = 18665/18665,
#chunk_partition_num =1,
chunk = 6,
node_n = 6,
mutrate_ref_files = c("mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw")
))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_cell_DNM_uniform_prior_test_selected_compact.rds"))
 user   system  elapsed 
4.202    1.202 5749.587 
1
2
3
4
5
tm = proc.time()
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = seq(1,4),
gene_prior_file = "prior/uniform_gene_prior.txt", optimization_iteration =2000)
proc.time() - tm
saveRDS(RR,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_cell_DNM_uniform_prior_test_selected_RR.rds"))
[1] "Read in gene prior file. Time consumed: 0.01s."
[1] "Finished optimization. Time consumed: 2987.369.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"



     user    system   elapsed 
20874.866    24.487  2987.399 
1
RR$rr_report
A data.frame: 4 × 3
relative_risklower_boundupper_bournd
<dbl><dbl><dbl>
1.4975016-0.92153753.916541
0.6822417-1.79430063.158784
-3.4487983 NaN NaN
1.8219448 1.55801972.085870
1
2
3
4
5
6
7
8
9
10
11
12
13
options(scipen=200)
tm = proc.time()
BF = TADA_A_get_BFs(data = compact_data$base_info,
selected_annotations = seq(1,4),
rr = RR$rr_report$relative_risk,
additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt"),
TADA_p0 = 0.94)

g_BF = BF$gene_BF_table
g_BF = g_BF[order(g_BF$FDR_all),]
g_BF2 = g_BF[g_BF$FDR_all <= 0.1,]
nrow(g_BF2)
proc.time() - tm
[1] "Read in additional BF file /storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt."
[1] "Flagged genes that don't have any bases with any informative annotation."
[1] "Got genenames without bases that have informative rr features."
[1] "Got logBF_noncoding."
[1] "Added coding and non-coding logBF. Time consumed: 2.409s."
[1] "Finished!"

9

 user  system elapsed 
7.715   0.000   6.918