1
2
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A/db/MS_data")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
system.time(compact_data <- TADA_A_read_info_by_chunks(
# mut_files = "/storage11_7T/fuy/TADA-A/cell_DNM_SNV_allele.bed",

mut_files = c("DNM/Jiang_cases_DNM_with_allele_info.txt",
"DNM/Kong_cases_DNM_with_allele_info.txt",
"DNM/Michaelson_cases_DNM_with_allele_info.txt",
"DNM/Yuen_NM2015_cases_DNM_with_allele_info.txt",
"DNM/Wu_cases_DNM_with_allele_info.txt"),

window_file = 'windows_partition/cd_windows_with_div_score.bed',

# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_DNM_SNV_scaling_factors.txt"),

# sample_sizes = 8609,


mutrate_scaling_files = c("results/Jiang_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
"results/Kong_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
"results/Michaelson_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
"results/Yuen_NM2015_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
"results/Wu_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt"),

sample_sizes = c(32,78,10,162,32),


gene_prior_file = "prior/Example_gene_prior.txt",

nonAS_noncoding_annotations = c(
"/storage11_7T/fuy/TADA-A/annotation/ccr/ccrs.allchrom.gt90.bed",
"/storage11_7T/fuy/TADA-A/annotation/ccr/chr_ccr_gt95_syn_rm.bed"),

AS_noncoding_annotations = list(
######################## DeepSEA #######################
c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain_rbp/brain1_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain_rbp/brain1_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain_rbp/brain1_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain_rbp/brain1_altT.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain2_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain2_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain2_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain2_altT.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain3_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain3_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain3_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain3_altT.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain4_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain4_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain4_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain4_altT.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain5_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain5_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain5_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/brain5_altT.bed")

c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced1.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced1.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced1.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced1.top5%_altT.bed") ,

c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced2.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced2.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced2.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced2.top5%_altT.bed") ,

# # c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced3.top5%_altA.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced3.top5%_altC.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced3.top5%_altG.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced3.top5%_altT.bed") ,

c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced4.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced4.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced4.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced4.top5%_altT.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced5.top5%_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced5.top5%_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced5.top5%_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced5.top5%_altT.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced6.top5%_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced6.top5%_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced6.top5%_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced6.top5%_altT.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced7.top5%_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced7.top5%_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced7.top5%_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced7.top5%_altT.bed"),

c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced8.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced8.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced8.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced8.top5%_altT.bed") ,

c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced9.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced9.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced9.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced9.top5%_altT.bed") ,

c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced10.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced10.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced10.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced10.top5%_altT.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced11.top5%_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced11.top5%_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced11.top5%_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced11.top5%_altT.bed") ,


# c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced12.top5%_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced12.top5%_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced12.top5%_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced12.top5%_altT.bed") ,


c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced13.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced13.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced13.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced13.top5%_altT.bed") ,


c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced14.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced14.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced14.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced14.top5%_altT.bed") ,


c("/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced15.top5%_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced15.top5%_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced15.top5%_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/DeepSEA/spliced_rbp/st.true.spliced15.top5%_altT.bed"),

####################### RADAR RBP ############################
# c("/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/A.top005.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/C.top005.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/G.top005.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/T.top005.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/A.top001.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/C.top001.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/G.top001.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/T.top001.bed")

####################### WES denovo ptv ############################
# c("/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_A.bed"),

####################### gnomad lof (PTV SNV: Nonsense, splice acceptor, and splice donor variants) ############################
c("/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

####################### gnomad PTV tiers ############################
# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_T.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

####################### annova missense ############################
# c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_T.bed"),

####################### annova lof ############################
c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_T.bed"),

####################### annova syn ############################
# # c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_A.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_C.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_G.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_T.bed"),

####################### CADD ############################
# c("/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altA_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altC_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altG_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altT_within_10kb_and_promoter_no_utr.bed"),

####################### RBP ############################
# c("/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altA.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altC.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altG.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altT.bed.merge_in_coding_windows.bed"),

###################### MVP ###########################
c("/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_T.bed"),

############################ primateAI ################################
c("/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_T.bed"),

########################## spidex #############################
c("/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_A_lower10pct.bed",
"/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_C_lower10pct.bed",
"/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_G_lower10pct.bed",
"/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_T_lower10pct.bed"),

############## MPC ##################
c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altT.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_T.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_T.bed")

#################### ribosnitch ###########################
# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altA.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altC.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altG.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altT.bed.merge.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altA_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altC_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altG_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altT_in_coding_windows.bed"),

###################### CLIPdb ############################
#c("/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altA.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altC.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altG.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altT.bed")

),


report_proportion = 1000/18665,
#chunk_partition_num =1,
chunk = 2,
node_n = 2,
mutrate_ref_files = c("mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw")
))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_tadaA_DNM_previous_prior_all_cd_annota_compact.rds"))
  user   system  elapsed 
11.066    1.162 1225.749 
1
2
3
4
5
6
7
tm = proc.time()
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(15,18),
gene_prior_file = "prior/Example_gene_prior.txt", optimization_iteration =2000)
proc.time() - tm
saveRDS(RR,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_tadaA_DNM_previous_prior_all_annota_RR.rds"))

RR$rr_report
[1] "Read in gene prior file. Time consumed: 0.09s."
[1] "Finished optimization. Time consumed: 713.881.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"



    user   system  elapsed 
1103.606    4.932  713.976 
A data.frame: 2 × 3
relative_risklower_boundupper_bournd
<dbl><dbl><dbl>
1.127074-0.77280653.026955
0.780406-0.92684132.487653
1
2
3
4
df2 = df[c(15,18),]
df2$annota = c("annovar_lof","spidex")
df2
saveRDS(df2,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_tadaA_DNM_previous_prior_sig_annota_RR.rds"))
A data.frame: 2 × 4
relative_risklower_boundupper_bourndannota
<dbl><dbl><dbl><chr>
151.6550120.261649133.048376annovar_lof
181.2822140.090345972.474083spidex
1
2
3
4
5
6
7
8
9
10
tm = proc.time()
df = c()
for(i in seq(1,19)){
RRs = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(i),
gene_prior_file = "prior/Example_gene_prior.txt", optimization_iteration =2000)
# saveRDS(RRs,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_",i,".rds"))
df = rbind(df,RRs$rr_report)
}
proc.time() - tm
df
1
df
A data.frame: 19 × 3
relative_risklower_boundupper_bournd
<dbl><dbl><dbl>
0.3880877 -1.55743500 2.3336104
-1.0000000 -7.27369926 5.2736993
-1.0000000 -9.92524847 7.9252485
0.1411465 -2.01497633 2.2972693
-1.0000000-10.07771434 8.0777144
0.3913139 -0.05028354 0.8329113
-1.0000000-10.13921660 8.1392166
0.3637251 -0.14469886 0.8721491
-1.0000000-10.04654968 8.0465498
0.1837599 -1.94466046 2.3121803
-1.0000000-10.09702638 8.0970264
0.2817003 -0.59850294 1.1619036
-1.0000000-18.1081461116.1081462
-1.0000000-48.8067692646.8067693
1.6550123 0.26164913 3.0483755
0.4713340 -1.61846295 2.5611310
0.8503098 -0.69999309 2.4006128
1.2822144 0.09034597 2.4740828
-1.0000000 -6.34139489 4.3413950
1
2
3
compact_data = readRDS("/storage11_7T/fuy/TADA-A/db/MS_data/158_compact_data.rds")
RR = readRDS("/storage11_7T/fuy/TADA-A/db/MS_data/158RR.rds")
RR
1
2
3
4
5
6
7
8
9
10
11
12
13
options(scipen=200)
tm = proc.time()
BF = TADA_A_get_BFs(data = compact_data$base_info,
selected_annotations = c(1,2,3),
rr = RR$rr_report$relative_risk,
additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/Example_gene_coding_BF.txt"),
TADA_p0 = 0.94)

g_BF = BF$gene_BF_table
g_BF = g_BF[order(g_BF$FDR_all),]
g_BF2 = g_BF[g_BF$FDR_all <= 0.1,]
proc.time() - tm
nrow(g_BF2)
[1] "Read in additional BF file /storage11_7T/fuy/TADA-A/annotation/cd_BF/Example_gene_coding_BF.txt."
[1] "Flagged genes that don't have any bases with any informative annotation."
[1] "Got genenames without bases that have informative rr features."
[1] "Got logBF_noncoding."
[1] "Added coding and non-coding logBF. Time consumed: 2.749s."
[1] "Finished!"



   user  system elapsed 
  4.634   0.032   3.948 

59

找到的基因与TADA-A文章中的一致

1
2
z = readRDS("/storage11_7T/fuy/TADA-A/annotation/results/gene-tadaA-previous-all-annota.rds")
head(z,2)
A data.table: 2 × 9
genenamelogBF_noncodinglogBF_codinglogBF_allBF_noncodingBF_codingBF_allFDR_codingFDR_all
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
SCN2A-0.0380686723.5615823.523510.962646817086885922164486366520.00000000091688260.000000000952460
CHD8 -0.0407375322.7135522.672820.9600811 7317632289 70255203730.00000000152891510.000000001591213
1
z[!(z$genename %in% g_BF2$genename),]
A data.table: 0 × 9
genenamelogBF_noncodinglogBF_codinglogBF_allBF_noncodingBF_codingBF_allFDR_codingFDR_all
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>