1
2
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A/db/MS_data")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_DNM_allele.bed",

window_file = "windows_partition/cd_windows_with_div_score.bed",

mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_DNM_cd_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt"),

sample_sizes = 8609,

gene_prior_file = "prior/uniform_gene_prior.txt",

nonAS_noncoding_annotations = NA,#c(
# "/storage11_7T/fuy/TADA-A/annotation/ccr/ccrs.allchrom.gt90.bed"),
#"/storage11_7T/fuy/TADA-A/annotation/ccr/chr_ccr_gt95_syn_rm.bed"),

AS_noncoding_annotations = list(
####################### RADAR RBP ############################
c("/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/A.top005.bed",
"/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/C.top005.bed",
"/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/G.top005.bed",
"/storage11_7T/fuy/TADA-A/annotation/RADAR_RBP/T.top005.bed")

####################### WES denovo ptv ############################
# c("/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/denovo_pLI_snv_alt_A.bed"),

####################### gnomad lof (PTV SNV: Nonsense, splice acceptor, and splice donor variants) ############################
# c("/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/gnomad/gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

####################### gnomad PTV tiers ############################
# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt09_gnomad.v2.1.1.all_lofs_snv_alt_T.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_05_995_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_lt05_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_T.bed"),

####################### annova missense ############################
# c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_missense_alt_T.bed"),

####################### annova lof ############################
# c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_lof_alt_T.bed"),

####################### annova syn ############################
# # c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_A.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_C.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_G.bed",
# # "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/all_cd_syn_alt_T.bed"),

####################### CADD ############################
# c("/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altA_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altC_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altG_within_10kb_and_promoter_no_utr.bed",
# "/storage11_7T/fuy/TADA-A/annotation/CADD/whole_genome_SNVs_gt15_altT_within_10kb_and_promoter_no_utr.bed"),

####################### RBP ############################
# c("/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altA.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altC.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altG.bed.merge_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/RBP-VarDB/RBP.all.bed.merge_overlap_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altT.bed.merge_in_coding_windows.bed"),

###################### MVP ###########################
# c("/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_T.bed"),

############################ primateAI ################################
# c("/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_T.bed"),

########################## spidex #############################
# c("/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_A_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_C_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_G_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_T_lower10pct.bed"),

############## MPC ##################
# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/fordist_constraint_official_mpc_values_v2_MPC_gt2_altT.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC12_alt_T.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/chr.MPC01_alt_T.bed")

#################### ribosnitch ###########################
# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altA.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altC.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altG.bed.merge.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.1.altT.bed.merge.bed"),

# c("/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altA_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altC_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altG_in_coding_windows.bed",
# "/storage11_7T/fuy/TADA-A/annotation/ribosnitch/hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altT_in_coding_windows.bed"),

###################### CLIPdb ############################
#c("/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altA.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altC.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altG.bed",
#"/storage11_7T/fuy/TADA-A/annotation/CLIPdb/human_combine.merged_hg19_refGenes_exons.gtf.lg.transc.fa.RNAsnpM3.bed.abspos.p0.05.merged.altT.bed")

),


report_proportion = 18665/18665,
#chunk_partition_num =1,
chunk = 2,
node_n = 2,
mutrate_ref_files = c("mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw",
"mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw")
))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_cell_DNM_uniform_prior_RADAR_compact.rds"))
 user   system  elapsed 
1.730    0.593 1265.852 
1
2
3
4
5
6
7
tm = proc.time()
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1),
gene_prior_file = "prior/uniform_gene_prior.txt", optimization_iteration =2000)
proc.time() - tm
saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_cell_DNM_uniform_prior_RADAR_RR.rds"))

RR$rr_report
[1] "Read in gene prior file. Time consumed: 0.006s."
[1] "Finished optimization. Time consumed: 192.671.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"



    user   system  elapsed 
1292.888    1.707  192.742 
A data.frame: 1 × 3
relative_risklower_boundupper_bournd
<dbl><dbl><dbl>
0.1112846-1.2216041.444173
1
2
3
4
5
6
7
8
9
10
tm = proc.time()
df = c()
for(i in c(1,2,3)){
RRs = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(i),
gene_prior_file = "prior/uniform_gene_prior.txt", optimization_iteration =2000)
# saveRDS(RRs,paste0("/storage11_7T/fuy/TADA-A/annotation/",Sys.Date(),"_",i,".rds"))
df = rbind(df,RRs$rr_report)
}
proc.time() - tm
df
1
saveRDS(df,"/storage11_7T/fuy/TADA-A/annotation/PTV_MPC_separate_rr.rds")
1
2
3
4
5
6
7
8
9
10
11
12
13
options(scipen=200)
tm = proc.time()
BF = TADA_A_get_BFs(data = compact_data$base_info,
selected_annotations = c(1),
rr = RR$rr_report$relative_risk,
additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt"),
TADA_p0 = 0.94)

g_BF = BF$gene_BF_table
g_BF = g_BF[order(g_BF$FDR_all),]
g_BF2 = g_BF[g_BF$FDR_all <= 0.1,]
proc.time() - tm
nrow(g_BF2)
[1] "Read in additional BF file /storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt."
[1] "Flagged genes that don't have any bases with any informative annotation."
[1] "Got genenames without bases that have informative rr features."
[1] "Got logBF_noncoding."
[1] "Added coding and non-coding logBF. Time consumed: 2.787s."
[1] "Finished!"



   user  system elapsed 
  6.398   0.004   5.955 

0

1
g_BF2
A data.table: 10 × 9
genenamelogBF_noncodinglogBF_codinglogBF_allBF_noncodingBF_codingBF_allFDR_codingFDR_all
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
SCN2A 17.516891017.51689140503174.27419140503174.274190.940.0000003868008
SLC6A111.609847011.609847 110177.441081 110177.441080.940.0000712807260
CHD8 6.8247220 6.824722 920.320581 920.320580.940.0056268937700
KCNQ3 5.9551410 5.955141 385.731421 385.731420.940.0139777322879
KDM5B 5.0787200 5.078720 160.568331 160.568330.940.0289614771015
FOXP1 4.8889850 4.888985 132.818751 132.818750.940.0417195303907
DPYSL2 4.4917700 4.491770 89.279311 89.279310.940.0570857626613
DEAF1 4.2498220 4.249822 70.092911 70.092910.940.0727851984294
DHX57 4.2155770 4.215577 67.733211 67.733210.940.0855701766551
STXBP1 4.0887600 4.088760 59.665881 59.665880.940.0978098368745
1
2