6166 DNM

  • 7131 DNM in coding windows
  • ALT should be SNV while REF could be whatever.
  • duplicated

list the de novo variants called in the 6,430 individuals with ASD and 2,179 individuals without ASD (a total of 15,789 de novo variants were identified covering 9,345 coding variants and 6,444 non-coding variants

From the family-based data, we identified 9,345 rare de novo variants in protein-coding exons (allele frequency ≤ 0.1% in our dataset and non-psychiatric subsets of reference databases): 63% of cases and 59% of unaffected siblings carried at least one such variant (4,073 of 6,430 and 1,294 of 2,179, respectively

1
2
3
4
f = fread("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors.txt")
colnames(f)[2] = "z"
f$scaling_factor = f$z * (1484/1630)
fwrite(f[,c(1,3)],"/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors_1484_d_1630.txt",sep="\t")
1
2
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_WES/DNM/12166DNM.affected.allele.bed",

# c("db/MS_data/DNM/Jiang_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Kong_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Michaelson_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Yuen_NM2015_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Wu_cases_DNM_with_allele_info.txt"),

window_file = 'db/MS_data/windows_partition/cd_windows_with_div_score.bed',

# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors_1441_d_1692.txt"),
# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors.txt"),
# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors_1441_d_1068.txt"),
# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors_1435_d_1052.txt"),
# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors_1505_d_1692.txt"),
mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors_1484_d_1630.txt"),

sample_sizes = 6430,
# sample_sizes = 4059,
# sample_sizes = 4000,

# mutrate_scaling_files = c("db/MS_data/results/Jiang_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Kong_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Michaelson_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Yuen_NM2015_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Wu_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt"),

# sample_sizes = c(32,78,10,162,32),


gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",

nonAS_noncoding_annotations = NA, #c(
# "/storage11_7T/fuy/TADA-A/annotation/ccr/ccrs.allchrom.gt90.bed"),
#"/storage11_7T/fuy/TADA-A/annotation/ccr/chr_ccr_gt95_syn_rm.bed"),

AS_noncoding_annotations = list(

# c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/autos.cd.window.all_cd_syn_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/autos.cd.window.all_cd_syn_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/autos.cd.window.all_cd_syn_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/driverMAPS/autos.cd.window.all_cd_syn_alt_T.bed")

c("/storage11_7T/fuy/TADA-A/annotation/pLI/auto/auto.annovar_cd_window_pli995_altA.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/auto/auto.annovar_cd_window_pli995_altC.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/auto/auto.annovar_cd_window_pli995_altG.bed",
"/storage11_7T/fuy/TADA-A/annotation/pLI/auto/auto.annovar_cd_window_pli995_altT.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli05-995_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli05-995_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli05-995_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli05-995_altT.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli0-05_altA.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli0-05_altC.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli0-05_altG.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/annovar_cd_window_pli0-05_altT.bed") ,

# c("/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_A_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_C_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_G_lower10pct.bed",
# "/storage11_7T/fuy/TADA-A/db/MS_data/annota/spidex_public_noncommercial_v1_0.tab_alt_T_lower10pct.bed") ,

c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/auto/auto.A.cd.MPC2.annovar.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/auto/auto.C.cd.MPC2.annovar.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/auto/auto.G.cd.MPC2.annovar.bed",
"/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/auto/auto.T.cd.MPC2.annovar.bed")

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/A.cd.MPC12.annovar.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/C.cd.MPC12.annovar.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/G.cd.MPC12.annovar.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/T.cd.MPC12.annovar.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/A.cd.MPC01.annovar.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/C.cd.MPC01.annovar.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/G.cd.MPC01.annovar.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MPC_score/annovar/T.cd.MPC01.annovar.bed") ,

##############
# c("/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/primateAI/chr_primateAI_exome_mutation_pathogen_rank_gt_80_alt_T.bed") ,

# c("/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/MVP/chr_MVP_all_rare_missense_pathogen_rank_gt_75_alt_T.bed")


),

report_proportion = 18665/18665,
#chunk_partition_num =1,
chunk = 2,
node_n = 2,
mutrate_ref_files = c("db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw",
"db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw",
"db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw",
"db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw")

# mutrate_ref_files = c("/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_A.uq.bedGraph.sort.bw" ,
# "/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_C.uq.bedGraph.sort.bw" ,
# "/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_G.uq.bedGraph.sort.bw" ,
# "/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_T.uq.bedGraph.sort.bw" )


))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/cell_WES/DNM/",Sys.Date(),"_12166DNM_6430_fam_auto_pli995_MPC2_1484_d_1630_compact.rds"))
 user   system  elapsed 
2.296    0.680 1552.936 

MPC2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(2), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR$rr_report

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[2] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[2] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed
[1] "Read in gene prior file. Time consumed: 0.004s."
[1] "Finished optimization. Time consumed: 94.947.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
2.3114042.0745342.548275

179.871940314055

280

pli995

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR$rr_report

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed
[1] "Read in gene prior file. Time consumed: 0.004s."
[1] "Finished optimization. Time consumed: 56.247.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
3.1644772.8941443.434811

52.458127723218

153

autos cd syn calibr

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR$rr_report

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed
[1] "Read in gene prior file. Time consumed: 0.052s."
[1] "Finished optimization. Time consumed: 343.387.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
0.01541753-2.2733292.304164

1483.81264325268

1484

1
exp(.0154)

1.01551919106144

auto cd syn uncalibr

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR$rr_report

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed
[1] "Read in gene prior file. Time consumed: 0.004s."
[1] "Finished optimization. Time consumed: 668.575.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
-1-2.101210.1012101

1629.79421058078

1484

MPC2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR$rr_report

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed

pli995

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR$rr_report

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed
[1] "Read in gene prior file. Time consumed: 0.004s."
[1] "Finished optimization. Time consumed: 16.582.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
3.1651342.8954373.434831

54.1597245992531

154

calib-syn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR$rr_report

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed
[1] "Read in gene prior file. Time consumed: 0.073s."
[1] "Finished optimization. Time consumed: 303.606.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
0.03115438-2.7348082.797117

1504.76043243318

1505

1
exp(0.03)

1.03045453395352

1
2
3
4
5
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
RR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_rate
}
}
}
s #### background

s = 0
for(j in 1:length(compact_data$base_info)){

lg = length(compact_data$base_info[[j]])
for(i in 1:lg){
if(compact_data$base_info[[j]][[i]]$feature_vector[1] == 1){
s = s + compact_data$base_info[[j]][[i]]$sum_mut_count
}
}
}
s #### observed

1691.73066556607

1505

1
2


1
2


1
2


1
2


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
tm = proc.time()
dfn = c()
for(i in 9:10){

RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(i),
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)

dfn = rbind(dfn,RR$rr_report)
}
proc.time() - tm
# saveRDS(RR,paste0("/storage11_7T/fuy/TADA-A/annotation/results/",Sys.Date(),"_wes_deepsea_brain_RR.rds"))

dfn
[1] "Read in gene prior file. Time consumed: 0.081s."
[1] "Finished optimization. Time consumed: 365.757.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
[1] "Read in gene prior file. Time consumed: 0.005s."
[1] "Finished optimization. Time consumed: 349.952.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"



    user   system  elapsed 
3598.031    4.140  715.976 
A data.frame: 2 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
1.5925291.3874011.797657
1.7766471.6236091.929684
1
2
3
4
RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(1,2), 
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)
[1] "Read in gene prior file. Time consumed: 0.017s."
[1] "Finished optimization. Time consumed: 388.177.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"
1
RR$rr_report
A data.frame: 2 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
3.1438142.8788363.408791
2.2785042.0504492.506558
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
options(scipen=200)
tm = proc.time()
BF = TADA_A_get_BFs(data = compact_data$base_info,
selected_annotations = c(1,2),
rr = RR$rr_report$logRR ,
# additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/Example_gene_coding_BF.txt"),
additional_BF_file = c("/storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt"),
TADA_p0 = 0.94)

g_BF = BF$gene_BF_table
g_BF = g_BF[order(g_BF$FDR_all),]
g_BF2 = g_BF[g_BF$FDR_all <= 0.1,]
proc.time() - tm
nrow(g_BF2)

g_BF2
[1] "Read in additional BF file /storage11_7T/fuy/TADA-A/annotation/cd_BF/uniform_cd_BFs.txt."
[1] "Flagged genes that don't have any bases with any informative annotation."
[1] "Got genenames without bases that have informative rr features."
[1] "Got logBF_noncoding."
[1] "Added coding and non-coding logBF. Time consumed: 2.43s."
[1] "Finished!"



   user  system elapsed 
  4.680   0.000   3.961 

38

A data.table: 38 × 9
genenamelogBF_noncodinglogBF_codinglogBF_allBF_noncodingBF_codingBF_allFDR_codingFDR_all
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
SCN2A 37.101469037.10146912970700178198198.00000112970700178198198.000000.940.000000000000001332268
SYNGAP115.352997015.352997 4652881.022031 4652881.022030.940.000001683539064045725
SLC6A1 15.233880015.233880 4130378.889141 4130378.889140.940.000002386699174961038
CHD8 13.099513013.099513 488704.385881 488704.385880.940.000009804155674308923
FOXP1 12.164281012.164281 191813.918161 191813.918160.940.000024177266648206518
ARID1B 11.302438011.302438 81018.927161 81018.927160.940.000052369900264119064
GRIN2B 10.929415010.929415 55793.629761 55793.629760.940.000084991038410975328
KCNQ3 8.0722570 8.072257 3204.327041 3204.327040.940.000682546401555919613
CHD2 7.8665850 7.866585 2608.642991 2608.642990.940.001270021684717373162
ANK2 7.1637740 7.163774 1291.777531 1291.777530.940.002341286079239701754
PPP2R5D 6.5596650 6.559665 706.035011 706.035010.940.004101892228727416559
ASH1L 5.7646530 5.764653 318.828341 318.828340.940.007663132321820855138
ADNP 5.7645250 5.764525 318.787641 318.787640.940.010676927765345780794
TCF4 5.7060550 5.706055 300.682571 300.682570.940.013451670548019965118
ATP1B1 5.6945720 5.694572 297.249561 297.249560.940.015892669089122110626
DPYSL2 5.6813990 5.681399 293.359581 293.359580.940.018067932326401005272
NOL6 5.4520250 5.452025 233.229911 233.229910.940.020707729431314048085
NAA15 5.3607270 5.360727 212.879701 212.879700.940.023365587900061015464
SETD5 5.3218880 5.321888 204.770061 204.770060.940.025876400936694275018
MYT1L 5.2974650 5.297465 199.829641 199.829640.940.028217600588264175887
STXBP1 5.1664110 5.166411 175.284671 175.284670.940.030780826196886448831
TRIP12 5.1488960 5.148896 172.241221 172.241220.940.033171433409178947838
NRXN1 5.0037620 5.003762 148.972571 148.972570.940.035866482133073071537
NF1 4.8933660 4.893366 133.401801 133.401800.940.038751092047992832801
WDFY3 4.6154040 4.615404 101.028611 101.028610.940.042571160147908564120
DEAF1 4.5150420 4.515042 91.381441 91.381440.940.046562717225392964704
RORB 4.4282530 4.428253 83.784881 83.784880.940.050672640423468987003
KDM5B 4.4167900 4.416790 82.830011 82.830010.940.054543539671785157275
MED13L 4.2732110 4.273211 71.751681 71.751680.940.058842550692057252193
MKX 4.2608440 4.260844 70.869751 70.869750.940.062915843050658101787
GNAI1 4.2139250 4.213925 67.621401 67.621400.940.066954111892309195464
AP2S1 4.1248070 4.124807 61.855861 61.855860.940.071177164827275776759
RFX3 4.0912680 4.091268 59.815691 59.815690.940.075309796636768094880
SRPK2 4.0055890 4.005589 54.904171 54.904170.940.079624189959590058185
GRIA2 3.9182750 3.918275 50.313561 50.313560.940.084133352626445542799
ELAVL3 3.8705460 3.870546 47.968561 47.968560.940.088635061933261274913
CORO1A 3.7070610 3.707061 40.733921 40.733920.940.093746949348747790620
TRAF7 3.6948300 3.694830 40.238741 40.238740.940.098654536725587771429
1
2