1
2
library(tadaA)
setwd("/storage11_7T/fuy/TADA-A")
1
2
3
4
5
6
7
TADA_A_adjust_mutrate(window_file = 'db/MS_data/windows_partition/cd_windows_with_div_score.bed',
mut_file = '/storage11_7T/fuy/TADA-A/TADA-A/lib/2.bed',
genes = "all",
sample_size = 6430,
mutrate_mode = "regular",
scaling_file_name = "/storage11_7T/fuy/TADA-A/TADA-A/lib/2bed_sf.txt",
scale_features = c("GC_content", "div_score"))
[1] "Made a tmp folder for tmp files."
[1] "Finished calculating mutation count within per genomic window. Time consumed: 1.955s"
[1] "Finished fitting mutation rate model and calculating scaling factors. Time consumed: 3.976s"
[1] "Removed temporary files."
A matrix: 3 × 4 of type dbl
EstimateStd. Errorz valuePr(>|z|)
(Intercept) 0.0018433270.01345905 0.13695820.89106386
GC_content 0.0034250120.01363202 0.25124750.80162276
div_score-0.0223681650.01276638-1.75211450.07975413
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
system.time(compact_data <- TADA_A_read_info_by_chunks(
mut_files = "/storage11_7T/fuy/TADA-A/cell_WES/DNM/6166SNV.cd.window.cd_mis_pli_syn.7131DNM.allele.bed",

# c("db/MS_data/DNM/Jiang_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Kong_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Michaelson_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Yuen_NM2015_cases_DNM_with_allele_info.txt",
# "db/MS_data/DNM/Wu_cases_DNM_with_allele_info.txt"),

window_file = 'db/MS_data/windows_partition/cd_windows_with_div_score.bed',

# mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/TADA-A/lib/3bed_sf.txt"),
mutrate_scaling_files = c("/storage11_7T/fuy/TADA-A/cell_WES/cd_uniform_scaling_factors.txt"),

# sample_sizes = 4265,
sample_sizes = 6430,

# mutrate_scaling_files = c("db/MS_data/results/Jiang_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Kong_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Michaelson_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Yuen_NM2015_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
# "db/MS_data/results/Wu_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt"),

# sample_sizes = c(32,78,10,162,32),


gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",

nonAS_noncoding_annotations = NA, #c(
# "/storage11_7T/fuy/TADA-A/annotation/ccr/ccrs.allchrom.gt90.bed"),
#"/storage11_7T/fuy/TADA-A/annotation/ccr/chr_ccr_gt95_syn_rm.bed"),

AS_noncoding_annotations = list(
# c("/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_A.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_C.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_G.bed",
# "/storage11_7T/fuy/TADA-A/annotation/pLI/pLI_nlt_995_gnomad.v2.1.1.all_lofs_snv_alt_T.bed")
c("/storage11_7T/fuy/TADA-A/annotation/driverMAPS/cd.window.all_cd_syn_alt_A.bed",
"/storage11_7T/fuy/TADA-A/annotation/driverMAPS/cd.window.all_cd_syn_alt_C.bed",
"/storage11_7T/fuy/TADA-A/annotation/driverMAPS/cd.window.all_cd_syn_alt_G.bed",
"/storage11_7T/fuy/TADA-A/annotation/driverMAPS/cd.window.all_cd_syn_alt_T.bed")
),

report_proportion = 18665/18665,
#chunk_partition_num =1,
chunk = 2,
node_n = 2,
mutrate_ref_files = c("db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw",
"db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw",
"db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw",
"db/MS_data/mutrate/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw")

# mutrate_ref_files = c("/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_A.uq.bedGraph.sort.bw" ,
# "/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_C.uq.bedGraph.sort.bw" ,
# "/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_G.uq.bedGraph.sort.bw" ,
# "/storage11_7T/data.for.yuwen/new_mutrate/window.hg19.genome.tri2.alt_T.uq.bedGraph.sort.bw" )


))

saveRDS(compact_data,paste0("/storage11_7T/fuy/TADA-A/TADA-A/lib/",Sys.Date(),"_SNV_6166_syn_compact.rds"))
 user   system  elapsed 
2.556    0.645 1318.286 

un-calibrated

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
tm = proc.time()
dfn = c()
for(i in 1){

RR = TADA_A_RR_estimate(data = compact_data$base_info, selected_annotations = c(i),
# gene_prior_file = "db/MS_data/prior/Example_gene_prior.txt",
gene_prior_file = "db/MS_data/prior/uniform_gene_prior.txt",
optimization_iteration = 2000)

dfn = rbind(dfn,RR$rr_report)
}
proc.time() - tm
# saveRDS(RR,paste0("/storage11_7T/fuy/TADA-A/annotation/results/",Sys.Date(),"_wes_deepsea_brain_RR.rds"))

dfn
[1] "Read in gene prior file. Time consumed: 0.006s."
[1] "Finished optimization. Time consumed: 601.394.s"
[1] "Got confidence intervals of RR estimates."
[1] "Finished RR estimation!"



    user   system  elapsed 
4019.175    4.939  601.413 
A data.frame: 1 × 3
logRRlower_boundupper_bound
<dbl><dbl><dbl>
-1-1.834987-0.1650126
1
2
3
4
5
6
7
8
l = length(compact_data$base_info)
l

s = 0
for(i in 1:l){
s = s + compact_data$base_info[[i]]$`1`$sum_mut_rate
}
s

67106

1691.73066556607

1
2
3
4
5
6
7
8
l = length(compact_data$base_info)
l

s = 0
for(i in 1:l){
s = s + compact_data$base_info[[i]]$`1`$sum_mut_count
}
s

67106

1441

1
compact_data$base_info[[1]]

$1 =


$feature_vector

1

$sum_mut_rate_count

0

$sum_mut_rate

0.0825900456873

$sum_mut_count

0

$log_fcount

0