0507_tadaA
In [16]:
library(tadaA)
In [26]:
setwd("/project/wgs/analysis/TADA/TADA-A")

Adjust mutation rate

In [12]:
TADA_A_adjust_mutrate(window_file = 'test_data/test_windows.txt',
                      mut_file = 'test_data/test_simulated_DNM.bed',
                      genes = "all",sample_size = 5000,
                      mutrate_mode = "regular",
                      scaling_file_name = "test_data/test_simulated_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt",
                      scale_features = c("GC_content", "div_score"))
 * Processing input (1): a
 * Processing input (2): b
   bedtools coverage -a test_data/test_simulated_DNM.bed -b tmp/1855737462_temp_windows.bed 
Warning message in if (!is.na(scale_features)) {:
“the condition has length > 1 and only the first element will be used”
A matrix: 5 × 4 of type dbl
EstimateStd. Errorz valuePr(>|z|)
(Intercept) -0.01781306 0.03297785-0.540152355.890920e-01
coding 0.17063881 0.09523129 1.791835517.315932e-02
promoter-15.57912267160.47707125-0.097080059.226628e-01
GC_content 0.02344618 0.03182928 0.736623084.613516e-01
div_score -0.32024953 0.03596375-8.904786845.348873e-19

Read in DNM data, annotation data and mutation rate data

In [13]:
compact_data <- TADA_A_read_info(
mut_files = c("test_data/test_simulated_DNM_with_allele_info.txt"),
window_file = "test_data/test_windows.txt",
mutrate_scaling_files = c("test_data/test_simulated_windows_mutrate_with_div_score_scaling_file_for_test_DNM.txt"),
sample_sizes = c(5000),
gene_prior_file = "test_data/test_gene_prior.txt",
nonAS_noncoding_annotations = "test_data/test_non_allele_specific_annotation_1.bed",
AS_noncoding_annotations = NA,
report_proportion = 2000/18665,
chunk_partition_num =1,
node_n = 1,
mutrate_ref_files = c("test_data/test_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw", 
                      "test_data/test_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw",
                      "test_data/test_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw" ,
                      "test_data/test_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw"))
 * Processing input (1): a
 * Processing input (2): b
   bedtools coverage -a test_data/test_non_allele_specific_annotation_1.bed -b tmp/18557413781_temp_for_mutrate.bed 
 * Processing input (1): a
 * Processing input (2): b
   bedtools coverage -a tmp/18557413781_temp_mut_allele.bed -b tmp/18557413781_temp_for_mutrate.bed 
 * Processing input (1): a
 * Processing input (2): b
   bedtools coverage -a tmp/18557413781_temp_mut_allele.bed -b tmp/18557413781_temp_for_mutrate.bed 
 * Processing input (1): a
 * Processing input (2): b
   bedtools coverage -a tmp/18557413781_temp_mut_allele.bed -b tmp/18557413781_temp_for_mutrate.bed 
 * Processing input (1): a
 * Processing input (2): b
   bedtools coverage -a tmp/18557413781_temp_mut_allele.bed -b tmp/18557413781_temp_for_mutrate.bed 
[1] "echo \"Temp files cleaned and data recording finished!\""

Estimate relative risks

In [19]:
RR = TADA_A_RR_estimate(data = compact_data$base_info,
                    selected_annotations = c(1), gene_prior_file = "test_data/test_gene_prior.txt",
                    optimization_iteration = 2000)
In [20]:
RR
$mle
$par
2.60243512738338
$value
-301.828299056853
$counts
function
<NA>
gradient
<NA>
$convergence
0
$message
NULL
$hessian
A matrix: 1 × 1 of type dbl
-18.45476
$rr_report
A data.frame: 1 × 3
relative_risklower_boundupper_bournd
<dbl><dbl><dbl>
2.6024352.1461863.058684

Bayesian FDR control

In [18]:
Bayesian_FDR(BF=c(seq(1,10)), pi0=0.05, alpha=0.05)
$FDR
  1. 0.05
  2. 0.0378205128205129
  3. 0.0309608016504568
  4. 0.0264673544845959
  5. 0.02325721692101
  6. 0.0208302894631605
  7. 0.0189206319065896
  8. 0.0173725463823182
  9. 0.0160882582829909
  10. 0.0150029926641159
$ND
1

calculate the Bayes factor for each gene based on informative noncodng annotations

In [28]:
TADA_A_get_BFs(data = compact_data$base_info,
               selected_annotations = c(1,3), 
               rr = RR$rr_report$relative_risk, 
               additional_BF_file = "test_data/test_additional_BF_file", 
               TADA_p0 = 0.94)
Error in data_partition_element_level2[[1]][selected_annotations] %*% : non-conformable arguments
Traceback:

1. TADA_A_get_BFs(data = compact_data$base_info, selected_annotations = c(1, 
 .     3), rr = RR$rr_report$relative_risk, additional_BF_file = "test_data/test_additional_BF_file", 
 .     TADA_p0 = 0.94)
2. sapply(data, cal_logP_Zg1)
3. lapply(X = X, FUN = FUN, ...)
4. FUN(X[[i]], ...)
5. sapply(data_partition_element, cal_logP_Zg1_level2)
6. lapply(X = X, FUN = FUN, ...)
7. FUN(X[[i]], ...)

generate DNM

In [24]:
TADA_A_DNM_generator(window_file = "/project/wgs/analysis/TADA/MS_data/Example_windows.bed",
                     mutrate_scaling_files = c("/project/wgs/analysis/TADA/MS_data/Example_windows_mutrate_scaling_file_for_Yuen_NM2015_cases_DNM.txt","../data/Example_windows_mutrate_scaling_file_for_Kong_cases_DNM.txt", "../data/Example_windows_mutrate_scaling_file_for_Wu_cases_DNM.txt", "../data/Example_windows_mutrate_scaling_file_for_Jiang_cases_DNM.txt", "../data/Example_windows_mutrate_scaling_file_for_Michaelson_cases_DNM.txt"),
                     sample_sizes = c(162, 78, 32, 32, 10),
                     gene_prior_file = "/project/wgs/analysis/TADA/MS_data/Example_gene_prior.txt",
                     nonAS_noncoding_annotations = c("/project/wgs/analysis/TADA/MS_data/Noonan_brain_roadmap_union_within_10kb_and_promoter_no_utr.bed","../data/Epigenome_E081_E082_intersection__within_10kb_and_promoter_no_utr.bed","../data/Encode_DHS_union_within_10kb_and_promoter_no_utr.bed"),
                     AS_noncoding_annotations = NA,
                     report_proportion = 100/18665,
                     chunk_partition_num =1 ,
                     node_n = 6,
                     mutrate_ref_files = c("/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw",
                                           "/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw",
                                           "/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw",
                                           "/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw"),
                     rr = c(1, 0, 0.5),
                     output_allele_info_files = "allele_info",
                     output_bed_files = "allele_bed",
                     output_risk_genes_file = "temp.riskgenes",
                     compact_mut_output = NA,
                     MPI = 1)
Error in fread(window_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE): File '/project/wgs/analysis/TADA/MS_data/Example_windows.bed' does not exist or is non-readable. getwd()=='/project/wgs/analysis/TADA/TADA-A'
Traceback:

1. TADA_A_DNM_generator(window_file = "/project/wgs/analysis/TADA/MS_data/Example_windows.bed", 
 .     mutrate_scaling_files = c("/project/wgs/analysis/TADA/MS_data/Example_windows_mutrate_scaling_file_for_Yuen_NM2015_cases_DNM.txt", 
 .         "../data/Example_windows_mutrate_scaling_file_for_Kong_cases_DNM.txt", 
 .         "../data/Example_windows_mutrate_scaling_file_for_Wu_cases_DNM.txt", 
 .         "../data/Example_windows_mutrate_scaling_file_for_Jiang_cases_DNM.txt", 
 .         "../data/Example_windows_mutrate_scaling_file_for_Michaelson_cases_DNM.txt"), 
 .     sample_sizes = c(162, 78, 32, 32, 10), gene_prior_file = "/project/wgs/analysis/TADA/MS_data/Example_gene_prior.txt", 
 .     nonAS_noncoding_annotations = c("/project/wgs/analysis/TADA/MS_data/Noonan_brain_roadmap_union_within_10kb_and_promoter_no_utr.bed", 
 .         "../data/Epigenome_E081_E082_intersection__within_10kb_and_promoter_no_utr.bed", 
 .         "../data/Encode_DHS_union_within_10kb_and_promoter_no_utr.bed"), 
 .     AS_noncoding_annotations = NA, report_proportion = 100/18665, 
 .     chunk_partition_num = 1, node_n = 6, mutrate_ref_files = c("/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_A.mutrate.bw", 
 .         "/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_C.mutrate.bw", 
 .         "/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_G.mutrate.bw", 
 .         "/project/wgs/analysis/TADA/MS_data/Example_windows_extended_1bp_for_getting_base_level_mutrate.bed.fasta.tri.alt_T.mutrate.bw"), 
 .     rr = c(1, 0, 0.5), output_allele_info_files = "allele_info", 
 .     output_bed_files = "allele_bed", output_risk_genes_file = "temp.riskgenes", 
 .     compact_mut_output = NA, MPI = 1)
2. fread(window_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
3. stop("File '", file, "' does not exist or is non-readable. getwd()=='", 
 .     getwd(), "'")

retrieve_mutation_info

In [ ]:
retrieve_mutation_info(data_partition, genename, noncoding_annotation)