1 | library(data.table) |
rm nonfs-unaff not in wd
1 | setwd("/storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected") |
1 | cd /storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected |
1 | notin = fread("/storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected/not_in_wd.44nonframeshift_unaff_unique.bed") |
1 | all4 = unaff.nonfs.wd[,c(1,2,6)] |
42
| chr | end | GENE_NAME | mg |
|---|---|---|---|
| <chr> | <int> | <chr> | <chr> |
| chr4 | 48523229 | FRYL | chr4_48523229 |
| chr12 | 11420400 | PRB3 | chr12_11420400 |
1 | nonfs_count = c() |
42
1 | library(data.table) |
Call:
glm(formula = nonfs_count ~ SNV_2N, family = poisson(link = "log"),
data = df)
Deviance Residuals:
Min 1Q Median
-0.67401072475467482814 -0.07119659925706947612 -0.06765228295833836114
3Q Max
-0.06511511531568234257 3.23975915589543239648
Coefficients:
Estimate Std. Error
(Intercept) -6.2828026906051723799 0.1933407844586273738
SNV_2N 0.7279000218031188574 0.2972449860550154810
z value Pr(>|z|)
(Intercept) -32.496000000000002217 < 0.0000000000000002 ***
SNV_2N 2.448819999999999997 0.014332 *
---
Signif. codes:
0 ‘***’ 0.001000000000000000020817 ‘**’ 0.01000000000000000020817 ‘*’
0.05000000000000000277556 ‘.’ 0.1000000000000000055511 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 506.60640306394611798 on 17477 degrees of freedom
Residual deviance: 503.79217682117155164 on 17476 degrees of freedom
AIC: 591.79217682117155164
Number of Fisher Scoring iterations: 8| genename | nonfs_count | nonfs_rate_2N | |
|---|---|---|---|
| <fct> | <dbl> | <dbl> | |
| 1 | A1BG | 0 | 0.002328933656623771893124 |
| 2 | A1CF | 0 | 0.002619080598964510127324 |
1 | head(df,2) |
| genename | GC_content | SNV_rate | nonfs_count | length | SNV_2N | nonfs_rate_2N | |
|---|---|---|---|---|---|---|---|
| <fct> | <dbl> | <dbl> | <dbl> | <int> | <dbl> | <dbl> | |
| 1 | A1BG | 0.5706439323514730377340 | 0.0000694971091999999968699 | 0 | 4006 | 0.3028684018935999811006 | 0.002328933656623771893124 |
| 2 | A1CF | 0.3689471682807459740339 | 0.0001065103076999999960400 | 0 | 9603 | 0.4641719209565999815226 | 0.002619080598964510127324 |
1 | mean(df$SNV_2N) |
0.321189451590518
0.2794803710137
1 | fwrite(df,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected/nonfs_unaff_mutrate",sep="\t") |
rm not in wd for frameshift of unaff
1 | setwd("/storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected") |
127
| chr | start | end |
|---|---|---|
| <chr> | <dbl> | <int> |
| chr12 | 120135792 | 120135793 |
| chr12 | 51470314 | 51470315 |
1 | cd /storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected |
1 | notin = fread("/storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected/not_in_wd.127frameshift_unaff_unique.bed") |
rm not in wd for frameshift of aff
1 | setwd("/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/indel") |
458
458
| chr | start | pos |
|---|---|---|
| <chr> | <dbl> | <int> |
| chr1 | 110584429 | 110584430 |
| chr15 | 69080206 | 69080207 |
1 | cd /storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/indel |
2 not_in_wd.458frameshift_aff_unique.bed1 | notin = fread("/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/indel/not_in_wd.458frameshift_aff_unique.bed") |
2
458
456
| chr | pos | ref | alt | GENE_NAME | start | mg |
|---|---|---|---|---|---|---|
| <chr> | <int> | <chr> | <chr> | <chr> | <dbl> | <chr> |
| chr1 | 110584430 | GC | G | STRIP1 | 110584429 | chr1_110584430 |
| chr15 | 69080207 | CTTCGAGTTTGCCTTCA | C | ANP32A | 69080206 | chr15_69080207 |
EM
1 | df2 = fread("/storage11_7T/fuy/TADA-A/cell_WES/DNM/unaffected/nonfs_unaff_mutrate")[,c(1,4,7)] |
| genename | nonfs_count | nonfs_rate_2N |
|---|---|---|
| <chr> | <int> | <dbl> |
| A1BG | 0 | 0.002328933656623770158400 |
| A1CF | 0 | 0.002619080598964510127324 |
1 | setwd("/storage11_7T/fuy/TADA-A/cell_WES/DNM/indel_rate/") |
17478
1 | sum(mg2$fs_count) |
456
1 | gama= 20 ### initial gamma |
172
22.4947944826454
3.1132839261165
1 | annota = c("baseline","SNV","SNV+Indel") |
| annota | num_risk_genes |
|---|---|
| <fct> | <dbl> |
| baseline | 54 |
| SNV | 81 |
| SNV+Indel | 172 |
1 | fwrite(mg3,"/storage11_7T/fuy/TADA-A/cell_WES/DNM/affected/indel/res/2021-05-24_indel+snv_risk_ASD_genes.txt",sep="\t") |
1 | baseline = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-05-24_selected14_PP_estim_pi_risk_genes.rds") |
54
1 | novel = mg3[!(mg3$genename %in% baseline$genename),] |
126
1 | snv$idx = 1:nrow(snv) |
| genename | prior | q0 | FDR | idx |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <int> |
| GFAP | 0.9448247554042644713590 | 0.05517524459573552864100 | 0.02279162570754494562819 | 30 |
| PTK7 | 0.9447206373370109488974 | 0.05527936266298905110261 | 0.02383961722223669130183 | 31 |
| UGT1A3 | 0.9402129689672902301822 | 0.05978703103270976981776 | 0.02496297390381397446335 | 32 |
| UGT1A4 | 0.9400102711412551270698 | 0.05998972885874487293023 | 0.02602439072063006397784 | 33 |
| UGT1A5 | 0.9397636348002530448653 | 0.06023636519974695513469 | 0.02703062526413349889176 | 34 |
| UGT1A7 | 0.9396900001393618806489 | 0.06030999986063811935111 | 0.02798146453831934499235 | 35 |
| UGT1A10 | 0.9395828949616745751072 | 0.06041710503832542489278 | 0.02888245455220840585131 | 36 |
| UGT1A9 | 0.9395767895866390384541 | 0.06042321041336096154595 | 0.02973490741332063952429 | 37 |
| UGT1A1 | 0.9392521767815901201359 | 0.06074782321840987986405 | 0.03055103677661245853425 | 38 |
| UGT1A6 | 0.9377735025624205356110 | 0.06222649743757946438905 | 0.03136322807561161413181 | 39 |
| PRKAR1B | 0.9299240388216790664799 | 0.07007596117832093352007 | 0.03233104640317933931026 | 40 |
1 |
1 | snv = readRDS("/storage11_7T/fuy/TADA-A/cell_WES/DNM/2021-05-21_selected14_estim_pi_risk_genes.rds") |
81
172
93
1 | snv_pp = snv[!(snv$genename %in% baseline$genename),] |
1 |