-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path_ukbb_h2_process_confidence.Rmd
1231 lines (896 loc) · 60.6 KB
/
_ukbb_h2_process_confidence.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "Defining Confidence Levels for UKB Round 2 LDSR Analyses"
date: "Last updated `r format(Sys.Date())`"
author: "Results from the [Neale Lab](credits.html)"
output:
html_document:
toc: true
toc_float: true
number_sections: false
params:
datfile: "../results/round2_raw/h2_topline_temp.tsv.gz"
fulldatfile: "../results/round2_raw/bothsexes_h2_topline_temp.tsv.gz"
fulldatmfile: "../results/round2_raw/male_h2_topline_temp.tsv.gz"
fulldatffile: "../results/round2_raw/female_h2_topline_temp.tsv.gz"
sexnfile: "../results/round2_raw/h2_topline_sex_ns_temp.tsv.gz"
onesexnfile: "../results/round2_raw/h2_topline_onesex_ns_temp.tsv.gz"
datadict: "../reference/Data_Dictionary_Showcase.csv"
ordwarn: "../reference/ukb_ord_codings_warn.txt"
outdir: "../results/round2_raw"
imagedir: "../external_images"
---
```{r child = '_toc_fix.Rmd'}
```
```{r child = '_code_highlight_fix.Rmd'}
```
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.align="center")
# devtools::install_github("ropensci/plotly")
require(plotly)
require(DT)
require(crosstalk)
require(crosstool)
require(Rmpfr)
plotly_colors <- c(
'#1f77b4', # muted blue
'#ff7f0e', # safety orange
'#2ca02c', # cooked asparagus green
'#d62728', # brick red
'#9467bd', # muted purple
'#8c564b', # chestnut brown
'#e377c2', # raspberry yogurt pink
'#7f7f7f', # middle gray
'#bcbd22', # curry yellow-green
'#17becf' # blue-teal
) # https://stackoverflow.com/questions/40673490/how-to-get-plotly-js-default-colors-list
```
<style type="text/css">
div.main-container {
max-width: 1800px;
margin-left: auto;
margin-right: auto;
}
</style>
```{r data_load2, include=FALSE}
# need from topline selection: dat,dat_full,datm_full,datf_full,sex_ns,onesex_ns
dat <- read.delim(params$datfile,sep = '\t', quote = "", header=T, stringsAsFactors = F)
dat_full <- read.delim(params$fulldatfile,sep = '\t', quote = "", header=T, stringsAsFactors = F)
datm_full <- read.delim(params$fulldatmfile,sep = '\t', quote = "", header=T, stringsAsFactors = F)
datf_full <- read.delim(params$fulldatffile,sep = '\t', quote = "", header=T, stringsAsFactors = F)
sex_ns <- read.delim(params$sexnfile,sep = '\t', quote = "", header=T, stringsAsFactors = F)
onesex_ns <- read.delim(params$onesexnfile,sep = '\t', quote = "", header=T, stringsAsFactors = F)
```
```{r plotly_dummy, echo=F, warnings=F, message=F,include=F}
# to catch initial plotly package messages
plot_ly(x=rnorm(2),y=rnorm(2),type="scatter",mode="markers")
```
<br>
***
# Overview
Elsewhere we've described how [we selected a "primary" $h^2_g$ result](select_topline.html) for each of the unique phenotypes in the UKB GWAS. These phenotypes range widely in terms of sample size and polygenicity, among other factors. These features can affect our confidence in the stability of the LD Score Regression results. For example we previously noted concerns about [bias at low effective sample sizes](http://www.nealelab.is/blog/2017/9/20/insights-from-estimates-of-snp-heritability-for-2000-traits-and-disorders-in-uk-biobank).
We document here out evaluation of confidence in each of the LDSR $h^2_g$ results. This is separate from our evaulation of [statistical significance](significance.html) for these results. Here we look at:
* What is the [minimum sample size](#minimum_sample_size) necessary to have statistical power for LDSR $h^2_g$ estimation?
* Are there [signs of bias](#potential_small_sample_biases) in $h^2_g$ estimates as a function of sample size?
* Are there phenotypes with [unexpectedly large sampling variation](#unusually_large_standard_errors) for their $h^2_g$ estimate?
* Are there [phenotyping](#sex-biased_phenotypes) [features](#ordinal_coding_issues) to be aware of in assessing our confidence in interpreting the results?
We conclude with a summary of our [current confidence classifications](#summary_of_confidence_ratings).
***
<br>
# Minimum sample size
After [we've defined a "primary" $h^2_g$ result](select_topline.html) for each of the `r length(unique(dat$phen_stem))` unique phenotypes in the Round 2 GWAS, we know that the sample size (or effective sample size) for many of these phenotypes is quite low. For example, case counts are quite small for most ICD codes and job codes. To avoid unnecessary multiple testing for the significance of the $h^2_g$ results we therefore aim to identify the minimum (effective) sample size required to provide sufficient power to make LDSR analysis viable. [*NB:* We focus first on a bare minimum in terms of power, we'll return to the question of bias later.]
*Goal:* determine the minimum (effective) sample size where we'd expect to have some useful level of power to detect a reasonable $h^2_g$.
<br>
## Question 1: What's the relationship between sample size and precision for LDSR?
<div class="well">
To evaluate the effective sample size required to have power in LDSR analyses, we consider the relationship between $N_{eff}$ and the observed standard error (SE) for the LDSR $h^2_g$ estimate for the `r length(unique(dat$phen_stem))` Round 2 phenotypes.
```{r neff_se, echo=F}
# for color scaling
dat$h2_liab <- dat$h2_liability
dat$h2_liab[dat$h2_liab < 0] <- 0
dat$h2_liab[dat$h2_liab > .5] <- 0.5
# handle p=0
dat$int_p_text <- as.character(signif(dat$intercept_p, 3))
dat$int_p_text[dat$intercept_p==0] <- as.character(format(mpfr(0.5,64)*erfc(mpfr(dat$intercept_z[dat$intercept_p==0],64)/sqrt(mpfr(2,64))),max.digits=3,scientific=T))
pp <- plot_ly(dat,
x=~Neff,
y=~h2_liability_se,
type="scatter",
mode="markers",
color=~h2_liab,
colors=c("blue","darkorange"),
hoverinfo="text",
width=400,height=400,
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
layout(xaxis=list(title="Neff"),
yaxis=list(title="SE of SNP h2 estimate (liability)"),
margin=list(b=65))
htmltools::div( pp, align="center" )
```
*Takeaway:* There's a clear inverse relationship between $N_{eff}$ and the SE of the $h^2_g$ estimate. The SE is also very large (e.g. wider than the 0-1 range for $h^2$) at small effective sample sizes.
<br>
For both visualization and modelling, it's useful to look at this relationship in terms of the inverse variance ($1/SE^2$):
```{r neff_se_inv, echo=F}
pp <- plot_ly(dat,
x=~Neff,
y=~1/h2_liability_se^2,
type="scatter",
mode="markers",
color=~h2_liab,
colors=c("blue","darkorange"),
hoverinfo="text",
width=400,height=400,
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
layout(xaxis=list(title="Neff"),
yaxis=list(title="1/SE^2 for SNP h2 estimate (liability)"),
margin=list(b=65))
htmltools::div( pp, align="center" )
```
*Takeaway:* There's a nearly linear relationship between $N_{eff}$ and $1/SE^2$. This relationship is highly heteroskedastic, with much greater variability in the inverse variance when $N_{eff}$ is large.
<br>
We can model this observed relationship in order to establish the "expected" inverse variance as a function of $N_{eff}$. Given the observed heteroskedasticity, and that our primary interest is in the error variance at small sample sizes, we consider a weighted regression with weights inversely proportional to sample size. Based on the plot above we also allow for quadratic curvature in the relationship with $N_{eff}$, and also compare regression to a loess fit.
```{r neff_se_inv_fit, echo=F}
dat$Neff_sq <- (dat$Neff)^2
mod1 <- lm((1/h2_liability_se^2) ~ Neff + Neff_sq -1,data=dat, weights = 1/Neff^3)
llmod <- loess((1/h2_liability_se^2) ~ Neff, data = dat, span=0.2)
dat$pred_h2liab_se <- 1/sqrt(llmod$fitted) # save for later
# summary(mod1)
pp <- plot_ly(dat,
x=~Neff,
y=~1/h2_liability_se^2,
type="scatter",
mode="markers",
hoverinfo="text",
width=400,height=400,
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
add_trace(x=~Neff[order(Neff)],
y=~mod1$fitted.values[order(Neff)],
mode="lines",
showlegend=F,
hoverinfo="text",
text="") %>%
add_trace(x=llmod$x[order(llmod$x)],
y=llmod$fitted[order(llmod$x)],
showlegend=F,
mode="lines",
hoverinfo="text",
text="") %>%
layout(xaxis=list(title="Neff"),
yaxis=list(title="1/SE^2 for SNP h2 estimate (liability)"),
margin=list(b=65))
htmltools::div( pp, align="center" )
```
*Note:* Orange = regression, green = loess.
<br>
Of these, the loess fit appears to better fit the overall trend of the data, especially when focusing on smaller sample sizes (as is visible when zooming the above plot). Note the smaller sample sizes are the range relevant to the current question of the minimum viable sample size for ldsc.
```{r neff_se_lowN_fit, echo=F}
pp <- plot_ly(dat,
x=~Neff,
y=~h2_liability_se,
type="scatter",
mode="markers",
hoverinfo="text",
width=400,height=400,
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
add_trace(x=~Neff[order(Neff)],
y=~1/sqrt(mod1$fitted.values[order(Neff)]),
mode="lines",
showlegend=F,
hoverinfo="text",
text="") %>%
add_trace(x=llmod$x[order(llmod$x)],
y=1/sqrt(llmod$fitted[order(llmod$x)]),
showlegend=F,
mode="lines",
hoverinfo="text",
text="") %>%
layout(xaxis=list(title="Neff",range=c(0,5000)),
yaxis=list(title="SE for SNP h2 estimate (liability)", range=c(0,0.35)),
margin=list(b=65))
htmltools::div( pp, align="center" )
rm(mod1)
```
<br>
We adopt the loess model (green) for predicting SEs from sample size for the rest of this section.
</div>
<br>
## Question 2: Power as a function of expected SE
<div class="well">
Given the above model for expected SEs as a function of sample size, and assuming the SEs are well calibrated, we can then turn these modelled SEs into a power analysis.
Specifically, we ask for a given true SNP heritability, sample size and p-value threshold what the probability is of the estimated $h^2_g$ divided by its SE exceeding to corresponding critical value for the Z statistic, assuming the sampling variation of the estimated estimated $h^2_g$ and it's estimated SE both match the expected SE based on sample size given by the above model. We focus on here power at $p < .05$ without any multiple testing correction in the interest of providing results relevant to hypotheses that include look-ups of $h^2_g$ for a single phenotype (as opposed to scans across all available UKB phenotypes). We'll return to the question of which results we'd consider significant in the context of looking at all `r length(unique(dat$phen_stem))` phenotypes later.
```{r neff_power_curve_reg, echo=F}
# h2hat = N(h2, se^2)
# z = h2hat/se = N(h2/se, 1)
# power = pnorm(z_alpha, mean=h2/se, var=1, lower=F)
pow_pred_points <- dat[order(dat$Neff),c("Neff","pred_h2liab_se")]
qq <- c(seq(1,nrow(pow_pred_points)-1,10),nrow(pow_pred_points))
pow_pred_points <- pow_pred_points[qq,]
### set up plots for different p-val thresholds
# with a range of h2 values
pp1 <- plot_ly(pow_pred_points,
x=~Neff,
y=~pnorm(qnorm(.05,lower=F), mean = .3/pred_h2liab_se, sd=1, lower.tail = F),
type="scatter",
mode="lines",
name="SNP h2 = 0.3",
showlegend=T,
hoverinfo="none",
height=400, width=400
) %>% add_trace(
x=~Neff,
y=~pnorm(qnorm(.05,lower=F), mean = .2/pred_h2liab_se, sd=1, lower.tail = F),
mode="lines",
name="SNP h2 = 0.2"
) %>% add_trace(
x=~Neff,
y=~pnorm(qnorm(.05,lower=F), mean = .1/pred_h2liab_se, sd=1, lower.tail = F),
mode="lines",
name="SNP h2 = 0.1"
) %>% add_trace(
x=~Neff,
y=~pnorm(qnorm(.05,lower=F), mean = .05/pred_h2liab_se, sd=1, lower.tail = F),
mode="lines",
name="SNP h2 = 0.05"
) %>% layout(
xaxis = list(title = "Effective N", range=c(0,30000)),
yaxis = list(title= "Estimated Power", range=c(0,1),
margin=list(b=65))
)
htmltools::div( pp1, align="center" )
```
Note that this is a bit more empirical than a formal power analysis. We're relying on the average observed SE reported for the ldsc $h^2_g$ estimates rather than some theoretical expectation for the SE. There's also likely a relationship between the SE and the true $h^2_g$ (and corresponding genetic architecture), as evidenced by the previous plots of precision vs. sample size colored by $h^2_g$ estimate, that we do not account for in this power estimate. Nevertheless, we use this modelled estimate of power as a useful rough benchmark for evaluating what range of sample sizes to include in this $h^2_g$ analysis.
</div>
<br>
## Question 3: What $h^2_g$ is worth detecting?
<div class="well">
Now that we have rough estimates of power as a function of $h^2_g$ and sample size, the question is what $h^2_g$ is relevant to have power to detect.
If we look at the UKB phenotypes with the larges sample sizes (i.e. $N_{eff}$>300k, where there's no worry about power) we find that the vast majority of phenotypes have $h^2_g$ estimates $\leq 0.3$, with a somewhat bimodal distribution (overall mean=`r round(mean(dat$h2_liability[dat$Neff>300000]),3)`, median=`r round(median(dat$h2_liability[dat$Neff>300000]),3)`).
```{r h2_high_neff, echo=F}
pp <- plot_ly(dat[dat$Neff > 300000,],
x=~h2_liability,
type="histogram",
showlegend=F,
hoverinfo="none",
width=400, height=400
) %>% layout(
xaxis = list(title="SNP h2 (liability)", range=c(-0.1,0.6),
margin=list(b=65))
)
htmltools::div( pp, align="center" )
```
<br>
From the observed $h^2_g$ distribution, is appears the phenotypes with larger estimated SNP heritability tend to have estimates around $h^2_g=0.2$ or slightly larger. We can then orient our decisions on sample size around having power to detect $h^2_g \geq 0.2$.
</div>
<br>
## Conclusion
```{r required_neff, echo=F}
pred_pow <- pnorm(qnorm(.05,lower=F), mean = .2/dat$pred_h2liab_se, sd=1, lower.tail = F)
req_neff <- min(dat$Neff[pred_pow > 0.8])
req_neff_round <- 4500
rm(pred_pow)
```
From the above estimated power analysis, based on a loess fit of the observed SE of $h^2_g$ estimates as a function of effective N, we estimate needing $N_{eff}$=`r req_neff` to have at least 80% power to detect $h^2_g \geq 0.2$ at nominal significance ($p < .05$). We take 80% power as a standard goal for having a "well-powered" analysis, and focus on $h^2_g \geq 0.2$ and $p < .05$ for the reasons described above.
For the sake of having a round number, and choosing to err slightly towards being inclusive in the analysis, we take this fitted value and round down to $N_{eff}=`r req_neff_round`$. We exclude `r sum(dat$Neff <= req_neff_round)` phenotypes with $N_{eff} \leq `r req_neff_round`$ from further consideration in the LDSR $h^2_g$ analysis based on this threshold, leaving `r sum(dat$Neff > req_neff_round)` phenotypes.
[*NB:* We're lenient here largely because we know much more stringent criteria on $N_{eff}$ will be applied below in defining "low confidence" results.]
```{r apply_min_n, echo=F}
# also setup here to track confidence
dat$confidence <- "high"
dat_full$confidence = "high"
datm_full$confidence = "high"
datf_full$confidence = "high"
dat$isNotPrimary <- !dat$keep
dat_full$isNotPrimary <- !dat_full$keep
datm_full$isNotPrimary <- !datm_full$keep
datf_full$isNotPrimary <- !datf_full$keep
dat_full$confidence[dat_full$isNotPrimary] = "NA_(not_primary)"
datm_full$confidence[datm_full$isNotPrimary] = "NA_(not_primary)"
datf_full$confidence[datf_full$isNotPrimary] = "NA_(not_primary)"
dat$isBadPower <- F
dat$isBadPower[dat$Neff <= req_neff_round] <- T
dat$confidence[dat$isBadPower & dat$confidence=="high"] <- "none"
dat_full$isBadPower <- F
dat_full$isBadPower[dat_full$Neff<=req_neff_round] <- T
dat_full$confidence[dat_full$isBadPower & dat_full$confidence=="high"] <- "none"
datm_full$isBadPower <- F
datm_full$isBadPower[datm_full$Neff<=req_neff_round] <- T
datm_full$confidence[datm_full$isBadPower & datm_full$confidence=="high"] <- "none"
datf_full$isBadPower <- F
datf_full$isBadPower[datf_full$Neff<=req_neff_round] <- T
datf_full$confidence[datf_full$isBadPower & datf_full$confidence=="high"] <- "none"
dat <- dat[!dat$isBadPower,]
```
<br>
***
# Potential small sample biases
The above set of conditions identifies `r nrow(dat)` phenotypes that avoid redundancy for encoding and sex-specific results and reach a minimum effective sample size ($N_{eff}$) necessary for LDSR analysis to be viable in terms of power. These thresholds may be considered a bare minimum. We now consider instances among these `r nrow(dat)` phenotypes where we may still be pessimistic about the accuracy of the LDSR results.
<br>
<div class="well">
In the first round of UKB GWAS from the Neale lab, we highlighted [possible biases](http://www.nealelab.is/blog/2017/9/20/insights-from-estimates-of-snp-heritability-for-2000-traits-and-disorders-in-uk-biobank) in the LDSR $h^2_g$ estimate at low $N_{eff}$. In particular, we observed signs of attenuated $h^2_g$ estimates for phenotypes with $N_{eff} < 10,000$. We reassess that trend here.
```{r n_bias, echo=F}
ll <- loess(h2_liability ~ Neff, data=dat, span = 1)
pp <- plot_ly(dat,
x=~Neff,
y=~h2_liability,
type="scatter",
mode="markers",
width=800,height=300,
hoverinfo="text",
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
add_trace(x=ll$x[order(ll$x)],
y=ll$fitted[order(ll$x)],
showlegend=F,
mode="lines",
hoverinfo="text",
text="") %>%
layout(yaxis=list(title="SNP h2 (liability)", range=c(-.25,.5)),
margin=list(b=65))
htmltools::div( pp, align="center" )
```
*Note*: Plot restricted to `r nrow(dat)` phenotypes passing $N_{eff}$ threshold. Zoom out to see $h^2_g$ outliers.
*Takeway:* Some mild attenuation in $h^2_g$ is seen below $N_{eff} = 100,000$, but it's as not as severe nor as sharply limited to $N_{eff} < 10,000$ as observed in round 1.
</div>
<br>
## Question 1: results changed as a function of mix of questions?
<div class="well">
One possible hypothesis is that the change in the shape of attenuation reflects the changing mix of phenotypes in the Round 2 GWAS release. In particular, there's a large number of additional diet items with $N_{eff} \approx 50,000$ and mental health items at $N_{eff} \approx 120,000$. It's possible these subsets of items are influencing the shape of the attentuation curve by confounding true $h^2_g$ with $N_{eff}$. If we restrict to variables observed in nearly all samples:
```{r n_bias_300k, echo=F}
ll <- loess(h2_liability ~ Neff, data=dat[dat$n > 300000, ], span = 1)
pp <- plot_ly(dat[dat$n > 300000, ],
x=~Neff,
y=~h2_liability,
type="scatter",
mode="markers",
width=800,height=300,
hoverinfo="text",
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
add_trace(x=ll$x[order(ll$x)],
y=ll$fitted[order(ll$x)],
showlegend=F,
mode="lines",
hoverinfo="text",
text="") %>%
layout(yaxis=list(title="SNP h2 (liability)", range=c(-.1,.5)),
margin=list(b=65))
htmltools::div( pp, align="center" )
```
*Note:* zoom out for $h^2_g$ outliers.
*Takeaway:* the milder attentuation of $h^2_g$ as a function of $N_{eff}$ is still observed when restricting to `r sum(dat$n > 300000)` phenotypes with $N > 300,000$. Similar trends are observed for subsetting on other dimensions (not shown).
**Conclusion:** The mix of phenotypes doesn't appear to explain the weaker attentuation effect.
</div>
<br>
## Question 2: Attentuation evident in downsampling?
<div class="well">
Instead of looking across phenotypes, we can also consider the impact of sample size on $h^2_g$ estimates within a phenotype. We evaluate this by looking at how the LDSR estimates change when subsetting individuals from UKB for phenotypes with high $N$s and strong $h^2_g$ estimates.
Specifically, for a given downsampling experiment we split UKB individuals into 300 chunks, perform a GWAS within each chunk (controlling for the standard GWAS covariates), and then meta-analyze increasing numbers of chunks. We then run LDSR for each of these meta-analyses to assess how the $h^2_g$ estimates change with growing meta-analyses.
```{r downsample, fig.align='center', out.width=800, echo=F}
knitr::include_graphics(paste0(params$imagedir,"/downsample_height.png"))
knitr::include_graphics(paste0(params$imagedir,"/downsample_leg.png"))
```
*Note:* Above are two specific examples among a broader set of ongoing analyses. Plots by Nikolas Baya.
*Takeaway:* There are signs of attenutation of the LDSR $h^2_g$ estimate at low $N$ in height, but leg impedence shows no such attentuation, and instead may have an upward bias. In both cases, estimates are clearly unstable at low $N$.
**Conclusion:** These results are preliminary, and appear fairly unstable across phenotypes, but suggest that attenuation may occur irregularly across phenotypes and than in some instances there may be upward bias rather than downward attenuation at low sample sizes.
</div>
<br>
## Question 3: Attentuation a function of stratification?
<div class="well">
The downsampling experiments, while inconclusive, do suggest instability of the LSDR $h^2_g$ at low $N$, with variability possibly exceeding the SE estimates. Noteably, there were signs of attenuation at low $N$ for height, a phenotype where the GWAS is likely affected by population stratification (intercept = $`r signif(dat$intercept[dat$phenotype=="50_irnt"],4)`$, p = $`r signif(dat$intercept_p[dat$phenotype=="50_irnt"],3)`$, ratio = $`r signif(dat$ratio[dat$phenotype=="50_irnt"],2)`$), but not leg impedence where stratification is weaker (intercept = $`r signif(dat$intercept[dat$phenotype=="23107_irnt"],4)`$, p = $`r signif(dat$intercept_p[dat$phenotype=="23107_irnt"],3)`$, ratio = $`r signif(dat$ratio[dat$phenotype=="23107_irnt"],2)`$).
This leads us to evaluate whether the relationship between sample size and LDSR $h^2_g$ varies as a function of stratification. We start by noting that the LDSR intercept is expected to estimate $1+Na$ where $N$ is sample size and $a$ is an index of population stratification or other counfounding that is, under a simple stratification model, proportional to $F_{st}$. It follows that $a=(intercept-1)/N$ should give an estimate of stratification that is invariant to sample size.
On that basis, we look at LDSR $h^2_g$ estimates as a function of $N_{eff}$ split by deciles of $a=(intercept-1)/N$.
```{r alpha_dec, echo=F}
dat$alpha <- (dat$intercept-1)/dat$n
pp <- plot_ly(dat,
x=~Neff,
y=~h2_liability,
type="scatter",
mode="markers",
width=800,
hoverinfo="text",
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
layout(
xaxis = list(title="Neff", range=c(0,200000)),
yaxis = list(title="SNP h2 (liability)", range=c(-.5,.5)),
title = "alpha deciles",
margin=list(b=65)
)
qq <- quantile(dat$alpha,probs = seq(0,1,.1))
for(i in 1:10){
ll <- loess(h2_liability ~ Neff, data=dat[(dat$alpha >= qq[i]) & (dat$alpha <= qq[i+1]),],span=0.5)
pp <- pp %>% add_trace(x=ll$x[order(ll$x)],
y=ll$fitted[order(ll$x)],
showlegend=T,
mode="lines",
hoverinfo="text",
text="",
name=paste0("Dec",i," [",signif(qq[i],1),",",signif(qq[i+1],1),"]"))
}
htmltools::div( pp, align="center" )
rm(ll)
```
*Note:* Plot restricted to $N_{eff} < 200,000$ and $|h^2_g|<.5$ for visibility.
From the above plot, we see a strong relationship between the trend in $h^2_g$ at low sample sizes and $a$ from the fitted intercept. These trends also continue in the sample sizes below $N_{eff}=`r req_neff_round`$ excluded above. Specifically, phenotypes at low $N_{eff}$ with higher fitted intercepts (e.g. deciles 8-10) tend to have lower $h^2_g$ estimates (to the point of the average estimate being negative), while phenotypes with lower fitted estimates (including $a < 0$, which corresponds to intercepts below 1) have higher $h^2_g$ estimates. Phenotypes with nearly null intercepts (deciles 3-4, $a \approx 0$, intercept $\approx 1$) show no directional bias on average.
It's worth noting that the trends conditional on estimated $a$ are likely to reflect both:
* Effects of true stratification/confounding in the GWAS, such that the true value of $a$ is non-zero
* Negative correlation of intercept and slope estimate from the LDSR regression when there's insufficient power to differentiate signal from noise
* Unstable trends for the top/bottom decile due to limited phenotypes with low intercept alphas at high sample size, and (to a lesser extent) with high intercepts at low sample size.
```{r neff_alpha_dist, echo=F}
pp <- plot_ly(dat,
y=~Neff,
x=~cut(alpha,breaks=quantile(alpha,probs = seq(0,1,.1)),include.lowest=TRUE),
split=~cut(alpha,breaks=quantile(alpha,probs = seq(0,1,.1)),include.lowest=TRUE),
type='violin',
box=list(visible=T),
meanline=list(visible=T),
color=~cut(alpha,breaks=quantile(alpha,probs = seq(0,1,.1)),include.lowest=TRUE),
colors=plotly_colors[c(2:10,1)],
height=300,width=800,
hoveron="points",
hoverinfo="text",
text=~description
) %>% layout(
yaxis = list(title="Effective N"),
xaxis = list(title="intercept alpha decile",showticklabels=FALSE),
margin=list(b=65)
)
htmltools::div( pp, align="center" )
```
<br>
The stability of the $h^2_g$ estimates may also be connected to the intercept $a$ even at high sample sizes. E.g. this is evident if we focus on the top 5 deciles of $a$ in effective sample sizes above 100,000.
```{r h2_se_alpha, echo=F}
pp <- plot_ly(dat,
x=~Neff,
y=~h2_liability_se,
type="scatter",
mode="markers",
hoverinfo="text",
colors=plotly_colors[c(1,7:10,1)],
height=300,width=800,
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff))
qq <- quantile(dat$alpha,probs = seq(0,1,.1))
for(i in 6:10){
ll <- loess(h2_liability_se ~ Neff, data=dat[(dat$alpha >= qq[i]) & (dat$alpha <= qq[i+1]),],span=0.5)
pp <- pp %>% add_trace(x=ll$x[order(ll$x)],
y=ll$fitted[order(ll$x)],
showlegend=T,
mode="lines",
line=list(color=plotly_colors[1+(i%%10)]),
hoverinfo="text",
text="",
name=paste0("Dec",i," [",signif(qq[i],1),",",signif(qq[i+1],1),"]"))
}
pp <- pp %>% layout(xaxis=list(title="Neff", range=c(100000,max(dat$Neff))),
yaxis=list(title="SE for SNP h2 estimate (liability)", range=c(0,.025)),
margin=list(b=65)
)
htmltools::div( pp, align="center" )
```
<br>
Nevertheless, to the extent the fitted $a$ estimates don't appear centered at zero (e.g. zero is covered by decile 3, mean $a = `r signif(mean(dat$alpha),3)`$, standard deviation $= `r signif(sd(dat$alpha),3)`$) it is likely the $a$ values do reflect some non-zero amounts of confounding/stratification in at least some of the phenotypes. It is unclear however to what extent the directional biases in $h^2_g$ conditional on $a$ reflect effects from the true value of $a$ versus correlated sampling noise in the intercept and slope estimates.
The relationship to $a$ may also explain why the attenuation at low $N_{eff}$ is weaker in the Round 2 results than in Round 1. The Round 2 GWAS increased the number of PCA covariates and used PCs computed within the GWAS sample (i.e. within european ancestry individuals) rather than the PCs from the full UKB data (i.e. with all ancestries). As a result, stratification may be better controlled within the Round 2 results, thus any dependence of the $h^2_g$ results on $a$ at low $N_{eff}$ will have a weaker marginal effect than in Round 1 due to reduced true values of $a$.
*Takeaway:* In either case, it is clear from the first figure above that there is strong dependence between $h^2_g$ and $a$ at low $N_{eff}$, with convergence to more stable $h^2_g$ estimates regardless of $a$ as $N_{eff}$ increase (subject to the instability from sparse representation of the $a$ decile or increased SEs of the $h^2_g$ estimate).
</div>
## Conclusion
From the plot of $h^2_g$ by $a$ decile, it appears LDSR $h^2_g$ estimates converge to stabilty in the $N_{eff}=20,000-40,000$ range. This is broadly consistent with the downsampling experiments above, though in some instances those show evidence of slightly slower convergence. Based on the above, we therefore denote our confidence in the LDSR $h^2_g$ results as follows:
* $N_{eff} < 4500$: No confidence (see [minimum sample size requirements](#minimum_sample_size))
* $4500 \leq N_{eff} < 20,000$: Low confidence
* $20,000 \leq N_{eff} < 40,000$: Medium confidence
* $N_{eff} \geq 40,000$: High confidence
[*NB:* Of the criteria in this LDSR analysis, this choice is probably the least stable. The downsampling experiments remain unclear about the nature of biases at low $N_{eff}$, suggesting effects beyond what might be anticipated based on the full-sample estimate of $a$. As a result, thoughts about best practices for interpreting $h^2_g$ estimates in this intermediate range of $N_{eff}$ are still very much subject to change.]
```{r set_neff, echo=F}
dat$isLowNeff <- F
dat$isLowNeff[dat$Neff>=4500 & dat$Neff<20000] <- T
dat$confidence[dat$isLowNeff & (dat$confidence %in% c("medium","high"))] <- "low"
dat$isMidNeff <- F
dat$isMidNeff[dat$Neff>=20000 & dat$Neff<40000] <- T
dat$confidence[dat$isMidNeff & dat$confidence=="high"] <- "medium"
dat_full$isLowNeff <- F
dat_full$isLowNeff[dat_full$Neff>=4500 & dat_full$Neff<20000] <- T
dat_full$confidence[dat_full$isLowNeff & (dat_full$confidence %in% c("medium","high"))] <- "low"
dat_full$isMidNeff <- F
dat_full$isMidNeff[dat_full$Neff>=20000 & dat_full$Neff<40000] <- T
dat_full$confidence[dat_full$isMidNeff & dat_full$confidence=="high"] <- "medium"
datm_full$isLowNeff <- F
datm_full$isLowNeff[datm_full$Neff>=4500 & datm_full$Neff<20000] <- T
datm_full$confidence[datm_full$isLowNeff & (datm_full$confidence %in% c("medium","high"))] <- "low"
datm_full$isMidNeff <- F
datm_full$isMidNeff[datm_full$Neff>=20000 & datm_full$Neff<40000] <- T
datm_full$confidence[datm_full$isMidNeff & datm_full$confidence=="high"] <- "medium"
datf_full$isLowNeff <- F
datf_full$isLowNeff[datf_full$Neff>=4500 & datf_full$Neff<20000] <- T
datf_full$confidence[datf_full$isLowNeff & (datf_full$confidence %in% c("medium","high"))] <- "low"
datf_full$isMidNeff <- F
datf_full$isMidNeff[datf_full$Neff>=20000 & datf_full$Neff<40000] <- T
datf_full$confidence[datf_full$isMidNeff & datf_full$confidence=="high"] <- "medium"
dat <- dat[dat$confidence != "low", ]
```
<br>
***
# Unusually large standard errors
In assessing the minimum necessary sample size [above](#minimum_sample_size), we noted the relationship between sample size and the SE of the $h^2_g$ estimate. Although we focused on the average SE by sample size, it may also be observed that there are some clear outliers with larger SEs than would be anticipated based on their sample size.
<div class="well">
We focus here on phenotypes that are at least medium confidence based on $N_{eff}$.
```{r h2_se_2, echo=F}
pp <- plot_ly(dat,
x=~Neff,
y=~h2_liability_se,
type="scatter",
mode="markers",
color=~h2_liab,
colors=c("blue","darkorange"),
hoverinfo="text",
width=400,height=400,
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
layout(xaxis=list(title="Neff"),
yaxis=list(title="SE of SNP h2 estimate (liability)"),
margin=list(b=65))
htmltools::div( pp, align="center" )
```
<br>
The clearest outlier here is red hair color (from UKB code 1747). In addition to the disproportionately large SE on the $h^2_g$ estimate for this phenotype, it also has a remarkably low intercept estimate which is also unstable (intercept = `r round(dat$intercept[dat$phenotype=="1747_2"],3)`, SE = `r round(dat$intercept_se[dat$phenotype=="1747_2"],3)`). A similar pattern is observed for the next two outliers, measures of bilirubin from the biomarker data (codes 30660 and 30840). Note that an intercept $<1$ is unexpected under the LDSR model, where variants tagging no signal (LD score of 0) should have a null expectation of 1 (for the 1 df $\chi^2$ statistic).
Looking at the manhattan plot for the red hair phenotype, it becomes clear that the genetic signal is dominated by two loci. The manhattan plots for bilirubin are similarly sparse with even stronger top loci (not shown).
```{r red_hair, fig.align='center', out.width=800, echo=F}
knitr::include_graphics(paste0(params$imagedir,"/1747_2_MF.png"))
```
*Note:* y-axis truncated for *MC1R* locus on chromosome 16.
LDSR is derived assuming a broadly polygenic architecture for each trait. Although sparser polygenicity doesn't fully invalidate the LDSR model (under a moments-based framework), it has previously been observed that LDSR is increasingly unstable for sparse architectures.
Although we hope that the reported SEs will accurately capture the instability of estimates for these phenotypes with sparse architectures, we may have lower confidence in these results. We may be particularly concerned about phenotypes where the SE is large and coupled with a low estimated intercept, which could in turn lead to over-estimation of $h^2_g$.
Using the same loess fit as [above](#minimum_sample_size) to predict the expected SE based on $N_{eff}$, we evaluate the ratio between the observed and expected SE along with the fitted intercept.
```{r h2_pred_se_ratio, echo=F}
pp <- plot_ly(dat,
x=~intercept,
y=~h2_liability_se/pred_h2liab_se,
type="scatter",
mode="markers",
color=~h2_liab,
colors=c("blue","darkorange"),
hoverinfo="text",
width=400,height=400,
text = ~paste0(
"Phenotype: ", description,
"<br>Intercept: ", round(intercept,5), " (p=",int_p_text,")",
"<br>Liability SNP h2: ", round(h2_liability,4), " (p=",signif(h2_p, 3),")",
"<br>Effective N: ", Neff)) %>%
layout(yaxis=list(title="Observed/Predicted ratio for SNP h2 SE"),
xaxis=list(title="LDSR intercept"),
margin=list(b=65))
htmltools::div( pp, align="center" )
```
<br>
The top outliers are observed for pigmentation-related traits, which have architectures similar to red hair as illustrated above, and other biomarkers like bilirubin. Following the biomarkers, the next set of phenotypes with larger-than-expected SEs are haematology measures, such as thrombocyte volume (UKB code 30100).
```{r thrombocyte, fig.align='center', out.width=800, echo=F}
knitr::include_graphics(paste0(params$imagedir,"/30100_raw_MF.png"))
```
*Takeaway:* Thrombocyte volume also has loci of very strong effect, but in the context of a more polygenic architecture.
<br>
Although the unexpectedly large SEs for phenotypes like thrombocyte volume are still worrisome, they are less concerning than the stronger outliers for pigmentation-related traits and biomarkers like bilirubin whose GWAS signals are driven by only a handful of loci.
</div>
## Conclusion
We mark reduced confidence in phenotypes with unexpectedly large SEs as follows:
* Low confidence: All hair color levels (UKB code 1747), plus other items with Observed/Expected SE ratios > 12 and intercepts < 1.1
* Medium confidence: All other items with Observed/Expected SE ratios > 6
All affected measures are pigmentation-related or biomarker or haemotology measures. [*NB:* Height (UKB code 50) is just barely under the 6x ratio of observed vs. expected SE. It's a borderline decision. We choose to give it some leeway given the general trend that higher $h^2_g$ also has higher SE and height has very strong $h^2_g$.]
<br>
```{r spiky_conf, echo=F}
dat$isExtremeSE <- F
dat$isExtremeSE[startsWith(dat$phenotype,"1747_") | (dat$intercept < 1.1 & (dat$h2_liability_se/dat$pred_h2liab_se) > 12)] <- T
dat$confidence[dat$isExtremeSE & (dat$confidence %in% c("medium","high"))] <- "low"
dat$isHighSE <- F
dat$isHighSE[(dat$h2_liability_se/dat$pred_h2liab_se) > 6 & !dat$isExtremeSE] <- T
dat$confidence[dat$isHighSE & dat$confidence=="high"] <- "medium"
dat_full$isExtremeSE <- F
dat_full$isExtremeSE[startsWith(dat_full$phenotype,"1747_") | (dat_full$intercept < 1.1 & (dat_full$h2_liability_se/(1/sqrt(predict(llmod, newdata=dat_full$Neff)))) > 12)] <- T
dat_full$confidence[dat_full$isExtremeSE & (dat_full$confidence %in% c("medium","high"))] <- "low"
dat_full$isHighSE <- F
dat_full$isHighSE[(dat_full$h2_liability_se/(1/sqrt(predict(llmod, newdata=dat_full$Neff)))) > 6 & !dat_full$isExtremeSE] <- T
dat_full$confidence[dat_full$isHighSE & dat_full$confidence=="high"] <- "medium"
datm_full$isExtremeSE <- F
datm_full$isExtremeSE[startsWith(datm_full$phenotype,"1747_") | (datm_full$intercept < 1.1 & (datm_full$h2_liability_se/(1/sqrt(predict(llmod, newdata=datm_full$Neff)))) > 12)] <- T
datm_full$confidence[datm_full$isExtremeSE & (datm_full$confidence %in% c("medium","high"))] <- "low"
datm_full$isHighSE <- F
datm_full$isHighSE[(datm_full$h2_liability_se/(1/sqrt(predict(llmod, newdata=datm_full$Neff)))) > 6 & !datm_full$isExtremeSE] <- T
datm_full$confidence[datm_full$isHighSE & datm_full$confidence=="high"] <- "medium"
datf_full$isExtremeSE <- F
datf_full$isExtremeSE[startsWith(datf_full$phenotype,"1747_") | (datf_full$intercept < 1.1 & (datf_full$h2_liability_se/(1/sqrt(predict(llmod, newdata=datf_full$Neff)))) > 12)] <- T
datf_full$confidence[datf_full$isExtremeSE & (datf_full$confidence %in% c("medium","high"))] <- "low"
datf_full$isHighSE <- F
datf_full$isHighSE[(datf_full$h2_liability_se/(1/sqrt(predict(llmod, newdata=datf_full$Neff)))) > 6 & !datf_full$isExtremeSE] <- T
datf_full$confidence[datf_full$isHighSE & datf_full$confidence=="high"] <- "medium"
dat <- dat[dat$confidence != "low", ]
```
<br>
***
# Sex-biased phenotypes
We noted [when selecting primary results for each phenotype](select_topline.html#sex-specific_phenotypes) that although we were designating the both-sex analysis as the primary analysis where possible that we would revisit the question of results for phenotypes with a strong sex bias. We revisit that question here.
<div class="well">
We first look at the sex balance of phenotypes using a `both_sexes` GWAS that passed the minimum sample size filters and have high confidence after the previous checks in this section.
```{r sex_bias_conf, echo=F}
sex_ns2 <- sex_ns[sex_ns$phenotype %in% dat$phenotype[dat$sex=="both_sexes"],]
rm(sex_ns)
pp <- plot_ly(sex_ns2[!(is.na(sex_ns2$n_fem) | is.na(sex_ns2$n_mal)),],
x=~n_fem/(n_fem+n_mal),
type="histogram",
showlegend=F,
hoverinfo="none",
width=400, height=400
) %>% layout(
xaxis = list(title="Proportion of samples who are female", range=c(0,1)),
margin=list(b=65)
)
htmltools::div( pp, align="center" )
```
*Takeaway:* Sample sizes are nicely balanced between sexes. The small tail of items with >60% female samples primarily consists of depression-related questions.
As before, we can similarly check the balance among cases and among controls for binary phenotypes.
```{r sex_bias_cc_conf, echo=F}
pp1 <- plot_ly(sex_ns2[!(is.na(sex_ns2$n_case_fem) | is.na(sex_ns2$n_case_mal)),],
x=~n_case_fem/(n_case_fem+n_case_mal),
type="histogram",
showlegend=F,
hoverinfo="none",
width=400, height=400
) %>% layout(
xaxis = list(title="Proportion of cases who are female", range=c(0,1)),
title = "cases",
margin=list(b=65)
)
pp2 <- plot_ly(sex_ns2[!(is.na(sex_ns2$n_case_fem) | is.na(sex_ns2$n_case_mal)),],
x=~n_control_fem/(n_control_fem+n_control_mal),
type="histogram",
showlegend=F,
hoverinfo="none",
width=400, height=400
) %>% layout(
xaxis = list(title="Proportion of controls who are female", range=c(0,1)),
title = "controls",
margin=list(b=65)
)
htmltools::div(
style = "display: flex; flex-wrap: wrap; justify-content: center",
htmltools::div(pp1),
htmltools::div(pp2)
)
```
Although these phenotypes are mostly balanced, there is a clear remaining tail of phenotypes with strong sex bias among cases.
We don't have particularly strong reasons to be concerned about LDSR results for phenotypes with strong sex differences. Noteably the GWAS is already conditioned on sex as a covariate (as well as $Sex \times Age$ interactions). On the other hand, we may have some concerns over whether the phenotype has the same meaning in both sexes or whether there are for example differential ascertainment biases affecting each sex in the GWAS sample. In both cases, this concern can be summarized as a worry over the potential for $G \times Sex$ interactions (either in the population or as an artifact within sample). On that basis, we choose to flag phenotypes with strong sex differences in sample size as somewhat lower confidence.
```{r apply_sex_bias, echo=F}
sex_drop1 <- sex_ns2[sex_ns2$n_fem/(sex_ns2$n_fem+sex_ns2$n_mal) > .75 |
sex_ns2$n_fem/(sex_ns2$n_fem+sex_ns2$n_mal) < .25 |
((sex_ns2$n_case_fem/(sex_ns2$n_case_fem+sex_ns2$n_case_mal) > .75 |
sex_ns2$n_case_fem/(sex_ns2$n_case_fem+sex_ns2$n_case_mal) < .25 |
sex_ns2$n_control_fem/(sex_ns2$n_control_fem+sex_ns2$n_control_mal) > .75 |
sex_ns2$n_control_fem/(sex_ns2$n_control_fem+sex_ns2$n_control_mal) < .25) &
!is.na(sex_ns2$n_case_fem)),"phenotype"]
sex_drop1 <- sex_drop1[!is.na(sex_drop1) & (sex_drop1 %in% dat$phenotype[dat$sex=="both_sexes"])]
sex_drop2 <- onesex_ns[onesex_ns$n_sex/onesex_ns$n > 0.75 |
(!is.na(onesex_ns$n_case) &
((onesex_ns$n_case_sex/onesex_ns$n_case > 0.75) |
(onesex_ns$n_control_sex/onesex_ns$n_control > 0.75))), "phenotype"]
sex_drop2 <- sex_drop2[!is.na(sex_drop2) & (sex_drop2 %in% dat$phenotype[dat$sex=="both_sexes"])]
idx_sexconf <- (dat$phenotype %in% c(as.character(sex_drop1),as.character(sex_drop2)) & dat$sex=="both_sexes")
dat$isSexBias <- F
dat$isSexBias[idx_sexconf] <- T
dat$confidence[dat$isSexBias & dat$confidence=="high"] <- "medium"
# same checks in dat_full
# (not required in datf, datm since this is specifically a concern about both_sexes analyses)
dat_full_fem_idx <- match(dat_full$phenotype, datf_full$phenotype)
dat_full_fem_idx_na <- is.na(dat_full_fem_idx)
dat_full_fem_n <- rep(NA,nrow(dat_full))
dat_full_fem_ncase <- rep(NA,nrow(dat_full))
dat_full_fem_ncontrol <- rep(NA,nrow(dat_full))
dat_full_fem_n[!dat_full_fem_idx_na] <- datf_full$n[dat_full_fem_idx[!dat_full_fem_idx_na]]
dat_full_fem_ncase[!dat_full_fem_idx_na] <- datf_full$n_cases[dat_full_fem_idx[!dat_full_fem_idx_na]]
dat_full_fem_ncontrol[!dat_full_fem_idx_na] <- datf_full$n_controls[dat_full_fem_idx[!dat_full_fem_idx_na]]
dat_full$isSexBias <- F
dat_full$isSexBias[!is.na(dat_full_fem_n) & dat_full_fem_n/dat_full$n > .75] <- T
dat_full$isSexBias[!is.na(dat_full_fem_n) & dat_full_fem_n/dat_full$n < .25] <- T
dat_full$isSexBias[!is.na(dat_full_fem_n) & !is.na(dat_full$n_cases) & dat_full_fem_ncase/dat_full$n_cases > .75] <- T
dat_full$isSexBias[!is.na(dat_full_fem_n) & !is.na(dat_full$n_cases) & dat_full_fem_ncase/dat_full$n_cases < .25] <- T
dat_full$isSexBias[!is.na(dat_full_fem_n) & !is.na(dat_full$n_cases) & dat_full_fem_ncontrol/dat_full$n_controls > .75] <- T
dat_full$isSexBias[!is.na(dat_full_fem_n) & !is.na(dat_full$n_cases) & dat_full_fem_ncontrol/dat_full$n_controls < .25] <- T
dat_full$confidence[dat_full$isSexBias & dat_full$confidence=="high"] <- "medium"
datf_full$isSexBias <- F
datm_full$isSexBias <- F
rm(onesex_ns)
rm(sex_ns2)
```
</div>
## Conclusion
We denote as "medium" confidence `r sum(idx_sexconf & !dat$isMidNeff)` phenotypes where >75% of cases are from a single sex.
<div class="well">
```{r sexbias_list, echo=F, comment=NA}
dt <- datatable(dat[dat$isSexBias & !dat$isMidNeff,c("phenotype","description")],
rownames = F,
colnames = c("Code","Phenotype"),
# extensions='FixedHeader',
selection="none",
style="bootstrap",
class="nowrap display",
escape=F,
options = list(autowidth=F, scrollY="400px", scrollX='400px', pageLength=50, dom='t')#, fixedColumns=TRUE),
)
dt
```