File prepration in R

Download PLINK software fromhttps://www.cog-genomics.org/plink2

Change working directory to the directory of your PLINK data

Count number of individuals in the study

FAM<-read.table(file="Transferrin.fam",sep=" ", header=FALSE,na="NA")
head(FAM)
##     V1  V2  V3    V4 V5 V6
## 1  355 883 681 10680  2 -9
## 2  355 884 681 10680  1 -9
## 3 3004 885 682 10681  2 -9
## 4 3155 886 683 10682  1 -9
## 5 1629 887 684 10683  2 -9
## 6 1747 888 685 10684  2 -9
dim(FAM)
## [1] 4861    6

Map information on number of SNPS count number of genotyped SNPs

map<-read.table(file="Transferrin.bim",sep="\t", header=FALSE,na="NA")
head(map)
##   V1         V2 V3      V4 V5 V6
## 1  1  rs3934834  0  995669  T  C
## 2  1  rs3737728  0 1011278  A  G
## 3  1  rs6687776  0 1020428  T  C
## 4  1  rs9651273  0 1021403  A  G
## 5  1  rs4970405  0 1038818  G  A
## 6  1 rs12726255  0 1039813  G  A
dim(map)
## [1] 281313      6

Obtain some basic info on serum transferrin phenotype

TPHENO<-read.table(file="Tr.pheno", header=FALSE)
head(TPHENO)
##     V1  V2     V3
## 1  355 883 -0.815
## 2  355 884     NA
## 3 3004 885     NA
## 4 3155 886 -0.299
## 5 1629 887     NA
## 6 1747 888  1.182
dim(TPHENO)
## [1] 4861    3

Give names to TPHENO variables

names(TPHENO)=c("FAMID", "ID", "Transferrin")
head(TPHENO)
##   FAMID  ID Transferrin
## 1   355 883      -0.815
## 2   355 884          NA
## 3  3004 885          NA
## 4  3155 886      -0.299
## 5  1629 887          NA
## 6  1747 888       1.182

Summary information and histrogram of transferrin pheno

summary(TPHENO$Transferrin)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -3.4960 -0.6195 -0.0450  0.0335  0.5818  4.5150    2499
table(is.na(TPHENO$Transferrin))
## 
## FALSE  TRUE 
##  2362  2499
hist(TPHENO$Transferrin, xlab="Transferrin",main="Histogram of Transferrin")

Get Information on height phenotype (standardized and adjusted for relevant covariates)

HPHENO = read.table(file="Ht.pheno", header=FALSE)
head(HPHENO)
##     V1  V2          V3
## 1  355 883          NA
## 2  355 884 -1.01219122
## 3 3004 885 -1.11122366
## 4 3155 886          NA
## 5 1629 887 -0.04663134
## 6 1747 888  1.59969343
dim(HPHENO)
## [1] 4861    3
names(HPHENO)=c("FAMID", "ID", "Height")
head(HPHENO)
##   FAMID  ID      Height
## 1   355 883          NA
## 2   355 884 -1.01219122
## 3  3004 885 -1.11122366
## 4  3155 886          NA
## 5  1629 887 -0.04663134
## 6  1747 888  1.59969343

Summary information and histrogram of height

summary(HPHENO$Height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -4.0274 -0.7340 -0.0299 -0.0204  0.6232  4.5077    2025
table(is.na(HPHENO$Height))
## 
## FALSE  TRUE 
##  2836  2025
hist(HPHENO$Height, xlab="Height",main="Histogram of Height")

Visualize the association results in R

After you run the PLINK association analysis, switch to R

If the R package GWASTools has not yet been installed on your computer, install the R package using the commands below

source("http://bioconductor.org/biocLite.R")
## Bioconductor version 3.6 (BiocInstaller 1.28.0), ?biocLite for help
## A new version of Bioconductor is available after installing the most
##   recent version of R; see http://bioconductor.org/install
biocLite("GWASTools")
## BioC_mirror: https://bioconductor.org
## Using Bioconductor 3.6 (BiocInstaller 1.28.0), R 3.4.3 (2017-11-30).
## Installing package(s) 'GWASTools'
## 
## The downloaded binary packages are in
##  /var/folders/3k/q_ys6r2955j8wrmzl2w5r_lh0000gp/T//RtmpCfDdLR/downloaded_packages
## Old packages: 'ade4', 'agridat', 'ALL', 'annotate', 'AnnotationHub',
##   'apcluster', 'ape', 'argparse', 'arm', 'assertthat', 'backports',
##   'BDgraph', 'BH', 'bindr', 'bindrcpp', 'BiocParallel', 'biomaRt',
##   'Biostrings', 'bit', 'blob', 'bnstruct', 'broom', 'car', 'caTools',
##   'checkmate', 'chron', 'circlize', 'class', 'cli', 'cluster',
##   'clusterProfiler', 'coda', 'codetools', 'colorspace', 'config',
##   'cowplot', 'curl', 'CVST', 'data.table', 'DBI', 'ddalpha', 'Deriv',
##   'DescTools', 'DESeq', 'DESeq2', 'devtools', 'diffusionMap', 'digest',
##   'dimRed', 'doBy', 'doParallel', 'DOSE', 'dotCall64', 'dplyr', 'dtw',
##   'e1071', 'energy', 'evaluate', 'expm', 'FactoMineR', 'fgsea',
##   'findpython', 'flexmix', 'FNN', 'forcats', 'foreign', 'Formula', 'fpc',
##   'futile.options', 'genefilter', 'geneplotter', 'GenomeInfoDbData',
##   'GenomicAlignments', 'GenomicFeatures', 'getopt', 'GGally', 'ggplot2',
##   'ggridges', 'git2r', 'glasso', 'globaltest', 'glue', 'GO.db',
##   'GOSemSim', 'gower', 'gplots', 'graph', 'grpreg', 'gsubfn', 'gtable',
##   'haven', 'highr', 'Hmisc', 'htmlTable', 'htmlwidgets', 'httpuv', 'httr',
##   'huge', 'iClusterPlus', 'igraph', 'impute', 'interactiveDisplayBase',
##   'ipred', 'irlba', 'iterators', 'JGL', 'jsonlite', 'kableExtra', 'keras',
##   'kernlab', 'knitr', 'labelled', 'laeken', 'lambda.r', 'LaplacesDemon',
##   'lattice', 'lava', 'lavaan', 'lazyeval', 'lme4', 'lmtest', 'lubridate',
##   'maps', 'markdown', 'MASS', 'Matrix', 'matrixStats', 'mclust', 'metap',
##   'mgcv', 'mice', 'mime', 'missMDA', 'modeest', 'ModelMetrics',
##   'modeltools', 'msm', 'munsell', 'mygene', 'network', 'nlme', 'nloptr',
##   'NLP', 'openssl', 'openxlsx', 'org.Hs.eg.db', 'pbapply', 'pillar',
##   'pkgconfig', 'pkgmaker', 'plogr', 'plotly', 'pls', 'prabclus',
##   'preprocessCore', 'pROC', 'processx', 'prodlim', 'progress', 'proxy',
##   'purrr', 'qgraph', 'quantmod', 'quantreg', 'qvalue', 'R.matlab', 'R.oo',
##   'R.utils', 'R6', 'RamiGO', 'ranger', 'RCircos', 'Rcpp', 'RcppArmadillo',
##   'RcppEigen', 'RcppProgress', 'RcppRoll', 'RCurl', 'RCytoscape', 'readr',
##   'readxl', 'recipes', 'registry', 'reshape', 'reticulate', 'rgl',
##   'Rgraphviz', 'rJava', 'rjson', 'rlang', 'rmarkdown', 'rngtools',
##   'robCompositions', 'robustbase', 'rpart', 'rrcov', 'Rsamtools',
##   'RSpectra', 'RSQLite', 'rstudioapi', 'rTensor', 'rtracklayer', 'Rtsne',
##   'rvcheck', 'scales', 'scatterplot3d', 'selectr', 'seqLogo', 'Seurat',
##   'sfsmisc', 'shiny', 'shinydashboard', 'slam', 'sn', 'snow',
##   'sourcetools', 'sp', 'spam', 'spdep', 'stringi', 'stringr', 'survival',
##   'tclust', 'tensorflow', 'tfruns', 'tibble', 'tidyr', 'tidyselect', 'tm',
##   'topGO', 'trimcluster', 'TTR', 'utf8', 'VGAM', 'VIM', 'withr',
##   'wordcloud', 'XLConnect', 'XLConnectJars', 'xlsx', 'XML', 'xtable',
##   'xts', 'yaml', 'zlibbioc', 'zoo'

Load the GWASTools package to your R session so that we can use the plotting functions for this package

library(GWASTools)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

Read in the Association Results from PLINK for Height

Height.Assoc=read.table("GWAS_H_add.qassoc",header=TRUE)    
head(Height.Assoc)
##   CHR        SNP      BP NMISS      BETA      SE        R2         T
## 1   1  rs3934834  995669  2812  0.014000 0.03857 4.690e-05  0.363100
## 2   1  rs3737728 1011278  2833  0.000238 0.03023 2.190e-08  0.007873
## 3   1  rs6687776 1020428  2834  0.086810 0.03742 1.897e-03  2.320000
## 4   1  rs9651273 1021403  2836 -0.024630 0.03090 2.242e-04 -0.797100
## 5   1  rs4970405 1038818  2832  0.083190 0.04372 1.278e-03  1.903000
## 6   1 rs12726255 1039813  2829  0.056490 0.03973 7.145e-04  1.422000
##         P
## 1 0.71660
## 2 0.99370
## 3 0.02043
## 4 0.42550
## 5 0.05714
## 6 0.15520

Obtain a Manhattan plot using GWAS tools

manhattanPlot(p=Height.Assoc$P,chromosome=Height.Assoc$CHR, main="Association Results for Height")

Create a Manhattan plot for just the autosomes

AHeight.Assoc=subset(Height.Assoc,CHR<=22)
manhattanPlot(p=AHeight.Assoc$P,chromosome=AHeight.Assoc$CHR, main="Association Results for Autosomes for Height")

Obtain a Q-Q plot using GWAS tools

qqPlot(pval=Height.Assoc$P)

QQplot deviates from the diagnal line; better to use linear mixed-effects model (LMM) with polygenic effects.

Can also use the qqman package to obtain a manhattan plot

if(!require("qqman")){
  install.packages("qqman")
  stopifnot(require("qqman"))
}
## Loading required package: qqman
## 
## For example usage please run: vignette('qqman')
## 
## Citation appreciated but not required:
## Turner, S.D. qqman: an R package for visualizing GWAS results using Q-Q and manhattan plots. biorXiv DOI: 10.1101/005165 (2014).
## 
library("qqman")


Trans.Assoc=read.table("GWAS_T_add.qassoc",header=TRUE)  

manhattan(Trans.Assoc,main="Association Results for Transferrin")

qq(Trans.Assoc$P)

Identify top 10 SNPS for Height

head(Trans.Assoc)
##   CHR        SNP      BP NMISS     BETA      SE        R2       T       P
## 1   1  rs3934834  995669  2349  0.02497 0.04037 1.630e-04  0.6185 0.53630
## 2   1  rs3737728 1011278  2361  0.03136 0.03164 4.164e-04  0.9913 0.32170
## 3   1  rs6687776 1020428  2361 -0.01126 0.03851 3.621e-05 -0.2923 0.77010
## 4   1  rs9651273 1021403  2362  0.05405 0.03199 1.208e-03  1.6900 0.09122
## 5   1  rs4970405 1038818  2361  0.01116 0.04710 2.378e-05  0.2369 0.81280
## 6   1 rs12726255 1039813  2361 -0.02646 0.04064 1.796e-04 -0.6510 0.51510
dim(Trans.Assoc)        ## number of SNPS analyzed
## [1] 273201      9
TOP <- Trans.Assoc[order(Trans.Assoc$P),] 
head(TOP,10,)
##       CHR        SNP        BP NMISS    BETA      SE      R2       T
## 55047   3  rs3811647 134966719  2362  0.3832 0.02889 0.06936  13.260
## 55051   3  rs6794945 135001153  2362  0.3652 0.02940 0.06136  12.420
## 97790   6  rs1800562  26201120  2361 -0.5884 0.04988 0.05572 -11.800
## 97960   6 rs13214703  28049366  2361 -0.4378 0.04886 0.03292  -8.961
## 55048   3  rs1358024 134966878  2361  0.3290 0.03745 0.03168   8.785
## 97709   6  rs2274089  25596562  2362 -0.3791 0.04551 0.02856  -8.330
## 55042   3  rs4525863 134918826  2362  0.2399 0.03017 0.02609   7.951
## 55038   3  rs1867503 134893338  2362  0.2039 0.02864 0.02103   7.120
## 55039   3  rs1867504 134893351  2362  0.2039 0.02864 0.02103   7.120
## 55052   3  rs9853615 135002671  2362 -0.2083 0.02929 0.02098  -7.111
##               P
## 55047 8.965e-39
## 55051 2.324e-34
## 97790 2.968e-31
## 97960 6.390e-19
## 55048 2.941e-18
## 97709 1.352e-16
## 55042 2.845e-15
## 55038 1.428e-12
## 55039 1.428e-12
## 55052 1.523e-12