When available on CRAN
install.packages("rsnps")
Or get from Github
install.packages("devtools")
devtools::install_github("ropensci/rsnps")
library(rsnps)
Get genotype data for all users at a particular SNP
allgensnp(snp='rs7412')[1:3]
#> [[1]]
#> [[1]]$snp
#> [[1]]$snp$name
#> [1] "rs7412"
#>
#> [[1]]$snp$chromosome
#> [1] "19"
#>
#> [[1]]$snp$position
#> [1] "44908822"
#>
#>
#> [[1]]$user
#> [[1]]$user$name
#> [1] "R.M. Holston"
#>
#> [[1]]$user$id
#> [1] 22
#>
#> [[1]]$user$genotypes
#> [[1]]$user$genotypes[[1]]
#> [[1]]$user$genotypes[[1]]$genotype_id
#> [1] 8
#>
#> [[1]]$user$genotypes[[1]]$local_genotype
#> [1] "CC"
#>
#>
#>
#>
#>
#> [[2]]
#> [[2]]$snp
#> [[2]]$snp$name
#> [1] "rs7412"
#>
#> [[2]]$snp$chromosome
#> [1] "19"
#>
#> [[2]]$snp$position
#> [1] "44908822"
#>
#>
#> [[2]]$user
#> [[2]]$user$name
#> [1] "Mom to AG"
#>
#> [[2]]$user$id
#> [1] 387
#>
#> [[2]]$user$genotypes
#> [[2]]$user$genotypes[[1]]
#> [[2]]$user$genotypes[[1]]$genotype_id
#> [1] 173
#>
#> [[2]]$user$genotypes[[1]]$local_genotype
#> [1] "CC"
#>
#>
#>
#>
#>
#> [[3]]
#> [[3]]$snp
#> [[3]]$snp$name
#> [1] "rs7412"
#>
#> [[3]]$snp$chromosome
#> [1] "19"
#>
#> [[3]]$snp$position
#> [1] "44908822"
#>
#>
#> [[3]]$user
#> [[3]]$user$name
#> [1] "Dan Bolser"
#>
#> [[3]]$user$id
#> [1] 254
#>
#> [[3]]$user$genotypes
#> list()
allgensnp('rs7412', df=TRUE)[1:10,]
#> snp_name snp_chromosome snp_position user_name user_id genotype_id genotype NA NA NA
#> 1 rs7412 19 44908822 R.M. Holston 22 8 CC <NA> <NA> <NA>
#> 2 rs7412 19 44908822 Mom to AG 387 173 CC <NA> <NA> <NA>
#> 3 rs7412 19 44908822 Dan Bolser 254 <NA> <NA> <NA> <NA> <NA>
#> 4 rs7412 19 44908822 Lb 14 6 CC <NA> <NA> <NA>
#> 5 rs7412 19 44908822 Glenn Allen Nolen 19 7 CC <NA> <NA> <NA>
#> 6 rs7412 19 44908822 kevinmcc 285 118 CC <NA> <NA> <NA>
#> 7 rs7412 19 44908822 Sigrid 569 260 CC <NA> <NA> <NA>
#> 8 rs7412 19 44908822 Razib Khan 33 12 CT <NA> <NA> <NA>
#> 9 rs7412 19 44908822 sagan 13 4 CC <NA> <NA> <NA>
#> 10 rs7412 19 44908822 William Vencill 581 266 CC <NA> <NA> <NA>
#> NA NA NA NA NA NA NA NA
#> 1 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 2 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 3 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 4 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 5 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 6 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 7 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 8 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 9 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 10 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
Get all phenotypes, their variations, and how many users have data available for a given phenotype
Get all data
allphenotypes(df = TRUE)[1:10,]
#> id characteristic known_variations number_of_users
#> 1 1 Eye color Brown 672
#> 2 1 Eye color Brown-green 672
#> 3 1 Eye color Blue-green 672
#> 4 1 Eye color Blue-grey 672
#> 5 1 Eye color Green 672
#> 6 1 Eye color Blue 672
#> 7 1 Eye color Hazel 672
#> 8 1 Eye color Mixed 672
#> 9 1 Eye color Gray-blue 672
#> 10 1 Eye color Blue-grey; broken amber collarette 672
Output a list, then call the characterisitc of interest by 'id' or 'characteristic'
datalist <- allphenotypes()
Get a list of all characteristics you can call
names(datalist)[1:10]
#> [1] "Eye color" "Handedness" "Height" "Sex"
#> [5] "Hair Color" "Tongue roller" "Colour Blindness" "Hair Type"
#> [9] "Lactose intolerance" "Astigmatism"
Get data.frame for ADHD
datalist[["ADHD"]]
#> id characteristic known_variations number_of_users
#> 1 29 ADHD False 167
#> 2 29 ADHD True 167
#> 3 29 ADHD Undiagnosed, but probably true 167
#> 4 29 ADHD No 167
#> 5 29 ADHD Yes 167
#> 6 29 ADHD Not diagnosed 167
#> 7 29 ADHD Diagnosed as not having but with some signs 167
#> 8 29 ADHD Mthfr c677t 167
#> 9 29 ADHD Rs1801260 167
Get data.frame for mouth size and SAT Writing
datalist[c("mouth size","SAT Writing")]
#> $`mouth size`
#> id characteristic known_variations number_of_users
#> 1 120 mouth size Medium 98
#> 2 120 mouth size Small 98
#> 3 120 mouth size Large 98
#>
#> $`SAT Writing`
#> id characteristic known_variations number_of_users
#> 1 41 SAT Writing 750 66
#> 2 41 SAT Writing Tested before 2005 66
#> 3 41 SAT Writing 800 66
#> 4 41 SAT Writing Country with no sat 66
#> 5 41 SAT Writing N/a 66
#> 6 41 SAT Writing Never & have ba & above 66
#> 7 41 SAT Writing 720 66
#> 8 41 SAT Writing Did well - don't remember score 66
#> 9 41 SAT Writing 511 66
#> 10 41 SAT Writing 700 66
#> 11 41 SAT Writing 760 66
#> 12 41 SAT Writing 780 66
#> 13 41 SAT Writing Not part of sat when i took test in august 1967 at uiuc 66
Get just the metadata
annotations(snp = 'rs7903146', output = 'metadata')
#> .id V1
#> 1 name rs7903146
#> 2 chromosome 10
#> 3 position 112998590
Just from PLOS journals
annotations(snp = 'rs7903146', output = 'plos')[c(1:2),]
#> author
#> 1 Marguerite R. Irvin
#> 2 Huixiao Hong
#> title
#> 1 Genome-Wide Detection of Allele Specific Copy Number Variation Associated with Insulin Resistance in African Americans from the HyperGEN Study
#> 2 Technical Reproducibility of Genotyping SNP Arrays Used in Genome-Wide Association Studies
#> publication_date number_of_readers url
#> 1 2011-08-25T00:00:00Z 2509 http://dx.doi.org/10.1371/journal.pone.0024052
#> 2 2012-09-07T00:00:00Z 3052 http://dx.doi.org/10.1371/journal.pone.0044483
#> doi
#> 1 10.1371/journal.pone.0024052
#> 2 10.1371/journal.pone.0044483
Just from SNPedia
annotations(snp = 'rs7903146', output = 'snpedia')
#> url
#> 1 http://www.snpedia.com/index.php/Rs7903146(C;C)
#> 2 http://www.snpedia.com/index.php/Rs7903146(C;T)
#> 3 http://www.snpedia.com/index.php/Rs7903146(T;T)
#> summary
#> 1 Normal (lower) risk of Type 2 Diabetes and Gestational Diabetes.
#> 2 1.4x increased risk for diabetes (and perhaps colon cancer).
#> 3 2x increased risk for Type-2 diabetes
Get all annotations
annotations(snp = 'rs7903146', output = 'all')[1:5,]
#> .id author
#> 1 mendeley Dhanasekaran Bodhini
#> 2 mendeley Ludmila Alves Sanches Dutra
#> 3 mendeley Thomas Hansen
#> 4 mendeley Laura J Rasmussen-Torvik
#> 5 mendeley Yu Yan
#> title
#> 1 The rs12255372(G/T) and rs7903146(C/T) polymorphisms of the TCF7L2 gene are associated with type 2 diabetes mellitus in Asian Indians.
#> 2 Allele-specific PCR assay to genotype SNP rs7903146 in TCF7L2 gene for rapid screening of diabetes susceptibility.
#> 3 At-Risk Variant in TCF7L2 for Type II Diabetes Increases Risk of Schizophrenia.
#> 4 Preliminary report: No association between TCF7L2 rs7903146 and euglycemic-clamp-derived insulin sensitivity in a mixed-age cohort.
#> 5 The transcription factor 7-like 2 (TCF7L2) polymorphism may be associated with focal arteriolar narrowing in Caucasians with hypertension or without diabetes: the ARIC Study
#> publication_year number_of_readers open_access
#> 1 2007 8 FALSE
#> 2 2008 5 FALSE
#> 3 2011 1 FALSE
#> 4 2009 3 FALSE
#> 5 2010 5 TRUE
#> url
#> 1 http://www.mendeley.com/research/rs12255372-g-t-rs7903146-c-t-polymorphisms-tcf7l2-gene-associated-type-2-diabetes-mellitus-asian-ind-1/
#> 2 http://www.mendeley.com/research/allelespecific-pcr-assay-to-genotype-snp-rs7903146-in-tcf7l2-gene-for-rapid-screening-of-diabetes-susceptibility/
#> 3 http://www.mendeley.com/research/atrisk-variant-tcf7l2-type-ii-diabetes-increases-risk-schizophrenia/
#> 4 http://www.mendeley.com/research/preliminary-report-association-between-tcf7l2-rs7903146-euglycemicclampderived-insulin-sensitivity-mixedage-cohort/
#> 5 http://www.mendeley.com/research/transcription-factor-7like-2-tcf7l2-polymorphism-associated-focal-arteriolar-narrowing-caucasians-hypertension-diabetes-aric-study-7/
#> doi publication_date summary first_author pubmed_link journal trait pvalue
#> 1 none <NA> <NA> <NA> <NA> <NA> <NA> NA
#> 2 none <NA> <NA> <NA> <NA> <NA> <NA> NA
#> 3 10.1016/j.biopsych.2011.01.031 <NA> <NA> <NA> <NA> <NA> <NA> NA
#> 4 none <NA> <NA> <NA> <NA> <NA> <NA> NA
#> 5 10.1186/1472-6823-10-9 <NA> <NA> <NA> <NA> <NA> <NA> NA
#> pvalue_description confidence_interval
#> 1 <NA> <NA>
#> 2 <NA> <NA>
#> 3 <NA> <NA>
#> 4 <NA> <NA>
#> 5 <NA> <NA>
Download genotype data for a user from 23andme or other repo. (not evaluated in this example)
data <- users(df=TRUE)
head( data[[1]] )
fetch_genotypes(url = data[[1]][1,"genotypes.download_url"], rows=15)
Genotype data for one or multiple users
genotypes(snp='rs9939609', userid=1)
#> $snp
#> $snp$name
#> [1] "rs9939609"
#>
#> $snp$chromosome
#> [1] "16"
#>
#> $snp$position
#> [1] "53786615"
#>
#>
#> $user
#> $user$name
#> [1] "Bastian Greshake"
#>
#> $user$id
#> [1] 1
#>
#> $user$genotypes
#> $user$genotypes[[1]]
#> $user$genotypes[[1]]$genotype_id
#> [1] 9
#>
#> $user$genotypes[[1]]$local_genotype
#> [1] "AT"
genotypes('rs9939609', userid='1,6,8', df=TRUE)
#> snp_name snp_chromosome snp_position user_name user_id genotype_id genotype
#> 1 rs9939609 16 53786615 Bastian Greshake 1 9 AT
#> 2 rs9939609 16 53786615 Nash Parovoz 6 5 AT
#> 3 rs9939609 16 53786615 Samantha B. Clark 8 2 TT
genotypes('rs9939609', userid='1-2', df=FALSE)
#> [[1]]
#> [[1]]$snp
#> [[1]]$snp$name
#> [1] "rs9939609"
#>
#> [[1]]$snp$chromosome
#> [1] "16"
#>
#> [[1]]$snp$position
#> [1] "53786615"
#>
#>
#> [[1]]$user
#> [[1]]$user$name
#> [1] "Bastian Greshake"
#>
#> [[1]]$user$id
#> [1] 1
#>
#> [[1]]$user$genotypes
#> [[1]]$user$genotypes[[1]]
#> [[1]]$user$genotypes[[1]]$genotype_id
#> [1] 9
#>
#> [[1]]$user$genotypes[[1]]$local_genotype
#> [1] "AT"
#>
#>
#>
#>
#>
#> [[2]]
#> [[2]]$snp
#> [[2]]$snp$name
#> [1] "rs9939609"
#>
#> [[2]]$snp$chromosome
#> [1] "16"
#>
#> [[2]]$snp$position
#> [1] "53786615"
#>
#>
#> [[2]]$user
#> [[2]]$user$name
#> [1] "Senficon"
#>
#> [[2]]$user$id
#> [1] 2
#>
#> [[2]]$user$genotypes
#> list()
Get phenotype data for one or multiple users
phenotypes(userid=1)$phenotypes[1:3]
#> $`white skin`
#> $`white skin`$phenotype_id
#> [1] 4
#>
#> $`white skin`$variation
#> [1] "Caucasian"
#>
#>
#> $`Lactose intolerance`
#> $`Lactose intolerance`$phenotype_id
#> [1] 2
#>
#> $`Lactose intolerance`$variation
#> [1] "lactose-tolerant"
#>
#>
#> $`Eye color`
#> $`Eye color`$phenotype_id
#> [1] 1
#>
#> $`Eye color`$variation
#> [1] "blue-green"
phenotypes(userid='1,6,8', df=TRUE)[[1]][1:10,]
#> phenotype phenotypeID variation
#> 1 white skin 4 Caucasian
#> 2 Lactose intolerance 2 lactose-tolerant
#> 3 Eye color 1 blue-green
#> 4 Hair Type 16 straight
#> 5 Height 15 Tall ( >180cm )
#> 6 Ability to Tan 14 Yes
#> 7 Short-sightedness (Myopia) 21 low
#> 8 Beard Color 12 Blonde
#> 9 Colour Blindness 25 False
#> 10 Strabismus 23 False
out <- phenotypes(userid='1-8', df=TRUE)
lapply(out, head)
#> $`Bastian Greshake`
#> phenotype phenotypeID variation
#> 1 white skin 4 Caucasian
#> 2 Lactose intolerance 2 lactose-tolerant
#> 3 Eye color 1 blue-green
#> 4 Hair Type 16 straight
#> 5 Height 15 Tall ( >180cm )
#> 6 Ability to Tan 14 Yes
#>
#> $Senficon
#> phenotype phenotypeID variation
#> 1 no data no data no data
#>
#> $`no info on user_3`
#> phenotype phenotypeID variation
#> 1 no data no data no data
#>
#> $`no info on user_4`
#> phenotype phenotypeID variation
#> 1 no data no data no data
#>
#> $`no info on user_5`
#> phenotype phenotypeID variation
#> 1 no data no data no data
#>
#> $`Nash Parovoz`
#> phenotype phenotypeID variation
#> 1 Handedness 3 right-handed
#> 2 Eye color 1 brown
#> 3 white skin 4 Caucasian
#> 4 Lactose intolerance 2 lactose-tolerant
#> 5 Ability to find a bug in openSNP 5 extremely high
#> 6 Number of wisdom teeth 57 4
#>
#> $`no info on user_7`
#> phenotype phenotypeID variation
#> 1 no data no data no data
#>
#> $`Samantha B. Clark`
#> phenotype phenotypeID variation
#> 1 Handedness 3 left-handed
#> 2 Lactose intolerance 2 lactose-intolerant
#> 3 Eye color 1 Brown
#> 4 Ability to Tan 14 Yes
#> 5 Nicotine dependence 20 ex-smoker, 7 cigarettes/day
#> 6 Hair Color 13 brown
Get all known variations and all users sharing that phenotype for one phenotype(-ID).
phenotypes_byid(phenotypeid=12, return_ = 'desc')
#> $id
#> [1] 12
#>
#> $characteristic
#> [1] "Beard Color"
#>
#> $description
#> [1] "coloration of facial hair"
phenotypes_byid(phenotypeid=12, return_ = 'knownvars')
#> $known_variations
#> $known_variations[[1]]
#> [1] "Red"
#>
#> $known_variations[[2]]
#> [1] "Blonde"
#>
#> $known_variations[[3]]
#> [1] "Red-brown"
#>
#> $known_variations[[4]]
#> [1] "Red-blonde-brown-black(in diferent parts i have different color,for example near the lips blond-red"
#>
#> $known_variations[[5]]
#> [1] "No beard-female"
#>
#> $known_variations[[6]]
#> [1] "Brown-black"
#>
#> $known_variations[[7]]
#> [1] "Blonde-brown"
#>
#> $known_variations[[8]]
#> [1] "Black"
#>
#> $known_variations[[9]]
#> [1] "Dark brown with minor blondish-red"
#>
#> $known_variations[[10]]
#> [1] "Brown-grey"
#>
#> $known_variations[[11]]
#> [1] "Red-blonde-brown-black"
#>
#> $known_variations[[12]]
#> [1] "Blond-brown"
#>
#> $known_variations[[13]]
#> [1] "Brown, some red"
#>
#> $known_variations[[14]]
#> [1] "Brown"
#>
#> $known_variations[[15]]
#> [1] "Brown-gray"
#>
#> $known_variations[[16]]
#> [1] "Never had a beard"
#>
#> $known_variations[[17]]
#> [1] "I'm a woman"
#>
#> $known_variations[[18]]
#> [1] "Black-brown-blonde"
#>
#> $known_variations[[19]]
#> [1] "Was red-brown now mixed with gray,"
#>
#> $known_variations[[20]]
#> [1] "Red-blonde-brown"
#>
#> $known_variations[[21]]
#> [1] "Dark brown w/few blonde & red hairs"
#>
#> $known_variations[[22]]
#> [1] "Dark blonde with red and light blonde on goatee area."
#>
#> $known_variations[[23]]
#> [1] "Black with few red hairs"
phenotypes_byid(phenotypeid=12, return_ = 'users')[1:10,]
#> user_id
#> 1 22
#> 2 1
#> 3 26
#> 4 10
#> 5 14
#> 6 42
#> 7 45
#> 8 16
#> 9 8
#> 10 661
#> variation
#> 1 Red
#> 2 Blonde
#> 3 red-brown
#> 4 Red-Blonde-Brown-Black(in diferent parts i have different color,for example near the lips blond-red
#> 5 No beard-female
#> 6 Brown-black
#> 7 Red-Blonde-Brown-Black(in diferent parts i have different color,for example near the lips blond-red
#> 8 blonde-brown
#> 9 No beard-female
#> 10 Brown-black
data <- users(df=FALSE)
data[1:2]
#> [[1]]
#> [[1]]$name
#> [1] "gigatwo"
#>
#> [[1]]$id
#> [1] 31
#>
#> [[1]]$genotypes
#> list()
#>
#>
#> [[2]]
#> [[2]]$name
#> [1] "Anu Acharya"
#>
#> [[2]]$id
#> [1] 385
#>
#> [[2]]$genotypes
#> list()
Search for SNPs in Linkage Disequilibrium with a set of SNPs
LDSearch("rs420358")
#> Querying SNAP...
#> Querying NCBI for up-to-date SNP annotation information...
#> Done!
#> $rs420358
#> Proxy SNP Distance RSquared DPrime GeneVariant GeneName GeneDescription Major Minor MAF
#> 4 rs420358 rs420358 0 1.000 1.000 INTERGENIC N/A N/A C A 0.167
#> 5 rs442418 rs420358 122 1.000 1.000 INTERGENIC N/A N/A C T 0.167
#> 8 rs718223 rs420358 1168 1.000 1.000 INTERGENIC N/A N/A A G 0.167
#> 6 rs453604 rs420358 2947 1.000 1.000 INTERGENIC N/A N/A A G 0.167
#> 3 rs372946 rs420358 -70 0.943 1.000 INTERGENIC N/A N/A G C 0.175
#> 1 rs10889290 rs420358 3987 0.800 1.000 INTERGENIC N/A N/A G A 0.200
#> 2 rs10889291 rs420358 4334 0.800 1.000 INTERGENIC N/A N/A C T 0.200
#> 7 rs4660403 rs420358 7021 0.800 1.000 INTERGENIC N/A N/A A G 0.200
#> NObserved Chromosome_NCBI Marker_NCBI Class_NCBI Gene_NCBI Alleles_NCBI Major_NCBI Minor_NCBI MAF_NCBI
#> 4 120 1 rs420358 snp <NA> G,T G T NA
#> 5 120 1 rs442418 snp <NA> A/G A G 0.0723
#> 8 120 1 rs718223 snp <NA> A/G A G 0.0723
#> 6 120 1 rs453604 snp <NA> A/G A G 0.0727
#> 3 120 1 rs372946 snp <NA> C,G C G NA
#> 1 120 1 rs10889290 snp <NA> A/G G A 0.0841
#> 2 120 1 rs10889291 snp <NA> C/T C T 0.0839
#> 7 120 1 rs4660403 snp <NA> A/G A G 0.0827
#> BP_NCBI
#> 4 40341238
#> 5 40341360
#> 8 40342406
#> 6 40344185
#> 3 40341168
#> 1 40345225
#> 2 40345572
#> 7 40348259
Query NCBI's dbSNP for information on a set of SNPs
An example with both merged SNPs, non-SNV SNPs, regular SNPs, SNPs not found, microsatellite
snps <- c("rs332", "rs420358", "rs1837253", "rs1209415715", "rs111068718")
NCBI_snp_query(snps)
#> Query Chromosome Marker Class Gene Alleles Major Minor MAF BP
#> 1 rs332 7 rs121909001 in-del CFTR -/TTT <NA> <NA> NA 117559592
#> 2 rs420358 1 rs420358 snp <NA> G,T G T NA 40341238
#> 3 rs1837253 5 rs1837253 snp <NA> C/T C T 0.3822 111066173
#> 4 rs111068718 <NA> rs111068718 microsatellite <NA> (GT)21/24 <NA> <NA> NA NA