bold
accesses BOLD barcode data.
Documentation for the BOLD API.
Stable CRAN version
install.packages("bold")
Development version from Github
Install sangerseqR
first
source("http://bioconductor.org/biocLite.R")
biocLite("sangerseqR")
Then bold
devtools::install_github("ropensci/bold")
library("bold")
Default is to get a list back
bold_seq(taxon='Coelioxys')[[1]]
#> $id
#> [1] "FBAPB481-09"
#>
#> $name
#> [1] "Coelioxys afra"
#>
#> $gene
#> [1] "FBAPB481-09"
#>
#> $sequence
#> [1] "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTTCCACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGTGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCATTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
You can optionally get back the httr
response object
res <- bold_seq(taxon='Coelioxys', response=TRUE)
res$headers
#> $date
#> [1] "Mon, 28 Mar 2016 20:35:02 GMT"
#>
#> $server
#> [1] "Apache/2.2.15 (Red Hat)"
#>
#> $`x-powered-by`
#> [1] "PHP/5.3.15"
#>
#> $`content-disposition`
#> [1] "attachment; filename=fasta.fas"
#>
#> $connection
#> [1] "close"
#>
#> $`transfer-encoding`
#> [1] "chunked"
#>
#> $`content-type`
#> [1] "application/x-download"
#>
#> attr(,"class")
#> [1] "insensitive" "list"
By default you download tsv
format data, which is given back to you as a data.frame
res <- bold_specimens(taxon='Osmia')
head(res[,1:8])
#> processid sampleid recordID catalognum fieldnum
#> 1 ASGCB255-13 BIOUG07489-F04 3955532 BIOUG07489-F04
#> 2 BCHYM1493-13 BC ZSM HYM 19353 4005342 BC ZSM HYM 19353 BC ZSM HYM 19353
#> 3 CHUBE002-06 CHU05-BEE-002 516711 CHU05-BEE-002 CHU05-BEE-002
#> 4 FBAPB679-09 BC ZSM HYM 02154 1289040 BC ZSM HYM 02154 BC ZSM HYM 02154
#> 5 FBAPB730-09 BC ZSM HYM 02205 1289091 BC ZSM HYM 02205 BC ZSM HYM 02205
#> 6 FBAPB743-09 BC ZSM HYM 02218 1289104 BC ZSM HYM 02218 BC ZSM HYM 02218
#> institution_storing
#> 1 Biodiversity Institute of Ontario
#> 2 SNSB, Zoologische Staatssammlung Muenchen
#> 3 University of Manitoba, Wallis Roughley Museum of Entomology
#> 4 SNSB, Zoologische Staatssammlung Muenchen
#> 5 SNSB, Zoologische Staatssammlung Muenchen
#> 6 SNSB, Zoologische Staatssammlung Muenchen
#> bin_uri phylum_taxID
#> 1 BOLD:ABZ2181 20
#> 2 BOLD:AAK6070 20
#> 3 BOLD:AAD4181 20
#> 4 BOLD:AAI1788 20
#> 5 BOLD:AAK5820 20
#> 6 20
By default you download tsv
format data, which is given back to you as a data.frame
res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE)
res$fasta[1:2]
#> $`ASGCB255-13`
#> [1] "-------------------------------GGAATAATTGGTTCTGCTATAAGTATTATTATTCGAATAGAATTAAGAATTCCTGGATCATTCATTTCTAATGATCAAACTTATAATTCTTTAGTAACAGCTCATGCTTTTTTAATAATTTTTTTTCTTGTAATACCATTTTTAATTGGTGGATTTGGAAATTGATTAATTCCATTAATATTAGGAATCCCAGATATAGCATTTCCTCGAATAAATAATATTAGATTTTGACTTTTACCCCCATCCTTAATAATTTTACTTTTAAGAAATTTCTTAAATCCAAGTCCAGGAACAGGTTGAACTGTATATCCCCCCCTTTCTTCTTATTTATTTCATTCTTCCCCTTCTGTTGATTTAGCTATTTTTTCTCTTCATATTTCTGGTTTATCTTCCATCATAGGTTCTTTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCATTAAAACATATTCAATTACCTTTATTTCCTTGATCCGTTTTTATTACAACTATTTTACTATTATTTTCTTTACCTGTTCTAGCAGGAGCTATTACTATATTATTATTTGATCGAAACTTTAATACTTCATTTTTTGATCCAACTGGAGGAGGAGATCCAATTTTATATCAACATTTATTC"
#>
#> $`BCHYM1493-13`
#> [1] "AATTTTATATATAATTTTTGCTTTATGATCTGGAATAATTGGTTCATCAATAAGAATTTTAATTCGAATAGAATTAAGAATTCCTGGATCATGAATTTCTAATGATCAAGTTTATAATTCTTTAGTAACTGCTCATGCTTTTTTAATAATTTTTTTTCTTGTAATACCATTTTTAATTGGGGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCTGATATAGCTTTTCCTCGAATAAATAATATTAGATTTTGACTTTTACCTCCATCTTTAATATTATTATTGTTAAGAAATTTTTTAAATCCAAGTCCAGGAACAGGATGAACTGTTTATCCTCCTCTTTCTTCAAATTTATTTCACTCTTCTCCTTCAGTAGATTTAGCAATTTTTTCATTACATATTTCAGGATTATCATCTATTATAGGATCATTAAATTTTATTGTTACAATTATTTTAATAAAAAATATTTCTTTAAAACATATTCAATTACCTTTATTTCCATGATCTGTTTTTATTACTACAATTCTTTTATTATTATCATTACCAGTTTTAGCAGGAGCTATTACTATACTTTTATTTGATCGAAATTTTAATACTTCTTTTTTTGACCCTATAGGAGGAGGAGATCCAATTCTTTATCAACATTTATTT"
Or you can index to a specific sequence like
res$fasta['GBAH0293-06']
#> $`GBAH0293-06`
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information.
x <- bold_trace(ids = 'ACRJP618-11', progress = FALSE)
read_trace(x$ab1)
#> Number of datapoints: 8877
#> Number of basecalls: 685
#>
#> Primary Basecalls: NNNNNNNNNNNNNNNNNNGNNNTTGAGCAGGNATAGTAGGANCTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTTTGATTTTTTGGACNTCNNNNAAGTTTAAN
#>
#> Secondary Basecalls:
bold
in R doing citation(package = 'bold')