wikitaxa
- Taxonomy data from Wikipedia
The goal of wikitaxa
is to allow search and taxonomic data retrieval from across many Wikimedia sites, including: Wikipedia, Wikicommons, and Wikispecies.
There are lower level and higher level parts to the package API:
The low level API is meant for power users and gives you more control, but requires more knowledge.
wt_wiki_page()
wt_wiki_page_parse()
wt_wiki_url_build()
wt_wiki_url_parse()
wt_wikispecies_parse()
wt_wikicommons_parse()
wt_wikipedia_parse()
The high level API is meant to be easier and faster to use.
wt_data()
wt_data_id()
wt_wikispecies()
wt_wikicommons()
wt_wikipedia()
Search functions:
wt_wikicommons_search()
wt_wikispecies_search()
wt_wikipedia_search()
CRAN version
install.packages("wikitaxa")
Dev version
devtools::install_github("ropensci/wikitaxa")
library("wikitaxa")
wt_data("Poa annua")
Get a Wikidata ID
wt_data_id("Mimulus foliatus")
#> [1] "Q6495130"
#> attr(,"class")
#> [1] "wiki_id"
lower level
pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
res <- wt_wiki_page_parse(pg)
res$iwlinks
#> [1] "https://commons.wikimedia.org/wiki/Category:apples"
#> [2] "https://commons.wikimedia.org/wiki/Category:Apple_cultivars"
#> [3] "https://www.wikidata.org/wiki/Q158657"
#> [4] "https://www.wikidata.org/wiki/Q18674606"
#> [5] "https://species.wikimedia.org/wiki/Malus_pumila"
#> [6] "https://species.wikimedia.org/wiki/Malus_domestica"
higher level
res <- wt_wikipedia("Malus domestica")
res$common_names
#> # A tibble: 1 x 2
#> name language
#> <chr> <chr>
#> 1 Apple en
res$classification
#> # A tibble: 3 x 2
#> rank name
#> <chr> <chr>
#> 1 plainlinks ""
#> 2 species M. pumila
#> 3 binomial Malus pumila
choose a wikipedia language
# French
wt_wikipedia(name = "Malus domestica", wiki = "fr")
# Slovak
wt_wikipedia(name = "Malus domestica", wiki = "sk")
# Vietnamese
wt_wikipedia(name = "Malus domestica", wiki = "vi")
search
wt_wikipedia_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 3144
#>
#>
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount snippet timestamp
#> * <int> <chr> <int> <int> <int> <chr> <chr>
#> 1 0 Pine 3.94e4 28676 3298 "A pine is any coni… 2018-10-13…
#> 2 0 Pinus po… 5.33e5 28882 2778 "misidentified it a… 2018-08-16…
#> 3 0 Pinus st… 4.64e5 28962 3578 "3 ft) tall & w… 2018-10-10…
#> 4 0 Pinus co… 5.08e5 21733 2529 "all pines (member … 2018-09-05…
#> 5 0 Pinus lo… 6.50e5 12244 1379 "sometimes form den… 2018-09-04…
#> 6 0 List of … 4.49e5 18644 1659 "<span class=\"sear… 2018-10-06…
#> 7 0 Bristlec… 2.16e5 16057 1660 "bad soils. One of … 2018-09-14…
#> 8 0 Pinus fl… 4.64e5 13615 1447 "and soft. <span cl… 2018-09-05…
#> 9 0 Pinus la… 4.59e5 10226 1197 "Fire affected this… 2018-09-05…
#> 10 0 Pinus ge… 1.38e6 6391 638 "kilogram. <span cl… 2018-08-17…
search supports languages
wt_wikipedia_search(query = "Pinus", wiki = "fr")
lower level
pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
res <- wt_wikicommons_parse(pg)
res$common_names[1:3]
#> [[1]]
#> [[1]]$name
#> [1] "okra"
#>
#> [[1]]$language
#> [1] "en"
#>
#>
#> [[2]]
#> [[2]]$name
#> [1] "مسكي"
#>
#> [[2]]$language
#> [1] "ar"
#>
#>
#> [[3]]
#> [[3]]$name
#> [1] "Abelmoş"
#>
#> [[3]]$language
#> [1] "az"
higher level
res <- wt_wikicommons("Abelmoschus")
res$classification
#> # A tibble: 15 x 2
#> rank name
#> <chr> <chr>
#> 1 Domain Eukaryota
#> 2 unranked Archaeplastida
#> 3 Regnum Plantae
#> 4 Cladus angiosperms
#> 5 Cladus eudicots
#> 6 Cladus core eudicots
#> 7 Cladus superrosids
#> 8 Cladus rosids
#> 9 Cladus eurosids II
#> 10 Ordo Malvales
#> 11 Familia Malvaceae
#> 12 Subfamilia Malvoideae
#> 13 Tribus Hibisceae
#> 14 Genus Abelmoschus
#> 15 Authority " Medik. (1787)"
res$common_names
#> # A tibble: 19 x 2
#> name language
#> <chr> <chr>
#> 1 okra en
#> 2 مسكي ar
#> 3 Abelmoş az
#> 4 Ibiškovec cs
#> 5 Bisameibisch de
#> 6 Okrat fi
#> 7 Abelmosco gl
#> 8 Abelmošus hr
#> 9 Ybiškė lt
#> 10 അബെൽമോസ്കസ് ml
#> 11 Абельмош mrj
#> 12 Abelmoskusslekta nn
#> 13 Piżmian pl
#> 14 Абельмош ru
#> 15 موري sd
#> 16 Okrasläktet sv
#> 17 Абельмош udm
#> 18 Chi Vông vang vi
#> 19 黄葵属 zh
search
wt_wikicommons_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 264
#>
#>
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount snippet timestamp
#> * <int> <chr> <int> <int> <int> <chr> <chr>
#> 1 0 Pinus 8.21e4 4154 336 "Ordo: Pinales • … 2017-05-2…
#> 2 0 Pinus × sc… 1.19e7 634 67 "Genus: <span cla… 2015-01-2…
#> 3 0 Pinus nigra 6.47e4 7743 501 "Ordo: Pinales • … 2018-03-0…
#> 4 0 Spinus pin… 7.03e5 1560 243 "Genus: Spinus • … 2017-07-3…
#> 5 0 Pinus coop… 8.85e6 564 64 "Familia: Pinacea… 2014-12-1…
#> 6 0 Pinus dist… 2.95e7 25972 92 "pines, soft pine… 2018-01-1…
#> 7 0 Pinus herr… 3.00e7 206 28 "Classis: Pinopsi… 2015-01-2…
#> 8 0 Setophaga … 3.53e5 1867 213 "Species: Setopha… 2018-04-2…
#> 9 0 Pinus tabu… 2.36e5 1739 145 "Familia: Pinacea… 2014-11-2…
#> 10 0 Pinus maxi… 2.04e7 485 60 "Familia: Pinacea… 2012-07-2…
lower level
pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
res <- wt_wikispecies_parse(pg, types = "common_names")
res$common_names[1:3]
#> [[1]]
#> [[1]]$name
#> [1] "Ябълка"
#>
#> [[1]]$language
#> [1] "български"
#>
#>
#> [[2]]
#> [[2]]$name
#> [1] "Poma, pomera"
#>
#> [[2]]$language
#> [1] "català"
#>
#>
#> [[3]]
#> [[3]]$name
#> [1] "jabloň domácí"
#>
#> [[3]]$language
#> [1] "čeština"
higher level
res <- wt_wikispecies("Malus domestica")
res$classification
#> # A tibble: 8 x 2
#> rank name
#> <chr> <chr>
#> 1 Superregnum Eukaryota
#> 2 Regnum Plantae
#> 3 Cladus Angiosperms
#> 4 Cladus Eudicots
#> 5 Cladus Core eudicots
#> 6 Cladus Rosids
#> 7 Cladus Eurosids I
#> 8 Ordo Rosales
res$common_names
#> # A tibble: 22 x 2
#> name language
#> <chr> <chr>
#> 1 Ябълка български
#> 2 Poma, pomera català
#> 3 jabloň domácí čeština
#> 4 Apfel Deutsch
#> 5 Aed-õunapuu eesti
#> 6 Μηλιά Ελληνικά
#> 7 Apple English
#> 8 Manzano español
#> 9 Pomme français
#> 10 Melâr furlan
#> # ... with 12 more rows
search
wt_wikispecies_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 417
#>
#>
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount snippet timestamp
#> * <int> <chr> <int> <int> <int> <chr> <chr>
#> 1 0 Pinus 17362 1570 281 "Familia: Pinaceae … 2017-02-08…
#> 2 0 Pinus ni… 327138 1412 127 "Familia: Pinaceae … 2016-03-13…
#> 3 0 Pinus su… 300923 318 27 "Pinaceae Genus: <… 2017-02-08…
#> 4 0 Pinus cl… 45047 1520 208 "Pinaceae Genus: <s… 2017-08-16…
#> 5 0 Pinus re… 45082 1195 165 "Pinaceae Genus: <… 2016-03-13…
#> 6 0 Pinus se… 300935 623 68 "Pinaceae Genus: <… 2017-03-12…
#> 7 0 Pinus go… 260795 594 61 "Pinaceae Genus: <… 2016-08-13…
#> 8 0 Pinus th… 73542 999 140 "Genus: <span class… 2017-07-06…
#> 9 0 Pinus su… 300938 718 94 "Familia: Pinaceae … 2017-03-12…
#> 10 0 Pinus sa… 45084 644 80 "Genus: <span class… 2015-10-31…