wikitaxa
- Taxonomy data from Wikipedia
The goal of wikitaxa
is to allow search and taxonomic data retrieval from across many Wikimedia sites, including: Wikipedia, Wikicommons, and Wikispecies.
There are lower level and higher level parts to the package API:
The low level API is meant for power users and gives you more control, but requires more knowledge.
wt_wiki_page()
wt_wiki_page_parse()
wt_wiki_url_build()
wt_wiki_url_parse()
wt_wikispecies_parse()
wt_wikicommons_parse()
wt_wikipedia_parse()
The high level API is meant to be easier and faster to use.
wt_data()
wt_data_id()
wt_wikispecies()
wt_wikicommons()
wt_wikipedia()
Search functions:
wt_wikicommons_search()
wt_wikispecies_search()
wt_wikipedia_search()
CRAN version
install.packages("wikitaxa")
Dev version
devtools::install_github("ropensci/wikitaxa")
library("wikitaxa")
wt_data("Poa annua")
Get a Wikidata ID
wt_data_id("Mimulus foliatus")
#> [1] "Q6495130"
#> attr(,"class")
#> [1] "wiki_id"
lower level
pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
res <- wt_wiki_page_parse(pg)
res$iwlinks
#> [1] "https://en.wiktionary.org/wiki/apple"
#> [2] "https://commons.wikimedia.org/wiki/Special:Search/Apple"
#> [3] "https://en.wikiquote.org/wiki/Apples"
#> [4] "https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Apple"
#> [5] "https://en.wikibooks.org/wiki/Apples"
#> [6] "https://species.wikimedia.org/wiki/Malus_domestica"
#> [7] "https://commons.wikimedia.org/wiki/Category:Apple_cultivars"
higher level
res <- wt_wikipedia("Malus domestica")
res$common_names
#> # A tibble: 3 × 2
#> name language
#> <chr> <chr>
#> 1 apple tree en
#> 2 apple en
#> 3 Apple en
res$classification
#> # A tibble: 9 × 2
#> rank name
#> <chr> <chr>
#> 1 kingdom Plantae
#> 2 unranked Angiosperms
#> 3 unranked Eudicots
#> 4 unranked Rosids
#> 5 order Rosales
#> 6 family Rosaceae
#> 7 genus Malus
#> 8 species M. pumila
#> 9 binomial Malus pumila
choose a wikipedia language
# French
wt_wikipedia(name = "Malus domestica", wiki = "fr")
# Slovak
wt_wikipedia(name = "Malus domestica", wiki = "sk")
# Vietnamese
wt_wikipedia(name = "Malus domestica", wiki = "vi")
search
wt_wikipedia_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 2804
#>
#>
#> $query$search
#> # A tibble: 10 × 6
#> ns title size wordcount
#> * <int> <chr> <int> <int>
#> 1 0 Pine 19915 2372
#> 2 0 List of Pinus species 13999 995
#> 3 0 Pinus luchuensis 2903 166
#> 4 0 Pinus wallichiana 4295 433
#> 5 0 Pinus nigra 11468 1352
#> 6 0 Pinus kesiya 5281 512
#> 7 0 Pinus devoniana 3801 397
#> 8 0 Pinus × sondereggeri 3485 347
#> 9 0 Pinus mugo 10884 795
#> 10 0 Pinus heldreichii 6482 707
#> # ... with 2 more variables: snippet <chr>, timestamp <chr>
search supports languages
wt_wikipedia_search(query = "Pinus", wiki = "fr")
lower level
pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
res <- wt_wikicommons_parse(pg)
res$common_names[1:3]
#> [[1]]
#> [[1]]$name
#> [1] "okra"
#>
#> [[1]]$language
#> [1] "en"
#>
#>
#> [[2]]
#> [[2]]$name
#> [1] "مسكي"
#>
#> [[2]]$language
#> [1] "ar"
#>
#>
#> [[3]]
#> [[3]]$name
#> [1] "Abelmoş"
#>
#> [[3]]$language
#> [1] "az"
higher level
res <- wt_wikicommons("Abelmoschus")
res$classification
#> # A tibble: 15 × 2
#> rank name
#> <chr> <chr>
#> 1 Domain Eukaryota
#> 2 • unranked Archaeplastida
#> 3 • Regnum Plantae
#> 4 • Cladus angiosperms
#> 5 • Cladus eudicots
#> 6 • Cladus core eudicots
#> 7 • Cladus superrosids
#> 8 • Cladus rosids
#> 9 • Cladus eurosids II
#> 10 • Ordo Malvales
#> 11 • Familia Malvaceae
#> 12 • Subfamilia Malvoideae
#> 13 • Tribus Hibisceae
#> 14 • Abelmoschus
#> 15 Medik. (1787)
res$common_names
#> # A tibble: 18 × 2
#> name language
#> <chr> <chr>
#> 1 okra en
#> 2 مسكي ar
#> 3 Abelmoş az
#> 4 Ibiškovec cs
#> 5 Bisameibisch de
#> 6 Okrat fi
#> 7 Abelmosco gl
#> 8 Abelmošus hr
#> 9 Ybiškė lt
#> 10 അബെ\u0d7dമോസ്കസ് ml
#> 11 Абельмош mrj
#> 12 Piżmian pl
#> 13 Абельмош ru
#> 14 موري sd
#> 15 Okrasläktet sv
#> 16 Абельмош udm
#> 17 Chi Vông vang vi
#> 18 黄葵属 zh
search
wt_wikicommons_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 257
#>
#>
#> $query$search
#> # A tibble: 10 × 6
#> ns title size wordcount
#> * <int> <chr> <int> <int>
#> 1 0 Pinus 4160 303
#> 2 0 Pinus nigra 7449 486
#> 3 0 Pinus × schwerinii 634 67
#> 4 0 Pinus mugo 7157 573
#> 5 0 Spinus pinus 1563 242
#> 6 0 Pinus tabuliformis 1739 136
#> 7 0 Setophaga pinus 1735 198
#> 8 0 Pinus sabiniana 2799 217
#> 9 0 Pinus distribution maps of North America 25971 92
#> 10 0 Pinus cooperi 564 64
#> # ... with 2 more variables: snippet <chr>, timestamp <chr>
lower level
pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
res <- wt_wikispecies_parse(pg, types = "common_names")
res$common_names[1:3]
#> [[1]]
#> [[1]]$name
#> [1] "Ябълка"
#>
#> [[1]]$language
#> [1] "български"
#>
#>
#> [[2]]
#> [[2]]$name
#> [1] "Poma, pomera"
#>
#> [[2]]$language
#> [1] "català"
#>
#>
#> [[3]]
#> [[3]]$name
#> [1] "Apfel"
#>
#> [[3]]$language
#> [1] "Deutsch"
higher level
res <- wt_wikispecies("Malus domestica")
res$classification
#> # A tibble: 8 × 2
#> rank name
#> <chr> <chr>
#> 1 Superregnum Eukaryota
#> 2 Regnum Plantae
#> 3 Cladus Angiosperms
#> 4 Cladus Eudicots
#> 5 Cladus Core eudicots
#> 6 Cladus Rosids
#> 7 Cladus Eurosids I
#> 8 Ordo Rosales
res$common_names
#> # A tibble: 19 × 2
#> name language
#> <chr> <chr>
#> 1 Ябълка български
#> 2 Poma, pomera català
#> 3 Apfel Deutsch
#> 4 Aed-õunapuu eesti
#> 5 Μηλιά Ελληνικά
#> 6 Apple English
#> 7 Manzano español
#> 8 Pomme français
#> 9 Melâr furlan
#> 10 사과나무 한국어
#> 11 ‘Āpala Hawaiʻi
#> 12 Melo italiano
#> 13 Aapel Nordfriisk
#> 14 Maçã, Macieira português
#> 15 Яблоня домашняя русский
#> 16 Tarhaomenapuu suomi
#> 17 Elma Türkçe
#> 18 Яблуня домашня українська
#> 19 Pomaro vèneto
search
wt_wikispecies_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 396
#>
#>
#> $query$search
#> # A tibble: 10 × 6
#> ns title size wordcount
#> * <int> <chr> <int> <int>
#> 1 0 Pinus 1570 282
#> 2 0 Pinus subg. Pinus 318 27
#> 3 0 Pinus clausa 1183 211
#> 4 0 Pinus sect. Pinus 623 68
#> 5 0 Pinus resinosa 1195 166
#> 6 0 Pinus nigra subsp. nigra 1412 127
#> 7 0 Pinus cooperi 680 89
#> 8 0 Pinus thunbergii 873 122
#> 9 0 Pinus gordoniana 594 61
#> 10 0 Pinus subsect. Pinus 718 94
#> # ... with 2 more variables: snippet <chr>, timestamp <chr>