rdatacite

Build Status codecov.io rstudio mirror downloads cran version

rdatacite provides programmatic accesses to DataCite metadata

Installation

Stable CRAN version

install.packages("rdatacite")

Development version from github

devtools::install_github("ropensci/rdatacite")
library('rdatacite')

OAI-PMH

Ping the service

dc_oai_identify()
#>   repositoryName                     baseURL protocolVersion
#> 1   DataCite MDS http://oai.datacite.org/oai             2.0
#>           adminEmail    earliestDatestamp deletedRecord
#> 1 admin@datacite.org 2011-01-01T00:00:00Z    persistent
#>            granularity compression compression.1
#> 1 YYYY-MM-DDThh:mm:ssZ        gzip       deflate
#>                                      description
#> 1 oaioai.datacite.org:oai:oai.datacite.org:12425

List metadata formats

dc_oai_listmetadataformats(id = "56225")
#> $`56225`
#> NULL

List identifiers

dc_oai_listidentifiers(from = '2011-06-01T', until = '2011-07-01T')
#> # A tibble: 5 x 4
#>                   identifier            datestamp setSpec setSpec.1
#>                        <chr>                <chr>   <chr>     <chr>
#> 1 oai:oai.datacite.org:32153 2011-06-08T08:57:11Z     TIB  TIB.WDCC
#> 2 oai:oai.datacite.org:32200 2011-06-20T08:12:41Z     TIB TIB.DAGST
#> 3 oai:oai.datacite.org:32220 2011-06-28T14:11:08Z     TIB TIB.DAGST
#> 4 oai:oai.datacite.org:32241 2011-06-30T13:24:45Z     TIB TIB.DAGST
#> 5 oai:oai.datacite.org:32255 2011-07-01T12:09:24Z     TIB TIB.DAGST

Get records

dc_oai_getrecord(id = "56225")
#> $`oai:oai.datacite.org:56225`
#> $`oai:oai.datacite.org:56225`$header
#> # A tibble: 1 x 3
#>                   identifier            datestamp   setSpec
#>                        <chr>                <chr>     <chr>
#> 1 oai:oai.datacite.org:56225 2014-06-03T12:03:28Z BL;BL.ADS
#> 
#> $`oai:oai.datacite.org:56225`$metadata
#> # A tibble: 1 x 12
#>                                                                         title
#>                                                                         <chr>
#> 1 Land at Hill Barton, Clyst St Mary, Devon (NGR SY 0002 9083) Hill Barton In
#> # ... with 11 more variables: creator <chr>, publisher <chr>, date <chr>,
#> #   identifier <chr>, relation <chr>, subject <chr>, language <chr>,
#> #   type <chr>, format <chr>, rights <chr>, coverage <chr>

You can pass in more than one identifier to id parameter.

This is the API for the same query interface you’d use on the DataCite website.

Search for the term laser

dc_search(q = "laser", fl = c('doi','publicationYear'), rows = 5)
#> # A tibble: 5 x 2
#>                          doi publicationYear
#>                        <chr>           <chr>
#> 1 10.17035/D.2016.0008119129            2016
#> 2             10.7283/R3ZW26            2016
#> 3             10.7283/R3V30K            2016
#> 4      10.2314/GBV:377452467            2002
#> 5      10.2314/GBV:499941845            2004

Another search: published between 2000 and 2005

dc_search(q = "publicationYear:[2000 TO 2005]", fl = c('doi', 'publicationYear'), rows = 5)
#> # A tibble: 5 x 2
#>                       doi publicationYear
#>                     <chr>           <chr>
#> 1 10.5279/DK-SA-DDA-10032            2003
#> 2  10.5279/DK-SA-DDA-1606            2002
#> 3  10.5279/DK-SA-DDA-1149            2001
#> 4  10.5279/DK-SA-DDA-9704            2004
#> 5  10.5279/DK-SA-DDA-5909            2001

Facet

dc_facet(q = "wind", facet.field = 'publisher_facet', facet.limit = 5)
#> $facet_queries
#> NULL
#> 
#> $facet_fields
#> $facet_fields$publisher_facet
#> # A tibble: 5 x 2
#>                                                         term value
#>                                                        <chr> <chr>
#> 1 PANGAEA - Data Publisher for Earth & Environmental Science 32456
#> 2                       Defense Technical Information Center  2382
#> 3                                                   Figshare  1545
#> 4                                                Unpublished  1193
#> 5                                                     Zenodo   609
#> 
#> 
#> $facet_pivot
#> NULL
#> 
#> $facet_dates
#> NULL
#> 
#> $facet_ranges
#> NULL

Stats

dc_stats(q = "ecology", stats.field = 'date')
#> $data
#>                          min                max  count missing
#> date 01-Jan-2007/29-Nov-2010 September 30, 2015 154991    5409
#> 
#> $facet
#> NULL

More-like-this

dc_mlt(q = "ecology", mlt.fl = 'title', mlt.count = 2, fl = 'doi')
#> $docs
#> # A tibble: 10 x 1
#>                            doi
#>                          <chr>
#>  1         10.7892/BORIS.16589
#>  2         10.7892/BORIS.79067
#>  3         10.7892/BORIS.16333
#>  4      10.13140/2.1.3940.8968
#>  5      10.1594/PANGAEA.745038
#>  6      10.1594/PANGAEA.745039
#>  7      10.1594/PANGAEA.745049
#>  8 10.6084/M9.FIGSHARE.1540730
#>  9 10.6084/M9.FIGSHARE.1541138
#> 10 10.6084/M9.FIGSHARE.1178080
#> 
#> $mlt
#> $mlt$`7245706`
#> # A tibble: 2 x 3
#>   numFound start                      doi
#>      <int> <int>                    <chr>
#> 1      604     0 10.11588/DIGLIT.27812.12
#> 2      604     0         10.4224/23001521
#> 
#> $mlt$`8103779`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`5704684`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`4647415`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`536251`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`536252`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`536260`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`6743504`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`6746350`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0
#> 
#> $mlt$`4013379`
#> # A tibble: 1 x 2
#>   numFound start
#>      <int> <int>
#> 1        0     0

REST API

Get a work

dc_work(doi = "10.5438/0012")
#> $data
#> $data$id
#> [1] "10.5438/0012"
#> 
#> $data$type
#> [1] "works"
#> 
#> $data$attributes
#> $data$attributes$doi
#> [1] "10.5438/0012"
#> 
#> $data$attributes$identifier
#> [1] "https://doi.org/10.5438/0012"
#> 
#> $data$attributes$url
#> [1] "https://schema.datacite.org/meta/kernel-4.0/index.html"
#> 
#> $data$attributes$author
#>                           literal
#> 1 DataCite Metadata Working Group
#> 
#> $data$attributes$title
#> [1] "DataCite Metadata Schema Documentation for the Publication and Citation of Research Data v4.0"
#> 
#> $data$attributes$`container-title`
#> [1] "DataCite e.V."
#> 
#> $data$attributes$description
#> [1] "1 Introduction\n1.1 The DataCite Consortium\n1.2 DataCite Community Participation\n1.3 The Metadata Schema\n1.4 Version 4.0 Update\n2 DataCite Metadata Properties\n2.1 Overview\n2.2 Citation\n2.3 DataCite Properties\n3 XML Example\n4 XML Schema\n5 Other DataCite Services\nAppendices\nAppendix 1: Controlled List Definitions\nAppendix 2: Earlier Version Update Notes"
#> 
#> $data$attributes$`resource-type-subtype`
#> [1] "Documentation"
#> 
#> $data$attributes$`data-center-id`
#> [1] "datacite.datacite"
#> 
#> $data$attributes$`member-id`
#> [1] "datacite"
#> 
#> $data$attributes$`resource-type-id`
#> [1] "text"
#> 
#> $data$attributes$version
#> [1] "4.0"
#> 
#> $data$attributes$license
#> NULL
#> 
#> $data$attributes$`schema-version`
#> [1] "4"
#> 
#> $data$attributes$results
#>               id             title count
#> 1      Documents         Documents     1
#> 2 IsNewVersionOf Is new version of     1
#> 
#> $data$attributes$`related-identifiers`
#>   relation-type-id           related-identifier
#> 1        Documents https://doi.org/10.5438/0013
#> 2   IsNewVersionOf https://doi.org/10.5438/0010
#> 
#> $data$attributes$published
#> [1] "2016"
#> 
#> $data$attributes$registered
#> [1] "2016-09-19T21:53:56Z"
#> 
#> $data$attributes$updated
#> [1] "2016-09-19T22:16:45Z"
#> 
#> $data$attributes$media
#> NULL
#> 
#> $data$attributes$xml
#> [1] "<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
	<identifier identifierType="DOI">10.5438/0012</identifier>
	<creators>
		<creator>
			<creatorName>DataCite Metadata Working Group</creatorName>
		</creator>
	</creators>
	<titles>
		<title>DataCite Metadata Schema Documentation for the Publication and Citation of Research Data v4.0</title>
	</titles>
	<publisher>DataCite e.V.</publisher>
	<publicationYear>2016</publicationYear>
	<contributors>
		<contributor contributorType="ProjectLeader">
			<contributorName>Starr, Joan</contributorName>
			<givenName>Joan</givenName>
			<familyName>Starr</familyName>
			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">0000-0002-7285-027X</nameIdentifier>
			<affiliation>California Digital Library</affiliation>
		</contributor>
		<contributor contributorType="ProjectLeader">
			<contributorName>Smaele, Madeleine de</contributorName>
			<givenName>Madeleine de</givenName>
			<familyName>Smaele</familyName>
			<affiliation>TU Delft</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Ashton, Jan</contributorName>
			<givenName>Jan</givenName>
			<familyName>Ashton</familyName>
			<affiliation>British Library</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Barton, Amy</contributorName>
			<givenName>Amy</givenName>
			<familyName>Barton</familyName>
			<affiliation>Purdue University Library</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Bradford, Tina</contributorName>
			<givenName>Tina</givenName>
			<familyName>Bradford</familyName>
			<affiliation>NRC/CISTI</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Ciolek‐Figiel, Anne</contributorName>
			<givenName>Anne</givenName>
			<familyName>Ciolek-Figiel</familyName>
			<affiliation>Inist‐CNRS</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Dietiker, Stefanie</contributorName>
			<givenName>Stefanie</givenName>
			<familyName>Dietiker</familyName>
			<affiliation>ETH Zürich</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Elliott, Jannean</contributorName>
			<givenName>Jannean</givenName>
			<familyName>Elliot</familyName>
			<affiliation>DOE/OSTI</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Genat, Berrit</contributorName>
			<givenName>Berrit</givenName>
			<familyName>Genat</familyName>
			<affiliation>TIB</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Harzenetter, Karoline</contributorName>
			<givenName>Karoline</givenName>
			<familyName>Harzenetter</familyName>
			<affiliation>GESIS</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Hirschmann, Barbara</contributorName>
			<givenName>Barbara</givenName>
			<familyName>Hirschmann</familyName>
			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">0000-0003-0289-0345</nameIdentifier>
			<affiliation>ETH Zürich</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Jakobsson, Stefan</contributorName>
			<givenName>Stefan</givenName>
			<familyName>Jakobsson</familyName>
			<affiliation>SND</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Mailloux, Jean‐Yves</contributorName>
			<givenName>Jean-Yves</givenName>
			<familyName>Mailloux</familyName>
			<affiliation>NRC/CISTI</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Newbold, Elizabeth</contributorName>
			<givenName>Elizabeth</givenName>
			<familyName>Newbold</familyName>
			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">0000-0002-8255-9013</nameIdentifier>
			<affiliation>British Library</affiliation>
		</contributor>
				<contributor contributorType="Editor">
			<contributorName>Nielsen, Lars Holm </contributorName>
			<givenName>Lars Holm</givenName>
			<familyName>Nielsen</familyName>
			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">0000-0001-8135-3489</nameIdentifier>
			<affiliation>CERN</affiliation>
		</contributor>
		<contributor contributorType="Editor">
			<contributorName>Yahia, Mohamed</contributorName>
			<givenName>Mohamed</givenName>
			<familyName>Yahia</familyName>
			<affiliation>Inist-CNRS</affiliation>
		</contributor>
		<contributor contributorType="Supervisor">
			<contributorName>Ziedorn, Frauke</contributorName>
			<givenName>Frauke</givenName>
			<familyName>Ziedorn</familyName>
			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">0000-0002-1143-781X</nameIdentifier>
			<affiliation>TIB</affiliation>
		</contributor>
	</contributors>
	<language>eng</language>
	<resourceType resourceTypeGeneral="Text">Documentation</resourceType>
	<relatedIdentifiers>
		<relatedIdentifier relatedIdentifierType="DOI" relationType="Documents">10.5438/0013</relatedIdentifier>
		<relatedIdentifier relatedIdentifierType="DOI" relationType="IsNewVersionOf">10.5438/0010</relatedIdentifier>
	</relatedIdentifiers>
	<sizes>
		<size>45 pages</size>
	</sizes>
	<formats>
		<format>application/pdf</format>
	</formats>
	<version>4.0</version>
	<descriptions>
		<description descriptionType="TableOfContents">1 Introduction<br/>
1.1 The DataCite Consortium<br/>
1.2 DataCite Community Participation<br/>
1.3 The Metadata Schema<br/>
1.4 Version 4.0 Update<br/>
2 DataCite Metadata Properties<br/>
2.1 Overview<br/>
2.2 Citation<br/>
2.3 DataCite Properties<br/>
3 XML Example<br/>
4 XML Schema<br/>
5 Other DataCite Services<br/>
Appendices<br/>
Appendix 1: Controlled List Definitions<br/>
Appendix 2: Earlier Version Update Notes</description>
	</descriptions>
</resource>
"
#> 
#> 
#> $data$relationships
#> $data$relationships$`data-center`
#> $data$relationships$`data-center`$meta
#> named list()
#> 
#> 
#> $data$relationships$member
#> $data$relationships$member$meta
#> named list()
#> 
#> 
#> $data$relationships$`resource-type`
#> $data$relationships$`resource-type`$meta
#> named list()

Meta