rmetadata, v0.1

rmetadata does all things metadata in R. rmetadata hits many web APIs for scholarly metadata -
if we don' have it, just let us know on our issues page.


About the package

We are developing rmetadata as a package to allow users to search across as many scholarly metadata providers as possible.

Some functions in this package have a md_ prefix - this is needed as some of our other packages use the same function names without the prefix.




Installing rmetadata

A stable version will be available on CRAN soon.

# Install the development version from our GitHub repo.
install.packages('devtools')
library(devtools)
install_github('rmetadata', 'ropensci')
library(rmetadata)



Count OAI-PMH identifiers for a data provider.

 # For DataCite.
count_identifiers("datacite")

   provider   count
 1 datacite 1215706





Lookup article info via CrossRef with DOI and get a citation.

As Bibtex

 print(crossref_citation("10.3998/3336451.0009.101"), style = "Bibtex")

 @Article{,
   title = {In Google We Trust?},
   author = {Geoffrey Bilder},
   journal = {The Journal of Electronic Publishing},
   year = {2006},
   month = {01},
   volume = {9},
   doi = {10.3998/3336451.0009.101},
 }

As regular text

 print(crossref_citation("10.3998/3336451.0009.101"), style = "text")

 Bilder G (2006). "In Google We Trust?" _The Journal of Electronic
 Publishing_, *9*. <URL:
 http://dx.doi.org/10.3998/3336451.0009.101>.



Search the CrossRef Metatdata for DOIs using free form references.

Search with title, author, year, and journal

 crossref_search_free(query = "Piwowar Sharing Detailed Research Data Is Associated with Increased Citation Rate PLOS one 2007")

                                                                                              text
 1 Piwowar Sharing Detailed Research Data Is Associated with Increased Citation Rate PLOS one 2007
   match                   doi score
 1  TRUE 10.1038/npre.2007.361 4.909

Get a DOI and get the citation using crossref_search

 # Get a DOI for a paper
doi <- crossref_search_free(query = "Piwowar sharing data PLOS one")$doi

# Get the metadata
crossref_search(doi = doi)[, 1:3]

                            doi score normalizedScore
 1 10.1371/journal.pone.0000308 18.09             100



Get a random set of DOI's through CrossRef.


# Default search gets 20 random DOIs
crossref_r()

  [1] "10.1038/171775d0"                   
  [2] "10.1017/CBO9780511707346.011"       
  [3] "10.1111/j.1749-6632.1972.tb16320.x" 
  [4] "10.1111/apha.1935.71.issue-1"       
  [5] "10.1007/s11431-008-0243-1"          
  [6] "10.1088/0305-4470/19/8/025"         
  [7] "10.1364/OL.35.002879"               
  [8] "10.1051/forest:198905ART0188"       
  [9] "10.1371/journal.pone.0056230.g002"  
 [10] "10.1079/9780851994437.0231"         
 [11] "10.1074/jbc.M313969200"             
 [12] "10.1016/j.freeradbiomed.2010.10.208"
 [13] "10.1007/978-1-4612-3660-3_21"       
 [14] "10.1016/j.msea.2010.06.064"         
 [15] "10.1016/B978-0-12-415795-8.00009-X" 
 [16] "10.1016/S0140-6736(01)57920-9"      
 [17] "10.1139/z77-087"                    
 [18] "10.2307/316344"                     
 [19] "10.1111/j.1365-2044.2012.07118.x"   
 [20] "10.1007/bf00692798"

# limit to certain dates
crossref_r(from = 1990, to = 1999)

  [1] "10.1016/j.jebdp.2008.09.011"      "10.1002/maco.19940450602"        
  [3] "10.1016/j.jdeveco.2013.01.009"    "10.1190/1.1436988"               
  [5] "10.1097/00008877-199000160-00009" "10.1016/S0300-7073(05)71561-9"   
  [7] "10.1108/09565690710833053"        "10.1039/c39910000431"            
  [9] "10.1016/S0167-6393(00)00068-6"    "10.1103/PhysRevD.84.095014"      
 [11] "10.1051/water/19922302129"        "10.1007/BF00501943"              
 [13] "10.1017/S0305741000013308"        "10.1186/1472-6963-11-32"         
 [15] "10.1134/S0965544108060169"        "10.1162/016228803322427965"      
 [17] "10.1007/978-3-540-76435-9_6055"   "10.1016/s0006-291x(05)80985-4"   
 [19] "10.1109/ICIS.2011.1"              "10.4147/HTV-101913"

# Specify you want journal articles only
crossref_r(type = "journal_article")

  [1] "10.1007/bf00547846"                                             
  [2] "10.1002/1097-0142(197907)44:1<52::AID-CNCR2820440110>3.0.CO;2-I"
  [3] "10.1007/bf02522750"                                             
  [4] "10.1142/s021953051350019x"                                      
  [5] "10.1007/bf03279203"                                             
  [6] "10.1007/s00204-011-0674-5"                                      
  [7] "10.1007/BF02388254"                                             
  [8] "10.1053/gast.2002.1230655b"                                     
  [9] "10.1353/jowh.2005.0050"                                         
 [10] "10.1016/0003-9861(67)90282-2"                                   
 [11] "10.1007/bf02525738"                                             
 [12] "10.1007/PL00000873"                                             
 [13] "10.1016/j.applthermaleng.2012.11.020"                           
 [14] "10.1097/00003072-200503000-00004"                               
 [15] "10.1186/1756-0500-4-302"                                        
 [16] "10.1107/S0108270194001460"                                      
 [17] "10.1016/S0016-5107(05)00777-7"                                  
 [18] "10.2307/1220756"                                                
 [19] "10.2307/779955"                                                 
 [20] "10.1007/BF02987452"



Search the CrossRef Metatdata API.

# Search for two different query terms
crossref_search(query = c("renear", "palmer"), rows = 4)[, 1:3]

                             doi score normalizedScore
 1       10.1126/science.1157784 3.236             100
 2  10.1002/meet.2009.1450460141 2.157              66
 3 10.4242/BalisageVol3.Renear01 2.084              64
 4 10.4242/BalisageVol5.Renear01 2.084              64

# Get results for a certain year
crossref_search(query = c("renear", "palmer"), year = 2010)[, 1:3]

                                doi  score normalizedScore
 1        10.5270/OceanObs09.cwp.68 1.0512             100
 2         10.1002/meet.14504701218 1.0419              99
 3         10.1002/meet.14504701240 1.0419              99
 4            10.1353/mpq.2010.0003 0.6936              65
 5               10.1353/mpq.0.0041 0.6936              65
 6               10.1353/mpq.0.0044 0.6936              65
 7               10.1353/mpq.0.0057 0.6936              65
 8                 10.1386/fm.1.1.2 0.6936              65
 9                 10.1386/fm.1.2.2 0.6936              65
 10                10.1386/fm.1.3.2 0.6936              65
 11            10.1117/2.4201001.04 0.6131              58
 12    10.1097/ALN.0b013e3181f09404 0.6131              58
 13 10.4067/S0717-69962010000100001 0.6131              58
 14 10.4067/S0717-69962010000200001 0.6131              58
 15           10.1353/ect.2010.0025 0.6131              58
 16   10.1016/j.urology.2010.02.033 0.6131              58
 17        10.2105/AJPH.2009.191098 0.6069              57
 18           10.1353/mpq.2010.0004 0.5202              49
 19              10.1353/mpq.0.0048 0.5202              49
 20              10.1353/mpq.0.0053 0.5202              49



Get a short DOI from shortdoi.org

 # Geta a short DOI, just the short DOI returned
short_doi(doi = "10.1371/journal.pone.0042793")

 [1] "10/f2bfz9"

# Geta a short DOI, all data returned
short_doi(doi = "10.1371/journal.pone.0042793", justshort = FALSE)

 $DOI
 [1] "10.1371/journal.pone.0042793"
 
 $ShortDOI
 [1] "10/f2bfz9"
 
 $IsNew
 [1] FALSE



Get a record from a OAI-PMH data provider

 # Single provider, one identifier
md_getrecord(provider = "pensoft", identifier = "10.3897/zookeys.1.10")

                                                                                                 title
 1 A new candidate for a Gondwanaland distribution in the Zodariidae (Araneae): Australutica in Africa
       creator date             type
 1 JocquƩ,Rudy 2008 Research Article
 # Single provider, multiple identifiers
md_getrecord(provider = "pensoft", identifier = c("10.3897/zookeys.1.10", "10.3897/zookeys.4.57"))

                                                                                                    title
 1    A new candidate for a Gondwanaland distribution in the Zodariidae (Araneae): Australutica in Africa
 2 Studies of Tiger Beetles. CLXXVIII. A new Lophyra (Lophyra) from Somaliland (Coleoptera, Cicindelidae)
         creator date             type
 1   JocquƩ,Rudy 2008 Research Article
 2 Cassola,Fabio 2008 Research Article



List available metadata formats from various providers

  
# List metadata formats for a provider
md_listmetadataformats(provider = "dryad")

   metadataPrefix
 1         oai_dc
 2            rdf
 3            ore
 4           mets
                                                        schema
 1              http://www.openarchives.org/OAI/2.0/oai_dc.xsd
 2                 http://www.openarchives.org/OAI/2.0/rdf.xsd
 3 http://tweety.lanl.gov/public/schemas/2008-06/atom-tron.sch
 4                  http://www.loc.gov/standards/mets/mets.xsd
                             metadataNamespace
 1 http://www.openarchives.org/OAI/2.0/oai_dc/
 2    http://www.openarchives.org/OAI/2.0/rdf/
 3                 http://www.w3.org/2005/Atom
 4                    http://www.loc.gov/METS/
# List metadata formats for a specific identifier for a provider
md_listmetadataformats(provider = "pensoft", identifier = "10.3897/zookeys.1.10")
             identifier metadataPrefix
 1 10.3897/zookeys.1.10         oai_dc
 2 10.3897/zookeys.1.10           mods
                                              schema
 1    http://www.openarchives.org/OAI/2.0/oai_dc.xsd
 2 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd
                             metadataNamespace
 1 http://www.openarchives.org/OAI/2.0/oai_dc/
 2                  http://www.loc.gov/mods/v3



Support and Bugs

For bugs, feature requests and other issues, please submit an issue via Github.


For general comments, email scott at ropensci.gmail.com . This package is part of the rOpenSci suite of R tools. For similar packages visit ropensci.org



License

Creative Commons License
This work is licensed under a Creative Commons Attribution 3.0 Unported License.