Commit 4882d0fc authored by Michael Wagner's avatar Michael Wagner
Browse files

succesful harvest of a single orcid

parent 8f92b389
# https://gist.github.com/MOOOWOOO/3cf91616c9f3bbc3d1339adfc707b08a#file-py-gitignore
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit json-results / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff:
.idea/workspace.xml
.idea/tasks.xml
.idea/dictionaries
.idea/vcs.xml
.idea/jsLibraryMappings.xml
# Sensitive or high-churn files:
.idea/dataSources.ids
.idea/dataSources.xml
.idea/dataSources.local.xml
.idea/sqlDataSources.xml
.idea/dynamic.xml
.idea/uiDesigner.xml
# Gradle:
.idea/gradle.xml
.idea/libraries
# Mongo Explorer plugin:
.idea/mongoSettings.xml
.idea/
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
/out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
##################################################
## {Acesses Zenodo.org in order to supply CRIS with the location of research data and/or papers, etc. on zenodo}
##################################################
## {License_info}
##################################################
## Author: {Michael Wagner}
## Credits: [{credit_list}]
## License: {license}
## Version: {0}.{0}.{2}
## Maintainer: {cris.support@fau.de}
## Email: {michael.wm.wagner@fau.de}
## Status: {in development}
##################################################
# resumption token error valid for 2min -> 422 Unprocessable Entity error
# rate limit -> 429 too many requests (120 requests per minute - since 2016: no rate limit)
import requests
import json
# import pandas as pd
import os
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# HELPER
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
def save_hits_locally(orcid, hits):
i = 0
for hit in hits:
path = './json-results/' + orcid + '/'
if not os.path.exists(path):
os.makedirs(path)
with open(path + str(i) + '.json', 'w') as f:
json.dump(hit, f)
i += 1
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# CORE ZENODO API ACCESSES
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
def harvest_entirity():
# https://zenodo.org/oai2d?verb=ListRecords&metadataPrefix=oai_datacite
url = 'https://zenodo.org/oai2d?verb=ListRecords&metadataPrefix=oai_datacite'
# ToDo: harvest ALL the data -> nice xml, but, yeah... slow and a bit much? eh why not?
r = requests.get(url)
print(r.status_code)
def harvest_community():
"""
https://zenodo.org/oai2d?verb=ListRecords&metadataPrefix=oai_datacite&set=user-fau
:return:
"""
print("WIP")
def harvest_zenodo(orcids):
"""
https://developers.zenodo.org/?python#changes
https://zenodo.org/search?page=1&size=20&q=creators.orcid:%220000-0001-7430-3694%22
https://zenodo.org/oai2d
:param creators_orcids: array containing the orcirds of creators as strings
:return: ???
"""
orcid_to_hits_dicts = {}
for orcid in orcids:
query = 'creators.orcid:"' + orcid + '"'
response = requests.get('https://zenodo.org/api/records', params={'q': query})
# ToDo: error handling
data = response.json()
# json.load(data)
hits = data['hits']['hits']
# saves hits to nested folder structure
save_hits_locally(orcid, hits)
orcid_to_hits_dicts[orcid] = hits
return orcid_to_hits_dicts
if __name__ == '__main__':
creators_orcids = ['0000-0001-7430-3694']
hits = harvest_zenodo(creators_orcids)
print("wow")
# harvest_entirity()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment