Commit 09d74aee authored by Michael Wagner's avatar Michael Wagner
Browse files

initial work on a meta-data checker (meta-data errors are rejected by zenodo)

parent 0842092c
# official zenodo documentation:
# check_meta_data tries to mimic the logic and checks described in the documentation
# value restricted fields
acc_upload_type = {
'publication', 'poster', 'presentation', 'dataset', 'image', 'video/Audio', 'software', 'lesson', 'physicalobject',
acc_publication_type = {
'annotationcollection', 'book', 'section', 'conferencepaper', 'datamanagementplan', 'article', 'patent', 'preprint',
'deliverable', 'milestone', 'proposal', 'report', 'softwaredocumentation', 'taxonomictreatment', 'technicalnote',
'thesis', 'workingpaper', 'other'
acc_image_type = {'figure', 'plot', 'drawing', 'diagram', 'photo', 'other'}
acc_access_right = {'open', 'embargoed', 'restricted', 'closed'}
# acc_publication_date has to be in ISO8601 format (YYYY-MM-DD). defaults to current date
# Some fields may not be empty
# title may not be empty
# creator must be given, at least one {'name':'Smith, Jane'} - affiliation, orcid and gnd are optional
# description must be there
# depending on the value of "access_right" there are additional fields, that may not be empty
# 'open', 'embargoed': license
# 'embargoed': embargo_date
# 'restricted': access_conditions
def check_meta_data(meta_data):
meta_data: a dictionary containing metadate in the zenodo upload schemea
returns: True if the meta_data passes all the checks, False else
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Must be set
requirement_is_missing = False
upload_type = meta_data.get('upload_type')
publication_type = meta_data.get('publication_type')
image_type = meta_data.get('image_type')
access_right = meta_data.get('access_right')
# check the upload type
if upload_type not in acc_upload_type:
requirement_is_missing = True
# if it was an image, details are required
elif upload_type is 'image' and image_type not in acc_image_type:
requirement_is_missing = True
# check the publication type
elif publication_type not in acc_publication_type:
requirement_is_missing = True
# check the acces rights: 'closed' defaults to 'open'
elif access_right not in acc_access_right:
requirement_is_missing = True
# print(requirement_is_missing)
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# access_right brings additional constraints
access_right_error = False
if access_right is 'open':
if meta_data.get('licence'):
access_right_error = True
elif access_right is 'embargoed':
if meta_data.get('licence') or meta_data.get('embargo_date'):
access_right_error = True
elif access_right is 'restricted':
if meta_data.get('access_conditions'):
access_right_error = True
# print('access_right_error', access_right_error)
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# May not be None
is_missing = False
if not meta_data.get('title'):
is_missing = True
elif not meta_data.get('creators'):
is_missing = True
elif not meta_data.get('description'):
is_missing = True
# print('title, creator or description error', is_missing)
# -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
return not (requirement_is_missing or is_missing or access_right_error)
if __name__ == '__main__':
meta_data = {
'title': 'My 2nd upload',
'upload_type': 'publication',
'publication_type': 'book',
'description': 'This is my first upload',
'access_right': 'open',
'license': 'cc-by',
'creators': [{'name': 'Doe, Michi', 'affiliation': 'Zenodo'}]
succ_check = check_meta_data(meta_data)
print("Meta-Data passed the check?:", succ_check)
