diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d5a35b5dbd45cf6f7cec14ed84b75ea397b6a25d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,151 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+### VirtualEnv template
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+.Python
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+.venv
+pip-selfcheck.json
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/workspace.xml
+.idea/tasks.xml
+.idea/dictionaries
+.idea/vcs.xml
+.idea/jsLibraryMappings.xml
+
+# Sensitive or high-churn files:
+.idea/dataSources.ids
+.idea/dataSources.xml
+.idea/dataSources.local.xml
+.idea/sqlDataSources.xml
+.idea/dynamic.xml
+.idea/uiDesigner.xml
+
+# Gradle:
+.idea/gradle.xml
+.idea/libraries
+
+# Mongo Explorer plugin:
+.idea/mongoSettings.xml
+
+.idea/
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
\ No newline at end of file
diff --git a/define-dataset.py b/define-dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f983e7659303d80d50901dab2aa7676718d8ac3a
--- /dev/null
+++ b/define-dataset.py
@@ -0,0 +1,64 @@
+"""
+What we need:
+- Checksum, File Size, File name
+- check the file names -> ask the user to change them
+- as single select and multi-select (no folders)
+- write as one json? csv? idk yet (but CRIS can do python, so anything really?)
+"""
+
+import os
+import tkinter as tk
+from tkinter import filedialog
+import re
+
+
+def check_file_name(file):
+    matches = re.findall(r'[\w,\d,_,-,.]+', file)
+
+    print('looks ok',  matches)
+
+    if len(matches) == 1:
+        # all is ok
+        return True
+    else:
+        f = file
+        for m in matches:
+            f = f.replace( m, '')
+        print('no: forbidden chars:', f)
+        return False
+
+
+
+def get_file_size(file_path):
+    """
+    :return: file size in bytes
+    """
+    b = os.path.getsize(file_path)
+    print(b)
+    return b
+
+
+def get_sha256_checksum(file_path):
+    # Python program to find SHA256 hash string of a file
+    import hashlib
+
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        # Read and update hash string value in blocks of 4K
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+        print(sha256_hash.hexdigest())
+        return sha256_hash.hexdigest()
+
+
+if __name__ == '__main__':
+    # os specific file selector, get the full path and filename
+    root = tk.Tk()
+    root.withdraw()
+    file_path = filedialog.askopenfilename()
+    file = os.path.basename(file_path)
+
+    print(file_path)
+    if check_file_name(file):
+        get_file_size(file_path)
+        get_sha256_checksum(file_path)