diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..951bbc2192dc911ec3fbc133d7a5db0031f799b4 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,6 @@ +root=true + +[*.py] +max_line_length=99 +indent_style=space +indent_size=4 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..2bc5ca41308af25fc8793aa8d1a559c2258195a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# If you follow the instructions in the README, these will be sensitive files. +*.password + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +.static_storage/ +.media/ +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +*.sqlite3 + +# PyCharm +.idea diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..b7f46ed4612ccfe78d592ecb2da816b7f569f433 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 University of Cambridge Information Services + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index c61c731171291922e94b4bc6e8f4769e48af619c..2e9e3061f319ca2d509d8614f8b5e8a3b4aa4e3a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,37 @@ -# lecture-capture-csv-scheduler -A repository for opencast csv sheduler code +# Schedule Opencast Events from CSV -# oc_sched.py -Takes a csv file as input and attempts to schedule opencast events. +This repository contains a small utility to schedule Opencast events from an +input CSV. + +**This utility requires at least version 1.1 of the Opencast API. This version +was first released in Opencast 6.** + +## Installation + +```console +$ pip install git+https://github.com/uisautomation/lecture-capture-csv-scheduler +$ opencast_csv_schedule --help # print usage summary +``` + +## Usage + +```console +$ echo "super-secret-password" > opencast.password +$ opencast_csv_schedule \ + --input=schedule.csv --base-url=https://opencast.invalid/ \ + --user=some-opencast-user --password-file=opencast.password +``` + +## Development + +When developing this script, it is useful to combine a virtualenv with pip's +concept of an "editable" install which uses symlinks instead of copies when +installing: + +```console +$ git clone git@github.com:uisautomation/lecture-capture-csv-scheduler.git +$ cd lecture-capture-csv-scheduler +$ python3 -m virtualenv ./venv +$ source ./venv/bin/activate +$ pip install -e . +``` diff --git a/oc_sched.py b/oc_sched.py deleted file mode 100755 index 3ec8d00e0363b0fc6570d5b2d11359a815d3f45d..0000000000000000000000000000000000000000 --- a/oc_sched.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python3 -""" -Schedule opencast events from csv input (file) - -Usage: - oc_sched.py (-h|--help) - oc_sched.py [--input=PATH] - -Options: - - -h, --help Show a brief usage summary. - - -i, --input=PATH Path (including filename) of the csv file to ingest - [default: ./lecture-schedule.csv] - -Environment Variables: - - OCUSER Username of opencast user for event scheduling (default: admin) - OCPASSWORD Password for opencast user for event scheduling (no default) - OCURL URL for the opencast instance - -""" # noqa:E501 - -import os -import csv -import sys -import json -import requests -import logging -import docopt -import pytz -from requests.auth import HTTPBasicAuth -from requests_toolbelt import MultipartEncoder -from dateutil.parser import parse - -logging.basicConfig(filename='lecture-schedule.log', - level=logging.DEBUG, - format='%(asctime)s %(message)s') - -inputfile = 'lecture-schedule.csv' -url = os.environ.get('OCURL', 'https://admin.lecturecapture.uis.cam.ac.uk') -user = os.environ.get('OCUSER', "admin") -password = os.environ.get("OCPASSWORD", 'password') -fieldnames = ["location", - "title", - "series", - "startTime", - "stopTime", - "workflow", - "courseDescription", - "vleUri", - "sequenceUri", - "sequenceIndex"] - - -def _parse_date(s): - """ - Parse date from a date string as defined in the CSV. - - """ - return parse(s).astimezone(pytz.utc) - - -def oc_create_event(m): - """opencast request for event creation""" - event_url = url + '/api/events' - try: - request = requests.post(event_url, data=m, - headers={'Content-Type': m.content_type}, - auth=HTTPBasicAuth(user, password)) - except requests.exceptions.RequestException as e: - logging.error(e) - sys.exit(1) - logging.info("status: %s" % str(request.status_code)) - logging.info(request.text) - return request - - -def oc_acl(): - return [ - {'role': 'ROLE_ADMIN', 'action': 'write', 'allow': True}, - {'role': 'ROLE_USER', 'action': 'read', 'allow': True}, - ] - - -def oc_metadata(row): - """Create opencast metadata for an event""" - t = _parse_date(row['startTime']) - - def _make_field(id_, value): - return {'id': id_, 'value': value} - - return [ - { - 'flavor': 'dublincore/episode', - 'fields': [ - _make_field('title', row['title']), - _make_field('description', row['courseDescription']), - _make_field('startDate', t.strftime("%Y-%m-%d")), - _make_field('startTime', t.strftime("%H:%M:%SZ")), - ], - } - ] - - -def oc_sched(row): - """Create opencast schedule for an event""" - duration = _parse_date(row["stopTime"]) - _parse_date(row["startTime"]) - sched = {"agent_id": row["location"], - "start": row["startTime"], - "duration": 1000 * int(duration.total_seconds()), - "inputs": ["default"]} - return sched - - -def oc_process(row): - """Create opencast processing details for an event""" - conf = {"flagForCutting": "false", - "flagForReview": "false", - "publishToEngage": "true", - "publishToHarvesting": "true", - "straightToPublishing": "true"} - process = {"workflow": row["workflow"], "configuration": conf} - return process - - -def oc_lecture_sched(inputfile): - """Read in csv file row by row, assemble multipart form fields and create events""" - with open(inputfile) as csv_file: - header = next(csv.reader(csv_file)) - if header[:len(fieldnames)] != fieldnames: - logging.error("Bad header in csv file: %s", inputfile) - logging.error(header) - sys.exit(1) - csv_reader = csv.DictReader(csv_file, fieldnames) - logging.info("Loaded file: %s" % inputfile) - for row in csv_reader: - m = MultipartEncoder( - fields={'acl': json.dumps(oc_acl()), - 'metadata': json.dumps(oc_metadata(row)), - 'scheduling': json.dumps(oc_sched(row)), - 'processing': json.dumps(oc_process(row))}) - oc_create_event(m) - - -if __name__ == "__main__": - opts = docopt.docopt(__doc__, options_first=True) - if os.environ.get("OCPASSWORD") is None: - print("No opencast password defined - please set OCPASSWORD environment variable") - logging.error("No opencast password defined") - sys.exit(1) - if opts['--input']: - oc_lecture_sched(opts['--input']) - else: - oc_lecture_sched(inputfile) diff --git a/opencastcsvschedule/__init__.py b/opencastcsvschedule/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0ef4750cc499cd5b58ea48d2e21be316c8862c20 --- /dev/null +++ b/opencastcsvschedule/__init__.py @@ -0,0 +1,189 @@ +""" +Schedule opencast events from csv input (file) + +Usage: + oc_sched.py (-h|--help) + oc_sched.py --base-url=URL [--user=USER] [--password-file=PATH] [--quiet] + [--input=PATH] + +Options: + + -h, --help Show a brief usage summary. + -q, --quiet Decrease verbosity of logging. + + --base-url=URL URL of opencast instance. + + --user=USER Username of Opencast user [default: admin] + --password-file=PATH Path to file containing Opencast user password. + (Leading/trailing whitespace is stripped.) + + -i, --input=PATH Path (including filename) of the csv file to ingest. If + omitted, standard input is used. + +""" +import csv +import json +import logging +import urllib.parse +import sys + +import docopt +from dateutil.parser import parse +import pytz +import requests +from requests.auth import HTTPBasicAuth +from requests_toolbelt import MultipartEncoder + + +# Expected header for input CSV. +EXPECTED_CSV_HEADER = [ + "location", "title", "series", "startTime", "stopTime", "workflow", "courseDescription", + "vleUri", "sequenceUri", "sequenceIndex" +] + + +class ProcessingError(RuntimeError): + pass + + +def main(): + # Parse command line options + opts = docopt.docopt(__doc__, options_first=True) + + # Configure logging + logging.basicConfig( + level=logging.WARN if opts['--quiet'] else logging.INFO, + format='%(asctime)s %(message)s' + ) + + # Load password + with open(opts['--password-file']) as fobj: + oc_password = fobj.read().strip() + + # Form arguments to schedule_events() + schedule_event_args = { + 'base_url': opts['--base-url'], + 'user': opts['--user'], + 'password': oc_password, + } + + # Attempt to schedule events catching processing errors. + try: + if opts['--input'] is not None: + with open(opts['--input']) as fobj: + schedule_events(input_fobj=fobj, **schedule_event_args) + else: + schedule_events(input_fobj=sys.stdin, **schedule_event_args) + except ProcessingError: + # Log error and exit with error status + logging.error('Aborting processing due to error') + sys.exit(1) + + # Signal normal exit + sys.exit(0) + + +def schedule_events(input_fobj, base_url, user, password): + """ + Read events from CSV and schedule them in Opencast. + + """ + # Read header from CSV and check that it conforms to our expectation. + header = next(csv.reader(input_fobj)) + if header[:len(EXPECTED_CSV_HEADER)] != EXPECTED_CSV_HEADER: + logging.error('Bad header in csv file') + logging.error('Header was: %s', ','.join(header)) + logging.error('Expected: %s', ','.join(EXPECTED_CSV_HEADER)) + raise ProcessingError() + + logging.info('Loading CSV') + csv_reader = csv.DictReader(input_fobj, EXPECTED_CSV_HEADER) + + # Form the events API URL from the base URL. + events_api_url = urllib.parse.urljoin(base_url, 'api/events') + + # For each input row, form each of the mulitpart form fields required by the opencast API. + for row in csv_reader: + logging.info( + 'Scheduling event "%s" at %s', row['title'], _parse_date(row['startTime']).isoformat() + ) + + # Create multipart form encoding for event + body_data = MultipartEncoder(fields={ + 'acl': json.dumps(oc_acl()), + 'metadata': json.dumps(oc_metadata(row)), + 'scheduling': json.dumps(oc_sched(row)), + 'processing': json.dumps(oc_process(row)), + }) + + # Attempt to schedule it + try: + response = requests.post( + events_api_url, data=body_data, headers={'Content-Type': body_data.content_type}, + auth=HTTPBasicAuth(user, password) + ) + response.raise_for_status() + except Exception as e: + logging.error('Error posting event') + logging.error('Row was: %s', ','.join(row)) + logging.exception(e) + + +def oc_acl(): + return [ + {'role': 'ROLE_ADMIN', 'action': 'write', 'allow': True}, + {'role': 'ROLE_USER', 'action': 'read', 'allow': True}, + ] + + +def oc_metadata(row): + """Create opencast metadata for an event""" + t = _parse_date(row['startTime']) + + def _make_field(id_, value): + return {'id': id_, 'value': value} + + return [ + { + 'flavor': 'dublincore/episode', + 'fields': [ + _make_field('title', row['title']), + _make_field('description', row['courseDescription']), + _make_field('startDate', t.strftime("%Y-%m-%d")), + _make_field('startTime', t.strftime("%H:%M:%SZ")), + ], + } + ] + + +def oc_sched(row): + """Create opencast schedule for an event""" + duration = _parse_date(row["stopTime"]) - _parse_date(row["startTime"]) + sched = {"agent_id": row["location"], + "start": row["startTime"], + "duration": 1000 * int(duration.total_seconds()), + "inputs": ["default"]} + return sched + + +def oc_process(row): + """Create opencast processing details for an event""" + conf = {"flagForCutting": "false", + "flagForReview": "false", + "publishToEngage": "true", + "publishToHarvesting": "true", + "straightToPublishing": "true"} + process = {"workflow": row["workflow"], "configuration": conf} + return process + + +def _parse_date(s): + """ + Parse date from a date string as defined in the CSV. + + """ + return parse(s).astimezone(pytz.utc) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..05f99bb98ea368c86a404a0d8b9bae3cf5311d82 --- /dev/null +++ b/setup.py @@ -0,0 +1,19 @@ +from setuptools import setup, find_packages + +setup( + name='opencastcsvschedule', + author='UIS DevOps', + packages=find_packages(), + install_requires=[ + 'docopt', + 'python-dateutil', + 'pytz', + 'requests', + 'requests-toolbelt', + ], + entry_points={ + 'console_scripts': [ + 'opencast_csv_schedule=opencastcsvschedule:main' + ] + }, +)