From 11b397e23185c3d905b89f1c22fd29242581ba12 Mon Sep 17 00:00:00 2001 From: Rich Wareham <rjw57@cam.ac.uk> Date: Tue, 27 Nov 2018 11:00:08 +0000 Subject: [PATCH 1/4] add LICENSE file Add a license specifying the terms under which this software is released. --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b7f46ed --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 University of Cambridge Information Services + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. -- GitLab From a5132db9cb45b2929cac5c581663964e30dfac05 Mon Sep 17 00:00:00 2001 From: Rich Wareham <rjw57@cam.ac.uk> Date: Tue, 27 Nov 2018 11:10:24 +0000 Subject: [PATCH 2/4] some finessing of the script This commit finesses the packaging and UX of the script a little. Add a README which indicates the purpose of the script, how to install it and a usage example. Add a basic packaging so that the script may be installed via pip. Add .editorconfig files so that people which appropriately configured editors get the cerrect indentation/line length settings. Add .gitignore to avoid accidentally committing files we don't want. Modify the script itself: 1. Logging is now to standard error by default. Redirection and/or ``tee`` can be used to log to a file. 2. Logging verbosity can be reduced via a ``--quiet`` flag. 3. Be explicit in what Opencast server and credentials we use rather than loading them from the environment. With multiple Opencast instances running it would be all to easy to accidentally schedule events in the wrong one. 4. Use urljoin() to form the API URL rather than string concatenation. This behaves correctly in the case where the API URL is specified with a trailing slash. 5. Default to standard input for CSV if no path is specified rather than having a "magic" filename. 6. Global constants converted to use SHOUTING_CASE which is the convention we adopt in our projects. 7. Use slightly more descriptive variable names and increase logging in places to show troublesome data in the log. 8. Requests returns a "response" and not a "request" so re-name the variable appropriately. 9. Rather than load passwords from the environment which is leaky since all program run see that variable, load passwords from a file. This is also a bit more compatible with, e.g., Docker secrets. --- .editorconfig | 4 + .gitignore | 115 +++++++++++++++++++ README.md | 40 ++++++- oc_sched.py | 155 -------------------------- opencastcsvschedule/__init__.py | 189 ++++++++++++++++++++++++++++++++ setup.py | 19 ++++ 6 files changed, 363 insertions(+), 159 deletions(-) create mode 100644 .editorconfig create mode 100644 .gitignore delete mode 100755 oc_sched.py create mode 100644 opencastcsvschedule/__init__.py create mode 100644 setup.py diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..8ae3a90 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,4 @@ +root=true + +[*.py] +max_line_length=99 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3839b3f --- /dev/null +++ b/.gitignore @@ -0,0 +1,115 @@ +# If you follow the instructions in the README, these will be sensitive files. +*.password + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +.static_storage/ +.media/ +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +*.sqlite3 + +# Local configuration +mediawebapp/settings/local.py +secrets.env + +# PyCharm +.idea diff --git a/README.md b/README.md index c61c731..2e9e306 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,37 @@ -# lecture-capture-csv-scheduler -A repository for opencast csv sheduler code +# Schedule Opencast Events from CSV -# oc_sched.py -Takes a csv file as input and attempts to schedule opencast events. +This repository contains a small utility to schedule Opencast events from an +input CSV. + +**This utility requires at least version 1.1 of the Opencast API. This version +was first released in Opencast 6.** + +## Installation + +```console +$ pip install git+https://github.com/uisautomation/lecture-capture-csv-scheduler +$ opencast_csv_schedule --help # print usage summary +``` + +## Usage + +```console +$ echo "super-secret-password" > opencast.password +$ opencast_csv_schedule \ + --input=schedule.csv --base-url=https://opencast.invalid/ \ + --user=some-opencast-user --password-file=opencast.password +``` + +## Development + +When developing this script, it is useful to combine a virtualenv with pip's +concept of an "editable" install which uses symlinks instead of copies when +installing: + +```console +$ git clone git@github.com:uisautomation/lecture-capture-csv-scheduler.git +$ cd lecture-capture-csv-scheduler +$ python3 -m virtualenv ./venv +$ source ./venv/bin/activate +$ pip install -e . +``` diff --git a/oc_sched.py b/oc_sched.py deleted file mode 100755 index 3ec8d00..0000000 --- a/oc_sched.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python3 -""" -Schedule opencast events from csv input (file) - -Usage: - oc_sched.py (-h|--help) - oc_sched.py [--input=PATH] - -Options: - - -h, --help Show a brief usage summary. - - -i, --input=PATH Path (including filename) of the csv file to ingest - [default: ./lecture-schedule.csv] - -Environment Variables: - - OCUSER Username of opencast user for event scheduling (default: admin) - OCPASSWORD Password for opencast user for event scheduling (no default) - OCURL URL for the opencast instance - -""" # noqa:E501 - -import os -import csv -import sys -import json -import requests -import logging -import docopt -import pytz -from requests.auth import HTTPBasicAuth -from requests_toolbelt import MultipartEncoder -from dateutil.parser import parse - -logging.basicConfig(filename='lecture-schedule.log', - level=logging.DEBUG, - format='%(asctime)s %(message)s') - -inputfile = 'lecture-schedule.csv' -url = os.environ.get('OCURL', 'https://admin.lecturecapture.uis.cam.ac.uk') -user = os.environ.get('OCUSER', "admin") -password = os.environ.get("OCPASSWORD", 'password') -fieldnames = ["location", - "title", - "series", - "startTime", - "stopTime", - "workflow", - "courseDescription", - "vleUri", - "sequenceUri", - "sequenceIndex"] - - -def _parse_date(s): - """ - Parse date from a date string as defined in the CSV. - - """ - return parse(s).astimezone(pytz.utc) - - -def oc_create_event(m): - """opencast request for event creation""" - event_url = url + '/api/events' - try: - request = requests.post(event_url, data=m, - headers={'Content-Type': m.content_type}, - auth=HTTPBasicAuth(user, password)) - except requests.exceptions.RequestException as e: - logging.error(e) - sys.exit(1) - logging.info("status: %s" % str(request.status_code)) - logging.info(request.text) - return request - - -def oc_acl(): - return [ - {'role': 'ROLE_ADMIN', 'action': 'write', 'allow': True}, - {'role': 'ROLE_USER', 'action': 'read', 'allow': True}, - ] - - -def oc_metadata(row): - """Create opencast metadata for an event""" - t = _parse_date(row['startTime']) - - def _make_field(id_, value): - return {'id': id_, 'value': value} - - return [ - { - 'flavor': 'dublincore/episode', - 'fields': [ - _make_field('title', row['title']), - _make_field('description', row['courseDescription']), - _make_field('startDate', t.strftime("%Y-%m-%d")), - _make_field('startTime', t.strftime("%H:%M:%SZ")), - ], - } - ] - - -def oc_sched(row): - """Create opencast schedule for an event""" - duration = _parse_date(row["stopTime"]) - _parse_date(row["startTime"]) - sched = {"agent_id": row["location"], - "start": row["startTime"], - "duration": 1000 * int(duration.total_seconds()), - "inputs": ["default"]} - return sched - - -def oc_process(row): - """Create opencast processing details for an event""" - conf = {"flagForCutting": "false", - "flagForReview": "false", - "publishToEngage": "true", - "publishToHarvesting": "true", - "straightToPublishing": "true"} - process = {"workflow": row["workflow"], "configuration": conf} - return process - - -def oc_lecture_sched(inputfile): - """Read in csv file row by row, assemble multipart form fields and create events""" - with open(inputfile) as csv_file: - header = next(csv.reader(csv_file)) - if header[:len(fieldnames)] != fieldnames: - logging.error("Bad header in csv file: %s", inputfile) - logging.error(header) - sys.exit(1) - csv_reader = csv.DictReader(csv_file, fieldnames) - logging.info("Loaded file: %s" % inputfile) - for row in csv_reader: - m = MultipartEncoder( - fields={'acl': json.dumps(oc_acl()), - 'metadata': json.dumps(oc_metadata(row)), - 'scheduling': json.dumps(oc_sched(row)), - 'processing': json.dumps(oc_process(row))}) - oc_create_event(m) - - -if __name__ == "__main__": - opts = docopt.docopt(__doc__, options_first=True) - if os.environ.get("OCPASSWORD") is None: - print("No opencast password defined - please set OCPASSWORD environment variable") - logging.error("No opencast password defined") - sys.exit(1) - if opts['--input']: - oc_lecture_sched(opts['--input']) - else: - oc_lecture_sched(inputfile) diff --git a/opencastcsvschedule/__init__.py b/opencastcsvschedule/__init__.py new file mode 100644 index 0000000..0ef4750 --- /dev/null +++ b/opencastcsvschedule/__init__.py @@ -0,0 +1,189 @@ +""" +Schedule opencast events from csv input (file) + +Usage: + oc_sched.py (-h|--help) + oc_sched.py --base-url=URL [--user=USER] [--password-file=PATH] [--quiet] + [--input=PATH] + +Options: + + -h, --help Show a brief usage summary. + -q, --quiet Decrease verbosity of logging. + + --base-url=URL URL of opencast instance. + + --user=USER Username of Opencast user [default: admin] + --password-file=PATH Path to file containing Opencast user password. + (Leading/trailing whitespace is stripped.) + + -i, --input=PATH Path (including filename) of the csv file to ingest. If + omitted, standard input is used. + +""" +import csv +import json +import logging +import urllib.parse +import sys + +import docopt +from dateutil.parser import parse +import pytz +import requests +from requests.auth import HTTPBasicAuth +from requests_toolbelt import MultipartEncoder + + +# Expected header for input CSV. +EXPECTED_CSV_HEADER = [ + "location", "title", "series", "startTime", "stopTime", "workflow", "courseDescription", + "vleUri", "sequenceUri", "sequenceIndex" +] + + +class ProcessingError(RuntimeError): + pass + + +def main(): + # Parse command line options + opts = docopt.docopt(__doc__, options_first=True) + + # Configure logging + logging.basicConfig( + level=logging.WARN if opts['--quiet'] else logging.INFO, + format='%(asctime)s %(message)s' + ) + + # Load password + with open(opts['--password-file']) as fobj: + oc_password = fobj.read().strip() + + # Form arguments to schedule_events() + schedule_event_args = { + 'base_url': opts['--base-url'], + 'user': opts['--user'], + 'password': oc_password, + } + + # Attempt to schedule events catching processing errors. + try: + if opts['--input'] is not None: + with open(opts['--input']) as fobj: + schedule_events(input_fobj=fobj, **schedule_event_args) + else: + schedule_events(input_fobj=sys.stdin, **schedule_event_args) + except ProcessingError: + # Log error and exit with error status + logging.error('Aborting processing due to error') + sys.exit(1) + + # Signal normal exit + sys.exit(0) + + +def schedule_events(input_fobj, base_url, user, password): + """ + Read events from CSV and schedule them in Opencast. + + """ + # Read header from CSV and check that it conforms to our expectation. + header = next(csv.reader(input_fobj)) + if header[:len(EXPECTED_CSV_HEADER)] != EXPECTED_CSV_HEADER: + logging.error('Bad header in csv file') + logging.error('Header was: %s', ','.join(header)) + logging.error('Expected: %s', ','.join(EXPECTED_CSV_HEADER)) + raise ProcessingError() + + logging.info('Loading CSV') + csv_reader = csv.DictReader(input_fobj, EXPECTED_CSV_HEADER) + + # Form the events API URL from the base URL. + events_api_url = urllib.parse.urljoin(base_url, 'api/events') + + # For each input row, form each of the mulitpart form fields required by the opencast API. + for row in csv_reader: + logging.info( + 'Scheduling event "%s" at %s', row['title'], _parse_date(row['startTime']).isoformat() + ) + + # Create multipart form encoding for event + body_data = MultipartEncoder(fields={ + 'acl': json.dumps(oc_acl()), + 'metadata': json.dumps(oc_metadata(row)), + 'scheduling': json.dumps(oc_sched(row)), + 'processing': json.dumps(oc_process(row)), + }) + + # Attempt to schedule it + try: + response = requests.post( + events_api_url, data=body_data, headers={'Content-Type': body_data.content_type}, + auth=HTTPBasicAuth(user, password) + ) + response.raise_for_status() + except Exception as e: + logging.error('Error posting event') + logging.error('Row was: %s', ','.join(row)) + logging.exception(e) + + +def oc_acl(): + return [ + {'role': 'ROLE_ADMIN', 'action': 'write', 'allow': True}, + {'role': 'ROLE_USER', 'action': 'read', 'allow': True}, + ] + + +def oc_metadata(row): + """Create opencast metadata for an event""" + t = _parse_date(row['startTime']) + + def _make_field(id_, value): + return {'id': id_, 'value': value} + + return [ + { + 'flavor': 'dublincore/episode', + 'fields': [ + _make_field('title', row['title']), + _make_field('description', row['courseDescription']), + _make_field('startDate', t.strftime("%Y-%m-%d")), + _make_field('startTime', t.strftime("%H:%M:%SZ")), + ], + } + ] + + +def oc_sched(row): + """Create opencast schedule for an event""" + duration = _parse_date(row["stopTime"]) - _parse_date(row["startTime"]) + sched = {"agent_id": row["location"], + "start": row["startTime"], + "duration": 1000 * int(duration.total_seconds()), + "inputs": ["default"]} + return sched + + +def oc_process(row): + """Create opencast processing details for an event""" + conf = {"flagForCutting": "false", + "flagForReview": "false", + "publishToEngage": "true", + "publishToHarvesting": "true", + "straightToPublishing": "true"} + process = {"workflow": row["workflow"], "configuration": conf} + return process + + +def _parse_date(s): + """ + Parse date from a date string as defined in the CSV. + + """ + return parse(s).astimezone(pytz.utc) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..05f99bb --- /dev/null +++ b/setup.py @@ -0,0 +1,19 @@ +from setuptools import setup, find_packages + +setup( + name='opencastcsvschedule', + author='UIS DevOps', + packages=find_packages(), + install_requires=[ + 'docopt', + 'python-dateutil', + 'pytz', + 'requests', + 'requests-toolbelt', + ], + entry_points={ + 'console_scripts': [ + 'opencast_csv_schedule=opencastcsvschedule:main' + ] + }, +) -- GitLab From 3c32496c450b7112fb408af3eef9c5dddc21b053 Mon Sep 17 00:00:00 2001 From: Rich Wareham <rjw57@cam.ac.uk> Date: Tue, 27 Nov 2018 11:17:22 +0000 Subject: [PATCH 3/4] gitignore: remove some unnecessary entries The problem with copy-pasting gitignore files is that sometimes cruft is left in them. --- .gitignore | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitignore b/.gitignore index 3839b3f..2bc5ca4 100644 --- a/.gitignore +++ b/.gitignore @@ -107,9 +107,5 @@ venv.bak/ .mypy_cache/ *.sqlite3 -# Local configuration -mediawebapp/settings/local.py -secrets.env - # PyCharm .idea -- GitLab From 88ebe5f17addb7f4cf283dbc6fad4575cf03931f Mon Sep 17 00:00:00 2001 From: Rich Wareham <rjw57@cam.ac.uk> Date: Tue, 27 Nov 2018 11:17:42 +0000 Subject: [PATCH 4/4] editorconfig: add indent settings for Python --- .editorconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.editorconfig b/.editorconfig index 8ae3a90..951bbc2 100644 --- a/.editorconfig +++ b/.editorconfig @@ -2,3 +2,5 @@ root=true [*.py] max_line_length=99 +indent_style=space +indent_size=4 -- GitLab