From a36aa68fd76cdf9369973e99c5aa58d6e499e2bc Mon Sep 17 00:00:00 2001 From: Brian Wilson <brian@edx.org> Date: Wed, 18 May 2016 12:43:12 -0400 Subject: [PATCH] Add new export_olx management command. This moves the functionality of the 'export_course' management command from lms/djangoapps/courseware over to the Studio codebase. This reflects its use going forward to be run with cms settings, to export the content of the Studio modulestore instead of the LMS modulestore. The management command is used by an analytics workflow to output course content for researchers. --- .../management/commands/export_olx.py | 111 ++++++++++++++++++ .../commands/tests/test_export_olx.py | 94 +++++++++++++++ .../management/commands/export_course.py | 8 ++ 3 files changed, 213 insertions(+) create mode 100644 cms/djangoapps/contentstore/management/commands/export_olx.py create mode 100644 cms/djangoapps/contentstore/management/commands/tests/test_export_olx.py diff --git a/cms/djangoapps/contentstore/management/commands/export_olx.py b/cms/djangoapps/contentstore/management/commands/export_olx.py new file mode 100644 index 00000000000..67a14fcad24 --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/export_olx.py @@ -0,0 +1,111 @@ +""" +A Django command that exports a course to a tar.gz file. + +If <filename> is '-', it pipes the file to stdout. + +This is used by Analytics research exports to provide researchers +with course content. + +At present, it differs from Studio exports in several ways: + +* It does not include static content. +* The top-level directory in the resulting tarball is a "safe" + (i.e. ascii) version of the course_key, rather than the word "course". +* It only supports the export of courses. It does not export libraries. + +""" + +import os +import re +import shutil +import tarfile +from tempfile import mktemp, mkdtemp +from textwrap import dedent + +from path import Path as path + +from django.core.management.base import BaseCommand, CommandError + +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.xml_exporter import export_course_to_xml +from opaque_keys import InvalidKeyError +from opaque_keys.edx.keys import CourseKey + + +class Command(BaseCommand): + """ + Export a course to XML. The output is compressed as a tar.gz file. + + """ + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + parser.add_argument('course_id') + parser.add_argument('--output', default=None) + + def handle(self, *args, **options): + + course_id = options['course_id'] + try: + course_key = CourseKey.from_string(course_id) + except InvalidKeyError: + raise CommandError("Unparsable course_id") + except IndexError: + raise CommandError("Insufficient arguments") + + filename = options['output'] + pipe_results = False + if filename is None: + filename = mktemp() + pipe_results = True + + export_course_to_tarfile(course_key, filename) + + results = self._get_results(filename) if pipe_results else None + + self.stdout.write(results, ending="") + + def _get_results(self, filename): + """Load results from file""" + with open(filename) as f: + results = f.read() + os.remove(filename) + return results + + +def export_course_to_tarfile(course_key, filename): + """Exports a course into a tar.gz file""" + tmp_dir = mkdtemp() + try: + course_dir = export_course_to_directory(course_key, tmp_dir) + compress_directory(course_dir, filename) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def export_course_to_directory(course_key, root_dir): + """Export course into a directory""" + store = modulestore() + course = store.get_course(course_key) + if course is None: + raise CommandError("Invalid course_id") + + # The safest characters are A-Z, a-z, 0-9, <underscore>, <period> and <hyphen>. + # We represent the first four with \w. + # TODO: Once we support courses with unicode characters, we will need to revisit this. + replacement_char = u'-' + course_dir = replacement_char.join([course.id.org, course.id.course, course.id.run]) + course_dir = re.sub(r'[^\w\.\-]', replacement_char, course_dir) + + export_course_to_xml(store, None, course.id, root_dir, course_dir) + + export_dir = path(root_dir) / course_dir + return export_dir + + +def compress_directory(directory, filename): + """Compress a directory into a tar.gz file""" + mode = 'w:gz' + name = path(directory).name + with tarfile.open(filename, mode) as tar_file: + tar_file.add(directory, arcname=name) diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_export_olx.py b/cms/djangoapps/contentstore/management/commands/tests/test_export_olx.py new file mode 100644 index 00000000000..c2be823d484 --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/tests/test_export_olx.py @@ -0,0 +1,94 @@ +""" +Tests for exporting OLX content. +""" + +import ddt +from path import Path as path +import shutil +from StringIO import StringIO +import tarfile +from tempfile import mkdtemp +import unittest + +from django.core.management import CommandError, call_command + +from xmodule.modulestore.tests.factories import CourseFactory +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase +from xmodule.modulestore.django import modulestore + + +class TestArgParsingCourseExportOlx(unittest.TestCase): + """ + Tests for parsing arguments for the `export_olx` management command + """ + def test_no_args(self): + """ + Test export command with no arguments + """ + errstring = "Error: too few arguments" + with self.assertRaisesRegexp(CommandError, errstring): + call_command('export_olx') + + +@ddt.ddt +class TestCourseExportOlx(ModuleStoreTestCase): + """ + Test exporting OLX content from a course or library. + """ + + def test_invalid_course_key(self): + """ + Test export command with an invalid course key. + """ + errstring = "Unparsable course_id" + with self.assertRaisesRegexp(CommandError, errstring): + call_command('export_olx', 'InvalidCourseID') + + def test_course_key_not_found(self): + """ + Test export command with a valid course key that doesn't exist. + """ + errstring = "Invalid course_id" + with self.assertRaisesRegexp(CommandError, errstring): + call_command('export_olx', 'x/y/z') + + def create_dummy_course(self, store_type): + """Create small course.""" + course = CourseFactory.create(default_store=store_type) + self.assertTrue( + modulestore().has_course(course.id), + "Could not find course in {}".format(store_type) + ) + return course.id + + def check_export_file(self, tar_file, course_key): + """Check content of export file.""" + names = tar_file.getnames() + dirname = "{0.org}-{0.course}-{0.run}".format(course_key) + self.assertIn(dirname, names) + # Check if some of the files are present, without being exhaustive. + self.assertIn("{}/about".format(dirname), names) + self.assertIn("{}/about/overview.html".format(dirname), names) + self.assertIn("{}/assets/assets.xml".format(dirname), names) + self.assertIn("{}/policies".format(dirname), names) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_export_course(self, store_type): + test_course_key = self.create_dummy_course(store_type) + tmp_dir = path(mkdtemp()) + self.addCleanup(shutil.rmtree, tmp_dir) + filename = tmp_dir / 'test.tar.gz' + call_command('export_olx', '--output', filename, unicode(test_course_key)) + with tarfile.open(filename) as tar_file: + self.check_export_file(tar_file, test_course_key) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_export_course_stdout(self, store_type): + test_course_key = self.create_dummy_course(store_type) + out = StringIO() + call_command('export_olx', unicode(test_course_key), stdout=out) + out.seek(0) + output = out.read() + with tarfile.open(fileobj=StringIO(output)) as tar_file: + self.check_export_file(tar_file, test_course_key) diff --git a/lms/djangoapps/courseware/management/commands/export_course.py b/lms/djangoapps/courseware/management/commands/export_course.py index 1b6bb0b594c..27a6c4497e4 100644 --- a/lms/djangoapps/courseware/management/commands/export_course.py +++ b/lms/djangoapps/courseware/management/commands/export_course.py @@ -3,6 +3,14 @@ A Django command that exports a course to a tar.gz file. If <filename> is '-', it pipes the file to stdout +NOTE: This used to be used by Analytics research exports to provide +researchers with course content. It is now DEPRECATED, and +functionality has moved to export_olx.py in +cms/djangoapps/contentstore/management/commands. + +Note: when removing this file, also remove references to it +from test_dump_course. + """ import os -- GitLab