metadata and file format cleanups

* course.xml is special--has org and course attributes in addition to url_name * strip data_dir from metadata on export * more asserts * work on roundtrip import-export test

metadata and file format cleanups
* course.xml is special--has org and course attributes in addition to url_name * strip data_dir from metadata on export * more asserts * work on roundtrip import-export test
b091dcab · Victor Shnayder · Calen Pennington · 6ed90527 · b091dcab · b091dcab
Commit b091dcab authored 12 years ago by Victor Shnayder Committed by Calen Pennington 12 years ago
--- a/common/lib/xmodule/xmodule/course_module.py
+++ b/common/lib/xmodule/xmodule/course_module.py
@@ -13,7 +13,6 @@ log = logging.getLogger(__name__)

 class CourseDescriptor(SequenceDescriptor):
    module_class = SequenceModule
-    metadata_attributes = SequenceDescriptor.metadata_attributes + ('org', 'course')

    def __init__(self, system, definition=None, **kwargs):
        super(CourseDescriptor, self).__init__(system, definition, **kwargs)

--- a/common/lib/xmodule/xmodule/tests/test_export.py
+++ b/common/lib/xmodule/xmodule/tests/test_export.py
-from xmodule.modulestore.xml import XMLModuleStore
-from nose.tools import assert_equals
+from fs.osfs import OSFS
+from nose.tools import assert_equals, assert_true
 from nose import SkipTest
+from path import path
 from tempfile import mkdtemp
-from fs.osfs import OSFS

+from xmodule.modulestore.xml import XMLModuleStore
+
+# from ~/mitx_all/mitx/common/lib/xmodule/xmodule/tests/
+# to   ~/mitx_all/mitx/common/test
+TEST_DIR = path(__file__).abspath().dirname()
+for i in range(4):
+    TEST_DIR = TEST_DIR.dirname()
+TEST_DIR = TEST_DIR / 'test'
+
+DATA_DIR = TEST_DIR / 'data'
+
+
+def strip_metadata(descriptor, key):
+    """
+    HACK: data_dir metadata tags break equality because they aren't real metadata
+    remove them.

-def check_export_roundtrip(data_dir):
+    Recursively strips same tag from all children.
+    """
+    print "strip {key} from {desc}".format(key=key, desc=descriptor.location.url())
+    descriptor.metadata.pop(key, None)
+    for d in descriptor.get_children():
+        strip_metadata(d, key)
+
+def check_gone(descriptor, key):
+    '''Make sure that the metadata of this descriptor or any
+    descendants does not include key'''
+    assert_true(key not in descriptor.metadata)
+    for d in descriptor.get_children():
+        check_gone(d, key)
+
+def check_export_roundtrip(data_dir, course_dir):
    print "Starting import"
-    initial_import = XMLModuleStore('org', 'course', data_dir, eager=True)
-    initial_course = initial_import.course
+    initial_import = XMLModuleStore(data_dir, eager=True, course_dirs=[course_dir])
+
+    courses = initial_import.get_courses()
+    assert_equals(len(courses), 1)
+    initial_course = courses[0]

    print "Starting export"
    export_dir = mkdtemp()
+    print "export_dir: {0}".format(export_dir)
    fs = OSFS(export_dir)
-    xml = initial_course.export_to_xml(fs)
-    with fs.open('course.xml', 'w') as course_xml:
+    export_course_dir = 'export'
+    export_fs = fs.makeopendir(export_course_dir)
+
+    xml = initial_course.export_to_xml(export_fs)
+    with export_fs.open('course.xml', 'w') as course_xml:
        course_xml.write(xml)

    print "Starting second import"
-    second_import = XMLModuleStore('org', 'course', export_dir, eager=True)
+    second_import = XMLModuleStore(export_dir, eager=True,
+                                   course_dirs=[export_course_dir])
+
+    courses2 = second_import.get_courses()
+    assert_equals(len(courses2), 1)
+    exported_course = courses2[0]
+
+    print "Checking course equality"
+    strip_metadata(initial_course, 'data_dir')
+    strip_metadata(exported_course, 'data_dir')
+
+    assert_equals(initial_course, exported_course)

    print "Checking key equality"
    assert_equals(initial_import.modules.keys(), second_import.modules.keys())

--- a/common/lib/xmodule/xmodule/tests/test_import.py
+++ b/common/lib/xmodule/xmodule/tests/test_import.py
@@ -5,6 +5,7 @@ from fs.memoryfs import MemoryFS
 from lxml import etree

 from xmodule.x_module import XMLParsingSystem, XModuleDescriptor
+from xmodule.xml_module import is_pointer_tag
 from xmodule.errortracker import make_error_tracker
 from xmodule.modulestore import Location
 from xmodule.modulestore.exceptions import ItemNotFoundError
@@ -117,15 +118,19 @@ class ImportTestCase(unittest.TestCase):
            - inherited metadata doesn't leak to children.
        """
        system = self.get_system()
-        v = "1 hour"
+        v = '1 hour'
+        org = 'foo'
+        course = 'bbhh'
+        url_name = 'test1'
        start_xml = '''
-        <course graceperiod="{grace}" url_name="test1" unicorn="purple">
+        <course org="{org}" course="{course}"
+                graceperiod="{grace}" url_name="{url_name}" unicorn="purple">
            <chapter url="hi" url_name="ch" display_name="CH">
                <html url_name="h" display_name="H">Two houses, ...</html>
            </chapter>
-        </course>'''.format(grace=v)
+        </course>'''.format(grace=v, org=org, course=course, url_name=url_name)
        descriptor = XModuleDescriptor.load_from_xml(start_xml, system,
-                                                     'org', 'course')
+                                                     org, course)

        print descriptor, descriptor.metadata
        self.assertEqual(descriptor.metadata['graceperiod'], v)
@@ -141,15 +146,25 @@ class ImportTestCase(unittest.TestCase):
        # Now export and check things
        resource_fs = MemoryFS()
        exported_xml = descriptor.export_to_xml(resource_fs)
+
+        # Check that the exported xml is just a pointer
        print "Exported xml:", exported_xml
+        pointer = etree.fromstring(exported_xml)
+        self.assertTrue(is_pointer_tag(pointer))
+        # but it's a special case course pointer
+        self.assertEqual(pointer.attrib['course'], course)
+        self.assertEqual(pointer.attrib['org'], org)

        # Does the course still have unicorns?
-        # hardcoded path to course
-        with resource_fs.open('course/test1.xml') as f:
+        with resource_fs.open('course/{url_name}.xml'.format(url_name=url_name)) as f:
            course_xml = etree.fromstring(f.read())

        self.assertEqual(course_xml.attrib['unicorn'], 'purple')

+        # the course and org tags should be _only_ in the pointer
+        self.assertTrue('course' not in course_xml.attrib)
+        self.assertTrue('org' not in course_xml.attrib)
+
        # did we successfully strip the url_name from the definition contents?
        self.assertTrue('url_name' not in course_xml.attrib)


--- a/common/lib/xmodule/xmodule/xml_module.py
+++ b/common/lib/xmodule/xmodule/xml_module.py
@@ -11,6 +11,29 @@ import sys

 log = logging.getLogger(__name__)

+
+def is_pointer_tag(xml_obj):
+    """
+    Check if xml_obj is a pointer tag: <blah url_name="something" />.
+    No children, one attribute named url_name.
+
+    Special case for course roots: the pointer is
+      <course url_name="something" org="myorg"  course="course">
+
+    xml_obj: an etree Element
+
+    Returns a bool.
+    """
+    if xml_obj.tag != "course":
+        expected_attr = set(['url_name'])
+    else:
+        expected_attr = set(['url_name', 'course', 'org'])
+
+    actual_attr = set(xml_obj.attrib.keys())
+    return len(xml_obj) == 0 and actual_attr == expected_attr
+
+
+
 _AttrMapBase = namedtuple('_AttrMap', 'from_xml to_xml')

 class AttrMap(_AttrMapBase):
@@ -41,16 +64,19 @@ class XmlDescriptor(XModuleDescriptor):

    # Note -- url_name isn't in this list because it's handled specially on
    # import and export.
+
+    # TODO (vshnayder): Do we need a list of metadata we actually
+    # understand?  And if we do, is this the place?
+    # Related: What's the right behavior for clean_metadata?
    metadata_attributes = ('format', 'graceperiod', 'showanswer', 'rerandomize',
        'start', 'due', 'graded', 'display_name', 'url_name', 'hide_from_toc',
        'ispublic', 	# if True, then course is listed for all users; see 
        # VS[compat] Remove once unused.
        'name', 'slug')

-    # VS[compat] -- remove once everything is in the CMS
-    # We don't want url_name in the metadata--it's in the location, so avoid
-    # confusion and duplication.
-    metadata_to_strip = ('url_name', )
+    metadata_to_strip = ('data_dir',
+           # VS[compat] -- remove the below attrs once everything is in the CMS
+           'course', 'org', 'url_name')

    # A dictionary mapping xml attribute names AttrMaps that describe how
    # to import and export them
@@ -130,6 +156,8 @@ class XmlDescriptor(XModuleDescriptor):
        Subclasses should not need to override this except in special
        cases (e.g. html module)'''

+        # VS[compat] -- the filename tag should go away once everything is
+        # converted.  (note: make sure html files still work once this goes away)
        filename = xml_object.get('filename')
        if filename is None:
            definition_xml = copy.deepcopy(xml_object)
@@ -198,13 +226,13 @@ class XmlDescriptor(XModuleDescriptor):
            url identifiers
        """
        xml_object = etree.fromstring(xml_data)
-        # VS[compat] -- just have the url_name lookup once translation is done
+        # VS[compat] -- just have the url_name lookup, once translation is done
        url_name = xml_object.get('url_name', xml_object.get('slug'))
        location = Location('i4x', org, course, xml_object.tag, url_name)

        # VS[compat] -- detect new-style each-in-a-file mode
-        if len(xml_object.attrib.keys()) == 1 and len(xml_object) == 0:
-            # new style: this is just a pointer.
+        if is_pointer_tag(xml_object):
+            # new style:
            # read the actual defition file--named using url_name
            filepath = cls._format_filepath(xml_object.tag, url_name)
            definition_xml = cls.load_file(filepath, system.resources_fs, location)
@@ -258,12 +286,13 @@ class XmlDescriptor(XModuleDescriptor):

        # Add the non-inherited metadata
        for attr in self.own_metadata:
-            xml_object.set(attr, val_for_xml(attr))
+            # don't want e.g. data_dir
+            if attr not in self.metadata_to_strip:
+                xml_object.set(attr, val_for_xml(attr))

-        # Write the actual contents to a file
+        # Write the definition to a file
        filepath = self.__class__._format_filepath(self.category, self.url_name)
        resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True)
-
        with resource_fs.open(filepath, 'w') as file:
            file.write(etree.tostring(xml_object, pretty_print=True))

@@ -271,6 +300,12 @@ class XmlDescriptor(XModuleDescriptor):
        record_object = etree.Element(self.category)
        record_object.set('url_name', self.url_name)

+        # Special case for course pointers:
+        if self.category == 'course':
+            # add org and course attributes on the pointer tag
+            record_object.set('org', self.location.org)
+            record_object.set('course', self.location.course)
+
        return etree.tostring(record_object, pretty_print=True)

    def definition_to_xml(self, resource_fs):