From da2964421e37eb7da51f85375be1cee52d566974 Mon Sep 17 00:00:00 2001 From: Calen Pennington <cale@edx.org> Date: Fri, 27 Mar 2015 14:19:01 -0400 Subject: [PATCH] Extract a pure-XBlock version of XmlDescriptor --- common/lib/xmodule/xmodule/xml_module.py | 250 ++++++++++++++++------- 1 file changed, 173 insertions(+), 77 deletions(-) diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index 6e18effd83a..b53d0550af9 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -13,12 +13,16 @@ from xmodule.modulestore import EdxJSONEncoder import dogstats_wrapper as dog_stats_api +from lxml.etree import ( # pylint: disable=no-name-in-module + Element, ElementTree, XMLParser, +) + log = logging.getLogger(__name__) # assume all XML files are persisted as utf-8. -edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, - remove_comments=True, remove_blank_text=True, - encoding='utf-8') +EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False, + remove_comments=True, remove_blank_text=True, + encoding='utf-8') def name_to_pathname(name): @@ -53,16 +57,6 @@ def is_pointer_tag(xml_obj): return len(xml_obj) == 0 and actual_attr == expected_attr and not has_text -def get_metadata_from_xml(xml_object, remove=True): - meta = xml_object.find('meta') - if meta is None: - return '' - dmdata = meta.text - if remove: - xml_object.remove(meta) - return dmdata - - def serialize_field(value): """ Return a string version of the value (where value is the JSON-formatted, internally stored value). @@ -108,16 +102,30 @@ def deserialize_field(field, value): return value -class XmlDescriptor(XModuleDescriptor): +class XmlParserMixin(object): """ - Mixin class for standardized parsing of from xml + Class containing XML parsing functionality shared between XBlock and XModuleDescriptor. """ + # Extension to append to filename paths + filename_extension = 'xml' xml_attributes = Dict(help="Map of unhandled xml attributes, used only for storage between import and export", default={}, scope=Scope.settings) - # Extension to append to filename paths - filename_extension = 'xml' + # VS[compat]. Backwards compatibility code that can go away after + # importing 2012 courses. + # A set of metadata key conversions that we want to make + metadata_translations = { + 'slug': 'url_name', + 'name': 'display_name', + } + + @classmethod + def _translate(cls, key): + """ + VS[compat] + """ + return cls.metadata_translations.get(key, key) # The attributes will be removed from the definition xml passed # to definition_from_xml, and from the xml returned by definition_to_xml @@ -135,6 +143,19 @@ class XmlDescriptor(XModuleDescriptor): metadata_to_export_to_policy = ('discussion_topics', 'checklists') + @staticmethod + def _get_metadata_from_xml(xml_object, remove=True): + """ + Extract the metadata from the XML. + """ + meta = xml_object.find('meta') + if meta is None: + return '' + dmdata = meta.text + if remove: + xml_object.remove(meta) + return dmdata + @classmethod def definition_from_xml(cls, xml_object, system): """ @@ -163,16 +184,16 @@ class XmlDescriptor(XModuleDescriptor): Returns an lxml Element """ - return etree.parse(file_object, parser=edx_xml_parser).getroot() + return etree.parse(file_object, parser=EDX_XML_PARSER).getroot() # pylint: disable=no-member @classmethod def load_file(cls, filepath, fs, def_id): # pylint: disable=invalid-name - ''' + """ Open the specified file in fs, and call cls.file_to_xml on it, returning the lxml object. Add details and reraise on error. - ''' + """ try: with fs.open(filepath) as xml_file: return cls.file_to_xml(xml_file) @@ -184,7 +205,7 @@ class XmlDescriptor(XModuleDescriptor): @classmethod def load_definition(cls, xml_object, system, def_id, id_generator): - ''' + """ Load a descriptor definition from the specified xml_object. Subclasses should not need to override this except in special cases (e.g. html module) @@ -194,7 +215,7 @@ class XmlDescriptor(XModuleDescriptor): system: the modulestore system (aka, runtime) which accesses data and provides access to services def_id: the definition id for the block--used to compute the usage id and asides ids id_generator: used to generate the usage_id - ''' + """ # VS[compat] -- the filename attr should go away once everything is # converted. (note: make sure html files still work once this goes away) @@ -234,7 +255,7 @@ class XmlDescriptor(XModuleDescriptor): # Add the attributes from the pointer node definition_xml.attrib.update(xml_object.attrib) - definition_metadata = get_metadata_from_xml(definition_xml) + definition_metadata = cls._get_metadata_from_xml(definition_xml) cls.clean_metadata_from_xml(definition_xml) definition, children = cls.definition_from_xml(definition_xml, system) if definition_metadata: @@ -289,42 +310,51 @@ class XmlDescriptor(XModuleDescriptor): metadata[attr] = value @classmethod - def from_xml(cls, xml_data, system, id_generator): + def parse_xml(cls, node, runtime, keys, id_generator): # pylint: disable=unused-argument """ - Creates an instance of this descriptor from the supplied xml_data. - This may be overridden by subclasses + Use `node` to construct a new block. - xml_data: A string of xml that will be translated into data and children for - this module - system: A DescriptorSystem for interacting with external resources - """ + Arguments: + node (etree.Element): The xml node to parse into an xblock. + + runtime (:class:`.Runtime`): The runtime to use while parsing. + + keys (:class:`.ScopeIds`): The keys identifying where this block + will store its data. + + id_generator (:class:`.IdGenerator`): An object that will allow the + runtime to generate correct definition and usage ids for + children of this block. + + Returns (XBlock): The newly parsed XBlock - xml_object = etree.fromstring(xml_data) + """ # VS[compat] -- just have the url_name lookup, once translation is done - url_name = xml_object.get('url_name', xml_object.get('slug')) - def_id = id_generator.create_definition(xml_object.tag, url_name) + url_name = node.get('url_name', node.get('slug')) + def_id = id_generator.create_definition(node.tag, url_name) usage_id = id_generator.create_usage(def_id) # VS[compat] -- detect new-style each-in-a-file mode - if is_pointer_tag(xml_object): + if is_pointer_tag(node): # new style: # read the actual definition file--named using url_name.replace(':','/') - filepath = cls._format_filepath(xml_object.tag, name_to_pathname(url_name)) - definition_xml = cls.load_file(filepath, system.resources_fs, def_id) - system.parse_asides(definition_xml, def_id, usage_id, id_generator) + filepath = cls._format_filepath(node.tag, name_to_pathname(url_name)) + definition_xml = cls.load_file(filepath, runtime.resources_fs, def_id) + runtime.parse_asides(definition_xml, def_id, usage_id, id_generator) else: filepath = None - definition_xml = xml_object + definition_xml = node dog_stats_api.increment( DEPRECATION_VSCOMPAT_EVENT, tags=["location:xmlparser_util_mixin_parse_xml"] ) - definition, children = cls.load_definition(definition_xml, system, def_id, id_generator) # note this removes metadata + # Note: removes metadata. + definition, children = cls.load_definition(definition_xml, runtime, def_id, id_generator) # VS[compat] -- make Ike's github preview links work in both old and # new file layouts - if is_pointer_tag(xml_object): + if is_pointer_tag(node): # new style -- contents actually at filepath definition['filename'] = [filepath, filepath] @@ -341,7 +371,7 @@ class XmlDescriptor(XModuleDescriptor): metadata['definition_metadata_err'] = str(err) # Set/override any metadata specified by policy - cls.apply_policy(metadata, system.get_policy(usage_id)) + cls.apply_policy(metadata, runtime.get_policy(usage_id)) field_data = {} field_data.update(metadata) @@ -352,10 +382,10 @@ class XmlDescriptor(XModuleDescriptor): kvs = InheritanceKeyValueStore(initial_values=field_data) field_data = KvsFieldData(kvs) - return system.construct_xblock_from_class( + return runtime.construct_xblock_from_class( cls, # We're loading a descriptor, so student_id is meaningless - ScopeIds(None, xml_object.tag, def_id, usage_id), + ScopeIds(None, node.tag, def_id, usage_id), field_data, ) @@ -374,32 +404,17 @@ class XmlDescriptor(XModuleDescriptor): """ return True - def export_to_xml(self, resource_fs): + def add_xml_to_node(self, node): """ - Returns an xml string representing this module, and all modules - underneath it. May also write required resources out to resource_fs - - Assumes that modules have single parentage (that no module appears twice - in the same course), and that it is thus safe to nest modules as xml - children as appropriate. - - The returned XML should be able to be parsed back into an identical - XModuleDescriptor using the from_xml method with the same system, org, - and course - - resource_fs is a pyfilesystem object (from the fs package) + For exporting, set data on `node` from ourselves. """ - - # Set up runtime.export_fs so that it's available through future - # uses of the pure xblock add_xml_to_node api - self.runtime.export_fs = resource_fs - # Get the definition - xml_object = self.definition_to_xml(resource_fs) + xml_object = self.definition_to_xml(self.runtime.export_fs) self.clean_metadata_from_xml(xml_object) - # Set the tag so we get the file path right + # Set the tag on both nodes so we get the file path right. xml_object.tag = self.category + node.tag = self.category # Add the non-inherited metadata for attr in sorted(own_metadata(self)): @@ -422,24 +437,25 @@ class XmlDescriptor(XModuleDescriptor): # Write the definition to a file url_path = name_to_pathname(self.url_name) filepath = self._format_filepath(self.category, url_path) - resource_fs.makedir(os.path.dirname(filepath), recursive=True, allow_recreate=True) - with resource_fs.open(filepath, 'w') as fileobj: - fileobj.write(etree.tostring(xml_object, pretty_print=True, encoding='utf-8')) - - # And return just a pointer with the category and filename. - record_object = etree.Element(self.category) + self.runtime.export_fs.makedir(os.path.dirname(filepath), recursive=True, allow_recreate=True) + with self.runtime.export_fs.open(filepath, 'w') as fileobj: + ElementTree(xml_object).write(fileobj, pretty_print=True, encoding='utf-8') else: - record_object = xml_object + # Write all attributes from xml_object onto node + node.clear() + node.tag = xml_object.tag + node.text = xml_object.text + node.tail = xml_object.tail + node.attrib = xml_object.attrib + node.extend(xml_object) - record_object.set('url_name', self.url_name) + node.set('url_name', self.url_name) # Special case for course pointers: if self.category == 'course': # add org and course attributes on the pointer tag - record_object.set('org', self.location.org) - record_object.set('course', self.location.course) - - return etree.tostring(record_object, pretty_print=True, encoding='utf-8') + node.set('org', self.location.org) + node.set('course', self.location.course) def definition_to_xml(self, resource_fs): """ @@ -450,6 +466,86 @@ class XmlDescriptor(XModuleDescriptor): @property def non_editable_metadata_fields(self): - non_editable_fields = super(XmlDescriptor, self).non_editable_metadata_fields - non_editable_fields.append(XmlDescriptor.xml_attributes) + """ + Return a list of all metadata fields that cannot be edited. + """ + non_editable_fields = super(XmlParserMixin, self).non_editable_metadata_fields + non_editable_fields.append(XmlParserMixin.xml_attributes) return non_editable_fields + + +class XmlDescriptor(XmlParserMixin, XModuleDescriptor): # pylint: disable=abstract-method + """ + Mixin class for standardized parsing of XModule xml. + """ + @classmethod + def from_xml(cls, xml_data, system, id_generator): + """ + Creates an instance of this descriptor from the supplied xml_data. + This may be overridden by subclasses. + + Args: + xml_data (str): A string of xml that will be translated into data and children + for this module + + system (:class:`.XMLParsingSystem): + + id_generator (:class:`xblock.runtime.IdGenerator`): Used to generate the + usage_ids and definition_ids when loading this xml + + """ + # Shim from from_xml to the parse_xml defined in XmlParserMixin. + # This only exists to satisfy subclasses that both: + # a) define from_xml themselves + # b) call super(..).from_xml(..) + return super(XmlDescriptor, cls).parse_xml( + etree.fromstring(xml_data), # pylint: disable=no-member + system, + None, # This is ignored by XmlParserMixin + id_generator, + ) + + @classmethod + def parse_xml(cls, node, runtime, keys, id_generator): + """ + Interpret the parsed XML in `node`, creating an XModuleDescriptor. + """ + if cls.from_xml != XmlDescriptor.from_xml: + # Skip the parse_xml from XmlParserMixin to get the shim parse_xml + # from XModuleDescriptor, which actually calls `from_xml`. + return super(XmlParserMixin, cls).parse_xml(node, runtime, keys, id_generator) # pylint: disable=bad-super-call + else: + return super(XmlDescriptor, cls).parse_xml(node, runtime, keys, id_generator) + + def export_to_xml(self, resource_fs): # pylint: disable=unused-argument + """ + Returns an xml string representing this module, and all modules + underneath it. May also write required resources out to resource_fs. + + Assumes that modules have single parentage (that no module appears twice + in the same course), and that it is thus safe to nest modules as xml + children as appropriate. + + The returned XML should be able to be parsed back into an identical + XModuleDescriptor using the from_xml method with the same system, org, + and course + """ + # Shim from export_to_xml to the add_xml_to_node defined in XmlParserMixin. + # This only exists to satisfy subclasses that both: + # a) define export_to_xml themselves + # b) call super(..).export_to_xml(..) + node = Element(self.category) + super(XmlDescriptor, self).add_xml_to_node(node) + return etree.tostring(node) # pylint: disable=no-member + + def add_xml_to_node(self, node): + """ + Export this :class:`XModuleDescriptor` as XML, by setting attributes on the provided + `node`. + """ + if self.export_to_xml != XmlDescriptor.export_to_xml: + # Skip the add_xml_to_node from XmlParserMixin to get the shim add_xml_to_node + # from XModuleDescriptor, which actually calls `export_to_xml`. + super(XmlParserMixin, self).add_xml_to_node(node) # pylint: disable=bad-super-call + else: + super(XmlDescriptor, self).add_xml_to_node(node) -- GitLab