From f9476ab4ff189d35e4dd55629fa5796695b01428 Mon Sep 17 00:00:00 2001 From: Qubad786 <muhammadrehan69@gmail.com> Date: Mon, 21 May 2018 18:35:38 +0500 Subject: [PATCH] Update old transcripts metadata present in video xml with the new transcripts for backward compatibility. --- .../lib/xmodule/xmodule/tests/test_video.py | 20 +++++- .../xmodule/video_module/video_module.py | 47 +++++++++---- common/lib/xmodule/xmodule/xml_module.py | 21 ++++-- .../courseware/tests/test_video_mongo.py | 70 ++++++++++++++++++- requirements/edx/base.in | 2 +- requirements/edx/base.txt | 2 +- requirements/edx/development.txt | 2 +- requirements/edx/github.in | 2 + requirements/edx/testing.txt | 5 +- 9 files changed, 144 insertions(+), 27 deletions(-) diff --git a/common/lib/xmodule/xmodule/tests/test_video.py b/common/lib/xmodule/xmodule/tests/test_video.py index 0e2db392a2d..1addb314513 100644 --- a/common/lib/xmodule/xmodule/tests/test_video.py +++ b/common/lib/xmodule/xmodule/tests/test_video.py @@ -716,7 +716,12 @@ class VideoExportTestCase(VideoDescriptorTestBase): Test that we write the correct XML on export. """ edx_video_id = u'test_edx_video_id' - mock_val_api.export_to_xml = Mock(return_value=etree.Element('video_asset')) + mock_val_api.export_to_xml = Mock( + return_value=dict( + xml=etree.Element('video_asset'), + transcripts={} + ) + ) self.descriptor.youtube_id_0_75 = 'izygArpw-Qo' self.descriptor.youtube_id_1_0 = 'p2Q6BrNhdh8' self.descriptor.youtube_id_1_25 = '1EeWXzPdhSA' @@ -736,14 +741,23 @@ class VideoExportTestCase(VideoDescriptorTestBase): xml = self.descriptor.definition_to_xml(self.file_system) parser = etree.XMLParser(remove_blank_text=True) xml_string = '''\ - <video url_name="SampleProblem" start_time="0:00:01" youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" show_captions="false" end_time="0:01:00" download_video="true" download_track="true"> + <video + url_name="SampleProblem" + start_time="0:00:01" + show_captions="false" + end_time="0:01:00" + download_video="true" + download_track="true" + youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" + transcripts='{"ge": "german_translation.srt", "ua": "ukrainian_translation.srt"}' + > <source src="http://www.example.com/source.mp4"/> <source src="http://www.example.com/source1.ogg"/> <track src="http://www.example.com/track"/> <handout src="http://www.example.com/handout"/> + <video_asset /> <transcript language="ge" src="german_translation.srt" /> <transcript language="ua" src="ukrainian_translation.srt" /> - <video_asset /> </video> ''' expected = etree.XML(xml_string, parser=parser) diff --git a/common/lib/xmodule/xmodule/video_module/video_module.py b/common/lib/xmodule/xmodule/video_module/video_module.py index 34dc33938da..ee5e29be1e6 100644 --- a/common/lib/xmodule/xmodule/video_module/video_module.py +++ b/common/lib/xmodule/xmodule/video_module/video_module.py @@ -739,13 +739,9 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler ele.set('src', self.handout) xml.append(ele) + transcripts = {} if self.transcripts is not None: - # sorting for easy testing of resulting xml - for transcript_language in sorted(self.transcripts.keys()): - ele = etree.Element('transcript') - ele.set('language', transcript_language) - ele.set('src', self.transcripts[transcript_language]) - xml.append(ele) + transcripts.update(self.transcripts) edx_video_id = clean_video_id(self.edx_video_id) if edxval_api and edx_video_id: @@ -753,17 +749,42 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler # Create static dir if not created earlier. resource_fs.makedirs(EXPORT_IMPORT_STATIC_DIR, recreate=True) - xml.append( - edxval_api.export_to_xml( - video_id=edx_video_id, - resource_fs=resource_fs, - static_dir=EXPORT_IMPORT_STATIC_DIR, - course_id=unicode(self.runtime.course_id.for_branch(None)) - ) + # Backward compatible exports + # edxval exports new transcripts into the course OLX and returns a transcript + # files map so that it can also be rewritten in old transcript metadata fields + # (i.e. `self.transcripts`) on import and older open-releases (<= ginkgo), + # who do not have deprecated contentstore yet, can also import and use new-style + # transcripts into their openedX instances. + + exported_metadata = edxval_api.export_to_xml( + video_id=edx_video_id, + resource_fs=resource_fs, + static_dir=EXPORT_IMPORT_STATIC_DIR, + course_id=unicode(self.runtime.course_id.for_branch(None)) ) + # Update xml with edxval metadata + xml.append(exported_metadata['xml']) + + # we don't need sub if english transcript + # is also in new transcripts. + new_transcripts = exported_metadata['transcripts'] + transcripts.update(new_transcripts) + if new_transcripts.get('en'): + xml.set('sub', '') + + # Update `transcripts` attribute in the xml + xml.set('transcripts', json.dumps(transcripts)) + except edxval_api.ValVideoNotFoundError: pass + # Sorting transcripts for easy testing of resulting xml + for transcript_language in sorted(transcripts.keys()): + ele = etree.Element('transcript') + ele.set('language', transcript_language) + ele.set('src', transcripts[transcript_language]) + xml.append(ele) + # handle license specifically self.add_license_to_xml(xml) diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index b56847532f4..1c6e16fe018 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -139,6 +139,14 @@ class XmlParserMixin(object): # Used for storing xml attributes between import and export, for roundtrips 'xml_attributes') + # This is a categories to fields map that contains the block category specific fields which should not be + # cleaned and/or override while adding xml to node. + metadata_to_not_to_clean = { + # A category `video` having `sub` and `transcripts` fields + # which should not be cleaned/override in an xml object. + 'video': ('sub', 'transcripts') + } + metadata_to_export_to_policy = ('discussion_topics',) @staticmethod @@ -165,13 +173,15 @@ class XmlParserMixin(object): raise NotImplementedError("%s does not implement definition_from_xml" % cls.__name__) @classmethod - def clean_metadata_from_xml(cls, xml_object): + def clean_metadata_from_xml(cls, xml_object, excluded_fields=()): """ Remove any attribute named for a field with scope Scope.settings from the supplied xml_object """ for field_name, field in cls.fields.items(): - if field.scope == Scope.settings and xml_object.get(field_name) is not None: + if (field.scope == Scope.settings + and field_name not in excluded_fields + and xml_object.get(field_name) is not None): del xml_object.attrib[field_name] @classmethod @@ -448,7 +458,8 @@ class XmlParserMixin(object): aside.add_xml_to_node(aside_node) xml_object.append(aside_node) - self.clean_metadata_from_xml(xml_object) + not_to_clean_fields = self.metadata_to_not_to_clean.get(self.category, ()) + self.clean_metadata_from_xml(xml_object, excluded_fields=not_to_clean_fields) # Set the tag on both nodes so we get the file path right. xml_object.tag = self.category @@ -457,7 +468,9 @@ class XmlParserMixin(object): # Add the non-inherited metadata for attr in sorted(own_metadata(self)): # don't want e.g. data_dir - if attr not in self.metadata_to_strip and attr not in self.metadata_to_export_to_policy: + if (attr not in self.metadata_to_strip + and attr not in self.metadata_to_export_to_policy + and attr not in not_to_clean_fields): val = serialize_field(self._field_data.get(self, attr)) try: xml_object.set(attr, val) diff --git a/lms/djangoapps/courseware/tests/test_video_mongo.py b/lms/djangoapps/courseware/tests/test_video_mongo.py index c799fb219c5..ca6450e1a90 100644 --- a/lms/djangoapps/courseware/tests/test_video_mongo.py +++ b/lms/djangoapps/courseware/tests/test_video_mongo.py @@ -20,6 +20,7 @@ from fs.path import combine from edxval.api import ( ValCannotCreateError, ValVideoNotFoundError, + create_video_transcript, create_or_update_video_transcript, create_profile, create_video, @@ -54,7 +55,7 @@ MODULESTORES = { ModuleStoreEnum.Type.split: TEST_DATA_SPLIT_MODULESTORE, } -TRANSCRIPT_FILE_SRT_DATA = """ +TRANSCRIPT_FILE_SRT_DATA = u""" 1 00:00:14,370 --> 00:00:16,530 I am overwatch. @@ -1589,16 +1590,19 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase): actual = self.descriptor.definition_to_xml(resource_fs=self.file_system) expected_str = """ - <video download_video="false" url_name="SampleProblem"> + <video download_video="false" url_name="SampleProblem" transcripts='{transcripts}'> <video_asset client_video_id="test_client_video_id" duration="111.0" image=""> <encoded_video profile="mobile" url="http://example.com/video" file_size="222" bitrate="333"/> <transcripts> <transcript file_format="srt" language_code="{language_code}" provider="Cielo24"/> </transcripts> </video_asset> + <transcript language="{language_code}" src="{transcript_file}"/> </video> """.format( - language_code=language_code + language_code=language_code, + transcript_file=transcript_file_name, + transcripts=json.dumps({language_code: transcript_file_name}) ) parser = etree.XMLParser(remove_blank_text=True) expected = etree.XML(expected_str, parser=parser) @@ -1612,6 +1616,66 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase): transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language_code) self.assertEqual(transcript['content'], expected_transcript_content) + @ddt.data( + (['en', 'da'], 'test_sub', ''), + (['da'], 'test_sub', 'test_sub') + ) + @ddt.unpack + def test_export_val_transcripts_backward_compatibility(self, languages, sub, expected_sub): + """ + Tests new transcripts export for backward compatibility. + """ + self.descriptor.edx_video_id = 'test_video_id' + self.descriptor.sub = sub + + # Setup VAL encode profile, video and transcripts + create_profile('mobile') + create_video({ + 'edx_video_id': self.descriptor.edx_video_id, + 'client_video_id': 'test_client_video_id', + 'duration': 111.0, + 'status': 'dummy', + 'encoded_videos': [{ + 'profile': 'mobile', + 'url': 'http://example.com/video', + 'file_size': 222, + 'bitrate': 333, + }], + }) + + for language in languages: + create_video_transcript( + video_id=self.descriptor.edx_video_id, + language_code=language, + file_format=Transcript.SRT, + content=ContentFile(TRANSCRIPT_FILE_SRT_DATA) + ) + + # Export the video module into xml + video_xml = self.descriptor.definition_to_xml(resource_fs=self.file_system) + + # Assert `sub` and `transcripts` attribute in the xml + self.assertEqual(video_xml.get('sub'), expected_sub) + + expected_transcripts = { + language: "{edx_video_id}-{language}.srt".format( + edx_video_id=self.descriptor.edx_video_id, + language=language + ) + for language in languages + } + self.assertDictEqual(json.loads(video_xml.get('transcripts')), expected_transcripts) + + # Assert transcript content from course OLX + for language in languages: + expected_transcript_path = combine( + combine(self.temp_dir, EXPORT_IMPORT_COURSE_DIR), + combine(EXPORT_IMPORT_STATIC_DIR, expected_transcripts[language]) + ) + expected_transcript_content = File(open(expected_transcript_path)).read() + transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language) + self.assertEqual(transcript['content'], expected_transcript_content) + def test_export_val_data_not_found(self): """ Tests that external video export works as expected. diff --git a/requirements/edx/base.in b/requirements/edx/base.in index 9b5e6593d0e..eb2374cb820 100644 --- a/requirements/edx/base.in +++ b/requirements/edx/base.in @@ -82,7 +82,7 @@ edx-rest-api-client edx-search edx-submissions edx-user-state-client -edxval +#edxval enum34==1.1.6 # Backport of Enum from Python 3.4+ event-tracking feedparser==5.1.3 diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index f862b540049..fa178fcd2cd 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -127,7 +127,7 @@ edx-rest-api-client==1.7.1 edx-search==1.1.0 edx-submissions==2.0.12 edx-user-state-client==1.0.4 -edxval==0.1.14 +#edxval==0.1.14 elasticsearch==1.9.0 # via edx-search enum34==1.1.6 event-tracking==0.2.4 diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index d70b10c8544..27bb82080da 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -149,7 +149,7 @@ edx-search==1.1.0 edx-sphinx-theme==1.3.0 edx-submissions==2.0.12 edx-user-state-client==1.0.4 -edxval==0.1.14 +#edxval==0.1.14 elasticsearch==1.9.0 enum34==1.1.6 event-tracking==0.2.4 diff --git a/requirements/edx/github.in b/requirements/edx/github.in index 798bec4791c..6bda38b3214 100644 --- a/requirements/edx/github.in +++ b/requirements/edx/github.in @@ -98,6 +98,8 @@ -e git+https://github.com/edx-solutions/xblock-google-drive.git@138e6fa0bf3a2013e904a085b9fed77dab7f3f21#egg=xblock-google-drive -e git+https://github.com/edx/xblock-lti-consumer.git@v1.1.8#egg=lti_consumer-xblock==1.1.8 +-e git+https://github.com/edx/edx-val.git@mrehan/backward-transcript-export#egg=edxval + # Third Party XBlocks -e git+https://github.com/mitodl/edx-sga.git@6b2f7aa2a18206023c8407e2c46f86d4b4c3ac96#egg=edx-sga==0.8.0 diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 7df6c40b912..ecccd576780 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -41,6 +41,9 @@ git+https://github.com/edx-solutions/xblock-drag-and-drop-v2@v2.1.6#egg=xblock-d git+https://github.com/open-craft/xblock-poll@7ba819b968fe8faddb78bb22e1fe7637005eb414#egg=xblock-poll==1.2.7 git+https://github.com/edx/xblock-utils.git@v1.1.1#egg=xblock-utils==1.1.1 -e common/lib/xmodule + +-e git+https://github.com/edx/edx-val.git@mrehan/backward-transcript-export#egg=edxval + amqp==1.4.9 analytics-python==1.1.0 anyjson==0.3.3 @@ -143,7 +146,7 @@ edx-rest-api-client==1.7.1 edx-search==1.1.0 edx-submissions==2.0.12 edx-user-state-client==1.0.4 -edxval==0.1.14 +#edxval==0.1.14 elasticsearch==1.9.0 enum34==1.1.6 event-tracking==0.2.4 -- GitLab