From f9476ab4ff189d35e4dd55629fa5796695b01428 Mon Sep 17 00:00:00 2001
From: Qubad786 <muhammadrehan69@gmail.com>
Date: Mon, 21 May 2018 18:35:38 +0500
Subject: [PATCH] Update old transcripts metadata present in video xml with the
 new transcripts for backward compatibility.

---
 .../lib/xmodule/xmodule/tests/test_video.py   | 20 +++++-
 .../xmodule/video_module/video_module.py      | 47 +++++++++----
 common/lib/xmodule/xmodule/xml_module.py      | 21 ++++--
 .../courseware/tests/test_video_mongo.py      | 70 ++++++++++++++++++-
 requirements/edx/base.in                      |  2 +-
 requirements/edx/base.txt                     |  2 +-
 requirements/edx/development.txt              |  2 +-
 requirements/edx/github.in                    |  2 +
 requirements/edx/testing.txt                  |  5 +-
 9 files changed, 144 insertions(+), 27 deletions(-)

diff --git a/common/lib/xmodule/xmodule/tests/test_video.py b/common/lib/xmodule/xmodule/tests/test_video.py
index 0e2db392a2d..1addb314513 100644
--- a/common/lib/xmodule/xmodule/tests/test_video.py
+++ b/common/lib/xmodule/xmodule/tests/test_video.py
@@ -716,7 +716,12 @@ class VideoExportTestCase(VideoDescriptorTestBase):
         Test that we write the correct XML on export.
         """
         edx_video_id = u'test_edx_video_id'
-        mock_val_api.export_to_xml = Mock(return_value=etree.Element('video_asset'))
+        mock_val_api.export_to_xml = Mock(
+            return_value=dict(
+                xml=etree.Element('video_asset'),
+                transcripts={}
+            )
+        )
         self.descriptor.youtube_id_0_75 = 'izygArpw-Qo'
         self.descriptor.youtube_id_1_0 = 'p2Q6BrNhdh8'
         self.descriptor.youtube_id_1_25 = '1EeWXzPdhSA'
@@ -736,14 +741,23 @@ class VideoExportTestCase(VideoDescriptorTestBase):
         xml = self.descriptor.definition_to_xml(self.file_system)
         parser = etree.XMLParser(remove_blank_text=True)
         xml_string = '''\
-         <video url_name="SampleProblem" start_time="0:00:01" youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" show_captions="false" end_time="0:01:00" download_video="true" download_track="true">
+         <video
+            url_name="SampleProblem"
+            start_time="0:00:01"
+            show_captions="false"
+            end_time="0:01:00"
+            download_video="true"
+            download_track="true"
+            youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8"
+            transcripts='{"ge": "german_translation.srt", "ua": "ukrainian_translation.srt"}'
+         >
            <source src="http://www.example.com/source.mp4"/>
            <source src="http://www.example.com/source1.ogg"/>
            <track src="http://www.example.com/track"/>
            <handout src="http://www.example.com/handout"/>
+           <video_asset />
            <transcript language="ge" src="german_translation.srt" />
            <transcript language="ua" src="ukrainian_translation.srt" />
-           <video_asset />
          </video>
         '''
         expected = etree.XML(xml_string, parser=parser)
diff --git a/common/lib/xmodule/xmodule/video_module/video_module.py b/common/lib/xmodule/xmodule/video_module/video_module.py
index 34dc33938da..ee5e29be1e6 100644
--- a/common/lib/xmodule/xmodule/video_module/video_module.py
+++ b/common/lib/xmodule/xmodule/video_module/video_module.py
@@ -739,13 +739,9 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler
             ele.set('src', self.handout)
             xml.append(ele)
 
+        transcripts = {}
         if self.transcripts is not None:
-            # sorting for easy testing of resulting xml
-            for transcript_language in sorted(self.transcripts.keys()):
-                ele = etree.Element('transcript')
-                ele.set('language', transcript_language)
-                ele.set('src', self.transcripts[transcript_language])
-                xml.append(ele)
+            transcripts.update(self.transcripts)
 
         edx_video_id = clean_video_id(self.edx_video_id)
         if edxval_api and edx_video_id:
@@ -753,17 +749,42 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler
                 # Create static dir if not created earlier.
                 resource_fs.makedirs(EXPORT_IMPORT_STATIC_DIR, recreate=True)
 
-                xml.append(
-                    edxval_api.export_to_xml(
-                        video_id=edx_video_id,
-                        resource_fs=resource_fs,
-                        static_dir=EXPORT_IMPORT_STATIC_DIR,
-                        course_id=unicode(self.runtime.course_id.for_branch(None))
-                    )
+                # Backward compatible exports
+                # edxval exports new transcripts into the course OLX and returns a transcript
+                # files map so that it can also be rewritten in old transcript metadata fields
+                # (i.e. `self.transcripts`) on import and older open-releases (<= ginkgo),
+                # who do not have deprecated contentstore yet, can also import and use new-style
+                # transcripts into their openedX instances.
+
+                exported_metadata = edxval_api.export_to_xml(
+                    video_id=edx_video_id,
+                    resource_fs=resource_fs,
+                    static_dir=EXPORT_IMPORT_STATIC_DIR,
+                    course_id=unicode(self.runtime.course_id.for_branch(None))
                 )
+                # Update xml with edxval metadata
+                xml.append(exported_metadata['xml'])
+
+                # we don't need sub if english transcript
+                # is also in new transcripts.
+                new_transcripts = exported_metadata['transcripts']
+                transcripts.update(new_transcripts)
+                if new_transcripts.get('en'):
+                    xml.set('sub', '')
+
+                # Update `transcripts` attribute in the xml
+                xml.set('transcripts', json.dumps(transcripts))
+
             except edxval_api.ValVideoNotFoundError:
                 pass
 
+            # Sorting transcripts for easy testing of resulting xml
+            for transcript_language in sorted(transcripts.keys()):
+                ele = etree.Element('transcript')
+                ele.set('language', transcript_language)
+                ele.set('src', transcripts[transcript_language])
+                xml.append(ele)
+
         # handle license specifically
         self.add_license_to_xml(xml)
 
diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py
index b56847532f4..1c6e16fe018 100644
--- a/common/lib/xmodule/xmodule/xml_module.py
+++ b/common/lib/xmodule/xmodule/xml_module.py
@@ -139,6 +139,14 @@ class XmlParserMixin(object):
                          # Used for storing xml attributes between import and export, for roundtrips
                          'xml_attributes')
 
+    # This is a categories to fields map that contains the block category specific fields which should not be
+    # cleaned and/or override while adding xml to node.
+    metadata_to_not_to_clean = {
+        # A category `video` having `sub` and `transcripts` fields
+        # which should not be cleaned/override in an xml object.
+        'video': ('sub', 'transcripts')
+    }
+
     metadata_to_export_to_policy = ('discussion_topics',)
 
     @staticmethod
@@ -165,13 +173,15 @@ class XmlParserMixin(object):
         raise NotImplementedError("%s does not implement definition_from_xml" % cls.__name__)
 
     @classmethod
-    def clean_metadata_from_xml(cls, xml_object):
+    def clean_metadata_from_xml(cls, xml_object, excluded_fields=()):
         """
         Remove any attribute named for a field with scope Scope.settings from the supplied
         xml_object
         """
         for field_name, field in cls.fields.items():
-            if field.scope == Scope.settings and xml_object.get(field_name) is not None:
+            if (field.scope == Scope.settings
+                    and field_name not in excluded_fields
+                    and xml_object.get(field_name) is not None):
                 del xml_object.attrib[field_name]
 
     @classmethod
@@ -448,7 +458,8 @@ class XmlParserMixin(object):
                 aside.add_xml_to_node(aside_node)
                 xml_object.append(aside_node)
 
-        self.clean_metadata_from_xml(xml_object)
+        not_to_clean_fields = self.metadata_to_not_to_clean.get(self.category, ())
+        self.clean_metadata_from_xml(xml_object, excluded_fields=not_to_clean_fields)
 
         # Set the tag on both nodes so we get the file path right.
         xml_object.tag = self.category
@@ -457,7 +468,9 @@ class XmlParserMixin(object):
         # Add the non-inherited metadata
         for attr in sorted(own_metadata(self)):
             # don't want e.g. data_dir
-            if attr not in self.metadata_to_strip and attr not in self.metadata_to_export_to_policy:
+            if (attr not in self.metadata_to_strip
+                    and attr not in self.metadata_to_export_to_policy
+                    and attr not in not_to_clean_fields):
                 val = serialize_field(self._field_data.get(self, attr))
                 try:
                     xml_object.set(attr, val)
diff --git a/lms/djangoapps/courseware/tests/test_video_mongo.py b/lms/djangoapps/courseware/tests/test_video_mongo.py
index c799fb219c5..ca6450e1a90 100644
--- a/lms/djangoapps/courseware/tests/test_video_mongo.py
+++ b/lms/djangoapps/courseware/tests/test_video_mongo.py
@@ -20,6 +20,7 @@ from fs.path import combine
 from edxval.api import (
     ValCannotCreateError,
     ValVideoNotFoundError,
+    create_video_transcript,
     create_or_update_video_transcript,
     create_profile,
     create_video,
@@ -54,7 +55,7 @@ MODULESTORES = {
     ModuleStoreEnum.Type.split: TEST_DATA_SPLIT_MODULESTORE,
 }
 
-TRANSCRIPT_FILE_SRT_DATA = """
+TRANSCRIPT_FILE_SRT_DATA = u"""
 1
 00:00:14,370 --> 00:00:16,530
 I am overwatch.
@@ -1589,16 +1590,19 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
 
         actual = self.descriptor.definition_to_xml(resource_fs=self.file_system)
         expected_str = """
-            <video download_video="false" url_name="SampleProblem">
+            <video download_video="false" url_name="SampleProblem" transcripts='{transcripts}'>
                 <video_asset client_video_id="test_client_video_id" duration="111.0" image="">
                     <encoded_video profile="mobile" url="http://example.com/video" file_size="222" bitrate="333"/>
                     <transcripts>
                         <transcript file_format="srt" language_code="{language_code}" provider="Cielo24"/>
                     </transcripts>
                 </video_asset>
+                <transcript language="{language_code}" src="{transcript_file}"/>
             </video>
         """.format(
-            language_code=language_code
+            language_code=language_code,
+            transcript_file=transcript_file_name,
+            transcripts=json.dumps({language_code: transcript_file_name})
         )
         parser = etree.XMLParser(remove_blank_text=True)
         expected = etree.XML(expected_str, parser=parser)
@@ -1612,6 +1616,66 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
         transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language_code)
         self.assertEqual(transcript['content'], expected_transcript_content)
 
+    @ddt.data(
+        (['en', 'da'], 'test_sub', ''),
+        (['da'], 'test_sub', 'test_sub')
+    )
+    @ddt.unpack
+    def test_export_val_transcripts_backward_compatibility(self, languages, sub, expected_sub):
+        """
+        Tests new transcripts export for backward compatibility.
+        """
+        self.descriptor.edx_video_id = 'test_video_id'
+        self.descriptor.sub = sub
+
+        # Setup VAL encode profile, video and transcripts
+        create_profile('mobile')
+        create_video({
+            'edx_video_id': self.descriptor.edx_video_id,
+            'client_video_id': 'test_client_video_id',
+            'duration': 111.0,
+            'status': 'dummy',
+            'encoded_videos': [{
+                'profile': 'mobile',
+                'url': 'http://example.com/video',
+                'file_size': 222,
+                'bitrate': 333,
+            }],
+        })
+
+        for language in languages:
+            create_video_transcript(
+                video_id=self.descriptor.edx_video_id,
+                language_code=language,
+                file_format=Transcript.SRT,
+                content=ContentFile(TRANSCRIPT_FILE_SRT_DATA)
+            )
+
+        # Export the video module into xml
+        video_xml = self.descriptor.definition_to_xml(resource_fs=self.file_system)
+
+        # Assert `sub` and `transcripts` attribute in the xml
+        self.assertEqual(video_xml.get('sub'), expected_sub)
+
+        expected_transcripts = {
+            language: "{edx_video_id}-{language}.srt".format(
+                edx_video_id=self.descriptor.edx_video_id,
+                language=language
+            )
+            for language in languages
+        }
+        self.assertDictEqual(json.loads(video_xml.get('transcripts')), expected_transcripts)
+
+        # Assert transcript content from course OLX
+        for language in languages:
+            expected_transcript_path = combine(
+                combine(self.temp_dir, EXPORT_IMPORT_COURSE_DIR),
+                combine(EXPORT_IMPORT_STATIC_DIR, expected_transcripts[language])
+            )
+            expected_transcript_content = File(open(expected_transcript_path)).read()
+            transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language)
+            self.assertEqual(transcript['content'], expected_transcript_content)
+
     def test_export_val_data_not_found(self):
         """
         Tests that external video export works as expected.
diff --git a/requirements/edx/base.in b/requirements/edx/base.in
index 9b5e6593d0e..eb2374cb820 100644
--- a/requirements/edx/base.in
+++ b/requirements/edx/base.in
@@ -82,7 +82,7 @@ edx-rest-api-client
 edx-search
 edx-submissions
 edx-user-state-client
-edxval
+#edxval
 enum34==1.1.6                       # Backport of Enum from Python 3.4+
 event-tracking
 feedparser==5.1.3
diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt
index f862b540049..fa178fcd2cd 100644
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -127,7 +127,7 @@ edx-rest-api-client==1.7.1
 edx-search==1.1.0
 edx-submissions==2.0.12
 edx-user-state-client==1.0.4
-edxval==0.1.14
+#edxval==0.1.14
 elasticsearch==1.9.0      # via edx-search
 enum34==1.1.6
 event-tracking==0.2.4
diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt
index d70b10c8544..27bb82080da 100644
--- a/requirements/edx/development.txt
+++ b/requirements/edx/development.txt
@@ -149,7 +149,7 @@ edx-search==1.1.0
 edx-sphinx-theme==1.3.0
 edx-submissions==2.0.12
 edx-user-state-client==1.0.4
-edxval==0.1.14
+#edxval==0.1.14
 elasticsearch==1.9.0
 enum34==1.1.6
 event-tracking==0.2.4
diff --git a/requirements/edx/github.in b/requirements/edx/github.in
index 798bec4791c..6bda38b3214 100644
--- a/requirements/edx/github.in
+++ b/requirements/edx/github.in
@@ -98,6 +98,8 @@
 -e git+https://github.com/edx-solutions/xblock-google-drive.git@138e6fa0bf3a2013e904a085b9fed77dab7f3f21#egg=xblock-google-drive
 -e git+https://github.com/edx/xblock-lti-consumer.git@v1.1.8#egg=lti_consumer-xblock==1.1.8
 
+-e git+https://github.com/edx/edx-val.git@mrehan/backward-transcript-export#egg=edxval
+
 # Third Party XBlocks
 
 -e git+https://github.com/mitodl/edx-sga.git@6b2f7aa2a18206023c8407e2c46f86d4b4c3ac96#egg=edx-sga==0.8.0
diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt
index 7df6c40b912..ecccd576780 100644
--- a/requirements/edx/testing.txt
+++ b/requirements/edx/testing.txt
@@ -41,6 +41,9 @@ git+https://github.com/edx-solutions/xblock-drag-and-drop-v2@v2.1.6#egg=xblock-d
 git+https://github.com/open-craft/xblock-poll@7ba819b968fe8faddb78bb22e1fe7637005eb414#egg=xblock-poll==1.2.7
 git+https://github.com/edx/xblock-utils.git@v1.1.1#egg=xblock-utils==1.1.1
 -e common/lib/xmodule
+
+-e git+https://github.com/edx/edx-val.git@mrehan/backward-transcript-export#egg=edxval
+
 amqp==1.4.9
 analytics-python==1.1.0
 anyjson==0.3.3
@@ -143,7 +146,7 @@ edx-rest-api-client==1.7.1
 edx-search==1.1.0
 edx-submissions==2.0.12
 edx-user-state-client==1.0.4
-edxval==0.1.14
+#edxval==0.1.14
 elasticsearch==1.9.0
 enum34==1.1.6
 event-tracking==0.2.4
-- 
GitLab