From 36483f938d1c67bd57246726be469c232f5b006b Mon Sep 17 00:00:00 2001 From: Mushtaq Ali <mushtaque@edx.org> Date: Thu, 15 Feb 2018 15:37:43 +0500 Subject: [PATCH] Export video transcripts - EDUCATOR-1789 --- .../lib/xmodule/xmodule/tests/test_video.py | 35 +++++---- .../xmodule/video_module/video_module.py | 31 ++++---- .../courseware/tests/test_video_mongo.py | 72 +++++++++++++++---- lms/envs/test.py | 10 +++ 4 files changed, 110 insertions(+), 38 deletions(-) diff --git a/common/lib/xmodule/xmodule/tests/test_video.py b/common/lib/xmodule/xmodule/tests/test_video.py index 3ac7226e949..6a27491e993 100644 --- a/common/lib/xmodule/xmodule/tests/test_video.py +++ b/common/lib/xmodule/xmodule/tests/test_video.py @@ -15,8 +15,10 @@ the course, section, subsection, unit, etc. import os import unittest import datetime +import shutil from uuid import uuid4 +from tempfile import mkdtemp from lxml import etree from mock import ANY, Mock, patch, MagicMock import ddt @@ -24,6 +26,7 @@ import ddt from django.conf import settings from django.test.utils import override_settings +from fs.osfs import OSFS from opaque_keys.edx.locator import CourseLocator from opaque_keys.edx.keys import CourseKey from xblock.field_data import DictFieldData @@ -31,7 +34,7 @@ from xblock.fields import ScopeIds from xmodule.tests import get_test_descriptor_system from xmodule.validation import StudioValidationMessage -from xmodule.video_module import VideoDescriptor, create_youtube_string +from xmodule.video_module import VideoDescriptor, create_youtube_string, EXPORT_STATIC_DIR from xmodule.video_module.transcripts_utils import download_youtube_subs, save_to_store from . import LogicTest from .test_import import DummySystem @@ -672,11 +675,18 @@ class VideoExportTestCase(VideoDescriptorTestBase): """ Make sure that VideoDescriptor can export itself to XML correctly. """ + def setUp(self): + super(VideoExportTestCase, self).setUp() + self.temp_dir = mkdtemp() + self.file_system = OSFS(self.temp_dir) + self.addCleanup(shutil.rmtree, self.temp_dir) + @patch('xmodule.video_module.video_module.edxval_api') def test_export_to_xml(self, mock_val_api): """ Test that we write the correct XML on export. """ + edx_video_id = u'test_edx_video_id' mock_val_api.export_to_xml = Mock(return_value=etree.Element('video_asset')) self.descriptor.youtube_id_0_75 = 'izygArpw-Qo' self.descriptor.youtube_id_1_0 = 'p2Q6BrNhdh8' @@ -691,10 +701,10 @@ class VideoExportTestCase(VideoDescriptorTestBase): self.descriptor.html5_sources = ['http://www.example.com/source.mp4', 'http://www.example.com/source1.ogg'] self.descriptor.download_video = True self.descriptor.transcripts = {'ua': 'ukrainian_translation.srt', 'ge': 'german_translation.srt'} - self.descriptor.edx_video_id = 'test_edx_video_id' + self.descriptor.edx_video_id = edx_video_id self.descriptor.runtime.course_id = MagicMock() - xml = self.descriptor.definition_to_xml(None) # We don't use the `resource_fs` parameter + xml = self.descriptor.definition_to_xml(self.file_system) parser = etree.XMLParser(remove_blank_text=True) xml_string = '''\ <video url_name="SampleProblem" start_time="0:00:01" youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" show_captions="false" end_time="0:01:00" download_video="true" download_track="true"> @@ -710,9 +720,10 @@ class VideoExportTestCase(VideoDescriptorTestBase): expected = etree.XML(xml_string, parser=parser) self.assertXmlEqual(expected, xml) mock_val_api.export_to_xml.assert_called_once_with( - [u'test_edx_video_id', u'p2Q6BrNhdh8', 'source', 'source1'], - ANY, - external=False + video_id=edx_video_id, + static_dir=EXPORT_STATIC_DIR, + resource_fs=self.file_system, + course_id=unicode(self.descriptor.runtime.course_id.for_branch(None)), ) @patch('xmodule.video_module.video_module.edxval_api') @@ -723,7 +734,7 @@ class VideoExportTestCase(VideoDescriptorTestBase): self.descriptor.edx_video_id = 'test_edx_video_id' self.descriptor.runtime.course_id = MagicMock() - xml = self.descriptor.definition_to_xml(None) + xml = self.descriptor.definition_to_xml(self.file_system) parser = etree.XMLParser(remove_blank_text=True) xml_string = '<video url_name="SampleProblem" download_video="false"/>' expected = etree.XML(xml_string, parser=parser) @@ -746,7 +757,7 @@ class VideoExportTestCase(VideoDescriptorTestBase): self.descriptor.html5_sources = ['http://www.example.com/source.mp4', 'http://www.example.com/source.ogg'] self.descriptor.download_video = True - xml = self.descriptor.definition_to_xml(None) # We don't use the `resource_fs` parameter + xml = self.descriptor.definition_to_xml(self.file_system) parser = etree.XMLParser(remove_blank_text=True) xml_string = '''\ <video url_name="SampleProblem" start_time="0:00:05" youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" show_captions="false" download_video="true" download_track="true"> @@ -763,7 +774,7 @@ class VideoExportTestCase(VideoDescriptorTestBase): """ Test XML export with defaults. """ - xml = self.descriptor.definition_to_xml(None) + xml = self.descriptor.definition_to_xml(self.file_system) # Check that download_video field is also set to default (False) in xml for backward compatibility expected = '<video url_name="SampleProblem" download_video="false"/>\n' self.assertEquals(expected, etree.tostring(xml, pretty_print=True)) @@ -774,7 +785,7 @@ class VideoExportTestCase(VideoDescriptorTestBase): Test XML export with transcripts being overridden to None. """ self.descriptor.transcripts = None - xml = self.descriptor.definition_to_xml(None) + xml = self.descriptor.definition_to_xml(self.file_system) expected = '<video url_name="SampleProblem" download_video="false"/>\n' self.assertEquals(expected, etree.tostring(xml, pretty_print=True)) @@ -785,7 +796,7 @@ class VideoExportTestCase(VideoDescriptorTestBase): The illegal characters in a String field are removed from the string instead. """ self.descriptor.display_name = 'Display\x1eName' - xml = self.descriptor.definition_to_xml(None) + xml = self.descriptor.definition_to_xml(self.file_system) self.assertEqual(xml.get('display_name'), 'DisplayName') @patch('xmodule.video_module.video_module.edxval_api', None) @@ -794,7 +805,7 @@ class VideoExportTestCase(VideoDescriptorTestBase): Test XML export handles the unicode characters. """ self.descriptor.display_name = u'这是文' - xml = self.descriptor.definition_to_xml(None) + xml = self.descriptor.definition_to_xml(self.file_system) self.assertEqual(xml.get('display_name'), u'\u8fd9\u662f\u6587') diff --git a/common/lib/xmodule/xmodule/video_module/video_module.py b/common/lib/xmodule/xmodule/video_module/video_module.py index 76038922327..b14ab5bbd62 100644 --- a/common/lib/xmodule/xmodule/video_module/video_module.py +++ b/common/lib/xmodule/xmodule/video_module/video_module.py @@ -47,6 +47,7 @@ from .transcripts_utils import ( get_video_ids_info, Transcript, VideoTranscriptsMixin, + clean_video_id, ) from .transcripts_model_utils import ( is_val_transcript_feature_enabled_for_course @@ -98,6 +99,9 @@ log = logging.getLogger(__name__) _ = lambda text: text +EXPORT_STATIC_DIR = u'static' + + @XBlock.wants('settings', 'completion') class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers, XModule, LicenseMixin): """ @@ -711,19 +715,22 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler ele.set('src', self.transcripts[transcript_language]) xml.append(ele) - if edxval_api: - external, video_ids = get_video_ids_info(self.edx_video_id, self.youtube_id_1_0, self.html5_sources) - if video_ids: - try: - xml.append( - edxval_api.export_to_xml( - video_ids, - unicode(self.runtime.course_id.for_branch(None)), - external=external - ) + edx_video_id = clean_video_id(self.edx_video_id) + if edx_video_id: + try: + # Create static dir if not created earlier. + resource_fs.makedirs(EXPORT_STATIC_DIR, recreate=True) + + xml.append( + edxval_api.export_to_xml( + video_id=edx_video_id, + resource_fs=resource_fs, + static_dir=EXPORT_STATIC_DIR, + course_id=unicode(self.runtime.course_id.for_branch(None)) ) - except edxval_api.ValVideoNotFoundError: - pass + ) + except edxval_api.ValVideoNotFoundError: + pass # handle license specifically self.add_license_to_xml(xml) diff --git a/lms/djangoapps/courseware/tests/test_video_mongo.py b/lms/djangoapps/courseware/tests/test_video_mongo.py index ad7314441c5..96206e47108 100644 --- a/lms/djangoapps/courseware/tests/test_video_mongo.py +++ b/lms/djangoapps/courseware/tests/test_video_mongo.py @@ -1,14 +1,22 @@ # -*- coding: utf-8 -*- -"""Video xmodule tests in mongo.""" +""" +Video xmodule tests in mongo. +""" import json from collections import OrderedDict from uuid import uuid4 +from tempfile import mkdtemp +import shutil import ddt from django.conf import settings +from django.core.files import File +from django.core.files.base import ContentFile from django.test import TestCase from django.test.utils import override_settings +from fs.osfs import OSFS +from fs.path import combine from edxval.api import ( ValCannotCreateError, ValVideoNotFoundError, @@ -16,7 +24,8 @@ from edxval.api import ( create_profile, create_video, get_video_info, - get_video_transcript + get_video_transcript, + get_video_transcript_data ) from lxml import etree from mock import MagicMock, Mock, patch @@ -32,6 +41,7 @@ from xmodule.tests.test_import import DummySystem from xmodule.tests.test_video import VideoDescriptorTestBase, instantiate_descriptor from xmodule.video_module import VideoDescriptor, bumper_utils, rewrite_video_url, video_utils from xmodule.video_module.transcripts_utils import Transcript, save_to_store +from xmodule.video_module.video_module import EXPORT_STATIC_DIR from xmodule.x_module import STUDENT_VIEW from .helpers import BaseTestXmodule @@ -43,6 +53,16 @@ MODULESTORES = { ModuleStoreEnum.Type.split: TEST_DATA_SPLIT_MODULESTORE, } +TRANSCRIPT_FILE_DATA = """ +1 +00:00:14,370 --> 00:00:16,530 +I am overwatch. + +2 +00:00:16,500 --> 00:00:18,600 +å¯ä»¥ç”¨â€œæˆ‘ä¸å¤ªæ‡‚艺术 但我知é“我喜欢什么â€åšæ¯”å–». +""" + @attr(shard=1) class TestVideoYouTube(TestVideo): @@ -1509,12 +1529,15 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase): super(VideoDescriptorTest, self).setUp() self.descriptor.runtime.handler_url = MagicMock() self.descriptor.runtime.course_id = MagicMock() + self.temp_dir = mkdtemp() + self.file_system = OSFS(self.temp_dir) + self.addCleanup(shutil.rmtree, self.temp_dir) def get_video_transcript_data(self, video_id): return dict( video_id=video_id, language_code='ar', - url='/media/ext101.srt', + url='{media_url}ext101.srt'.format(media_url=settings.MEDIA_URL), # MEDIA_URL is /static/uploads/ provider='Cielo24', file_format='srt', ) @@ -1547,7 +1570,14 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase): ) def test_export_val_data_with_internal(self): + """ + Tests that exported VAL videos are working as expected. + """ + language_code = 'ar' + transcript_file_name = 'test_edx_video_id-ar.srt' + expected_transcript_path = combine(self.temp_dir, combine(EXPORT_STATIC_DIR, transcript_file_name)) self.descriptor.edx_video_id = 'test_edx_video_id' + create_profile('mobile') create_video({ 'edx_video_id': self.descriptor.edx_video_id, @@ -1561,34 +1591,48 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase): 'bitrate': 333, }], }) - create_or_update_video_transcript( + transcript_url = create_or_update_video_transcript( video_id=self.descriptor.edx_video_id, - language_code='ar', + language_code=language_code, metadata={ 'provider': 'Cielo24', - 'file_name': 'ext101.srt', 'file_format': 'srt' - } + }, + file_data=ContentFile(TRANSCRIPT_FILE_DATA) ) - actual = self.descriptor.definition_to_xml(resource_fs=None) + actual = self.descriptor.definition_to_xml(resource_fs=self.file_system) expected_str = """ <video download_video="false" url_name="SampleProblem"> <video_asset client_video_id="test_client_video_id" duration="111.0" image=""> <encoded_video profile="mobile" url="http://example.com/video" file_size="222" bitrate="333"/> <transcripts> - <transcript file_format="srt" file_name="ext101.srt" language_code="ar" provider="Cielo24" video_id="{video_id}"/> + <transcript file_format="srt" file_name='video-transcripts/{transcript_name}' language_code="{language_code}" provider="Cielo24"/> </transcripts> </video_asset> </video> - """.format(video_id=self.descriptor.edx_video_id) + """.format( + transcript_name=transcript_url.split('/')[-1], + language_code=language_code + ) parser = etree.XMLParser(remove_blank_text=True) expected = etree.XML(expected_str, parser=parser) self.assertXmlEqual(expected, actual) + # Verify transcript file is created. + self.assertEqual([transcript_file_name], self.file_system.listdir(EXPORT_STATIC_DIR)) + + # Also verify the content of created transcript file. + expected_transcript_content = File(open(expected_transcript_path)).read() + transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language_code) + self.assertEqual(transcript['content'], expected_transcript_content) + def test_export_val_data_not_found(self): + """ + Tests that external video export works as expected. + """ self.descriptor.edx_video_id = 'nonexistent' - actual = self.descriptor.definition_to_xml(resource_fs=None) + actual = self.descriptor.definition_to_xml(resource_fs=self.file_system) expected_str = """<video download_video="false" url_name="SampleProblem"/>""" parser = etree.XMLParser(remove_blank_text=True) expected = etree.XML(expected_str, parser=parser) @@ -1597,12 +1641,12 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase): @patch('xmodule.video_module.transcripts_utils.get_video_ids_info') def test_export_no_video_ids(self, mock_get_video_ids_info): """ - Tests export when there are no video ids + Tests export when there is no video id. `export_to_xml` only works in case of video id. """ mock_get_video_ids_info.return_value = True, [] - actual = self.descriptor.definition_to_xml(resource_fs=None) - expected_str = '<video url_name="SampleProblem" download_video="false"><video_asset/></video>' + actual = self.descriptor.definition_to_xml(resource_fs=self.file_system) + expected_str = '<video url_name="SampleProblem" download_video="false"></video>' parser = etree.XMLParser(remove_blank_text=True) expected = etree.XML(expected_str, parser=parser) diff --git a/lms/envs/test.py b/lms/envs/test.py index 8969262e5d0..ecd383d87be 100644 --- a/lms/envs/test.py +++ b/lms/envs/test.py @@ -579,6 +579,16 @@ ACTIVATION_EMAIL_FROM_ADDRESS = 'test_activate@edx.org' TEMPLATES[0]['OPTIONS']['debug'] = True +########################## VIDEO TRANSCRIPTS STORAGE ############################ +VIDEO_TRANSCRIPTS_SETTINGS = dict( + VIDEO_TRANSCRIPTS_MAX_BYTES=3 * 1024 * 1024, # 3 MB + STORAGE_KWARGS=dict( + location=MEDIA_ROOT, + base_url=MEDIA_URL, + ), + DIRECTORY_PREFIX='video-transcripts/', +) + ####################### Plugin Settings ########################## from openedx.core.djangoapps.plugins import plugin_settings, constants as plugin_constants -- GitLab