Skip to content
Snippets Groups Projects
Commit e4eac68e authored by Braden MacDonald's avatar Braden MacDonald
Browse files

Support transcript files for video XBlocks in Blockstore

parent 009b96d4
No related branches found
No related tags found
No related merge requests found
......@@ -20,10 +20,12 @@ from six import text_type
from six.moves import range, zip
from six.moves.html_parser import HTMLParser # pylint: disable=import-error
from opaque_keys.edx.locator import CourseLocator, LibraryLocator
from opaque_keys.edx.locator import BundleDefinitionLocator
from xmodule.contentstore.content import StaticContent
from xmodule.contentstore.django import contentstore
from xmodule.exceptions import NotFoundError
from openedx.core.djangolib import blockstore_cache
from openedx.core.lib import blockstore_api
from .bumper_utils import get_bumper_settings
......@@ -994,6 +996,77 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts
return transcript_content, transcript_name, Transcript.mime_types[output_format]
def get_transcript_from_blockstore(video_block, language, output_format, transcripts_info):
"""
Get video transcript from Blockstore.
Blockstore expects video transcripts to be placed into the 'static/'
subfolder of the XBlock's folder in a Blockstore bundle. For example, if the
video XBlock's definition is in the standard location of
video/video1/definition.xml
Then the .srt files should be placed at e.g.
video/video1/static/video1-en.srt
This is the same place where other public static files are placed for other
XBlocks, such as image files used by HTML blocks.
Video XBlocks in Blockstore must set the 'transcripts' XBlock field to a
JSON dictionary listing the filename of the transcript for each language:
<video
youtube_id_1_0="3_yD_cEKoCk"
transcripts='{"en": "3_yD_cEKoCk-en.srt"}'
display_name="Welcome Video with Transcript"
download_track="true"
/>
This method is tested in openedx/core/djangoapps/content_libraries/tests/test_static_assets.py
Arguments:
video_block (Video XBlock): The video XBlock
language (str): transcript language
output_format (str): transcript output format
transcripts_info (dict): transcript info for a video, from video_block.get_transcripts_info()
Returns:
tuple containing content, filename, mimetype
"""
if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
raise NotFoundError('Invalid transcript format `{output_format}`'.format(output_format=output_format))
transcripts = transcripts_info['transcripts']
if language not in transcripts:
raise NotFoundError("Video {} does not have a transcript file defined for the '{}' language in its OLX.".format(
video_block.scope_ids.usage_id,
language,
))
filename = transcripts[language]
if not filename.endswith('.srt'):
# We want to standardize on .srt
raise NotFoundError("Video XBlocks in Blockstore only support .srt transcript files.")
# Try to load the transcript file out of Blockstore
# In lieu of an XBlock API for this (like block.runtime.resources_fs), we use the blockstore API directly.
bundle_uuid = video_block.scope_ids.def_id.bundle_uuid
path = video_block.scope_ids.def_id.olx_path.rpartition('/')[0] + '/static/' + filename
bundle_version = video_block.scope_ids.def_id.bundle_version # Either bundle_version or draft_name will be set.
draft_name = video_block.scope_ids.def_id.draft_name
try:
content_binary = blockstore_cache.get_bundle_file_data_with_cache(bundle_uuid, path, bundle_version, draft_name)
except blockstore_api.BundleFileNotFound:
raise NotFoundError("Transcript file '{}' missing for video XBlock {}".format(
path,
video_block.scope_ids.usage_id,
))
# Now convert the transcript data to the requested format:
filename_no_extension = os.path.splitext(filename)[0]
output_filename = '{}.{}'.format(filename_no_extension, output_format)
output_transcript = Transcript.convert(
content_binary.decode('utf-8'),
input_format=Transcript.SRT,
output_format=output_format,
)
if not output_transcript.strip():
raise NotFoundError('No transcript content')
return output_transcript, output_filename, Transcript.mime_types[output_format]
def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=None):
"""
Get video transcript from edx-val or content store.
......@@ -1011,18 +1084,18 @@ def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=No
if not lang:
lang = video.get_default_transcript_language(transcripts_info)
if isinstance(video.scope_ids.def_id, BundleDefinitionLocator):
# This block is in Blockstore.
# For Blockstore, VAL is considered deprecated and we can load the transcript file
# directly using the Blockstore API:
return get_transcript_from_blockstore(video, lang, output_format, transcripts_info)
try:
edx_video_id = clean_video_id(video.edx_video_id)
if not edx_video_id:
raise NotFoundError
return get_transcript_from_val(edx_video_id, lang, output_format)
except NotFoundError:
# If this is not in a modulestore course or library, don't try loading from contentstore:
if not isinstance(video.scope_ids.usage_id.course_key, (CourseLocator, LibraryLocator)):
raise NotFoundError(
u'Video transcripts cannot yet be loaded from Blockstore (block: {})'.format(video.scope_ids.usage_id),
)
return get_transcript_from_contentstore(
video,
lang,
......
......@@ -13,6 +13,16 @@ SVG_DATA = """<svg xmlns="http://www.w3.org/2000/svg" height="30" width="100">
<text x="0" y="15" fill="red">SVG is 🔥</text>
</svg>""".encode('utf-8')
# part of an .srt transcript file
TRANSCRIPT_DATA = """1
00:00:00,260 --> 00:00:01,510
Welcome to edX.
2
00:00:01,510 --> 00:00:04,480
I'm Anant Agarwal, I'm the president of edX,
""".encode('utf-8')
class ContentLibrariesStaticAssetsTest(ContentLibrariesRestApiTest):
"""
......@@ -111,3 +121,49 @@ class ContentLibrariesStaticAssetsTest(ContentLibrariesRestApiTest):
# 'a////////b' is not allowed
file_name = "a////////b"
self._set_library_block_asset(block_id, file_name, SVG_DATA, expect_response=400)
def test_video_transcripts(self):
"""
Test that video blocks can read transcript files out of blockstore.
"""
library = self._create_library(slug="transcript-test-lib", title="Transcripts Test Library")
block = self._add_block_to_library(library["id"], "video", "video1")
block_id = block["id"]
self._set_library_block_olx(block_id, """
<video
youtube_id_1_0="3_yD_cEKoCk"
display_name="Welcome Video with Transcript"
download_track="true"
transcripts='{"en": "3_yD_cEKoCk-en.srt"}'
/>
""")
# Upload the transcript file
self._set_library_block_asset(block_id, "3_yD_cEKoCk-en.srt", TRANSCRIPT_DATA)
transcript_handler_url = self._get_block_handler_url(block_id, "transcript")
def check_sjson():
"""
Call the handler endpoint which the video player uses to load the transcript as SJSON
"""
url = transcript_handler_url + 'translation/en'
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertIn("Welcome to edX", response.content.decode('utf-8'))
def check_download():
"""
Call the handler endpoint which the video player uses to download the transcript SRT file
"""
url = transcript_handler_url + 'download'
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, TRANSCRIPT_DATA)
check_sjson()
check_download()
# Publish the OLX and the transcript file, since published data gets
# served differently by Blockstore and we should test that too.
self._commit_library_changes(library["id"])
check_sjson()
check_download()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment