Skip to content
Snippets Groups Projects
Commit 77f7f269 authored by MarCnu's avatar MarCnu
Browse files

Add HTTP_RANGE compatibility for ContentStore file streaming

Currently, users can only download ContentStore files from first byte to
last byte.

With this change, when a request to the ContentStore includes the HTTP
"Range" parameter, it is parsed and StaticContentStream will stream the
requested bytes.

This change makes possible to stream video files (.mp4 especially) from
the ContentStore. Users can now seek a specific time in the video
without loading all the file.

This is useful for courses with a limited number of students that
doesn't require a dedicated video server.
parent d5db673a
No related merge requests found
......@@ -75,7 +75,58 @@ class StaticContentServer(object):
if if_modified_since == last_modified_at_str:
return HttpResponseNotModified()
response = HttpResponse(content.stream_data(), content_type=content.content_type)
# *** File streaming within a byte range ***
# If a Range is provided, parse Range attribute of the request
# Add Content-Range in the response if Range is structurally correct
# Request -> Range attribute structure: "Range: bytes=first-[last]"
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
response = None
if request.META.get('HTTP_RANGE'):
# Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
if type(content) == StaticContent:
content = contentstore().find(loc, as_stream=True)
# Let's parse the Range header, bytes=first-[last]
range_header = request.META['HTTP_RANGE']
if '=' in range_header:
unit, byte_range = range_header.split('=')
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
if unit == 'bytes' and '-' in byte_range:
first, last = byte_range.split('-')
# "first" must be a valid integer
try:
first = int(first)
except ValueError:
pass
if type(first) is int:
# "last" default value is the last byte of the file
# Users can ask "bytes=0-" to request the whole file when they don't know the length
try:
last = int(last)
except ValueError:
last = content.length - 1
if 0 <= first <= last < content.length:
# Valid Range attribute
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # HTTP_206_PARTIAL_CONTENT
if not response:
# Malformed Range attribute
response = HttpResponse()
response.status_code = 400 # HTTP_400_BAD_REQUEST
return response
else:
# No Range attribute
response = HttpResponse(content.stream_data())
response['Content-Length'] = content.length
response['Accept-Ranges'] = 'bytes'
response['Content-Type'] = content.content_type
response['Last-Modified'] = last_modified_at_str
return response
......@@ -48,12 +48,12 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
# A locked asset
self.locked_asset = self.course_key.make_asset_key('asset', 'sample_static.txt')
self.url_locked = self.locked_asset.to_deprecated_string()
self.contentstore.set_attr(self.locked_asset, 'locked', True)
# An unlocked asset
self.unlocked_asset = self.course_key.make_asset_key('asset', 'another_static.txt')
self.url_unlocked = self.unlocked_asset.to_deprecated_string()
self.contentstore.set_attr(self.locked_asset, 'locked', True)
self.length_unlocked = self.contentstore.get_attr(self.unlocked_asset, 'length')
def test_unlocked_asset(self):
"""
......@@ -61,7 +61,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
"""
self.client.logout()
resp = self.client.get(self.url_unlocked)
self.assertEqual(resp.status_code, 200) # pylint: disable=E1103
self.assertEqual(resp.status_code, 200) # pylint: disable=E1103
def test_locked_asset_not_logged_in(self):
"""
......@@ -70,7 +70,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
"""
self.client.logout()
resp = self.client.get(self.url_locked)
self.assertEqual(resp.status_code, 403) # pylint: disable=E1103
self.assertEqual(resp.status_code, 403) # pylint: disable=E1103
def test_locked_asset_not_registered(self):
"""
......@@ -79,7 +79,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
"""
self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd)
resp = self.client.get(self.url_locked)
self.assertEqual(resp.status_code, 403) # pylint: disable=E1103
self.assertEqual(resp.status_code, 403) # pylint: disable=E1103
def test_locked_asset_registered(self):
"""
......@@ -91,7 +91,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd)
resp = self.client.get(self.url_locked)
self.assertEqual(resp.status_code, 200) # pylint: disable=E1103
self.assertEqual(resp.status_code, 200) # pylint: disable=E1103
def test_locked_asset_staff(self):
"""
......@@ -99,5 +99,87 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
"""
self.client.login(username=self.staff_usr, password=self.staff_pwd)
resp = self.client.get(self.url_locked)
self.assertEqual(resp.status_code, 200) # pylint: disable=E1103
self.assertEqual(resp.status_code, 200) # pylint: disable=E1103
def test_range_request_full_file(self):
"""
Test that a range request from byte 0 to last,
outputs partial content status code and valid Content-Range and Content-Length.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-')
self.assertEqual(resp.status_code, 206) # HTTP_206_PARTIAL_CONTENT
self.assertEqual(resp['Content-Range'], 'bytes {first}-{last}/{length}'.format(
first=0, last=self.length_unlocked-1, length=self.length_unlocked)
)
self.assertEqual(resp['Content-Length'], str(self.length_unlocked))
def test_range_request_partial_file(self):
"""
Test that a range request for a partial file,
outputs partial content status code and valid Content-Range and Content-Length.
first_byte and last_byte are chosen to be simple but non trivial values.
"""
first_byte = self.length_unlocked / 4
last_byte = self.length_unlocked / 2
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=first_byte, last=last_byte)
)
self.assertEqual(resp.status_code, 206) # HTTP_206_PARTIAL_CONTENT
self.assertEqual(resp['Content-Range'], 'bytes {first}-{last}/{length}'.format(
first=first_byte, last=last_byte, length=self.length_unlocked))
self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1))
def test_range_request_malformed_missing_equal(self):
"""
Test that a range request with malformed Range (missing '=') outputs status 400.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_not_bytes(self):
"""
Test that a range request with malformed Range (not "bytes") outputs status 400.
"Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_missing_minus(self):
"""
Test that a range request with malformed Range (missing '-') outputs status 400.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_first_not_integer(self):
"""
Test that a range request with malformed Range (first is not an integer) outputs status 400.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_invalid_range(self):
"""
Test that a range request with malformed Range (first_byte > last_byte) outputs status 400.
"""
first_byte = self.length_unlocked / 2
last_byte = self.length_unlocked / 4
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=first_byte, last=last_byte)
)
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_out_of_bounds(self):
"""
Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error)
outputs status 400.
"""
last_byte = self.length_unlocked
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format(
last=last_byte)
)
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
......@@ -5,6 +5,8 @@ XASSET_SRCREF_PREFIX = 'xasset:'
XASSET_THUMBNAIL_TAIL_NAME = '.jpg'
STREAM_DATA_CHUNK_SIZE = 1024
import os
import logging
import StringIO
......@@ -164,11 +166,26 @@ class StaticContentStream(StaticContent):
def stream_data(self):
while True:
chunk = self._stream.read(1024)
chunk = self._stream.read(STREAM_DATA_CHUNK_SIZE)
if len(chunk) == 0:
break
yield chunk
def stream_data_in_range(self, first_byte, last_byte):
"""
Stream the data between first_byte and last_byte (included)
"""
self._stream.seek(first_byte)
position = first_byte
while True:
if last_byte < position + STREAM_DATA_CHUNK_SIZE - 1:
chunk = self._stream.read(last_byte - position + 1)
yield chunk
break
chunk = self._stream.read(STREAM_DATA_CHUNK_SIZE)
position += STREAM_DATA_CHUNK_SIZE
yield chunk
def close(self):
self._stream.close()
......
import unittest
from xmodule.contentstore.content import StaticContent
from xmodule.contentstore.content import StaticContent, StaticContentStream
from xmodule.contentstore.content import ContentStore
from opaque_keys.edx.locations import SlashSeparatedCourseKey, AssetLocation
SAMPLE_STRING = """
This is a sample string with more than 1024 bytes, the default STREAM_DATA_CHUNK_SIZE
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
Lorem Ipsum has been the industry's standard dummy text ever since the 1500s,
when an unknown printer took a galley of type and scrambled it to make a type
specimen book. It has survived not only five centuries, but also the leap into
electronic typesetting, remaining essentially unchanged. It was popularised in
the 1960s with the release of Letraset sheets containing Lorem Ipsum passages,
nd more recently with desktop publishing software like Aldus PageMaker including
versions of Lorem Ipsum.
It is a long established fact that a reader will be distracted by the readable
content of a page when looking at its layout. The point of using Lorem Ipsum is
that it has a more-or-less normal distribution of letters, as opposed to using
'Content here, content here', making it look like readable English. Many desktop
ublishing packages and web page editors now use Lorem Ipsum as their default model
text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy.
Various versions have evolved over the years, sometimes by accident, sometimes on purpose
injected humour and the like).
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
Lorem Ipsum has been the industry's standard dummy text ever since the 1500s,
when an unknown printer took a galley of type and scrambled it to make a type
specimen book. It has survived not only five centuries, but also the leap into
electronic typesetting, remaining essentially unchanged. It was popularised in
the 1960s with the release of Letraset sheets containing Lorem Ipsum passages,
nd more recently with desktop publishing software like Aldus PageMaker including
versions of Lorem Ipsum.
It is a long established fact that a reader will be distracted by the readable
content of a page when looking at its layout. The point of using Lorem Ipsum is
that it has a more-or-less normal distribution of letters, as opposed to using
'Content here, content here', making it look like readable English. Many desktop
ublishing packages and web page editors now use Lorem Ipsum as their default model
text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy.
Various versions have evolved over the years, sometimes by accident, sometimes on purpose
injected humour and the like).
"""
class Content:
def __init__(self, location, content_type):
......@@ -10,6 +49,30 @@ class Content:
self.content_type = content_type
class FakeGridFsItem:
"""
This class provides the basic methods to get data from a GridFS item
"""
def __init__(self, string_data):
self.cursor = 0
self.data = string_data
self.length = len(string_data)
def seek(self, position):
"""
Set the cursor at "position"
"""
self.cursor = position
def read(self, chunk_size):
"""
Read "chunk_size" bytes of data at position cursor and move the cursor
"""
chunk = self.data[self.cursor:(self.cursor + chunk_size)]
self.cursor += chunk_size
return chunk
class ContentTest(unittest.TestCase):
def test_thumbnail_none(self):
# We had a bug where a thumbnail location of None was getting transformed into a Location tuple, with
......@@ -46,3 +109,39 @@ class ContentTest(unittest.TestCase):
AssetLocation(u'foo', u'bar', None, u'asset', u'images_course_image.jpg', None),
asset_location
)
def test_static_content_stream_stream_data(self):
"""
Test StaticContentStream stream_data function, asserts that we get all the bytes
"""
data = SAMPLE_STRING
item = FakeGridFsItem(data)
static_content_stream = StaticContentStream('loc', 'name', 'type', item, length=item.length)
total_length = 0
stream = static_content_stream.stream_data()
for chunck in stream:
total_length += len(chunck)
self.assertEqual(total_length, static_content_stream.length)
def test_static_content_stream_stream_data_in_range(self):
"""
Test StaticContentStream stream_data_in_range function,
asserts that we get the requested number of bytes
first_byte and last_byte are chosen to be simple but non trivial values
and to have total_length > STREAM_DATA_CHUNK_SIZE (1024)
"""
data = SAMPLE_STRING
item = FakeGridFsItem(data)
static_content_stream = StaticContentStream('loc', 'name', 'type', item, length=item.length)
first_byte = 100
last_byte = 1500
total_length = 0
stream = static_content_stream.stream_data_in_range(first_byte, last_byte)
for chunck in stream:
total_length += len(chunck)
self.assertEqual(total_length, last_byte - first_byte + 1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment