Skip to content
Snippets Groups Projects
Commit 1b911985 authored by Michael Terry's avatar Michael Terry
Browse files

AA-614: Add initial effort estimation block transformer

parent 5c952581
No related branches found
No related tags found
No related merge requests found
Showing
with 475 additions and 12 deletions
......@@ -1541,6 +1541,7 @@ INSTALLED_APPS = [
'openedx.features.course_duration_limits',
'openedx.features.content_type_gating',
'openedx.features.discounts',
'openedx.features.effort_estimation',
'lms.djangoapps.experiments',
'openedx.core.djangoapps.external_user_ids',
......
......@@ -9,10 +9,10 @@ from lms.djangoapps.course_blocks.transformers.hidden_content import HiddenConte
from lms.djangoapps.course_blocks.transformers.hide_empty import HideEmptyTransformer
from openedx.core.djangoapps.content.block_structure.transformers import BlockStructureTransformers
from openedx.core.lib.mobile_utils import is_request_from_mobile_app
from openedx.features.effort_estimation.api import EffortEstimationTransformer
from .serializers import BlockDictSerializer, BlockSerializer
from .toggles import HIDE_ACCESS_DENIALS_FLAG
from .transformers.block_completion import BlockCompletionTransformer
from .transformers.blocks_api import BlocksAPITransformer
from .transformers.milestones import MilestonesAndSpecialExamsTransformer
......@@ -72,9 +72,11 @@ def get_blocks(
if requested_fields is None:
requested_fields = []
include_completion = 'completion' in requested_fields
include_effort_estimation = (EffortEstimationTransformer.EFFORT_TIME in requested_fields or
EffortEstimationTransformer.EFFORT_ACTIVITIES in requested_fields)
include_gated_sections = 'show_gated_sections' in requested_fields
include_has_scheduled_content = 'has_scheduled_content' in requested_fields
include_special_exams = 'special_exam_info' in requested_fields
include_gated_sections = 'show_gated_sections' in requested_fields
if user is not None:
transformers += course_blocks_api.get_course_block_access_transformers(user)
......@@ -93,6 +95,9 @@ def get_blocks(
if is_request_from_mobile_app(request):
transformers += [HideEmptyTransformer()]
if include_effort_estimation:
transformers += [EffortEstimationTransformer()]
transformers += [
BlocksAPITransformer(
block_counts,
......@@ -102,15 +107,13 @@ def get_blocks(
)
]
if include_completion:
transformers += [BlockCompletionTransformer()]
# transform
blocks = course_blocks_api.get_course_blocks(
user,
usage_key,
transformers,
allow_start_dates_in_future=allow_start_dates_in_future,
include_completion=include_completion,
include_has_scheduled_content=include_has_scheduled_content
)
......
......@@ -46,6 +46,8 @@ class SupportedFieldType(object):
SUPPORTED_FIELDS = [
SupportedFieldType('category', requested_field_name='type'),
SupportedFieldType('display_name', default_value=''),
SupportedFieldType('effort_activities'),
SupportedFieldType('effort_time'),
SupportedFieldType('graded'),
SupportedFieldType('format'),
SupportedFieldType('start'),
......
......@@ -45,6 +45,8 @@ class CourseBlockSerializer(serializers.Serializer):
'description': description,
'display_name': display_name,
'due': block.get('due'),
'effort_activities': block.get('effort_activities'),
'effort_time': block.get('effort_time'),
'icon': icon,
'id': block_key,
'lms_web_url': block['lms_web_url'] if enable_links else None,
......
......@@ -2952,6 +2952,7 @@ INSTALLED_APPS = [
'openedx.features.course_duration_limits',
'openedx.features.content_type_gating',
'openedx.features.discounts',
'openedx.features.effort_estimation',
'lms.djangoapps.experiments',
......
......@@ -205,17 +205,19 @@ def get_course_outline_block_tree(request, course_id, user=None, allow_start_dat
nav_depth=3,
requested_fields=[
'children',
'display_name',
'type',
'start',
'contains_gated_content',
'display_name',
'due',
'effort_activities',
'effort_time',
'format',
'graded',
'has_score',
'weight',
'special_exam_info',
'show_gated_sections',
'format'
'special_exam_info',
'start',
'type',
'weight',
],
block_types_filter=block_types_filter,
allow_start_dates_in_future=allow_start_dates_in_future,
......
"""
API entry point to the effort_estimation app.
"""
# pylint: disable=unused-import
from .block_transformers import EffortEstimationTransformer
"""
Define the effort_estimation Django App.
"""
from django.apps import AppConfig
class EffortEstimationConfig(AppConfig):
name = 'openedx.features.effort_estimation'
"""
Effort Estimation Transformer implementation.
Adds effort estimations for block types it recognizes.
"""
import math
import crum
import lxml.html
from django.utils.functional import cached_property
from edxval.api import get_videos_for_course
from openedx.core.djangoapps.content.block_structure.transformer import BlockStructureTransformer
from openedx.core.lib.mobile_utils import is_request_from_mobile_app
from .toggles import EFFORT_ESTIMATION_LOCATION_FLAG
class EffortEstimationTransformer(BlockStructureTransformer):
"""
A transformer that adds effort estimation to the block tree.
There are two fields added by this transformer:
- effort_activities: The number of "activities" at this block or lower. Note that verticals count as a single
activity at most. Activities are basically anything that isn't text or video.
- effort_time: Our best guess at how long the block and lower will take, in seconds. We use an estimated reading
speed and video duration to calculate this. Just a rough guide.
If there is any missing data (like no video duration), we don't provide any estimates at all for the course.
We'd rather provide no estimate than a misleading estimate.
This transformer requires data gathered during the collection phase (from a course publish), so it won't work
on a course until the next publish.
"""
WRITE_VERSION = 1
READ_VERSION = 1
# Public xblock field names
EFFORT_ACTIVITIES = 'effort_activities'
EFFORT_TIME = 'effort_time'
# Private transformer field names
DISABLE_ESTIMATION = 'disable_estimation'
HTML_WORD_COUNT = 'html_word_count'
VIDEO_CLIP_DURATION = 'video_clip_duration'
VIDEO_DURATION = 'video_duration'
CACHE_VIDEO_DURATIONS = 'video.durations'
DEFAULT_WPM = 265 # words per minute
class MissingEstimationData(Exception):
pass
@classmethod
def name(cls):
"""Unique identifier for the transformer's class; same identifier used in setup.py."""
return 'effort_estimation'
@classmethod
def collect(cls, block_structure):
"""
Grabs raw estimates for leaf content.
Pooling leaf estimates higher up the tree (e.g. in verticals, then sequentials, then chapters) is done by
transform() below at run time, because which blocks each user sees can be different.
"""
block_structure.request_xblock_fields('category')
block_structure.request_xblock_fields('global_speed', 'only_on_web') # video fields
collection_cache = {} # collection methods can stuff some temporary data here
collections = {
'html': cls._collect_html_effort,
'video': cls._collect_video_effort,
}
try:
for block_key in block_structure.topological_traversal():
xblock = block_structure.get_xblock(block_key)
if xblock.category in collections:
collections[xblock.category](block_structure, block_key, xblock, collection_cache)
except cls.MissingEstimationData:
# Some bit of required data is missing. Likely some duration info is missing from the video pipeline.
# Rather than attempt to work around it, just set a note for ourselves to not show durations for this
# course at all. Better no estimate than a misleading estimate.
block_structure.set_transformer_data(cls, cls.DISABLE_ESTIMATION, True)
@classmethod
def _collect_html_effort(cls, block_structure, block_key, xblock, _cache):
"""Records a word count for later reading speed calculations."""
try:
text = lxml.html.fromstring(xblock.data).text_content() if xblock.data else ''
except Exception as exc: # pylint: disable=broad-except
raise cls.MissingEstimationData() from exc
block_structure.set_transformer_block_field(block_key, cls, cls.HTML_WORD_COUNT, len(text.split()))
@classmethod
def _collect_video_effort(cls, block_structure, block_key, xblock, cache):
"""Records a duration for later viewing speed calculations."""
# Lookup all course video metadata at once rather than piecemeal, for performance reasons
if cls.CACHE_VIDEO_DURATIONS not in cache:
all_videos, _ = get_videos_for_course(str(block_structure.root_block_usage_key.course_key))
cache[cls.CACHE_VIDEO_DURATIONS] = {v['edx_video_id']: v['duration'] for v in all_videos}
# Check if we have a duration. If not, raise an exception that will stop this transformer from affecting
# this course.
duration = cache[cls.CACHE_VIDEO_DURATIONS].get(xblock.edx_video_id, 0)
if duration <= 0:
raise cls.MissingEstimationData()
block_structure.set_transformer_block_field(block_key, cls, cls.VIDEO_DURATION, duration)
# Some videos will suggest specific start & end times, rather than the whole video. Note that this is only
# supported in some clients (other clients - like the mobile app - will play the whole video anyway). So we
# record this duration separately, to use instead of the whole video duration if the client supports it.
clip_duration = (xblock.end_time - xblock.start_time).total_seconds()
if clip_duration > 0:
block_structure.set_transformer_block_field(block_key, cls, cls.VIDEO_CLIP_DURATION, clip_duration)
def transform(self, usage_info, block_structure):
# Skip any transformation if our collection phase said to
cls = EffortEstimationTransformer
if block_structure.get_transformer_data(cls, cls.DISABLE_ESTIMATION, default=False):
return
# These estimation methods should return a tuple of (a number in seconds, an activity count)
estimations = {
'chapter': self._estimate_children_effort,
'course': self._estimate_children_effort,
'html': self._estimate_html_effort,
'sequential': self._estimate_children_effort,
'vertical': self._estimate_vertical_effort,
'video': self._estimate_video_effort,
}
# We're good to continue and make user-specific estimates based on collected data
for block_key in block_structure.post_order_traversal():
category = block_structure.get_xblock_field(block_key, 'category')
if category not in estimations:
continue
time, activities = estimations[category](usage_info, block_structure, block_key)
if time is not None:
# We take the ceiling of the estimate here just for cleanliness. Losing the fractional seconds does
# technically make our estimate less accurate, especially as we combine these values in parents.
# But easier to present a simple integer to any consumers, and precise to-the-second accuracy on our
# estimate is not a primary goal.
block_structure.override_xblock_field(block_key, self.EFFORT_TIME, math.ceil(time))
if activities is not None:
block_structure.override_xblock_field(block_key, self.EFFORT_ACTIVITIES, activities)
# Get bucket for this experiment. 0 is no estimate. 1 is only on sections. 2 is only on subsections.
# For cleanup ticket AA-659: remove everything below.
# We only want to get the bucket if there is data available - i.e. there is something to actually experiment
# on. This helps avoid rollout issues where we don't want to claim a user is in bucket 1 if we haven't even
# re-published the course so that it has any estimation data available.
root_key = block_structure.root_block_usage_key
total_activities = block_structure.get_xblock_field(root_key, self.EFFORT_ACTIVITIES)
total_time = block_structure.get_xblock_field(root_key, self.EFFORT_TIME)
if not total_activities and not total_time:
return
# Second pass to clear out collected estimate on levels we don't want to share. Just an easy way to test
# estimates at different levels, per the experiment.
bucket = EFFORT_ESTIMATION_LOCATION_FLAG.get_bucket(course_key=block_structure.root_block_usage_key.course_key)
for block_key in block_structure.post_order_traversal():
category = block_structure.get_xblock_field(block_key, 'category')
allowed = (bucket == 1 and category == 'chapter') or (bucket == 2 and category == 'sequential')
if not allowed:
block_structure.override_xblock_field(block_key, self.EFFORT_TIME, None)
block_structure.override_xblock_field(block_key, self.EFFORT_ACTIVITIES, None)
@cached_property
def _is_on_mobile(self):
"""Returns whether the current request is from our mobile app."""
request = crum.get_current_request()
return request and is_request_from_mobile_app(request)
def _gather_child_values(self, block_structure, block_key, field, default=0):
"""Collects and sums all child values for field."""
return sum([
block_structure.get_xblock_field(child_key, field, default=default)
for child_key in block_structure.get_children(block_key)
])
def _estimate_children_effort(self, _usage_info, block_structure, block_key):
"""Collects time and activity counts for children."""
time = self._gather_child_values(block_structure, block_key, self.EFFORT_TIME)
time = time or None # avoid claiming anything takes 0 seconds by coercing to None (no estimate) instead
# Use 1 as the default for activity - any block that we don't know for sure is 0, we should count
activities = self._gather_child_values(block_structure, block_key, self.EFFORT_ACTIVITIES, default=1)
return time, activities
def _estimate_html_effort(self, _usage_info, block_structure, block_key):
"""Returns an average expected time to read the contained html."""
cls = EffortEstimationTransformer
word_count = block_structure.get_transformer_block_field(block_key, cls, self.HTML_WORD_COUNT)
if not word_count:
return None, 0
time = word_count / self.DEFAULT_WPM * 60 # in seconds
return time, 0
def _estimate_vertical_effort(self, usage_info, block_structure, block_key):
"""A vertical is either an amount of time if we know it, or an activity"""
time, activities = self._estimate_children_effort(usage_info, block_structure, block_key)
# Verticals are the basic activity metric - we may have collected all unknown xblocks as activities in the call
# above, but we reset that count to 1 here.
return time, 1 if activities else 0
def _estimate_video_effort(self, _usage_info, block_structure, block_key):
"""Returns an expected time to view the video, at the user's preferred speed."""
cls = EffortEstimationTransformer
clip_duration = block_structure.get_transformer_block_field(block_key, cls, self.VIDEO_CLIP_DURATION)
duration = block_structure.get_transformer_block_field(block_key, cls, self.VIDEO_DURATION)
global_speed = block_structure.get_xblock_field(block_key, 'global_speed', default=1)
only_on_web = block_structure.get_xblock_field(block_key, 'only_on_web', default=False)
if self._is_on_mobile:
if only_on_web:
return None, 0
clip_duration = None # mobile can't do clips
user_duration = clip_duration or duration
if not user_duration:
return None, 0
# We are intentionally only looking at global_speed, not speed (which is last speed user used on this video)
# because this estimate is meant to be somewhat static.
return user_duration / global_speed, 0
"""Tests for effort_estimation transformers."""
from datetime import timedelta
from mock import patch
from crum import set_current_request
from django.test.client import RequestFactory
from edxval.api import create_video, remove_video_for_course
from lms.djangoapps.experiments.testutils import override_experiment_waffle_flag
from openedx.core.djangoapps.content.block_structure.factory import BlockStructureFactory
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.tests.factories import SampleCourseFactory
from xmodule.modulestore.tests.sample_courses import BlockInfo
from ..block_transformers import EffortEstimationTransformer
from ..toggles import EFFORT_ESTIMATION_LOCATION_FLAG
# Copied here, rather than used directly from class, just to catch any accidental changes
DISABLE_ESTIMATION = 'disable_estimation'
EFFORT_ACTIVITIES = 'effort_activities'
EFFORT_TIME = 'effort_time'
HTML_WORD_COUNT = 'html_word_count'
VIDEO_CLIP_DURATION = 'video_clip_duration'
VIDEO_DURATION = 'video_duration'
class TestEffortEstimationTransformer(ModuleStoreTestCase):
"""EffortEstimationTransformer tests"""
def setUp(self):
super().setUp()
block_info_tree = [
BlockInfo('Section', 'chapter', {}, [
BlockInfo('Subsection', 'sequential', {}, [
BlockInfo('Vertical', 'vertical', {}, [
BlockInfo('Clip', 'video',
{'edx_video_id': 'edxval1',
'start_time': timedelta(seconds=20),
'end_time': timedelta(seconds=60)},
[]),
BlockInfo('Video', 'video', {'edx_video_id': 'edxval2'}, []),
BlockInfo('Web', 'video', {'edx_video_id': 'edxval3', 'only_on_web': True}, []),
BlockInfo('HTML', 'html', {'data': 'Hello World'}, []),
BlockInfo('Problem1', 'problem', {}, []),
BlockInfo('Problem2', 'problem', {}, []),
]),
]),
]),
]
self.course_key = SampleCourseFactory.create(block_info_tree=block_info_tree).id
self.course_usage_key = self.store.make_course_usage_key(self.course_key)
self.block_structure = BlockStructureFactory.create_from_modulestore(self.course_usage_key, self.store)
self.section_key = self.course_key.make_usage_key('chapter', 'Section')
self.subsection_key = self.course_key.make_usage_key('sequential', 'Subsection')
self.vertical_key = self.course_key.make_usage_key('vertical', 'Vertical')
self.video_clip_key = self.course_key.make_usage_key('video', 'Clip')
self.video_normal_key = self.course_key.make_usage_key('video', 'Video')
self.video_web_key = self.course_key.make_usage_key('video', 'Web')
self.html_key = self.course_key.make_usage_key('html', 'HTML')
# Set edxval data
create_video({
'edx_video_id': 'edxval1',
'status': 'uploaded',
'client_video_id': 'Clip Video',
'duration': 200,
'encoded_videos': [],
'courses': [str(self.course_key)],
})
create_video({
'edx_video_id': 'edxval2',
'status': 'uploaded',
'client_video_id': 'Normal Video',
'duration': 30,
'encoded_videos': [],
'courses': [str(self.course_key)],
})
create_video({
'edx_video_id': 'edxval3',
'status': 'uploaded',
'client_video_id': 'Web Video',
'duration': 50,
'encoded_videos': [],
'courses': [str(self.course_key)],
})
def collect(self):
EffortEstimationTransformer.collect(self.block_structure)
self.block_structure._collect_requested_xblock_fields() # pylint: disable=protected-access
def transform(self):
EffortEstimationTransformer().transform(None, self.block_structure)
def collect_and_transform(self):
self.collect()
self.transform()
def set_mobile_request(self):
request = RequestFactory().request()
request.META['HTTP_USER_AGENT'] = 'edX/org.edx.mobile'
self.addCleanup(set_current_request, None)
set_current_request(request)
def get_collection_field(self, key, name):
return self.block_structure.get_transformer_block_field(key, EffortEstimationTransformer, name)
def test_collection(self):
self.collect()
assert self.get_collection_field(self.video_clip_key, VIDEO_DURATION) == 200
assert self.get_collection_field(self.video_clip_key, VIDEO_CLIP_DURATION) == 40
assert self.get_collection_field(self.video_normal_key, VIDEO_DURATION) == 30
assert self.get_collection_field(self.video_normal_key, VIDEO_CLIP_DURATION) is None
assert self.get_collection_field(self.video_web_key, VIDEO_DURATION) == 50
assert self.get_collection_field(self.video_web_key, VIDEO_CLIP_DURATION) is None
assert self.get_collection_field(self.html_key, HTML_WORD_COUNT) == 2
assert self.block_structure.get_transformer_data(EffortEstimationTransformer, DISABLE_ESTIMATION) is None
@override_experiment_waffle_flag(EFFORT_ESTIMATION_LOCATION_FLAG, bucket=1)
def test_incomplete_data_collection(self):
"""Ensure that missing video data prevents any estimates from being generated"""
remove_video_for_course(str(self.course_key), 'edxval3')
self.collect_and_transform()
assert self.block_structure.get_transformer_data(EffortEstimationTransformer, DISABLE_ESTIMATION) is True
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_ACTIVITIES) is None
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_TIME) is None
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_ACTIVITIES) is None
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_TIME) is None
@override_experiment_waffle_flag(EFFORT_ESTIMATION_LOCATION_FLAG, bucket=0)
def test_control_bucket(self):
self.collect_and_transform()
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_ACTIVITIES) is None
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_TIME) is None
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_ACTIVITIES) is None
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_TIME) is None
@override_experiment_waffle_flag(EFFORT_ESTIMATION_LOCATION_FLAG, bucket=1)
def test_section_bucket(self):
self.collect_and_transform()
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_ACTIVITIES) == 1
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_TIME) == 121
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_ACTIVITIES) is None
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_TIME) is None
@override_experiment_waffle_flag(EFFORT_ESTIMATION_LOCATION_FLAG, bucket=2)
def test_subsection_bucket(self):
self.collect_and_transform()
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_ACTIVITIES) is None
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_TIME) is None
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_ACTIVITIES) == 1
assert self.block_structure.get_xblock_field(self.subsection_key, EFFORT_TIME) == 121
def test_no_collection_no_bucket(self):
"""
Test that if we don't have any collection data, we don't bucket at all.
Useful to make sure that during rollout before we re-publish a course, we aren't trying to bucket anyone.
"""
where = 'openedx.features.effort_estimation.toggles.EFFORT_ESTIMATION_LOCATION_FLAG.get_bucket'
with patch(where, return_value=1) as mock_get_bucket:
self.transform() # no collection
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_ACTIVITIES) is None
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_TIME) is None
assert mock_get_bucket.call_count == 0
@override_experiment_waffle_flag(EFFORT_ESTIMATION_LOCATION_FLAG, bucket=1)
def test_mobile_video_support(self):
"""Clips values are ignored and web only videos should be excluded"""
self.set_mobile_request()
self.collect_and_transform()
assert self.block_structure.get_xblock_field(self.section_key, EFFORT_TIME) == 231
"""
Feature/experiment toggles used for effort estimation.
"""
from edx_toggles.toggles import LegacyWaffleFlagNamespace
from lms.djangoapps.experiments.flags import ExperimentWaffleFlag
WAFFLE_FLAG_NAMESPACE = LegacyWaffleFlagNamespace(name='effort_estimation')
# Temporary flag while we test which location works best:
# - Bucket 0: off
# - Bucket 1: section (chapter) estimations
# - Bucket 2: subsection (sequential) estimations
EFFORT_ESTIMATION_LOCATION_FLAG = ExperimentWaffleFlag(WAFFLE_FLAG_NAMESPACE, 'location', __name__, num_buckets=3,
use_course_aware_bucketing=False)
......@@ -6,7 +6,7 @@ from setuptools import setup
setup(
name="Open edX",
version="0.12",
version='0.13',
install_requires=["setuptools"],
requires=[],
# NOTE: These are not the names we should be installing. This tree should
......@@ -69,6 +69,7 @@ setup(
"content_type_gate = openedx.features.content_type_gating.block_transformers:ContentTypeGateTransformer",
"access_denied_message_filter = lms.djangoapps.course_blocks.transformers.access_denied_filter:AccessDeniedMessageFilterTransformer", # lint-amnesty, pylint: disable=line-too-long
"open_assessment_transformer = lms.djangoapps.courseware.transformers:OpenAssessmentDateTransformer",
'effort_estimation = openedx.features.effort_estimation.api:EffortEstimationTransformer',
],
"openedx.ace.policy": [
"bulk_email_optout = lms.djangoapps.bulk_email.policies:CourseEmailOptout"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment