Skip to content
Snippets Groups Projects
Commit 669677c7 authored by David Ormsbee's avatar David Ormsbee
Browse files

Push Course Outlines to learning_sequences on publish.

The learning_sequences app has its own model for Course Outlines.
Prior to this commit, these course outlines were only populated by
a management command in the learning_sequences app that queried
modulestore. This commit does a few things:

1. Move the update_course_outline command to live in contentstore
   (i.e. Studio). This makes learning_sequences unaware of
   modulestore, and makes it easier for us to extract it from
   edx-platform (or to plug in different kinds of course outlines).
2. Add tests.
3. Add performance and debug logging to course outline creation.
4. Make course outline creation happen every time a course publish
   happens.

This will allow us to start collecting data about how long building
course outlines takes, and get error reporting around any content
edge cases that break the course outline code.
parent 5170fa5a
No related branches found
Tags release-2021-01-28-09.00
No related merge requests found
Showing
with 551 additions and 98 deletions
from django.core.management.base import BaseCommand, CommandError
"""
Management command to create the course outline for a course. This is done
automatically when Studio publishes a course, but this command can be used to
do it manually for debugging, error recovery, or backfilling purposes.
Should be invoked from the Studio process.
"""
from django.core.management.base import BaseCommand
from opaque_keys.edx.keys import CourseKey
from ...tasks import update_from_modulestore
from ...tasks import update_outline_from_modulestore
class Command(BaseCommand):
"""
Invoke with:
python manage.py cms update_course_outline <course_key>
"""
help = "Updates a single course outline based on modulestore content."
def add_arguments(self, parser):
......@@ -13,4 +25,4 @@ class Command(BaseCommand):
def handle(self, *args, **options):
course_key = CourseKey.from_string(options['course_key'])
update_from_modulestore(course_key)
update_outline_from_modulestore(course_key)
"""
This is where Studio interacts with the learning_sequences application, which
is responsible for holding course outline data. Studio _pushes_ that data into
learning_sequences at publish time.
"""
from datetime import timezone
from edx_django_utils.monitoring import function_trace, set_custom_attribute
from openedx.core.djangoapps.content.learning_sequences.api import replace_course_outline
from openedx.core.djangoapps.content.learning_sequences.data import (
CourseLearningSequenceData,
CourseOutlineData,
CourseSectionData,
CourseVisibility,
ExamData,
VisibilityData,
)
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore
class CourseStructureError(Exception):
"""
Raise this if we can't create an outline because of the course structure.
Courses built in Studio conform to a hierarchy that looks like:
Course -> Section (a.k.a. Chapter) -> Subsection (a.k.a. Sequence)
OLX imports are much more freeform and can generate unusual structures that
we won't know how to handle.
"""
def _check_sequence_fields(sequence):
"""
Raise CourseStructureError if `sequence` is missing a required field.
Do this instead of checking against specific block types to better future
proof ourselves against new sequence-types, aliases, changes in the way
dynamic mixing of XBlock types happens, as well as deprecation/removal of
the specific fields we care about. If it quacks like a duck...
"""
expected_fields = [
'display_name',
'hide_after_due',
'hide_from_toc',
'is_practice_exam',
'is_proctored_enabled',
'is_time_limited',
'visible_to_staff_only',
]
for field in expected_fields:
if not hasattr(sequence, field):
msg = (
f"Cannot create CourseOutline: Expected a Sequence at "
f"{sequence.location} (child of {sequence.parent}), "
f"but this object does not have sequence field {field}."
)
raise CourseStructureError(msg)
def _check_section_fields(section):
"""
Raise CourseStructureError if `section` is missing a required field.
Do this instead of checking against specific block types to better future
proof ourselves against new sequence-types, aliases, changes in the way
dynamic mixing of XBlock types happens, as well as deprecation/removal of
the specific fields we care about. If it quacks like a duck...
"""
expected_fields = [
'children',
'hide_from_toc',
'visible_to_staff_only',
]
for field in expected_fields:
if not hasattr(section, field):
msg = (
f"Cannot create CourseOutline: Expected a Section at "
f"{section.location} (child of {section.parent}), "
f"but this object does not have Section field {field}."
)
raise CourseStructureError(msg)
def _remove_version_info(usage_key):
"""
When we ask modulestore for the published branch in the Studio process
after catching a publish signal, the items that have been changed will
return UsageKeys that have full version information in their attached
CourseKeys. This makes them hash and serialize differently. We want to
strip this information and have everything use a CourseKey with no
version information attached.
The fact that this versioned CourseKey appears is likely an unintended
side-effect, rather than an intentional part of the API contract. It
also likely doesn't happen when the modulestore is being processed from
a different process than the one doing the writing (e.g. a celery task
running on any environment other than devstack). But stripping this
version information out is necessary to make devstack and tests work
properly.
"""
unversioned_course_key = usage_key.course_key.replace(branch=None, version_guid=None)
return usage_key.map_into_course(unversioned_course_key)
def _make_section_data(section):
"""
Generate a CourseSectionData from a SectionDescriptor.
This method does a lot of the work to convert modulestore fields to an input
that the learning_sequences app expects. It doesn't check for specific
classes (i.e. you could create your own Sequence-like XBlock), but it will
raise a CourseStructureError if anything you pass in is missing fields that
we expect in a SectionDescriptor or its SequenceDescriptor children.
"""
_check_section_fields(section)
sequences_data = []
for sequence in section.get_children():
_check_sequence_fields(sequence)
sequences_data.append(
CourseLearningSequenceData(
usage_key=_remove_version_info(sequence.location),
title=sequence.display_name,
inaccessible_after_due=sequence.hide_after_due,
exam=ExamData(
is_practice_exam=sequence.is_practice_exam,
is_proctored_enabled=sequence.is_proctored_enabled,
is_time_limited=sequence.is_time_limited,
),
visibility=VisibilityData(
hide_from_toc=sequence.hide_from_toc,
visible_to_staff_only=sequence.visible_to_staff_only,
),
)
)
section_data = CourseSectionData(
usage_key=_remove_version_info(section.location),
title=section.display_name,
sequences=sequences_data,
visibility=VisibilityData(
hide_from_toc=section.hide_from_toc,
visible_to_staff_only=section.visible_to_staff_only,
),
)
return section_data
@function_trace('get_outline_from_modulestore')
def get_outline_from_modulestore(course_key):
"""
Get a learning_sequence.data.CourseOutlineData for a param:course_key
"""
store = modulestore()
with store.branch_setting(ModuleStoreEnum.Branch.published_only, course_key):
course = store.get_course(course_key, depth=2)
sections_data = []
for section in course.get_children():
section_data = _make_section_data(section)
sections_data.append(section_data)
course_outline_data = CourseOutlineData(
course_key=course_key,
title=course.display_name,
# subtree_edited_on has a tzinfo of bson.tz_util.FixedOffset (which
# maps to UTC), but for consistency, we're going to use the standard
# python timezone.utc (which is what the learning_sequence app will
# return from MySQL). They will compare as equal.
published_at=course.subtree_edited_on.replace(tzinfo=timezone.utc),
# .course_version is a BSON obj, so we convert to str (MongoDB-
# specific objects don't go into CourseOutlineData).
published_version=str(course.course_version),
entrance_exam_id=course.entrance_exam_id,
days_early_for_beta=course.days_early_for_beta,
sections=sections_data,
self_paced=course.self_paced,
course_visibility=CourseVisibility(course.course_visibility),
)
return course_outline_data
def update_outline_from_modulestore(course_key):
"""
Update the CourseOutlineData for course_key in the learning_sequences with
ModuleStore data (i.e. what was most recently published in Studio).
"""
# Set the course_id attribute first so that if getting the information
# from the modulestore errors out, we still have the course_key reported in
# New Relic for easier trace debugging.
set_custom_attribute('course_id', str(course_key))
course_outline_data = get_outline_from_modulestore(course_key)
set_custom_attribute('num_sequences', len(course_outline_data.sequences))
replace_course_outline(course_outline_data)
......@@ -64,13 +64,14 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=
# to perform any 'on_publish' workflow
on_course_publish(course_key)
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from cms.djangoapps.contentstore.tasks import update_outline_from_modulestore_task, update_search_index
update_outline_from_modulestore_task.delay(str(course_key))
# Finally call into the course search subsystem
# to kick off an indexing action
if CoursewareSearchIndexer.indexing_is_enabled() and CourseAboutSearchIndexer.indexing_is_enabled():
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from cms.djangoapps.contentstore.tasks import update_search_index
update_search_index.delay(six.text_type(course_key), datetime.now(UTC).isoformat())
update_search_index.delay(str(course_key), datetime.now(UTC).isoformat())
@receiver(SignalHandler.library_updated)
......
......@@ -53,6 +53,7 @@ from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
from xmodule.modulestore.xml_importer import import_course_from_xml, import_library_from_xml
from .outlines import update_outline_from_modulestore
User = get_user_model()
......@@ -560,3 +561,16 @@ def import_olx(self, user_id, course_key_string, archive_path, archive_name, lan
from .views.entrance_exam import add_entrance_exam_milestone
add_entrance_exam_milestone(course.id, entrance_exam_chapter)
LOGGER.info(u'Course %s Entrance exam imported', course.id)
@task(name='cms.djangoapps.contentstore.tasks.update_outline_from_modulestore_task')
def update_outline_from_modulestore_task(course_key_str):
"""
Celery task that creates a learning_sequence course outline.
"""
try:
course_key = CourseKey.from_string(course_key_str)
update_outline_from_modulestore(course_key)
except Exception: # pylint disable=broad-except
LOGGER.exception("Could not create course outline for course %s", course_key_str)
raise # Re-raise so that errors are noted in reporting.
"""
Test the interaction with the learning_sequences app, where course outlines are
stored.
"""
from datetime import datetime, timezone
from opaque_keys.edx.keys import CourseKey
from openedx.core.djangoapps.content.learning_sequences.data import (
CourseOutlineData,
ExamData,
VisibilityData,
)
from openedx.core.djangoapps.content.learning_sequences.api import get_course_outline
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
from ..outlines import CourseStructureError, get_outline_from_modulestore
class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
"""
These tests all set up some sort of course content data in the modulestore
and extract that data using get_course_outline() to make sure that it
creates the CourseOutlineData that we expect.
The learning_sequences app has its own tests to test different scenarios for
creating the outline. This set of tests only cares about making sure the
data comes out of the Modulestore in the way we expect.
Comparisons are done on individual attributes rather than making a complete
CourseOutline object for comparison, so that more data fields can be added
later without breaking tests.
"""
ENABLED_SIGNALS = []
ENABLED_CACHES = []
def setUp(self):
super().setUp()
self.course_key = CourseKey.from_string("course-v1:TNL+7733+OutlineFromModuleStoreTestCase")
# This CourseFactory will be a reference to data in the *draft* branch.
# Creating this does "auto-publish" – all container types changes do,
# and everything we care about for outlines is a container (section,
# sequence, unit). But publish version/time metadata will not match the
# published branch.
self.draft_course = CourseFactory.create(
org=self.course_key.org,
course=self.course_key.course,
run=self.course_key.run,
default_store=ModuleStoreEnum.Type.split,
display_name="OutlineFromModuleStoreTestCase Course",
)
def test_empty_course_metadata(self):
"""Courses start empty, and could have a section with no sequences."""
# The learning_sequences app only uses the published branch, which will
# have slightly different metadata for version and published_at (because
# it's created a tiny fraction of a second later). Explicitly pull from
# published branch to make sure we have the right data.
with self.store.branch_setting(ModuleStoreEnum.Branch.published_only, self.course_key):
published_course = self.store.get_course(self.course_key, depth=2)
outline = get_outline_from_modulestore(self.course_key)
# Check basic metdata...
assert outline.title == "OutlineFromModuleStoreTestCase Course"
# published_at
assert isinstance(outline.published_at, datetime)
assert outline.published_at == published_course.subtree_edited_on
assert outline.published_at.tzinfo == timezone.utc
# published_version
assert isinstance(outline.published_version, str)
assert outline.published_version == str(published_course.course_version) # str, not BSON
# Misc.
assert outline.entrance_exam_id == published_course.entrance_exam_id
assert outline.days_early_for_beta == published_course.days_early_for_beta
assert outline.self_paced == published_course.self_paced
# Outline stores an enum for course_visibility, while Modulestore uses strs...
assert outline.course_visibility.value == published_course.course_visibility
# Check that the contents are empty.
assert len(outline.sections) == 0
assert len(outline.sequences) == 0
def test_normal_sequence(self):
ms_seq = self._create_seq_in_new_section(display_name="Normal Sequence")
outline_seq, usage_key = self._outline_seq_data(ms_seq)
assert outline_seq.usage_key == usage_key
assert outline_seq.title == "Normal Sequence"
assert outline_seq.visibility == VisibilityData()
assert outline_seq.exam == ExamData()
assert outline_seq.inaccessible_after_due is False
def test_hidden_after_due_sequence(self):
ms_seq = self._create_seq_in_new_section(hide_after_due=True)
outline_seq, _usage_key = self._outline_seq_data(ms_seq)
assert outline_seq.inaccessible_after_due is True
def test_staff_only_seq(self):
ms_seq = self._create_seq_in_new_section(visible_to_staff_only=True)
outline_seq, _usage_key = self._outline_seq_data(ms_seq)
assert outline_seq.visibility == VisibilityData(visible_to_staff_only=True)
def test_hidden_from_toc_seq(self):
ms_seq = self._create_seq_in_new_section(hide_from_toc=True)
outline_seq, _usage_key = self._outline_seq_data(ms_seq)
assert outline_seq.visibility == VisibilityData(hide_from_toc=True)
def test_practice_exam_seq(self):
ms_seq = self._create_seq_in_new_section(
is_time_limited=True,
is_practice_exam=True,
is_proctored_enabled=True,
)
outline_seq, _usage_key = self._outline_seq_data(ms_seq)
assert outline_seq.exam == ExamData(
is_time_limited=True,
is_practice_exam=True,
is_proctored_enabled=True,
)
def test_proctored_exam_seq(self):
ms_seq = self._create_seq_in_new_section(
is_time_limited=True,
is_proctored_enabled=True,
)
outline_seq, _usage_key = self._outline_seq_data(ms_seq)
assert outline_seq.exam == ExamData(
is_time_limited=True,
is_proctored_enabled=True,
)
def test_multiple_sections(self):
"""Make sure sequences go into the right places."""
with self.store.bulk_operations(self.course_key):
section_1 = ItemFactory.create(
parent_location=self.draft_course.location,
category='chapter',
display_name="Section 1 - Three Sequences",
)
ItemFactory.create(
parent_location=self.draft_course.location,
category='chapter',
display_name="Section 2 - Empty",
)
for i in range(3):
ItemFactory.create(
parent_location=section_1.location,
category='sequential',
display_name=f"Seq_1_{i}",
)
outline = get_outline_from_modulestore(self.course_key)
assert len(outline.sections) == 2
assert len(outline.sections[0].sequences) == 3
assert outline.sections[0].sequences[0].title == "Seq_1_0"
assert outline.sections[0].sequences[1].title == "Seq_1_1"
assert outline.sections[0].sequences[2].title == "Seq_1_2"
assert len(outline.sections[1].sequences) == 0
def test_unit_in_section(self):
"""
Test when the structure is Course -> Section -> Unit.
Studio disallows this, but it's possible to craft in OLX. This type of
structure is unsupported. We should fail with a CourseStructureError, as
that will emit useful debug information.
"""
# Course -> Section -> Unit (No Sequence)
with self.store.bulk_operations(self.course_key):
section = ItemFactory.create(
parent_location=self.draft_course.location,
category='chapter',
display_name="Section",
)
ItemFactory.create(
parent_location=section.location,
category='vertical',
display_name="Unit"
)
with self.assertRaises(CourseStructureError):
get_outline_from_modulestore(self.course_key)
def test_sequence_without_section(self):
"""
Test when the structure is Course -> Sequence -> Unit.
Studio disallows this, but it's possible to craft in OLX. This type of
structure is unsupported. We should fail with a CourseStructureError, as
that will emit useful debug information.
"""
# Course -> Sequence (No Section)
with self.store.bulk_operations(self.course_key):
seq = ItemFactory.create(
parent_location=self.draft_course.location,
category='sequential',
display_name="Sequence",
)
ItemFactory.create(
parent_location=seq.location,
category='vertical',
display_name="Unit",
)
with self.assertRaises(CourseStructureError):
get_outline_from_modulestore(self.course_key)
def _outline_seq_data(self, modulestore_seq):
"""
(CourseLearningSequenceData, UsageKey) for a Modulestore sequence.
When we return the UsageKey part of the tuple, we'll strip out any
CourseKey branch information that might be present (the most recently
published set of blocks will have version information when they're
published, but learning_sequences ignores all of that).
"""
outline = get_outline_from_modulestore(self.course_key)
# Recently modified content can have full version information on their
# CourseKeys. We need to strip that out and have versionless-CourseKeys
# or they won't be found properly.
versionless_usage_key = modulestore_seq.location.map_into_course(self.course_key)
outline_seq_data = outline.sequences[versionless_usage_key]
return outline_seq_data, versionless_usage_key
def _create_seq_in_new_section(self, **kwargs):
"""
Helper that creates a sequence in a new section and returns it.
Just reduces the boilerplate of "make me a sequence with the following
params in a new section/chapter so I can do asserts on how it translated
over."
"""
with self.store.bulk_operations(self.course_key):
section = ItemFactory.create(
parent_location=self.draft_course.location,
category='chapter',
display_name="Generated Section",
)
sequence = ItemFactory.create(
parent_location=section.location,
category='sequential',
**kwargs,
)
return sequence
class OutlineFromModuleStoreTaskTestCase(ModuleStoreTestCase):
"""
Test to make sure that the outline is created after course publishing. (i.e.
that it correctly receives the course_published signal).
"""
ENABLED_SIGNALS = ['course_published']
def test_task_invocation(self):
"""Test outline auto-creation after course publish"""
course_key = CourseKey.from_string("course-v1:TNL+7733+2021-01-21")
with self.assertRaises(CourseOutlineData.DoesNotExist):
get_course_outline(course_key)
course = CourseFactory.create(
org=course_key.org,
course=course_key.course,
run=course_key.run,
default_store=ModuleStoreEnum.Type.split,
)
section = ItemFactory.create(
parent_location=course.location,
category="chapter",
display_name="First Section"
)
ItemFactory.create(
parent_location=section.location,
category="sequential",
display_name="First Sequence"
)
ItemFactory.create(
parent_location=section.location,
category="sequential",
display_name="Second Sequence"
)
self.store.publish(course.location, self.user.id)
outline = get_course_outline(course_key)
assert len(outline.sections) == 1
assert len(outline.sequences) == 2
......@@ -9,6 +9,8 @@ users through the LMS, though it is also available to Studio for pushing data
into the system. The first API this app implements is computing the Course
Outline.
This package should _not_ depend on the modulestore directly.
---------------
Direction: Keep
---------------
......
......@@ -265,6 +265,7 @@ def _get_user_course_outline_and_processors(course_key: CourseKey,
return user_course_outline, processors
@function_trace('replace_course_outline')
def replace_course_outline(course_outline: CourseOutlineData):
"""
Replace the model data stored for the Course Outline with the contents of
......
......@@ -59,13 +59,13 @@ class VisibilityData:
# lets you define a Sequence that is reachable by direct URL but not shown
# in Course navigation. It was used for things like supplementary tutorials
# that were not considered a part of the normal course path.
hide_from_toc = attr.ib(type=bool)
hide_from_toc = attr.ib(type=bool, default=False)
# Restrict visibility to course staff, regardless of start date. This is
# often used to hide content that either still being built out, or is a
# scratch space of content that will eventually be copied over to other
# sequences.
visible_to_staff_only = attr.ib(type=bool)
visible_to_staff_only = attr.ib(type=bool, default=False)
@attr.s(frozen=True)
......@@ -93,10 +93,9 @@ class CourseLearningSequenceData:
"""
usage_key = attr.ib(type=UsageKey)
title = attr.ib(type=str)
visibility = attr.ib(type=VisibilityData)
visibility = attr.ib(type=VisibilityData, default=VisibilityData())
exam = attr.ib(type=ExamData, default=ExamData())
inaccessible_after_due = attr.ib(type=bool, default=True)
inaccessible_after_due = attr.ib(type=bool, default=False)
@attr.s(frozen=True)
......
"""
This module is here as a placeholder, but knowledge of the modulestore should
eventually be moved out of the learning_sequence app entirely.
Also note that right now we're not hooked into the publish flow. This task code
is only invoked by the "update_course_outline" management command.
"""
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore
from .api import replace_course_outline
from .data import (
CourseOutlineData,
CourseSectionData,
CourseLearningSequenceData,
ExamData,
VisibilityData,
CourseVisibility
)
def update_from_modulestore(course_key):
"""
Update the CourseOutlineData for course_key with ModuleStore data (i.e. what
was most recently published in Studio).
We should move this out so that learning_sequences does not depend on
ModuleStore.
"""
course_outline_data = get_outline_from_modulestore(course_key)
replace_course_outline(course_outline_data)
def get_outline_from_modulestore(course_key):
"""
Get CourseOutlineData corresponding to param:course_key
"""
def _make_section_data(section):
sequences_data = []
for sequence in section.get_children():
sequences_data.append(
CourseLearningSequenceData(
usage_key=sequence.location,
title=sequence.display_name,
inaccessible_after_due=sequence.hide_after_due,
exam=ExamData(
is_practice_exam=sequence.is_practice_exam,
is_proctored_enabled=sequence.is_proctored_enabled,
is_time_limited=sequence.is_timed_exam
),
visibility=VisibilityData(
hide_from_toc=sequence.hide_from_toc,
visible_to_staff_only=sequence.visible_to_staff_only
),
)
)
section_data = CourseSectionData(
usage_key=section.location,
title=section.display_name,
sequences=sequences_data,
visibility=VisibilityData(
hide_from_toc=section.hide_from_toc,
visible_to_staff_only=section.visible_to_staff_only
),
)
return section_data
store = modulestore()
sections = []
with store.branch_setting(ModuleStoreEnum.Branch.published_only, course_key):
course = store.get_course(course_key, depth=2)
sections_data = []
for section in course.get_children():
section_data = _make_section_data(section)
sections_data.append(section_data)
course_outline_data = CourseOutlineData(
course_key=course_key,
title=course.display_name,
published_at=course.subtree_edited_on,
published_version=str(course.course_version), # .course_version is a BSON obj
entrance_exam_id=course.entrance_exam_id,
days_early_for_beta=course.days_early_for_beta,
sections=sections_data,
self_paced=course.self_paced,
course_visibility=CourseVisibility(course.course_visibility),
)
return course_outline_data
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment