Skip to content
Snippets Groups Projects
Commit 71ef28d3 authored by David Ormsbee's avatar David Ormsbee
Browse files

Add simulate_publish management command

Prior to this commit, any apps that update their data when courses are
published had to create their own management commands for bootstrapping
or error recovery (e.g. generate_course_overviews,
generate_course_blocks). This is a management command to allow us to
generically simulate a course_publish signal so that any async tasks
that trigger actions off of that can do so without having to each write
their own management commands.

It has a few options to make it more ops friendly:

* Can specify a set of courses, but defaults to all courses in the
  modulestore.
* Can specify a set of listeners, so we can bootstrap a new app without
  rebuilding everything.
* Can specify a delay between emitting signals so that we don't flood
  the queues and block author-initiated publishes from going through in
  a timely manner.
* Dry-run mode for a simple preview of what the script will attempt.
parent 04d3e9b9
No related branches found
No related tags found
No related merge requests found
"""
Many apps in the LMS maintain their own optimized data structures that they
update whenever a course publish is detected. To do this, they listen for the
SignalHandler.course_published signal. Sometimes we want to rebuild the data on
these apps regardless of an actual change in course content, either to recover
from a bug or to bootstrap a new app we're rolling out for the first time. To
date, each app has implemented its own management command for this kind of
bootstrapping work (e.g. generate_course_overviews, generate_course_blocks).
This management command will emit the SignalHandler.course_published signal for
some subset of courses and signal listeners, and then rely on existing listener
behavior to trigger the necessary data updates.
"""
from __future__ import print_function
import copy
import logging
import os
import textwrap
import time
import sys
from django.core.management.base import BaseCommand
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from lms.djangoapps.ccx.tasks import course_published_handler as ccx_receiver_fn
from xmodule.modulestore.django import modulestore, SignalHandler
log = logging.getLogger('simulate_publish')
class Command(BaseCommand):
"""
Example usage:
# Send the course_published signal to all listeners and courses with 10
# seconds between courses. We might use a delay like this to make sure we
# don't flood the queue and unnecessarily delay normal publishing via
# Studio.
$ ./manage.py lms --settings=devstack_docker simulate_publish --delay 10
# Find all available listeners
$ ./manage.py lms --settings=devstack_docker simulate_publish --show_listeners
# Send the publish signal to two courses and two listeners
$ ./manage.py lms --settings=devstack_docker simulate_publish --listeners \
openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish \
openedx.core.djangoapps.bookmarks.signals.trigger_update_xblocks_cache_task \
--courses course-v1:edX+DemoX+Demo_Course edX/MODULESTORE_100/2018
A Dry Run will produce output that looks like:
DRY-RUN: This command would have sent course_published to...
1 Receivers:
openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish
27 Courses:
course-v1:DEV_153+A2E_CHINESE+JAN2018
course-v1:edX+100+MITPhysics
course-v1:edX+DemoX+Demo_Course
course-v1:edX+E2E-101+course
course-v1:edX+MEMORY+2018
course-v1:edX+MK101+2018
edX/MODULESTORE_100/2018_1
edX/MODULESTORE_100/2018_2
edX/MODULESTORE_100/2018_3
edX/MODULESTORE_100/2018_4
(+ 17 more)
"""
help = (
u"Simulate course publish signals without actually modifying course "
u"content. This command is useful for triggering various async tasks "
u"that listen for course_published signals."
)
# Having this be a class attribute makes it easier to substitute during
# tests, and thereby avoid global side-effects that will mysteriously fail
# tests that need signal handling later on.
course_published_signal = copy.copy(SignalHandler.course_published)
def add_arguments(self, parser):
# pylint: disable=expression-not-assigned
parser.add_argument(
'--show-receivers',
dest='show_receivers',
action='store_true',
help=(u'Display the list of possible receiver functions and exit.')
),
parser.add_argument(
'--dry-run',
dest='dry_run',
action='store_true',
help=(
u"Just show a preview of what would happen. This may make an "
u"expensive modulestore query to find courses, but it will "
u"not emit any signals."
)
),
parser.add_argument(
'--receivers',
dest='receivers',
action='store',
nargs='+',
help=(
u'Send course_published to specific receivers. If this flag is '
u'not present, course_published will be sent to all receivers. '
u'The CCX receiver is always included unless --skip-ccx is '
u'explicitly passed (otherwise CCX courses would never get '
u'called for any signal).'
)
)
parser.add_argument(
'--courses',
dest='courses',
action='store',
nargs='+',
help=(
u'Send course_published for specific courses. If this flag is '
u'not present, course_published will be sent to all courses.'
)
)
parser.add_argument(
'--delay',
dest='delay',
action='store',
type=int,
default=0,
help=(
u"Number of seconds to sleep between emitting course_published "
u"signals, so that we don't flood our queues."
)
)
parser.add_argument(
'--force-lms',
dest='force_lms',
action='store_true',
help=(
u"This command should be run under cms (Studio), not LMS. "
u"Regular publishes happen via Studio, and this script will "
u"exit with an error if you attempt to run it in an LMS "
u"process. However, if you know what you're doing and need to "
u"override that behavior, use this flag."
)
),
parser.add_argument(
'--skip-ccx',
dest='skip_ccx',
action='store_true',
help=(
u"CCX receivers are special echoing receivers that relay "
u"the course_published signal to all CCX courses derived from "
u"a modulestore-stored course. That means we almost always "
u"want to emit to them (even when using --receivers), or none "
u"of our signals will reach any CCX derived courses. However, "
u"if you know what you're doing, you can disable this behavior "
u"with this flag, so that CCX receivers are omitted."
)
),
def handle(self, *args, **options):
if options['show_receivers']:
return self.print_show_receivers()
log.info(
"simulate_publish starting, dry-run=%s, delay=%d seconds",
options['dry_run'],
options['delay']
)
if os.environ.get('SERVICE_VARIANT', 'cms').startswith('lms'):
if options['force_lms']:
log.info("Forcing simulate_publish to run in LMS process.")
else:
log.fatal(
"simulate_publish should be run as a CMS (Studio) " +
"command, not %s (override with --force-lms).",
os.environ.get('SERVICE_VARIANT')
)
sys.exit(1)
if options['receivers']:
self.modify_receivers(options['receivers'], options['skip_ccx'])
elif options['skip_ccx']:
log.info("Disconnecting CCX handler (--skip-ccx is True)")
self.course_published_signal.disconnect(ccx_receiver_fn)
course_keys = self.get_course_keys(options['courses'])
if options['dry_run']:
return self.print_dry_run(course_keys)
# Now that our signal receivers and courses are set up properly, do the
# actual work of emitting signals.
for i, course_key in enumerate(course_keys, start=1):
log.info(
"Emitting course_published signal (%d of %d) for course %s",
i, len(course_keys), course_key
)
if options['delay']:
time.sleep(options['delay'])
self.course_published_signal.send_robust(sender=self, course_key=course_key)
def modify_receivers(self, receiver_names, skip_ccx):
"""
Modify our signal to only have the user-specified receivers.
This method modifies the process global SignalHandler.course_published
to disconnect any receivers that were not in the `receiver_names` list.
If any of the receiver_names is not found (i.e. is not in the list of
receivers printed in self.print_show_receivers), it is a fatal error and
we will exit the process.
"""
all_receiver_names = get_receiver_names()
unknown_receiver_names = set(receiver_names) - all_receiver_names
if unknown_receiver_names:
log.fatal(
"The following receivers were specified but not recognized: %s",
", ".join(sorted(unknown_receiver_names))
)
log.fatal("Known receivers: %s", ", ".join(sorted(all_receiver_names)))
sys.exit(1)
log.info("%d receivers specified: %s", len(receiver_names), ", ".join(receiver_names))
receiver_names_set = set(receiver_names)
for receiver_fn in get_receiver_fns():
if receiver_fn == ccx_receiver_fn and not skip_ccx:
continue
fn_name = name_from_fn(receiver_fn)
if fn_name not in receiver_names_set:
log.info("Disconnecting %s", fn_name)
self.course_published_signal.disconnect(receiver_fn)
def get_course_keys(self, courses):
"""
Return a list of CourseKeys that we will emit signals to.
`courses` is an optional list of strings that can be parsed into
CourseKeys. If `courses` is empty or None, we will default to returning
all courses in the modulestore (which can be very expensive). If one of
the strings passed in the list for `courses` does not parse correctly,
it is a fatal error and will cause us to exit the entire process.
"""
# Use specific courses if specified, but fall back to all courses.
course_keys = []
if courses:
log.info("%d courses specified: %s", len(courses), ", ".join(courses))
for course_id in courses:
try:
course_keys.append(CourseKey.from_string(course_id))
except InvalidKeyError:
log.fatal("%s is not a parseable CourseKey", course_id)
sys.exit(1)
else:
log.info("No courses specified, reading all courses from modulestore...")
course_keys = sorted(
(course.id for course in modulestore().get_course_summaries()),
key=unicode # Different types of CourseKeys can't be compared without this.
)
log.info("%d courses read from modulestore.", len(course_keys))
return course_keys
def print_show_receivers(self):
"""Print receivers with accompanying docstrings for context."""
receivers = {name_from_fn(fn): fn for fn in get_receiver_fns()}
print(len(receivers), "receivers found:")
for receiver_name, receiver_fn in sorted(receivers.items()):
print(" ", receiver_name)
docstring = textwrap.dedent(receiver_fn.__doc__ or "[No docstring]").strip()
for line in docstring.split('\n'):
print(" ", line)
def print_dry_run(self, course_keys):
"""Give a preview of what courses and signals we will emit to."""
print("DRY-RUN: This command would have sent course_published to...")
dry_run_reveiver_names = sorted(get_receiver_names())
print(len(dry_run_reveiver_names), "Receivers:")
for name in dry_run_reveiver_names:
if name == name_from_fn(ccx_receiver_fn):
print(" ", name, "(automatically added, use --skip-ccx to omit)")
else:
print(" ", name)
COURSES_TO_SHOW = 10
print(len(course_keys), "Courses:")
for course_key in course_keys[:COURSES_TO_SHOW]:
print(" ", course_key)
if len(course_keys) > COURSES_TO_SHOW:
print(" (+ {} more)".format(len(course_keys) - COURSES_TO_SHOW))
def get_receiver_names():
"""Return an unordered set of receiver names (full.module.path.function)"""
return set(
name_from_fn(fn_ref())
for _, fn_ref in Command.course_published_signal.receivers
)
def get_receiver_fns():
"""Return the list of active receiver functions."""
return [
fn_ref() # fn_ref is a weakref to a function, fn_ref() gives us the function
for _, fn_ref in Command.course_published_signal.receivers
]
def name_from_fn(fn):
"""Human readable module.function name."""
return u"{}.{}".format(fn.__module__, fn.__name__)
"""
Tests the simulate_publish management command.
"""
from openedx.core.djangoapps.content.course_overviews.management.commands.simulate_publish import (
Command, name_from_fn
)
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
import openedx.core.djangoapps.content.course_overviews.signals
import lms.djangoapps.ccx.tasks
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import SwitchedSignal
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory
class TestSimulatePublish(SharedModuleStoreTestCase):
"""Test simulate_publish, our fake course-publish signal command."""
@classmethod
def setUpClass(cls):
"""
Create courses in modulestore.
Modulestore signals are suppressed by ModuleStoreIsolationMixin, so this
method should not trigger things like CourseOverview creation.
"""
super(TestSimulatePublish, cls).setUpClass()
cls.command = Command()
# org.0/course_0/Run_0
cls.course_key_1 = CourseFactory.create(default_store=ModuleStoreEnum.Type.mongo).id
# course-v1:org.1+course_1+Run_1
cls.course_key_2 = CourseFactory.create(default_store=ModuleStoreEnum.Type.split).id
# course-v1:org.2+course_2+Run_2
cls.course_key_3 = CourseFactory.create(default_store=ModuleStoreEnum.Type.split).id
def setUp(self):
"""
Most of this is isolating and re-initializing our signal handler. It
might look like you can move this to setUpClass, but be very careful if
doing so, to make sure side-effects don't leak out between tests.
"""
super(TestSimulatePublish, self).setUp()
# Instead of using the process global SignalHandler.course_published, we
# create our own SwitchedSignal to manually send to.
Command.course_published_signal = SwitchedSignal('test_course_publish')
# Course Overviews Handler
# pylint: disable=protected-access
Command.course_published_signal.connect(
openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish
)
# CCX Handler
Command.course_published_signal.connect(
lms.djangoapps.ccx.tasks.course_published_handler
)
Command.course_published_signal.connect(self.sample_receiver_1)
Command.course_published_signal.connect(self.sample_receiver_2)
self.received_1 = []
self.received_2 = []
def tearDown(self):
"""Cleap up our signals."""
# pylint: disable=protected-access
Command.course_published_signal.disconnect(
openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish
)
Command.course_published_signal.disconnect(
lms.djangoapps.ccx.tasks.course_published_handler
)
Command.course_published_signal.disconnect(self.sample_receiver_1)
Command.course_published_signal.disconnect(self.sample_receiver_2)
super(TestSimulatePublish, self).tearDown()
def options(self, **kwargs):
"""
Return an options dict that can be passed to self.command.handle()
Passed in **kwargs will override existing defaults. Most defaults are
the same as they are for running the management command manually (e.g.
dry_run is False, show_receivers is False), except that the list of
receivers is by default limited to the two that exist in this test
class. We do this to keep these tests faster and more self contained.
"""
default_receivers = [
name_from_fn(self.sample_receiver_1),
name_from_fn(self.sample_receiver_2),
]
default_options = dict(
show_receivers=False,
dry_run=False,
receivers=default_receivers,
courses=None,
delay=0,
force_lms=False,
skip_ccx=False,
)
default_options.update(kwargs)
return default_options
def test_specific_courses(self):
"""Test sending only to specific courses."""
self.command.handle(
**self.options(
courses=[unicode(self.course_key_1), unicode(self.course_key_2)]
)
)
self.assertIn(self.course_key_1, self.received_1)
self.assertIn(self.course_key_2, self.received_1)
self.assertNotIn(self.course_key_3, self.received_1)
self.assertEqual(self.received_1, self.received_2)
def test_specific_receivers(self):
"""Test sending only to specific receivers."""
self.command.handle(
**self.options(
receivers=[name_from_fn(self.sample_receiver_1)]
)
)
self.assertIn(self.course_key_1, self.received_1)
self.assertIn(self.course_key_2, self.received_1)
self.assertIn(self.course_key_3, self.received_1)
self.assertEqual(self.received_2, [])
def test_course_overviews(self):
"""Integration test with CourseOverviews."""
self.assertEqual(CourseOverview.objects.all().count(), 0)
# pylint: disable=protected-access
self.command.handle(
**self.options(
receivers=[
name_from_fn(openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish)
]
)
)
self.assertEqual(CourseOverview.objects.all().count(), 3)
self.assertEqual(self.received_1, [])
self.assertEqual(self.received_2, [])
def sample_receiver_1(self, sender, course_key, **kwargs): # pylint: disable=unused-argument
"""Custom receiver for testing."""
self.received_1.append(course_key)
def sample_receiver_2(self, sender, course_key, **kwargs): # pylint: disable=unused-argument
"""Custom receiver for testing."""
self.received_2.append(course_key)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment