Add simulate_publish management command

Prior to this commit, any apps that update their data when courses are published had to create their own management commands for bootstrapping or error recovery (e.g. generate_course_overviews, generate_course_blocks). This is a management command to allow us to generically simulate a course_publish signal so that any async tasks that trigger actions off of that can do so without having to each write their own management commands. It has a few options to make it more ops friendly: * Can specify a set of courses, but defaults to all courses in the modulestore. * Can specify a set of listeners, so we can bootstrap a new app without rebuilding everything. * Can specify a delay between emitting signals so that we don't flood the queues and block author-initiated publishes from going through in a timely manner. * Dry-run mode for a simple preview of what the script will attempt.

Add simulate_publish management command
Prior to this commit, any apps that update their data when courses are published had to create their own management commands for bootstrapping or error recovery (e.g. generate_course_overviews, generate_course_blocks). This is a management command to allow us to generically simulate a course_publish signal so that any async tasks that trigger actions off of that can do so without having to each write their own management commands. It has a few options to make it more ops friendly: * Can specify a set of courses, but defaults to all courses in the modulestore. * Can specify a set of listeners, so we can bootstrap a new app without rebuilding everything. * Can specify a delay between emitting signals so that we don't flood the queues and block author-initiated publishes from going through in a timely manner. * Dry-run mode for a simple preview of what the script will attempt.
71ef28d3 · David Ormsbee · 04d3e9b9 · 71ef28d3 · 71ef28d3
Commit 71ef28d3 authored 6 years ago by David Ormsbee
--- a/openedx/core/djangoapps/content/course_overviews/management/commands/simulate_publish.py
+++ b/openedx/core/djangoapps/content/course_overviews/management/commands/simulate_publish.py
+"""
+Many apps in the LMS maintain their own optimized data structures that they
+update whenever a course publish is detected. To do this, they listen for the
+SignalHandler.course_published signal. Sometimes we want to rebuild the data on
+these apps regardless of an actual change in course content, either to recover
+from a bug or to bootstrap a new app we're rolling out for the first time. To
+date, each app has implemented its own management command for this kind of
+bootstrapping work (e.g. generate_course_overviews, generate_course_blocks).
+
+This management command will emit the SignalHandler.course_published signal for
+some subset of courses and signal listeners, and then rely on existing listener
+behavior to trigger the necessary data updates.
+"""
+from __future__ import print_function
+import copy
+import logging
+import os
+import textwrap
+import time
+import sys
+
+from django.core.management.base import BaseCommand
+from opaque_keys import InvalidKeyError
+from opaque_keys.edx.keys import CourseKey
+
+from lms.djangoapps.ccx.tasks import course_published_handler as ccx_receiver_fn
+from xmodule.modulestore.django import modulestore, SignalHandler
+
+
+log = logging.getLogger('simulate_publish')
+
+
+class Command(BaseCommand):
+    """
+    Example usage:
+
+    # Send the course_published signal to all listeners and courses with 10
+    # seconds between courses. We might use a delay like this to make sure we
+    # don't flood the queue and unnecessarily delay normal publishing via
+    # Studio.
+    $ ./manage.py lms --settings=devstack_docker simulate_publish --delay 10
+
+    # Find all available listeners
+    $ ./manage.py lms --settings=devstack_docker simulate_publish --show_listeners
+
+    # Send the publish signal to two courses and two listeners
+    $ ./manage.py lms --settings=devstack_docker simulate_publish --listeners \
+    openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish \
+    openedx.core.djangoapps.bookmarks.signals.trigger_update_xblocks_cache_task \
+    --courses course-v1:edX+DemoX+Demo_Course edX/MODULESTORE_100/2018
+
+    A Dry Run will produce output that looks like:
+
+        DRY-RUN: This command would have sent course_published to...
+        1 Receivers:
+            openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish
+        27 Courses:
+            course-v1:DEV_153+A2E_CHINESE+JAN2018
+            course-v1:edX+100+MITPhysics
+            course-v1:edX+DemoX+Demo_Course
+            course-v1:edX+E2E-101+course
+            course-v1:edX+MEMORY+2018
+            course-v1:edX+MK101+2018
+            edX/MODULESTORE_100/2018_1
+            edX/MODULESTORE_100/2018_2
+            edX/MODULESTORE_100/2018_3
+            edX/MODULESTORE_100/2018_4
+            (+ 17 more)
+    """
+    help = (
+        u"Simulate course publish signals without actually modifying course "
+        u"content. This command is useful for triggering various async tasks "
+        u"that listen for course_published signals."
+    )
+
+    # Having this be a class attribute makes it easier to substitute during
+    # tests, and thereby avoid global side-effects that will mysteriously fail
+    # tests that need signal handling later on.
+    course_published_signal = copy.copy(SignalHandler.course_published)
+
+    def add_arguments(self, parser):
+        # pylint: disable=expression-not-assigned
+        parser.add_argument(
+            '--show-receivers',
+            dest='show_receivers',
+            action='store_true',
+            help=(u'Display the list of possible receiver functions and exit.')
+        ),
+        parser.add_argument(
+            '--dry-run',
+            dest='dry_run',
+            action='store_true',
+            help=(
+                u"Just show a preview of what would happen. This may make an "
+                u"expensive modulestore query to find courses, but it will "
+                u"not emit any signals."
+            )
+        ),
+        parser.add_argument(
+            '--receivers',
+            dest='receivers',
+            action='store',
+            nargs='+',
+            help=(
+                u'Send course_published to specific receivers. If this flag is '
+                u'not present, course_published will be sent to all receivers. '
+                u'The CCX receiver is always included unless --skip-ccx is '
+                u'explicitly passed (otherwise CCX courses would never get '
+                u'called for any signal).'
+            )
+        )
+        parser.add_argument(
+            '--courses',
+            dest='courses',
+            action='store',
+            nargs='+',
+            help=(
+                u'Send course_published for specific courses. If this flag is '
+                u'not present, course_published will be sent to all courses.'
+            )
+        )
+        parser.add_argument(
+            '--delay',
+            dest='delay',
+            action='store',
+            type=int,
+            default=0,
+            help=(
+                u"Number of seconds to sleep between emitting course_published "
+                u"signals, so that we don't flood our queues."
+            )
+        )
+        parser.add_argument(
+            '--force-lms',
+            dest='force_lms',
+            action='store_true',
+            help=(
+                u"This command should be run under cms (Studio), not LMS. "
+                u"Regular publishes happen via Studio, and this script will "
+                u"exit with an error if you attempt to run it in an LMS "
+                u"process. However, if you know what you're doing and need to "
+                u"override that behavior, use this flag."
+            )
+        ),
+        parser.add_argument(
+            '--skip-ccx',
+            dest='skip_ccx',
+            action='store_true',
+            help=(
+                u"CCX receivers are special echoing receivers that relay "
+                u"the course_published signal to all CCX courses derived from "
+                u"a modulestore-stored course. That means we almost always "
+                u"want to emit to them (even when using --receivers), or none "
+                u"of our signals will reach any CCX derived courses. However, "
+                u"if you know what you're doing, you can disable this behavior "
+                u"with this flag, so that CCX receivers are omitted."
+            )
+        ),
+
+    def handle(self, *args, **options):
+        if options['show_receivers']:
+            return self.print_show_receivers()
+
+        log.info(
+            "simulate_publish starting, dry-run=%s, delay=%d seconds",
+            options['dry_run'],
+            options['delay']
+        )
+
+        if os.environ.get('SERVICE_VARIANT', 'cms').startswith('lms'):
+            if options['force_lms']:
+                log.info("Forcing simulate_publish to run in LMS process.")
+            else:
+                log.fatal(
+                    "simulate_publish should be run as a CMS (Studio) " +
+                    "command, not %s (override with --force-lms).",
+                    os.environ.get('SERVICE_VARIANT')
+                )
+                sys.exit(1)
+
+        if options['receivers']:
+            self.modify_receivers(options['receivers'], options['skip_ccx'])
+        elif options['skip_ccx']:
+            log.info("Disconnecting CCX handler (--skip-ccx is True)")
+            self.course_published_signal.disconnect(ccx_receiver_fn)
+
+        course_keys = self.get_course_keys(options['courses'])
+
+        if options['dry_run']:
+            return self.print_dry_run(course_keys)
+
+        # Now that our signal receivers and courses are set up properly, do the
+        # actual work of emitting signals.
+        for i, course_key in enumerate(course_keys, start=1):
+            log.info(
+                "Emitting course_published signal (%d of %d) for course %s",
+                i, len(course_keys), course_key
+            )
+            if options['delay']:
+                time.sleep(options['delay'])
+            self.course_published_signal.send_robust(sender=self, course_key=course_key)
+
+    def modify_receivers(self, receiver_names, skip_ccx):
+        """
+        Modify our signal to only have the user-specified receivers.
+
+        This method modifies the process global SignalHandler.course_published
+        to disconnect any receivers that were not in the `receiver_names` list.
+        If any of the receiver_names is not found (i.e. is not in the list of
+        receivers printed in self.print_show_receivers), it is a fatal error and
+        we will exit the process.
+        """
+        all_receiver_names = get_receiver_names()
+        unknown_receiver_names = set(receiver_names) - all_receiver_names
+        if unknown_receiver_names:
+            log.fatal(
+                "The following receivers were specified but not recognized: %s",
+                ", ".join(sorted(unknown_receiver_names))
+            )
+            log.fatal("Known receivers: %s", ", ".join(sorted(all_receiver_names)))
+            sys.exit(1)
+        log.info("%d receivers specified: %s", len(receiver_names), ", ".join(receiver_names))
+        receiver_names_set = set(receiver_names)
+        for receiver_fn in get_receiver_fns():
+            if receiver_fn == ccx_receiver_fn and not skip_ccx:
+                continue
+            fn_name = name_from_fn(receiver_fn)
+            if fn_name not in receiver_names_set:
+                log.info("Disconnecting %s", fn_name)
+                self.course_published_signal.disconnect(receiver_fn)
+
+    def get_course_keys(self, courses):
+        """
+        Return a list of CourseKeys that we will emit signals to.
+
+        `courses` is an optional list of strings that can be parsed into
+        CourseKeys. If `courses` is empty or None, we will default to returning
+        all courses in the modulestore (which can be very expensive). If one of
+        the strings passed in the list for `courses` does not parse correctly,
+        it is a fatal error and will cause us to exit the entire process.
+        """
+        # Use specific courses if specified, but fall back to all courses.
+        course_keys = []
+        if courses:
+            log.info("%d courses specified: %s", len(courses), ", ".join(courses))
+            for course_id in courses:
+                try:
+                    course_keys.append(CourseKey.from_string(course_id))
+                except InvalidKeyError:
+                    log.fatal("%s is not a parseable CourseKey", course_id)
+                    sys.exit(1)
+        else:
+            log.info("No courses specified, reading all courses from modulestore...")
+            course_keys = sorted(
+                (course.id for course in modulestore().get_course_summaries()),
+                key=unicode  # Different types of CourseKeys can't be compared without this.
+            )
+            log.info("%d courses read from modulestore.", len(course_keys))
+
+        return course_keys
+
+    def print_show_receivers(self):
+        """Print receivers with accompanying docstrings for context."""
+        receivers = {name_from_fn(fn): fn for fn in get_receiver_fns()}
+        print(len(receivers), "receivers found:")
+        for receiver_name, receiver_fn in sorted(receivers.items()):
+            print("  ", receiver_name)
+            docstring = textwrap.dedent(receiver_fn.__doc__ or "[No docstring]").strip()
+            for line in docstring.split('\n'):
+                print("      ", line)
+
+    def print_dry_run(self, course_keys):
+        """Give a preview of what courses and signals we will emit to."""
+        print("DRY-RUN: This command would have sent course_published to...")
+        dry_run_reveiver_names = sorted(get_receiver_names())
+        print(len(dry_run_reveiver_names), "Receivers:")
+        for name in dry_run_reveiver_names:
+            if name == name_from_fn(ccx_receiver_fn):
+                print("   ", name, "(automatically added, use --skip-ccx to omit)")
+            else:
+                print("   ", name)
+        COURSES_TO_SHOW = 10
+        print(len(course_keys), "Courses:")
+        for course_key in course_keys[:COURSES_TO_SHOW]:
+            print("   ", course_key)
+        if len(course_keys) > COURSES_TO_SHOW:
+            print("    (+ {} more)".format(len(course_keys) - COURSES_TO_SHOW))
+
+
+def get_receiver_names():
+    """Return an unordered set of receiver names (full.module.path.function)"""
+    return set(
+        name_from_fn(fn_ref())
+        for _, fn_ref in Command.course_published_signal.receivers
+    )
+
+
+def get_receiver_fns():
+    """Return the list of active receiver functions."""
+    return [
+        fn_ref()  # fn_ref is a weakref to a function, fn_ref() gives us the function
+        for _, fn_ref in Command.course_published_signal.receivers
+    ]
+
+
+def name_from_fn(fn):
+    """Human readable module.function name."""
+    return u"{}.{}".format(fn.__module__, fn.__name__)
--- a/openedx/core/djangoapps/content/course_overviews/management/commands/tests/test_simulate_publish.py
+++ b/openedx/core/djangoapps/content/course_overviews/management/commands/tests/test_simulate_publish.py
+"""
+Tests the simulate_publish management command.
+"""
+from openedx.core.djangoapps.content.course_overviews.management.commands.simulate_publish import (
+    Command, name_from_fn
+)
+from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
+import openedx.core.djangoapps.content.course_overviews.signals
+import lms.djangoapps.ccx.tasks
+from xmodule.modulestore import ModuleStoreEnum
+from xmodule.modulestore.django import SwitchedSignal
+from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
+from xmodule.modulestore.tests.factories import CourseFactory
+
+
+class TestSimulatePublish(SharedModuleStoreTestCase):
+    """Test simulate_publish, our fake course-publish signal command."""
+
+    @classmethod
+    def setUpClass(cls):
+        """
+        Create courses in modulestore.
+
+        Modulestore signals are suppressed by ModuleStoreIsolationMixin, so this
+        method should not trigger things like CourseOverview creation.
+        """
+        super(TestSimulatePublish, cls).setUpClass()
+        cls.command = Command()
+        # org.0/course_0/Run_0
+        cls.course_key_1 = CourseFactory.create(default_store=ModuleStoreEnum.Type.mongo).id
+        # course-v1:org.1+course_1+Run_1
+        cls.course_key_2 = CourseFactory.create(default_store=ModuleStoreEnum.Type.split).id
+        # course-v1:org.2+course_2+Run_2
+        cls.course_key_3 = CourseFactory.create(default_store=ModuleStoreEnum.Type.split).id
+
+    def setUp(self):
+        """
+        Most of this is isolating and re-initializing our signal handler. It
+        might look like you can move this to setUpClass, but be very careful if
+        doing so, to make sure side-effects don't leak out between tests.
+        """
+        super(TestSimulatePublish, self).setUp()
+
+        # Instead of using the process global SignalHandler.course_published, we
+        # create our own SwitchedSignal to manually send to.
+        Command.course_published_signal = SwitchedSignal('test_course_publish')
+
+        # Course Overviews Handler
+        # pylint: disable=protected-access
+        Command.course_published_signal.connect(
+            openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish
+        )
+        # CCX Handler
+        Command.course_published_signal.connect(
+            lms.djangoapps.ccx.tasks.course_published_handler
+        )
+        Command.course_published_signal.connect(self.sample_receiver_1)
+        Command.course_published_signal.connect(self.sample_receiver_2)
+
+        self.received_1 = []
+        self.received_2 = []
+
+    def tearDown(self):
+        """Cleap up our signals."""
+        # pylint: disable=protected-access
+        Command.course_published_signal.disconnect(
+            openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish
+        )
+        Command.course_published_signal.disconnect(
+            lms.djangoapps.ccx.tasks.course_published_handler
+        )
+        Command.course_published_signal.disconnect(self.sample_receiver_1)
+        Command.course_published_signal.disconnect(self.sample_receiver_2)
+        super(TestSimulatePublish, self).tearDown()
+
+    def options(self, **kwargs):
+        """
+        Return an options dict that can be passed to self.command.handle()
+
+        Passed in **kwargs will override existing defaults. Most defaults are
+        the same as they are for running the management command manually (e.g.
+        dry_run is False, show_receivers is False), except that the list of
+        receivers is by default limited to the two that exist in this test
+        class. We do this to keep these tests faster and more self contained.
+        """
+        default_receivers = [
+            name_from_fn(self.sample_receiver_1),
+            name_from_fn(self.sample_receiver_2),
+        ]
+        default_options = dict(
+            show_receivers=False,
+            dry_run=False,
+            receivers=default_receivers,
+            courses=None,
+            delay=0,
+            force_lms=False,
+            skip_ccx=False,
+        )
+        default_options.update(kwargs)
+        return default_options
+
+    def test_specific_courses(self):
+        """Test sending only to specific courses."""
+        self.command.handle(
+            **self.options(
+                courses=[unicode(self.course_key_1), unicode(self.course_key_2)]
+            )
+        )
+        self.assertIn(self.course_key_1, self.received_1)
+        self.assertIn(self.course_key_2, self.received_1)
+        self.assertNotIn(self.course_key_3, self.received_1)
+        self.assertEqual(self.received_1, self.received_2)
+
+    def test_specific_receivers(self):
+        """Test sending only to specific receivers."""
+        self.command.handle(
+            **self.options(
+                receivers=[name_from_fn(self.sample_receiver_1)]
+            )
+        )
+        self.assertIn(self.course_key_1, self.received_1)
+        self.assertIn(self.course_key_2, self.received_1)
+        self.assertIn(self.course_key_3, self.received_1)
+        self.assertEqual(self.received_2, [])
+
+    def test_course_overviews(self):
+        """Integration test with CourseOverviews."""
+        self.assertEqual(CourseOverview.objects.all().count(), 0)
+        # pylint: disable=protected-access
+        self.command.handle(
+            **self.options(
+                receivers=[
+                    name_from_fn(openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish)
+                ]
+            )
+        )
+        self.assertEqual(CourseOverview.objects.all().count(), 3)
+        self.assertEqual(self.received_1, [])
+        self.assertEqual(self.received_2, [])
+
+    def sample_receiver_1(self, sender, course_key, **kwargs):  # pylint: disable=unused-argument
+        """Custom receiver for testing."""
+        self.received_1.append(course_key)
+
+    def sample_receiver_2(self, sender, course_key, **kwargs):  # pylint: disable=unused-argument
+        """Custom receiver for testing."""
+        self.received_2.append(course_key)