From 79de77cf9589885248d82fb8ad1f4c80de8dadc4 Mon Sep 17 00:00:00 2001 From: David Ormsbee <dave@edx.org> Date: Thu, 26 Feb 2015 18:11:34 -0500 Subject: [PATCH] Optimize grading/progress page to reduce database queries (cache max scores). The progress page did a number of things that make performance terrible for courses with large numbers of problems, particularly if those problems are customresponse CapaModule problems that need to be executed via codejail. The grading code takes pains to not instantiate student state and execute the problem code. If a student has answered the question, the max score is stored in StudentModule. However, if the student hasn't attempted the question yet, we have to run the problem code just to call .max_score() on it. This is necessary in grade() if the student has answered other problems in the assignment (so we can know what to divide by). This is always necessary to know in progress_summary() because we list out every problem there. Code execution can be especially slow if the problems need to invoke codejail. To address this, we create a MaxScoresCache that will cache the max raw score possible for every problem. We select the cache keys so that it will automatically become invalidated when a new version of the course is published. The fundamental assumption here is that a problem cannot have two different max score values for two unscored students. A problem *can* score two students differently such that they have different max scores. So Carlos can have 2/3 on a problem, while Lyla gets 3/4. But if neither Carlos nor Lyla has ever interacted with the problem (i.e. they're just seeing it on their progress page), they must both see 0/4 -- it cannot be the case that Carlos sees 0/3 and Lyla sees 0/4. We used to load all student state into two separate FieldDataCache instances, after which we do a bunch of individual queries for scored items. Part of this split-up was done because of locking problems, but I think we might have gotten overzealous with our manual transaction hammer. In this commit, we consolidate all state access in grade() and progress() to use one shared FieldDataCache. We also use a filter so that we only pull back StudentModule state for things that might possibly affect the grade -- items that either have scores or have children. Because some older XModules do work in their __init__() methods (like Video), instantiating them takes time, particularly on large courses. This commit also changes the code that fetches the grading_context to filter out children that can't possibly affect the grade. Finally, we introduce a ScoresClient that also tries to fetch score information all at once, instead of in separate queries. Technically, we are fetching this information redundantly, but that's because the state and score interfaces are being teased apart as we move forward. Still, this only amounts to one extra SQL query, and has very little impact on performance overall. Much thanks to @adampalay -- his hackathon work in #7168 formed the basis of this. https://openedx.atlassian.net/browse/CSM-17 --- common/lib/xmodule/xmodule/course_module.py | 16 +- .../tests/test_field_override_performance.py | 54 ++-- lms/djangoapps/courseware/grades.py | 287 ++++++++++++++---- lms/djangoapps/courseware/model_data.py | 64 +++- lms/djangoapps/courseware/models.py | 5 +- .../courseware/tests/test_grades.py | 77 ++++- .../courseware/tests/test_model_data.py | 3 +- .../tests/test_submitting_problems.py | 43 +++ lms/djangoapps/courseware/views.py | 13 +- .../tests/test_tasks_helper.py | 2 +- lms/envs/common.py | 6 +- 11 files changed, 465 insertions(+), 105 deletions(-) diff --git a/common/lib/xmodule/xmodule/course_module.py b/common/lib/xmodule/xmodule/course_module.py index f3d18eb8be8..91637abe0f7 100644 --- a/common/lib/xmodule/xmodule/course_module.py +++ b/common/lib/xmodule/xmodule/course_module.py @@ -20,6 +20,7 @@ from xmodule.tabs import CourseTabList from xmodule.mixin import LicenseMixin import json +from xblock.core import XBlock from xblock.fields import Scope, List, String, Dict, Boolean, Integer, Float from .fields import Date from django.utils.timezone import UTC @@ -1321,11 +1322,15 @@ class CourseDescriptor(CourseFields, SequenceDescriptor, LicenseMixin): except UndefinedContext: module = self + def possibly_scored(usage_key): + """Can this XBlock type can have a score or children?""" + return usage_key.block_type in self.block_types_affecting_grading + all_descriptors = [] graded_sections = {} def yield_descriptor_descendents(module_descriptor): - for child in module_descriptor.get_children(): + for child in module_descriptor.get_children(usage_key_filter=possibly_scored): yield child for module_descriptor in yield_descriptor_descendents(child): yield module_descriptor @@ -1351,6 +1356,15 @@ class CourseDescriptor(CourseFields, SequenceDescriptor, LicenseMixin): return {'graded_sections': graded_sections, 'all_descriptors': all_descriptors, } + @lazy + def block_types_affecting_grading(self): + """Return all block types that could impact grading (i.e. scored, or having children).""" + return frozenset( + cat for (cat, xblock_class) in XBlock.load_classes() if ( + getattr(xblock_class, 'has_score', False) or getattr(xblock_class, 'has_children', False) + ) + ) + @staticmethod def make_id(org, course, url_name): return '/'.join([org, course, url_name]) diff --git a/lms/djangoapps/ccx/tests/test_field_override_performance.py b/lms/djangoapps/ccx/tests/test_field_override_performance.py index afb013e3c16..e012523c285 100644 --- a/lms/djangoapps/ccx/tests/test_field_override_performance.py +++ b/lms/djangoapps/ccx/tests/test_field_override_performance.py @@ -29,7 +29,11 @@ from xmodule.modulestore.tests.utils import ProceduralCourseTestMixin @attr('shard_1') @mock.patch.dict( - 'django.conf.settings.FEATURES', {'ENABLE_XBLOCK_VIEW_ENDPOINT': True} + 'django.conf.settings.FEATURES', + { + 'ENABLE_XBLOCK_VIEW_ENDPOINT': True, + 'ENABLE_MAX_SCORE_CACHE': False, + } ) @ddt.ddt class FieldOverridePerformanceTestCase(ProceduralCourseTestMixin, @@ -173,18 +177,18 @@ class TestFieldOverrideMongoPerformance(FieldOverridePerformanceTestCase): TEST_DATA = { # (providers, course_width, enable_ccx): # of sql queries, # of mongo queries, # of xblocks - ('no_overrides', 1, True): (27, 7, 14), - ('no_overrides', 2, True): (135, 7, 85), - ('no_overrides', 3, True): (595, 7, 336), - ('ccx', 1, True): (27, 7, 14), - ('ccx', 2, True): (135, 7, 85), - ('ccx', 3, True): (595, 7, 336), - ('no_overrides', 1, False): (27, 7, 14), - ('no_overrides', 2, False): (135, 7, 85), - ('no_overrides', 3, False): (595, 7, 336), - ('ccx', 1, False): (27, 7, 14), - ('ccx', 2, False): (135, 7, 85), - ('ccx', 3, False): (595, 7, 336), + ('no_overrides', 1, True): (23, 7, 14), + ('no_overrides', 2, True): (68, 7, 85), + ('no_overrides', 3, True): (263, 7, 336), + ('ccx', 1, True): (23, 7, 14), + ('ccx', 2, True): (68, 7, 85), + ('ccx', 3, True): (263, 7, 336), + ('no_overrides', 1, False): (23, 7, 14), + ('no_overrides', 2, False): (68, 7, 85), + ('no_overrides', 3, False): (263, 7, 336), + ('ccx', 1, False): (23, 7, 14), + ('ccx', 2, False): (68, 7, 85), + ('ccx', 3, False): (263, 7, 336), } @@ -196,16 +200,16 @@ class TestFieldOverrideSplitPerformance(FieldOverridePerformanceTestCase): __test__ = True TEST_DATA = { - ('no_overrides', 1, True): (27, 4, 9), - ('no_overrides', 2, True): (135, 19, 54), - ('no_overrides', 3, True): (595, 84, 215), - ('ccx', 1, True): (27, 4, 9), - ('ccx', 2, True): (135, 19, 54), - ('ccx', 3, True): (595, 84, 215), - ('no_overrides', 1, False): (27, 4, 9), - ('no_overrides', 2, False): (135, 19, 54), - ('no_overrides', 3, False): (595, 84, 215), - ('ccx', 1, False): (27, 4, 9), - ('ccx', 2, False): (135, 19, 54), - ('ccx', 3, False): (595, 84, 215), + ('no_overrides', 1, True): (23, 4, 9), + ('no_overrides', 2, True): (68, 19, 54), + ('no_overrides', 3, True): (263, 84, 215), + ('ccx', 1, True): (23, 4, 9), + ('ccx', 2, True): (68, 19, 54), + ('ccx', 3, True): (263, 84, 215), + ('no_overrides', 1, False): (23, 4, 9), + ('no_overrides', 2, False): (68, 19, 54), + ('no_overrides', 3, False): (263, 84, 215), + ('ccx', 1, False): (23, 4, 9), + ('ccx', 2, False): (68, 19, 54), + ('ccx', 3, False): (263, 84, 215), } diff --git a/lms/djangoapps/courseware/grades.py b/lms/djangoapps/courseware/grades.py index 566f8a3ea06..208d15c7383 100644 --- a/lms/djangoapps/courseware/grades.py +++ b/lms/djangoapps/courseware/grades.py @@ -1,6 +1,7 @@ # Compute grades using real division, with no integer truncation from __future__ import division from collections import defaultdict +from functools import partial import json import random import logging @@ -9,11 +10,12 @@ from contextlib import contextmanager from django.conf import settings from django.db import transaction from django.test.client import RequestFactory +from django.core.cache import cache import dogstats_wrapper as dog_stats_api from courseware import courses -from courseware.model_data import FieldDataCache +from courseware.model_data import FieldDataCache, ScoresClient from student.models import anonymous_id_for_user from util.module_utils import yield_dynamic_descriptor_descendants from xmodule import graders @@ -25,12 +27,131 @@ from .module_render import get_module_for_descriptor from submissions import api as sub_api # installed from the edx-submissions repository from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import CourseKey - from openedx.core.djangoapps.signals.signals import GRADES_UPDATED + log = logging.getLogger("edx.courseware") +class MaxScoresCache(object): + """ + A cache for unweighted max scores for problems. + + The key assumption here is that any problem that has not yet recorded a + score for a user is worth the same number of points. An XBlock is free to + score one student at 2/5 and another at 1/3. But a problem that has never + issued a score -- say a problem two students have only seen mentioned in + their progress pages and never interacted with -- should be worth the same + number of points for everyone. + """ + def __init__(self, cache_prefix): + self.cache_prefix = cache_prefix + self._max_scores_cache = {} + self._max_scores_updates = {} + + @classmethod + def create_for_course(cls, course): + """ + Given a CourseDescriptor, return a correctly configured `MaxScoresCache` + + This method will base the `MaxScoresCache` cache prefix value on the + last time something was published to the live version of the course. + This is so that we don't have to worry about stale cached values for + max scores -- any time a content change occurs, we change our cache + keys. + """ + return cls(u"{}.{}".format(course.id, course.subtree_edited_on.isoformat())) + + def fetch_from_remote(self, locations): + """ + Populate the local cache with values from django's cache + """ + remote_dict = cache.get_many([self._remote_cache_key(loc) for loc in locations]) + self._max_scores_cache = { + self._local_cache_key(remote_key): value + for remote_key, value in remote_dict.items() + if value is not None + } + + def push_to_remote(self): + """ + Update the remote cache + """ + if self._max_scores_updates: + cache.set_many( + { + self._remote_cache_key(key): value + for key, value in self._max_scores_updates.items() + }, + 60 * 60 * 24 # 1 day + ) + + def _remote_cache_key(self, location): + """Convert a location to a remote cache key (add our prefixing).""" + return u"grades.MaxScores.{}___{}".format(self.cache_prefix, unicode(location)) + + def _local_cache_key(self, remote_key): + """Convert a remote cache key to a local cache key (i.e. location str).""" + return remote_key.split(u"___", 1)[1] + + def num_cached_from_remote(self): + """How many items did we pull down from the remote cache?""" + return len(self._max_scores_cache) + + def num_cached_updates(self): + """How many local updates are we waiting to push to the remote cache?""" + return len(self._max_scores_updates) + + def set(self, location, max_score): + """ + Adds a max score to the max_score_cache + """ + loc_str = unicode(location) + if self._max_scores_cache.get(loc_str) != max_score: + self._max_scores_updates[loc_str] = max_score + + def get(self, location): + """ + Retrieve a max score from the cache + """ + loc_str = unicode(location) + max_score = self._max_scores_updates.get(loc_str) + if max_score is None: + max_score = self._max_scores_cache.get(loc_str) + + return max_score + + +def descriptor_affects_grading(block_types_affecting_grading, descriptor): + """ + Returns True if the descriptor could have any impact on grading, else False. + + Something might be a scored item if it is capable of storing a score + (has_score=True). We also have to include anything that can have children, + since those children might have scores. We can avoid things like Videos, + which have state but cannot ever impact someone's grade. + """ + return descriptor.location.block_type in block_types_affecting_grading + + +def field_data_cache_for_grading(course, user): + """ + Given a CourseDescriptor and User, create the FieldDataCache for grading. + + This will generate a FieldDataCache that only loads state for those things + that might possibly affect the grading process, and will ignore things like + Videos. + """ + descriptor_filter = partial(descriptor_affects_grading, course.block_types_affecting_grading) + return FieldDataCache.cache_for_descriptor_descendents( + course.id, + user, + course, + depth=None, + descriptor_filter=descriptor_filter + ) + + def answer_distributions(course_key): """ Given a course_key, return answer distributions in the form of a dictionary @@ -123,14 +244,14 @@ def answer_distributions(course_key): @transaction.commit_manually -def grade(student, request, course, keep_raw_scores=False): +def grade(student, request, course, keep_raw_scores=False, field_data_cache=None, scores_client=None): """ Wraps "_grade" with the manual_transaction context manager just in case there are unanticipated errors. Send a signal to update the minimum grade requirement status. """ with manual_transaction(): - grade_summary = _grade(student, request, course, keep_raw_scores) + grade_summary = _grade(student, request, course, keep_raw_scores, field_data_cache, scores_client) responses = GRADES_UPDATED.send_robust( sender=None, username=request.user.username, @@ -145,7 +266,7 @@ def grade(student, request, course, keep_raw_scores=False): return grade_summary -def _grade(student, request, course, keep_raw_scores): +def _grade(student, request, course, keep_raw_scores, field_data_cache, scores_client): """ Unwrapped version of "grade" @@ -166,15 +287,25 @@ def _grade(student, request, course, keep_raw_scores): More information on the format is in the docstring for CourseGrader. """ - grading_context = course.grading_context - raw_scores = [] + if field_data_cache is None: + with manual_transaction(): + field_data_cache = field_data_cache_for_grading(course, student) + if scores_client is None: + scores_client = ScoresClient.from_field_data_cache(field_data_cache) # Dict of item_ids -> (earned, possible) point tuples. This *only* grabs # scores that were registered with the submissions API, which for the moment # means only openassessment (edx-ora2) - submissions_scores = sub_api.get_scores( - course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id) - ) + submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id)) + max_scores_cache = MaxScoresCache.create_for_course(course) + # For the moment, we have to get scorable_locations from field_data_cache + # and not from scores_client, because scores_client is ignorant of things + # in the submissions API. As a further refactoring step, submissions should + # be hidden behind the ScoresClient. + max_scores_cache.fetch_from_remote(field_data_cache.scorable_locations) + + grading_context = course.grading_context + raw_scores = [] totaled_scores = {} # This next complicated loop is just to collect the totaled_scores, which is @@ -202,13 +333,10 @@ def _grade(student, request, course, keep_raw_scores): ) if not should_grade_section: - with manual_transaction(): - should_grade_section = StudentModule.objects.filter( - student=student, - module_state_key__in=[ - descriptor.location for descriptor in section['xmoduledescriptors'] - ] - ).exists() + should_grade_section = any( + descriptor.location in scores_client + for descriptor in section['xmoduledescriptors'] + ) # If we haven't seen a single problem in the section, we don't have # to grade it at all! We can assume 0% @@ -219,23 +347,24 @@ def _grade(student, request, course, keep_raw_scores): '''creates an XModule instance given a descriptor''' # TODO: We need the request to pass into here. If we could forego that, our arguments # would be simpler - with manual_transaction(): - field_data_cache = FieldDataCache([descriptor], course.id, student) return get_module_for_descriptor( student, request, descriptor, field_data_cache, course.id, course=course ) - for module_descriptor in yield_dynamic_descriptor_descendants( - section_descriptor, student.id, create_module - ): - + descendants = yield_dynamic_descriptor_descendants(section_descriptor, student.id, create_module) + for module_descriptor in descendants: (correct, total) = get_score( - course.id, student, module_descriptor, create_module, scores_cache=submissions_scores + student, + module_descriptor, + create_module, + scores_client, + submissions_scores, + max_scores_cache, ) if correct is None and total is None: continue - if settings.GENERATE_PROFILE_SCORES: # for debugging! + if settings.GENERATE_PROFILE_SCORES: # for debugging! if total > 1: correct = random.randrange(max(total - 2, 1), total + 1) else: @@ -256,7 +385,7 @@ def _grade(student, request, course, keep_raw_scores): ) ) - _, graded_total = graders.aggregate_scores(scores, section_name) + __, graded_total = graders.aggregate_scores(scores, section_name) if keep_raw_scores: raw_scores += scores else: @@ -283,11 +412,14 @@ def _grade(student, request, course, keep_raw_scores): letter_grade = grade_for_percentage(course.grade_cutoffs, grade_summary['percent']) grade_summary['grade'] = letter_grade - grade_summary['totaled_scores'] = totaled_scores # make this available, eg for instructor download & debugging + grade_summary['totaled_scores'] = totaled_scores # make this available, eg for instructor download & debugging if keep_raw_scores: # way to get all RAW scores out to instructor # so grader can be double-checked grade_summary['raw_scores'] = raw_scores + + max_scores_cache.push_to_remote() + return grade_summary @@ -314,19 +446,19 @@ def grade_for_percentage(grade_cutoffs, percentage): @transaction.commit_manually -def progress_summary(student, request, course): +def progress_summary(student, request, course, field_data_cache=None, scores_client=None): """ Wraps "_progress_summary" with the manual_transaction context manager just in case there are unanticipated errors. """ with manual_transaction(): - return _progress_summary(student, request, course) + return _progress_summary(student, request, course, field_data_cache, scores_client) # TODO: This method is not very good. It was written in the old course style and # then converted over and performance is not good. Once the progress page is redesigned # to not have the progress summary this method should be deleted (so it won't be copied). -def _progress_summary(student, request, course): +def _progress_summary(student, request, course, field_data_cache=None, scores_client=None): """ Unwrapped version of "progress_summary". @@ -348,21 +480,26 @@ def _progress_summary(student, request, course): """ with manual_transaction(): - field_data_cache = FieldDataCache.cache_for_descriptor_descendents( - course.id, student, course, depth=None - ) - # TODO: We need the request to pass into here. If we could - # forego that, our arguments would be simpler + if field_data_cache is None: + field_data_cache = field_data_cache_for_grading(course, student) + if scores_client is None: + scores_client = ScoresClient.from_field_data_cache(field_data_cache) + course_module = get_module_for_descriptor( student, request, course, field_data_cache, course.id, course=course ) if not course_module: - # This student must not have access to the course. return None course_module = getattr(course_module, '_x_module', course_module) submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id)) + max_scores_cache = MaxScoresCache.create_for_course(course) + # For the moment, we have to get scorable_locations from field_data_cache + # and not from scores_client, because scores_client is ignorant of things + # in the submissions API. As a further refactoring step, submissions should + # be hidden behind the ScoresClient. + max_scores_cache.fetch_from_remote(field_data_cache.scorable_locations) chapters = [] # Don't include chapters that aren't displayable (e.g. due to error) @@ -389,7 +526,12 @@ def _progress_summary(student, request, course): ): course_id = course.id (correct, total) = get_score( - course_id, student, module_descriptor, module_creator, scores_cache=submissions_scores + student, + module_descriptor, + module_creator, + scores_client, + submissions_scores, + max_scores_cache, ) if correct is None and total is None: continue @@ -426,10 +568,20 @@ def _progress_summary(student, request, course): 'sections': sections }) + max_scores_cache.push_to_remote() + return chapters -def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=None): +def weighted_score(raw_correct, raw_total, weight): + """Return a tuple that represents the weighted (correct, total) score.""" + # If there is no weighting, or weighting can't be applied, return input. + if weight is None or raw_total == 0: + return (raw_correct, raw_total) + return (float(raw_correct) * weight / raw_total, float(weight)) + + +def get_score(user, problem_descriptor, module_creator, scores_client, submissions_scores_cache, max_scores_cache): """ Return the score for a user on a problem, as a tuple (correct, total). e.g. (5,7) if you got 5 out of 7 points. @@ -439,19 +591,21 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache= user: a Student object problem_descriptor: an XModuleDescriptor + scores_client: an initialized ScoresClient module_creator: a function that takes a descriptor, and returns the corresponding XModule for this user. Can return None if user doesn't have access, or if something else went wrong. - scores_cache: A dict of location names to (earned, possible) point tuples. + submissions_scores_cache: A dict of location names to (earned, possible) point tuples. If an entry is found in this cache, it takes precedence. + max_scores_cache: a MaxScoresCache """ - scores_cache = scores_cache or {} + submissions_scores_cache = submissions_scores_cache or {} if not user.is_authenticated(): return (None, None) location_url = problem_descriptor.location.to_deprecated_string() - if location_url in scores_cache: - return scores_cache[location_url] + if location_url in submissions_scores_cache: + return submissions_scores_cache[location_url] # some problems have state that is updated independently of interaction # with the LMS, so they need to always be scored. (E.g. foldit.) @@ -469,22 +623,27 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache= # These are not problems, and do not have a score return (None, None) - try: - student_module = StudentModule.objects.get( - student=user, - course_id=course_id, - module_state_key=problem_descriptor.location - ) - except StudentModule.DoesNotExist: - student_module = None - - if student_module is not None and student_module.max_grade is not None: - correct = student_module.grade if student_module.grade is not None else 0 - total = student_module.max_grade + # Check the score that comes from the ScoresClient (out of CSM). + # If an entry exists and has a total associated with it, we trust that + # value. This is important for cases where a student might have seen an + # older version of the problem -- they're still graded on what was possible + # when they tried the problem, not what it's worth now. + score = scores_client.get(problem_descriptor.location) + cached_max_score = max_scores_cache.get(problem_descriptor.location) + if score and score.total is not None: + # We have a valid score, just use it. + correct = score.correct if score.correct is not None else 0.0 + total = score.total + elif cached_max_score is not None and settings.FEATURES.get("ENABLE_MAX_SCORE_CACHE"): + # We don't have a valid score entry but we know from our cache what the + # max possible score is, so they've earned 0.0 / cached_max_score + correct = 0.0 + total = cached_max_score else: - # If the problem was not in the cache, or hasn't been graded yet, - # we need to instantiate the problem. - # Otherwise, the max score (cached in student_module) won't be available + # This means we don't have a valid score entry and we don't have a + # cached_max_score on hand. We know they've earned 0.0 points on this, + # but we need to instantiate the module (i.e. load student state) in + # order to find out how much it was worth. problem = module_creator(problem_descriptor) if problem is None: return (None, None) @@ -496,17 +655,11 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache= # In which case total might be None if total is None: return (None, None) + else: + # add location to the max score cache + max_scores_cache.set(problem_descriptor.location, total) - # Now we re-weight the problem, if specified - weight = problem_descriptor.weight - if weight is not None: - if total == 0: - log.exception("Cannot reweight a problem with zero total points. Problem: " + str(student_module)) - return (correct, total) - correct = correct * weight / total - total = weight - - return (correct, total) + return weighted_score(correct, total, problem_descriptor.weight) @contextmanager diff --git a/lms/djangoapps/courseware/model_data.py b/lms/djangoapps/courseware/model_data.py index a0e217ede7c..faa5fdbad3a 100644 --- a/lms/djangoapps/courseware/model_data.py +++ b/lms/djangoapps/courseware/model_data.py @@ -23,7 +23,7 @@ DjangoOrmFieldCache: A base-class for single-row-per-field caches. import json from abc import abstractmethod, ABCMeta -from collections import defaultdict +from collections import defaultdict, namedtuple from .models import ( StudentModule, XModuleUserStateSummaryField, @@ -741,6 +741,7 @@ class FieldDataCache(object): self.course_id, ), } + self.scorable_locations = set() self.add_descriptors_to_cache(descriptors) def add_descriptors_to_cache(self, descriptors): @@ -748,6 +749,7 @@ class FieldDataCache(object): Add all `descriptors` to this FieldDataCache. """ if self.user.is_authenticated(): + self.scorable_locations.update(desc.location for desc in descriptors if desc.has_score) for scope, fields in self._fields_to_cache(descriptors).items(): if scope not in self.cache: continue @@ -955,3 +957,63 @@ class FieldDataCache(object): def __len__(self): return sum(len(cache) for cache in self.cache.values()) + + +class ScoresClient(object): + """ + Basic client interface for retrieving Score information. + + Eventually, this should read and write scores, but at the moment it only + handles the read side of things. + """ + Score = namedtuple('Score', 'correct total') + + def __init__(self, course_key, user_id): + """Basic constructor. from_field_data_cache() is more appopriate for most uses.""" + self.course_key = course_key + self.user_id = user_id + self._locations_to_scores = {} + self._has_fetched = False + + def __contains__(self, location): + """Return True if we have a score for this location.""" + return location in self._locations_to_scores + + def fetch_scores(self, locations): + """Grab score information.""" + scores_qset = StudentModule.objects.filter( + student_id=self.user_id, + course_id=self.course_key, + module_state_key__in=set(locations), + ) + # Locations in StudentModule don't necessarily have course key info + # attached to them (since old mongo identifiers don't include runs). + # So we have to add that info back in before we put it into our lookup. + self._locations_to_scores.update({ + UsageKey.from_string(location).map_into_course(self.course_key): self.Score(correct, total) + for location, correct, total + in scores_qset.values_list('module_state_key', 'grade', 'max_grade') + }) + self._has_fetched = True + + def get(self, location): + """ + Get the score for a given location, if it exists. + + If we don't have a score for that location, return `None`. Note that as + convention, you should be passing in a location with full course run + information. + """ + if not self._has_fetched: + raise ValueError( + "Tried to fetch location {} from ScoresClient before fetch_scores() has run." + .format(location) + ) + return self._locations_to_scores.get(location) + + @classmethod + def from_field_data_cache(cls, fd_cache): + """Create a ScoresClient from a populated FieldDataCache.""" + client = cls(fd_cache.course_id, fd_cache.user.id) + client.fetch_scores(fd_cache.scorable_locations) + return client diff --git a/lms/djangoapps/courseware/models.py b/lms/djangoapps/courseware/models.py index ac7b72520bd..32a01e9e6ea 100644 --- a/lms/djangoapps/courseware/models.py +++ b/lms/djangoapps/courseware/models.py @@ -133,7 +133,10 @@ class StudentModule(models.Model): return 'StudentModule<%r>' % ({ 'course_id': self.course_id, 'module_type': self.module_type, - 'student': self.student.username, # pylint: disable=no-member + # We use the student_id instead of username to avoid a database hop. + # This can actually matter in cases where we're logging many of + # these (e.g. on a broken progress page). + 'student_id': self.student_id, # pylint: disable=no-member 'module_state_key': self.module_state_key, 'state': str(self.state)[:20], },) diff --git a/lms/djangoapps/courseware/tests/test_grades.py b/lms/djangoapps/courseware/tests/test_grades.py index c6825c99be9..5935eaae034 100644 --- a/lms/djangoapps/courseware/tests/test_grades.py +++ b/lms/djangoapps/courseware/tests/test_grades.py @@ -2,13 +2,16 @@ Test grade calculation. """ from django.http import Http404 +from django.test.client import RequestFactory + from mock import patch from nose.plugins.attrib import attr from opaque_keys.edx.locations import SlashSeparatedCourseKey -from courseware.grades import grade, iterate_grades_for +from courseware.grades import field_data_cache_for_grading, grade, iterate_grades_for, MaxScoresCache from student.tests.factories import UserFactory -from xmodule.modulestore.tests.factories import CourseFactory +from student.models import CourseEnrollment +from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase @@ -121,3 +124,73 @@ class TestGradeIteration(ModuleStoreTestCase): students_to_errors[student] = err_msg return students_to_gradesets, students_to_errors + + +class TestMaxScoresCache(ModuleStoreTestCase): + """ + Tests for the MaxScoresCache + """ + def setUp(self): + super(TestMaxScoresCache, self).setUp() + self.student = UserFactory.create() + self.course = CourseFactory.create() + self.problems = [] + for _ in xrange(3): + self.problems.append( + ItemFactory.create(category='problem', parent=self.course) + ) + + CourseEnrollment.enroll(self.student, self.course.id) + self.request = RequestFactory().get('/') + self.locations = [problem.location for problem in self.problems] + + def test_max_scores_cache(self): + """ + Tests the behavior fo the MaxScoresCache + """ + max_scores_cache = MaxScoresCache("test_max_scores_cache") + self.assertEqual(max_scores_cache.num_cached_from_remote(), 0) + self.assertEqual(max_scores_cache.num_cached_updates(), 0) + + # add score to cache + max_scores_cache.set(self.locations[0], 1) + self.assertEqual(max_scores_cache.num_cached_updates(), 1) + + # push to remote cache + max_scores_cache.push_to_remote() + + # create a new cache with the same params, fetch from remote cache + max_scores_cache = MaxScoresCache("test_max_scores_cache") + max_scores_cache.fetch_from_remote(self.locations) + + # see cache is populated + self.assertEqual(max_scores_cache.num_cached_from_remote(), 1) + + +class TestFieldDataCacheScorableLocations(ModuleStoreTestCase): + """ + Make sure we can filter the locations we pull back student state for via + the FieldDataCache. + """ + def setUp(self): + super(TestFieldDataCacheScorableLocations, self).setUp() + self.student = UserFactory.create() + self.course = CourseFactory.create() + chapter = ItemFactory.create(category='chapter', parent=self.course) + sequential = ItemFactory.create(category='sequential', parent=chapter) + vertical = ItemFactory.create(category='vertical', parent=sequential) + ItemFactory.create(category='video', parent=vertical) + ItemFactory.create(category='html', parent=vertical) + ItemFactory.create(category='discussion', parent=vertical) + ItemFactory.create(category='problem', parent=vertical) + + CourseEnrollment.enroll(self.student, self.course.id) + + def test_field_data_cache_scorable_locations(self): + """Only scorable locations should be in FieldDataCache.scorable_locations.""" + fd_cache = field_data_cache_for_grading(self.course, self.student) + block_types = set(loc.block_type for loc in fd_cache.scorable_locations) + self.assertNotIn('video', block_types) + self.assertNotIn('html', block_types) + self.assertNotIn('discussion', block_types) + self.assertIn('problem', block_types) diff --git a/lms/djangoapps/courseware/tests/test_model_data.py b/lms/djangoapps/courseware/tests/test_model_data.py index 0f87c49de35..a123309f85b 100644 --- a/lms/djangoapps/courseware/tests/test_model_data.py +++ b/lms/djangoapps/courseware/tests/test_model_data.py @@ -6,8 +6,7 @@ from mock import Mock, patch from nose.plugins.attrib import attr from functools import partial -from courseware.model_data import DjangoKeyValueStore -from courseware.model_data import InvalidScopeError, FieldDataCache +from courseware.model_data import DjangoKeyValueStore, FieldDataCache, InvalidScopeError from courseware.models import StudentModule from courseware.models import XModuleStudentInfoField, XModuleStudentPrefsField diff --git a/lms/djangoapps/courseware/tests/test_submitting_problems.py b/lms/djangoapps/courseware/tests/test_submitting_problems.py index 315ee21d2d1..1490d6f1042 100644 --- a/lms/djangoapps/courseware/tests/test_submitting_problems.py +++ b/lms/djangoapps/courseware/tests/test_submitting_problems.py @@ -109,6 +109,15 @@ class TestSubmittingProblems(ModuleStoreTestCase, LoginEnrollmentTestCase): return resp + def look_at_question(self, problem_url_name): + """ + Create state for a problem, but don't answer it + """ + location = self.problem_location(problem_url_name) + modx_url = self.modx_url(location, "problem_get") + resp = self.client.get(modx_url) + return resp + def reset_question_answer(self, problem_url_name): """ Reset specified problem for current user. @@ -457,6 +466,33 @@ class TestCourseGrader(TestSubmittingProblems): current_count = csmh.count() self.assertEqual(current_count, 3) + def test_grade_with_max_score_cache(self): + """ + Tests that the max score cache is populated after a grading run + and that the results of grading runs before and after the cache + warms are the same. + """ + self.basic_setup() + self.submit_question_answer('p1', {'2_1': 'Correct'}) + self.look_at_question('p2') + self.assertTrue( + StudentModule.objects.filter( + module_state_key=self.problem_location('p2') + ).exists() + ) + location_to_cache = unicode(self.problem_location('p2')) + max_scores_cache = grades.MaxScoresCache.create_for_course(self.course) + + # problem isn't in the cache + max_scores_cache.fetch_from_remote([location_to_cache]) + self.assertIsNone(max_scores_cache.get(location_to_cache)) + self.check_grade_percent(0.33) + + # problem is in the cache + max_scores_cache.fetch_from_remote([location_to_cache]) + self.assertIsNotNone(max_scores_cache.get(location_to_cache)) + self.check_grade_percent(0.33) + def test_none_grade(self): """ Check grade is 0 to begin with. @@ -474,6 +510,13 @@ class TestCourseGrader(TestSubmittingProblems): self.check_grade_percent(0.33) self.assertEqual(self.get_grade_summary()['grade'], 'B') + @patch.dict("django.conf.settings.FEATURES", {"ENABLE_MAX_SCORE_CACHE": False}) + def test_grade_no_max_score_cache(self): + """ + Tests grading when the max score cache is disabled + """ + self.test_b_grade_exact() + def test_b_grade_above(self): """ Check grade between cutoffs. diff --git a/lms/djangoapps/courseware/views.py b/lms/djangoapps/courseware/views.py index f1d86af9b04..116daea7a5b 100644 --- a/lms/djangoapps/courseware/views.py +++ b/lms/djangoapps/courseware/views.py @@ -44,7 +44,7 @@ from openedx.core.djangoapps.credit.api import ( is_user_eligible_for_credit, is_credit_course ) -from courseware.model_data import FieldDataCache +from courseware.model_data import FieldDataCache, ScoresClient from .module_render import toc_for_course, get_module_for_descriptor, get_module, get_module_by_usage_id from .entrance_exams import ( course_has_entrance_exam, @@ -1054,10 +1054,15 @@ def _progress(request, course_key, student_id): # The pre-fetching of groups is done to make auth checks not require an # additional DB lookup (this kills the Progress page in particular). student = User.objects.prefetch_related("groups").get(id=student.id) - - courseware_summary = grades.progress_summary(student, request, course) + field_data_cache = grades.field_data_cache_for_grading(course, student) + scores_client = ScoresClient.from_field_data_cache(field_data_cache) + courseware_summary = grades.progress_summary( + student, request, course, field_data_cache=field_data_cache, scores_client=scores_client + ) + grade_summary = grades.grade( + student, request, course, field_data_cache=field_data_cache, scores_client=scores_client + ) studio_url = get_studio_url(course, 'settings/grading') - grade_summary = grades.grade(student, request, course) if courseware_summary is None: #This means the student didn't have access to the course (which the instructor requested) diff --git a/lms/djangoapps/instructor_task/tests/test_tasks_helper.py b/lms/djangoapps/instructor_task/tests/test_tasks_helper.py index 7d5084e7a37..12210c4260a 100644 --- a/lms/djangoapps/instructor_task/tests/test_tasks_helper.py +++ b/lms/djangoapps/instructor_task/tests/test_tasks_helper.py @@ -1336,7 +1336,7 @@ class TestCertificateGeneration(InstructorTaskModuleTestCase): current_task = Mock() current_task.update_state = Mock() - with self.assertNumQueries(109): + with self.assertNumQueries(125): with patch('instructor_task.tasks_helper._get_current_task') as mock_current_task: mock_current_task.return_value = current_task with patch('capa.xqueue_interface.XQueueInterface.send_to_queue') as mock_queue: diff --git a/lms/envs/common.py b/lms/envs/common.py index 620425db67b..69ed33a0538 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -40,6 +40,7 @@ from django.utils.translation import ugettext_lazy as _ from .discussionsettings import * import dealer.git from xmodule.modulestore.modulestore_settings import update_module_store_settings +from xmodule.modulestore.edit_info import EditInfoMixin from xmodule.mixin import LicenseMixin from lms.djangoapps.lms_xblock.mixin import LmsBlockMixin @@ -416,6 +417,9 @@ FEATURES = { # The block types to disable need to be specified in "x block disable config" in django admin. 'ENABLE_DISABLING_XBLOCK_TYPES': True, + + # Enable the max score cache to speed up grading + 'ENABLE_MAX_SCORE_CACHE': True, } # Ignore static asset files on import which match this pattern @@ -711,7 +715,7 @@ from xmodule.x_module import XModuleMixin # These are the Mixins that should be added to every XBlock. # This should be moved into an XBlock Runtime/Application object # once the responsibility of XBlock creation is moved out of modulestore - cpennington -XBLOCK_MIXINS = (LmsBlockMixin, InheritanceMixin, XModuleMixin) +XBLOCK_MIXINS = (LmsBlockMixin, InheritanceMixin, XModuleMixin, EditInfoMixin) # Allow any XBlock in the LMS XBLOCK_SELECT_FUNCTION = prefer_xmodules -- GitLab