diff --git a/common/djangoapps/student/models.py b/common/djangoapps/student/models.py index d7a918b5690ef684714a348bfb13d7c067d10db7..67746326348b09e3e5f97dc2defe906282d72959 100644 --- a/common/djangoapps/student/models.py +++ b/common/djangoapps/student/models.py @@ -769,7 +769,7 @@ class CourseEnrollment(models.Model): activation_changed = True mode_changed = False - # if mode is None, the call to update_enrollment didn't specify a new + # if mode is None, the call to update_enrollment didn't specify a new # mode, so leave as-is if self.mode != mode and mode is not None: self.mode = mode @@ -967,6 +967,14 @@ class CourseEnrollment(models.Model): def enrollments_for_user(cls, user): return CourseEnrollment.objects.filter(user=user, is_active=1) + @classmethod + def users_enrolled_in(cls, course_id): + """Return a queryset of User for every user enrolled in the course.""" + return User.objects.filter( + courseenrollment__course_id=course_id, + courseenrollment__is_active=True + ) + def activate(self): """Makes this `CourseEnrollment` record active. Saves immediately.""" self.update_enrollment(is_active=True) diff --git a/lms/djangoapps/courseware/grades.py b/lms/djangoapps/courseware/grades.py index 93c0d5bf474a4e63c2a6c7e0a4b97264fe6d0407..438466c0eaa1715ff8fa2b34994e062e0e9194bc 100644 --- a/lms/djangoapps/courseware/grades.py +++ b/lms/djangoapps/courseware/grades.py @@ -8,13 +8,12 @@ from collections import defaultdict from django.conf import settings from django.contrib.auth.models import User from django.db import transaction -from django.core.handlers.base import BaseHandler from django.test.client import RequestFactory from dogapi import dog_stats_api from courseware import courses -from courseware.model_data import FieldDataCache, DjangoKeyValueStore +from courseware.model_data import FieldDataCache from xblock.fields import Scope from xmodule import graders from xmodule.capa_module import CapaModule @@ -431,6 +430,7 @@ def manual_transaction(): else: transaction.commit() + def iterate_grades_for(course_id, students): """Given a course_id and an iterable of students (User), yield a tuple of: @@ -447,7 +447,7 @@ def iterate_grades_for(course_id, students): up the grade. (For display) - grade_breakdown : A breakdown of the major components that make up the final grade. (For display) - - raw_scores contains scores for every graded module + - raw_scores: contains scores for every graded module """ course = courses.get_course_by_id(course_id) @@ -460,11 +460,16 @@ def iterate_grades_for(course_id, students): with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=['action:{}'.format(course_id)]): try: request.user = student + # Grading calls problem rendering, which calls masquerading, + # which checks session vars -- thus the empty session dict below. + # It's not pretty, but untangling that is currently beyond the + # scope of this feature. + request.session = {} gradeset = grade(student, request, course) yield student, gradeset, "" - except Exception as exc: + except Exception as exc: # pylint: disable=broad-except # Keep marching on even if this student couldn't be graded for - # some reason. + # some reason, but log it for future reference. log.exception( 'Cannot grade student %s (%s) in course %s because of exception: %s', student.username, diff --git a/lms/djangoapps/instructor/tests/test_api.py b/lms/djangoapps/instructor/tests/test_api.py index 6744031c6f9674eb7e8356a0bbf7e69bf88c3618..016b5bc39c95d2b05ccc4866474d1fd435cdef3a 100644 --- a/lms/djangoapps/instructor/tests/test_api.py +++ b/lms/djangoapps/instructor/tests/test_api.py @@ -29,7 +29,6 @@ from courseware.models import StudentModule # modules which are mocked in test cases. import instructor_task.api - from instructor.access import allow_access import instructor.views.api from instructor.views.api import _split_input_list, _msk_from_problem_urlname, common_exceptions_400 @@ -128,6 +127,8 @@ class TestInstructorAPIDenyLevels(ModuleStoreTestCase, LoginEnrollmentTestCase): ('send_email', {'send_to': 'staff', 'subject': 'test', 'message': 'asdf'}), ('list_instructor_tasks', {}), ('list_background_email_tasks', {}), + ('list_grade_downloads', {}), + ('calculate_grades_csv', {}), ] # Endpoints that only Instructors can access self.instructor_level_endpoints = [ @@ -790,6 +791,50 @@ class TestInstructorAPILevelsDataDump(ModuleStoreTestCase, LoginEnrollmentTestCa self.assertTrue(body.startswith('"User ID","Anonymized user ID"\n"2","42"\n')) self.assertTrue(body.endswith('"7","42"\n')) + def test_list_grade_downloads(self): + url = reverse('list_grade_downloads', kwargs={'course_id': self.course.id}) + with patch('instructor_task.models.LocalFSGradesStore.links_for') as mock_links_for: + mock_links_for.return_value = [ + ('mock_file_name_1', 'https://1.mock.url'), + ('mock_file_name_2', 'https://2.mock.url'), + ] + response = self.client.get(url, {}) + + expected_response = { + "downloads": [ + { + "url": "https://1.mock.url", + "link": "<a href=\"https://1.mock.url\">mock_file_name_1</a>", + "name": "mock_file_name_1" + }, + { + "url": "https://2.mock.url", + "link": "<a href=\"https://2.mock.url\">mock_file_name_2</a>", + "name": "mock_file_name_2" + } + ] + } + res_json = json.loads(response.content) + self.assertEqual(res_json, expected_response) + + def test_calculate_grades_csv_success(self): + url = reverse('calculate_grades_csv', kwargs={'course_id': self.course.id}) + + with patch('instructor_task.api.submit_calculate_grades_csv') as mock_cal_grades: + mock_cal_grades.return_value = True + response = self.client.get(url, {}) + success_status = "Your grade report is being generated! You can view the status of the generation task in the 'Pending Instructor Tasks' section." + self.assertIn(success_status, response.content) + + def test_calculate_grades_csv_already_running(self): + url = reverse('calculate_grades_csv', kwargs={'course_id': self.course.id}) + + with patch('instructor_task.api.submit_calculate_grades_csv') as mock_cal_grades: + mock_cal_grades.side_effect = AlreadyRunningError() + response = self.client.get(url, {}) + already_running_status = "A grade report generation task is already in progress. Check the 'Pending Instructor Tasks' table for the status of the task. When completed, the report will be available for download in the table below." + self.assertIn(already_running_status, response.content) + def test_get_students_features_csv(self): """ Test that some minimum of information is formatted @@ -802,7 +847,7 @@ class TestInstructorAPILevelsDataDump(ModuleStoreTestCase, LoginEnrollmentTestCa def test_get_distribution_no_feature(self): """ Test that get_distribution lists available features - when supplied no feature quparameter. + when supplied no feature parameter. """ url = reverse('get_distribution', kwargs={'course_id': self.course.id}) response = self.client.get(url) diff --git a/lms/djangoapps/instructor/views/api.py b/lms/djangoapps/instructor/views/api.py index d4180eef0e0be300be38590fe8bbbda6cf327c0c..215f775d7742c692e01dd146085515408baa97db 100644 --- a/lms/djangoapps/instructor/views/api.py +++ b/lms/djangoapps/instructor/views/api.py @@ -761,7 +761,7 @@ def list_grade_downloads(_request, course_id): grades_store = GradesStore.from_config() response_payload = { - 'downloads' : [ + 'downloads': [ dict(name=name, url=url, link='<a href="{}">{}</a>'.format(url, name)) for name, url in grades_store.links_for(course_id) ] diff --git a/lms/djangoapps/instructor/views/instructor_dashboard.py b/lms/djangoapps/instructor/views/instructor_dashboard.py index c1fa3e67bdb75cf7371fae14a52a21a9f61885a7..de14df29404ee1f28dec9d6aa6b05975180e861e 100644 --- a/lms/djangoapps/instructor/views/instructor_dashboard.py +++ b/lms/djangoapps/instructor/views/instructor_dashboard.py @@ -171,8 +171,8 @@ def _section_data_download(course_id, access): 'get_students_features_url': reverse('get_students_features', kwargs={'course_id': course_id}), 'get_anon_ids_url': reverse('get_anon_ids', kwargs={'course_id': course_id}), 'list_instructor_tasks_url': reverse('list_instructor_tasks', kwargs={'course_id': course_id}), - 'list_grade_downloads_url' : reverse('list_grade_downloads', kwargs={'course_id' : course_id}), - 'calculate_grades_csv_url' : reverse('calculate_grades_csv', kwargs={'course_id' : course_id}), + 'list_grade_downloads_url': reverse('list_grade_downloads', kwargs={'course_id': course_id}), + 'calculate_grades_csv_url': reverse('calculate_grades_csv', kwargs={'course_id': course_id}), } return section_data diff --git a/lms/djangoapps/instructor_task/api.py b/lms/djangoapps/instructor_task/api.py index 23d85ebf95b0f943d9947b8f8cff35ea636c9c3f..73af3e43784edcc24fa528569f69e56985917ac7 100644 --- a/lms/djangoapps/instructor_task/api.py +++ b/lms/djangoapps/instructor_task/api.py @@ -208,6 +208,7 @@ def submit_bulk_course_email(request, course_id, email_id): task_key = hashlib.md5(task_key_stub).hexdigest() return submit_task(request, task_type, task_class, course_id, task_input, task_key) + def submit_calculate_grades_csv(request, course_id): """ AlreadyRunningError is raised if the course's grades are already being updated. diff --git a/lms/djangoapps/instructor_task/models.py b/lms/djangoapps/instructor_task/models.py index 2acf78cbaabe8b6300bc54a904fcd6601fe89529..8df6a95c71e57e94e07eb6436b9691870118fd52 100644 --- a/lms/djangoapps/instructor_task/models.py +++ b/lms/djangoapps/instructor_task/models.py @@ -211,7 +211,15 @@ class GradesStore(object): class S3GradesStore(GradesStore): """ + Grades store backed by S3. The directory structure we use to store things + is:: + `{bucket}/{root_path}/{sha1 hash of course_id}/filename` + + We might later use subdirectories or metadata to do more intelligent + grouping and querying, but right now it simply depends on its own + conventions on where files are stored to know what to display. Clients using + this class can name the final file whatever they want. """ def __init__(self, bucket_name, root_path): self.root_path = root_path @@ -224,13 +232,26 @@ class S3GradesStore(GradesStore): @classmethod def from_config(cls): + """ + The expected configuration for an `S3GradesStore` is to have a + `GRADES_DOWNLOAD` dict in settings with the following fields:: + + STORAGE_TYPE : "s3" + BUCKET : Your bucket name, e.g. "grades-bucket" + ROOT_PATH : The path you want to store all course files under. Do not + use a leading or trailing slash. e.g. "staging" or + "staging/2013", not "/staging", or "/staging/" + + Since S3 access relies on boto, you must also define `AWS_ACCESS_KEY_ID` + and `AWS_SECRET_ACCESS_KEY` in settings. + """ return cls( settings.GRADES_DOWNLOAD['BUCKET'], settings.GRADES_DOWNLOAD['ROOT_PATH'] ) def key_for(self, course_id, filename): - """Return the key we would use to store and retrive the data for the + """Return the S3 key we would use to store and retrive the data for the given filename.""" hashed_course_id = hashlib.sha1(course_id) @@ -244,6 +265,16 @@ class S3GradesStore(GradesStore): return key def store(self, course_id, filename, buff): + """ + Store the contents of `buff` in a directory determined by hashing + `course_id`, and name the file `filename`. `buff` is typically a + `StringIO`, but can be anything that implements `.getvalue()`. + + This method assumes that the contents of `buff` are gzip-encoded (it + will add the appropriate headers to S3 to make the decompression + transparent via the browser). Filenames should end in whatever + suffix makes sense for the original file, so `.txt` instead of `.gz` + """ key = self.key_for(course_id, filename) data = buff.getvalue() @@ -251,19 +282,26 @@ class S3GradesStore(GradesStore): key.content_encoding = "gzip" key.content_type = "text/csv" + # Just setting the content encoding and type above should work + # according to the docs, but when experimenting, this was necessary for + # it to actually take. key.set_contents_from_string( data, headers={ - "Content-Encoding" : "gzip", - "Content-Length" : len(data), - "Content-Type" : "text/csv", + "Content-Encoding": "gzip", + "Content-Length": len(data), + "Content-Type": "text/csv", } ) def store_rows(self, course_id, filename, rows): """ - Given a course_id, filename, and rows (each row is an iterable of strings), - write this data out. + Given a `course_id`, `filename`, and `rows` (each row is an iterable of + strings), create a buffer that is a gzip'd csv file, and then `store()` + that buffer. + + Even though we store it in gzip format, browsers will transparently + download and decompress it. Filenames should end in `.csv`, not `.gz`. """ output_buffer = StringIO() gzip_file = GzipFile(fileobj=output_buffer, mode="wb") @@ -291,7 +329,11 @@ class LocalFSGradesStore(GradesStore): """ LocalFS implementation of a GradesStore. This is meant for debugging purposes and is *absolutely not for production use*. Use S3GradesStore for - that. + that. We use this in tests and for local development. When it generates + links, it will make file:/// style links. That means you actually have to + copy them and open them in a separate browser window, for security reasons. + This lets us do the cheap thing locally for debugging without having to open + up a separate URL that would only be used to send files in dev. """ def __init__(self, root_path): """ @@ -309,7 +351,10 @@ class LocalFSGradesStore(GradesStore): that there is a dict in settings named GRADES_DOWNLOAD and that it has a ROOT_PATH that maps to an absolute file path that the web app has write permissions to. `LocalFSGradesStore` will create any intermediate - directories as needed. + directories as needed. Example:: + + STORAGE_TYPE : "localfs" + ROOT_PATH : /tmp/edx/grade-downloads/ """ return cls(settings.GRADES_DOWNLOAD['ROOT_PATH']) @@ -344,7 +389,10 @@ class LocalFSGradesStore(GradesStore): def links_for(self, course_id): """ For a given `course_id`, return a list of `(filename, url)` tuples. `url` - can be plugged straight into an href + can be plugged straight into an href. Note that `LocalFSGradesStore` + will generate `file://` type URLs, so you'll need to copy the URL and + open it in a new browser window. Again, this class is only meant for + local development. """ course_dir = self.path_to(course_id, '') if not os.path.exists(course_dir): @@ -355,4 +403,4 @@ class LocalFSGradesStore(GradesStore): for filename in os.listdir(course_dir) ], reverse=True - ) \ No newline at end of file + ) diff --git a/lms/djangoapps/instructor_task/tasks.py b/lms/djangoapps/instructor_task/tasks.py index 2c8ad51e83190824c9e372f8822f925342dbb7e8..69e8ee1865b7c0591080c7d55500ff5d4b9b615e 100644 --- a/lms/djangoapps/instructor_task/tasks.py +++ b/lms/djangoapps/instructor_task/tasks.py @@ -138,4 +138,4 @@ def calculate_grades_csv(entry_id, xmodule_instance_args): """ action_name = ugettext_noop('graded') task_fn = partial(push_grades_to_s3, xmodule_instance_args) - return run_main_task(entry_id, task_fn, action_name) \ No newline at end of file + return run_main_task(entry_id, task_fn, action_name) diff --git a/lms/djangoapps/instructor_task/tasks_helper.py b/lms/djangoapps/instructor_task/tasks_helper.py index c64f71b5f204fba575a8665a3eeb4fce8c23ac20..06941e7fdf7e94ce95ab1b4df94a27f6cb059289 100644 --- a/lms/djangoapps/instructor_task/tasks_helper.py +++ b/lms/djangoapps/instructor_task/tasks_helper.py @@ -24,6 +24,7 @@ from courseware.models import StudentModule from courseware.model_data import FieldDataCache from courseware.module_render import get_module_for_descriptor_internal from instructor_task.models import GradesStore, InstructorTask, PROGRESS +from student.models import CourseEnrollment # define different loggers for use within tasks and on client side TASK_LOG = get_task_logger(__name__) @@ -477,27 +478,19 @@ def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input, `GradesStore.from_config()`) and calling `link_for()` on it. Writes are buffered, so we'll never write part of a CSV file to S3 -- i.e. any files that are visible in GradesStore will be complete ones. + + As we start to add more CSV downloads, it will probably be worthwhile to + make a more general CSVDoc class instead of building out the rows like we + do here. """ - # Get start time for task: start_time = datetime.now(UTC) status_interval = 100 - # The pre-fetching of groups is done to make auth checks not require an - # additional DB lookup (this kills the Progress page in particular). - # But when doing grading at this scale, the memory required to store the resulting - # enrolled_students is too large to fit comfortably in memory, and subsequent - # course grading requests lead to memory fragmentation. So we will err here on the - # side of smaller memory allocations at the cost of additional lookups. - enrolled_students = User.objects.filter( - courseenrollment__course_id=course_id, - courseenrollment__is_active=True - ) - - # perform the main loop + enrolled_students = CourseEnrollment.users_enrolled_in(course_id) + num_total = enrolled_students.count() num_attempted = 0 num_succeeded = 0 num_failed = 0 - num_total = enrolled_students.count() curr_step = "Calculating Grades" def update_task_progress(): @@ -507,26 +500,27 @@ def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input, 'action_name': action_name, 'attempted': num_attempted, 'succeeded': num_succeeded, - 'failed' : num_failed, - 'total' : num_total, + 'failed': num_failed, + 'total': num_total, 'duration_ms': int((current_time - start_time).total_seconds() * 1000), - 'step' : curr_step, + 'step': curr_step, } _get_current_task().update_state(state=PROGRESS, meta=progress) return progress - # Loop over all our students and build a + # Loop over all our students and build our CSV lists in memory header = None rows = [] err_rows = [["id", "username", "error_msg"]] for student, gradeset, err_msg in iterate_grades_for(course_id, enrolled_students): - # Periodically update task status (this is a db write) + # Periodically update task status (this is a cache write) if num_attempted % status_interval == 0: update_task_progress() num_attempted += 1 if gradeset: + # We were able to successfully grade this student for this course. num_succeeded += 1 if not header: header = [section['label'] for section in gradeset[u'section_breakdown']] @@ -537,28 +531,37 @@ def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input, for section in gradeset[u'section_breakdown'] if 'label' in section } - row_percents = [percents[label] for label in header] + + # Not everybody has the same gradable items. If the item is not + # found in the user's gradeset, just assume it's a 0. The aggregated + # grades for their sections and overall course will be calculated + # without regard for the item they didn't have access to, so it's + # possible for a student to have a 0.0 show up in their row but + # still have 100% for the course. + row_percents = [percents.get(label, 0.0) for label in header] rows.append([student.id, student.email, student.username, gradeset['percent']] + row_percents) else: # An empty gradeset means we failed to grade a student. num_failed += 1 err_rows.append([student.id, student.username, err_msg]) + # By this point, we've got the rows we're going to stuff into our CSV files. curr_step = "Uploading CSVs" update_task_progress() - grades_store = GradesStore.from_config() + # Generate parts of the file name timestamp_str = start_time.strftime("%Y-%m-%d-%H%M") - - TASK_LOG.debug("Uploading CSV files for course {}".format(course_id)) - course_id_prefix = urllib.quote(course_id.replace("/", "_")) + + # Perform the actual upload + grades_store = GradesStore.from_config() grades_store.store_rows( course_id, "{}_grade_report_{}.csv".format(course_id_prefix, timestamp_str), rows ) - # If there are any error rows (don't count the header), write that out as well + + # If there are any error rows (don't count the header), write them out as well if len(err_rows) > 1: grades_store.store_rows( course_id, diff --git a/lms/envs/common.py b/lms/envs/common.py index a8575b89b6dfd5e6cd950228b4f908e36d66fd80..09299b960a532192b8a07c4607b643adc5a6be2f 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -195,10 +195,10 @@ MITX_FEATURES = { # Grade calculation started from the new instructor dashboard will write # grades CSV files to S3 and give links for downloads. - 'ENABLE_S3_GRADE_DOWNLOADS' : True, + 'ENABLE_S3_GRADE_DOWNLOADS': True, # Give course staff unrestricted access to grade downloads (if set to False, # only edX superusers can perform the downloads) - 'ALLOW_COURSE_STAFF_GRADE_DOWNLOADS' : False, + 'ALLOW_COURSE_STAFF_GRADE_DOWNLOADS': False, } # Used for A/B testing @@ -1071,11 +1071,11 @@ REGISTRATION_OPTIONAL_FIELDS = set([ 'goals', ]) -# Grades download +###################### Grade Downloads ###################### GRADES_DOWNLOAD_ROUTING_KEY = HIGH_MEM_QUEUE GRADES_DOWNLOAD = { - 'STORAGE_TYPE' : 'localfs', - 'BUCKET' : 'edx-grades', - 'ROOT_PATH' : '/tmp/edx-s3/grades', + 'STORAGE_TYPE': 'localfs', + 'BUCKET': 'edx-grades', + 'ROOT_PATH': '/tmp/edx-s3/grades', } diff --git a/lms/envs/dev.py b/lms/envs/dev.py index 86e214263d55b1895f857a4977e75d691a5703e0..1fe7fc330ca1c797bbd03c17b58128e79127549b 100644 --- a/lms/envs/dev.py +++ b/lms/envs/dev.py @@ -283,13 +283,6 @@ EDX_API_KEY = None ####################### Shoppingcart ########################### MITX_FEATURES['ENABLE_SHOPPING_CART'] = True -###################### Grade Downloads ###################### -GRADES_DOWNLOAD = { - 'STORAGE_TYPE' : 'localfs', - 'BUCKET' : 'edx-grades', - 'ROOT_PATH' : '/tmp/edx-s3/grades', -} - ##################################################################### # Lastly, see if the developer has any local overrides. try: diff --git a/lms/envs/test.py b/lms/envs/test.py index 4a56b41c091eac41b6dd795fc4a12b85e73e772e..49f8d29aa2e8bd9421e9535063345a56e7e33641 100644 --- a/lms/envs/test.py +++ b/lms/envs/test.py @@ -247,13 +247,6 @@ PASSWORD_HASHERS = ( # 'django.contrib.auth.hashers.CryptPasswordHasher', ) -###################### Grade Downloads ###################### -GRADES_DOWNLOAD = { - 'STORAGE_TYPE' : 'localfs', - 'BUCKET' : 'edx-grades', - 'ROOT_PATH' : '/tmp/edx-s3/grades', -} - ################### Make tests quieter # OpenID spews messages like this to stderr, we don't need to see them: