Skip to content
Snippets Groups Projects
Commit 6c596f7c authored by Calen Pennington's avatar Calen Pennington
Browse files

Allow courses api to return data incrementally

Prior to this commit, the course api (/api/courses/v1/courses/)
performed all the work necessary to return all courses available
to the user, and then only actually returned on page's worth of those
courses.

With this change, the api now does the work incrementally, computing
only the data needed to fetch the courses up to and including the page
being returned. This still increases approximately linearly as
the page number accessed being increases, but should be more cache-friendly.
One side effect of this is that the max_page reported by pagination
will be an overestimate (it will include pages that are removed due
to a users access restrictions).

This change also changes the sort-order of courses being returned by the
course_api. By sorting by course-id, rather than course-number, we
can sort in the database, rather than in Python, and defer loading data
from the end of the list until it is requested.

REVMI-90
parent 9ff9c33f
No related merge requests found
......@@ -15,7 +15,7 @@ from openedx.core.djangoapps.site_configuration import helpers as configuration_
def get_visible_courses(org=None, filter_=None):
"""
Return the set of CourseOverviews that should be visible in this branded
Yield the CourseOverviews that should be visible in this branded
instance.
Arguments:
......@@ -27,9 +27,10 @@ def get_visible_courses(org=None, filter_=None):
# Import is placed here to avoid model import at project startup.
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
courses = []
current_site_orgs = configuration_helpers.get_current_site_orgs()
courses = CourseOverview.objects.none()
if org:
# Check the current site's orgs to make sure the org's courses should be displayed
if not current_site_orgs or org in current_site_orgs:
......@@ -40,7 +41,7 @@ def get_visible_courses(org=None, filter_=None):
else:
courses = CourseOverview.get_all_courses(filter_=filter_)
courses = sorted(courses, key=lambda course: course.number)
courses = courses.order_by('id')
# Filtering can stop here.
if current_site_orgs:
......@@ -57,11 +58,11 @@ def get_visible_courses(org=None, filter_=None):
)
if filtered_visible_ids:
return [course for course in courses if course.id in filtered_visible_ids]
return courses.filter(id__in=filtered_visible_ids)
else:
# Filter out any courses based on current org, to avoid leaking these.
orgs = configuration_helpers.get_all_orgs()
return [course for course in courses if course.location.org not in orgs]
return courses.exclude(org__in=orgs)
def get_university_for_request():
......
......@@ -57,7 +57,7 @@ def course_detail(request, username, course_key):
def list_courses(request, username, org=None, filter_=None):
"""
Return a list of available courses.
Yield all available courses.
The courses returned are all be visible to the user identified by
`username` and the logged in user should have permission to view courses
......@@ -81,7 +81,7 @@ def list_courses(request, username, org=None, filter_=None):
by the given key-value pairs.
Return value:
List of `CourseOverview` objects representing the collection of courses.
Yield `CourseOverview` objects representing the collection of courses.
"""
user = get_effective_user(request.user, username)
return get_courses(user, org=org, filter_=filter_)
......@@ -231,12 +231,12 @@ class TestGetCourseListExtras(CourseListTestMixin, ModuleStoreTestCase):
def test_no_courses(self):
courses = self._make_api_call(self.honor_user, self.honor_user)
self.assertEqual(len(courses), 0)
self.assertEqual(len(list(courses)), 0)
def test_hidden_course_for_honor(self):
self.create_course(visible_to_staff_only=True)
courses = self._make_api_call(self.honor_user, self.honor_user)
self.assertEqual(len(courses), 0)
self.assertEqual(len(list(courses)), 0)
def test_hidden_course_for_staff(self):
self.create_course(visible_to_staff_only=True)
......
......@@ -356,7 +356,9 @@ class CourseListSearchViewTest(CourseApiTestViewMixin, ModuleStoreTestCase, Sear
res = self.verify_response(params={'search_term': 'unique search term'})
self.assertIn('results', res.data)
self.assertNotEqual(res.data['results'], [])
self.assertEqual(res.data['pagination']['count'], 1) # Should list a single course
# Returns a count of 3 courses because that's the estimate before filtering
self.assertEqual(res.data['pagination']['count'], 3)
self.assertEqual(len(res.data['results']), 1) # Should return a single course
def test_too_many_courses(self):
"""
......@@ -390,7 +392,7 @@ class CourseListSearchViewTest(CourseApiTestViewMixin, ModuleStoreTestCase, Sear
self.setup_user(self.audit_user)
# These query counts were found empirically
query_counts = [1266, 349, 349, 349, 349, 349, 349, 349, 349, 349, 322]
query_counts = [174, 196, 226, 256, 286, 316, 346, 376, 406, 436, 331]
ordered_course_ids = sorted([str(cid) for cid in (course_ids + [c.id for c in self.courses])])
self.clear_caches()
......
......@@ -10,6 +10,7 @@ from rest_framework.throttling import UserRateThrottle
from edx_rest_framework_extensions.paginators import NamespacedPageNumberPagination
from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, view_auth_classes
from openedx.core.lib.api.view_utils import LazySequence
from . import USE_RATE_LIMIT_2_FOR_COURSE_LIST_API, USE_RATE_LIMIT_10_FOR_COURSE_LIST_API
from .api import course_detail, list_courses
......@@ -243,7 +244,7 @@ class CourseListView(DeveloperErrorViewMixin, ListAPIView):
def get_queryset(self):
"""
Return a list of courses visible to the user.
Yield courses visible to the user.
"""
form = CourseListGetForm(self.request.query_params, initial={'requesting_user': self.request.user})
if not form.is_valid():
......@@ -264,9 +265,12 @@ class CourseListView(DeveloperErrorViewMixin, ListAPIView):
size=self.results_size_infinity,
)
search_courses_ids = {course['data']['id']: True for course in search_courses['results']}
search_courses_ids = {course['data']['id'] for course in search_courses['results']}
return [
course for course in db_courses
if unicode(course.id) in search_courses_ids
]
return LazySequence(
(
course for course in db_courses
if unicode(course.id) in search_courses_ids
),
est_len=len(db_courses)
)
......@@ -35,6 +35,7 @@ from lms.djangoapps.courseware.exceptions import CourseAccessRedirect
from opaque_keys.edx.keys import UsageKey
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.site_configuration import helpers as configuration_helpers
from openedx.core.lib.api.view_utils import LazySequence
from path import Path as path
from six import text_type
from static_replace import replace_static_urls
......@@ -451,8 +452,7 @@ def get_course_syllabus_section(course, section_key):
def get_courses(user, org=None, filter_=None):
"""
Returns a list of courses available, sorted by course.number and optionally
filtered by org code (case-insensitive).
Return a LazySequence of courses available, optionally filtered by org code (case-insensitive).
"""
courses = branding.get_visible_courses(org=org, filter_=filter_)
......@@ -461,9 +461,10 @@ def get_courses(user, org=None, filter_=None):
settings.COURSE_CATALOG_VISIBILITY_PERMISSION
)
courses = [c for c in courses if has_access(user, permission_name, c)]
return courses
return LazySequence(
(c for c in courses if has_access(user, permission_name, c)),
est_len=courses.count()
)
def get_permission_for_course_about():
......
......@@ -141,7 +141,7 @@ class CoursesTest(ModuleStoreTestCase):
# Request filtering for an org distinct from the designated org.
no_courses = get_courses(user, org=primary)
self.assertEqual(no_courses, [])
self.assertEqual(list(no_courses), [])
# Request filtering for an org matching the designated org.
site_courses = get_courses(user, org=alternate)
......
......@@ -409,7 +409,8 @@ class CourseOverview(TimeStampedModel):
migrate and test switching to display_name_with_default, which is no
longer escaped.
"""
return block_metadata_utils.display_name_with_default_escaped(self)
# pylint: disable=line-too-long
return block_metadata_utils.display_name_with_default_escaped(self) # xss-lint: disable=python-deprecated-display-name
@property
def dashboard_start_display(self):
......@@ -560,7 +561,7 @@ class CourseOverview(TimeStampedModel):
@classmethod
def get_all_courses(cls, orgs=None, filter_=None):
"""
Returns all CourseOverview objects in the database.
Return a queryset containing all CourseOverview objects in the database.
Arguments:
orgs (list[string]): Optional parameter that allows case-insensitive
......
"""
Utilities related to API views
"""
from collections import Sequence
from django.core.exceptions import NON_FIELD_ERRORS, ObjectDoesNotExist, ValidationError
from django.http import Http404
from django.utils.translation import ugettext as _
......@@ -205,3 +206,100 @@ class RetrievePatchAPIView(RetrieveModelMixin, UpdateModelMixin, GenericAPIView)
# PATCH requests where the object does not exist should still
# return a 404 response.
raise
class LazySequence(Sequence):
"""
This class provides an immutable Sequence interface on top of an existing
iterable.
It is immutable, and accepts an estimated length in order to support __len__
without exhausting the underlying sequence
"""
def __init__(self, iterable, est_len=None): # pylint: disable=super-init-not-called
self.iterable = iterable
self.est_len = est_len
self._data = []
self._exhausted = False
def __len__(self):
# Return the actual data length if we know it exactly (because
# the underlying sequence is exhausted), or it's greater than
# the initial estimated length
if len(self._data) > self.est_len or self._exhausted:
return len(self._data)
else:
return self.est_len
def __iter__(self):
# Yield all the known data first
for item in self._data:
yield item
# Capture and yield data from the underlying iterator
# until it is exhausted
while True:
try:
item = next(self.iterable)
self._data.append(item)
yield item
except StopIteration:
self._exhausted = True
return
def __getitem__(self, index):
if isinstance(index, int):
# For a single index, if we haven't already loaded enough
# data, we can load data until we have enough, and then
# return the value from the loaded data
if index < 0:
raise IndexError("Negative indexes aren't supported")
while len(self._data) <= index:
try:
self._data.append(next(self.iterable))
except StopIteration:
self._exhausted = True
raise IndexError("Underlying sequence exhausted")
return self._data[index]
elif isinstance(index, slice):
# For a slice, we can load data until we reach 'stop'.
# Once we have data including 'stop', then we can use
# the underlying list to actually understand the mechanics
# of the slicing operation.
if index.start is not None and index.start < 0:
raise IndexError("Negative indexes aren't supported")
if index.stop is not None and index.stop < 0:
raise IndexError("Negative indexes aren't supported")
if index.step is not None and index.step < 0:
largest_value = index.start + 1
else:
largest_value = index.stop
if largest_value is not None:
while len(self._data) <= largest_value:
try:
self._data.append(next(self.iterable))
except StopIteration:
self._exhausted = True
break
else:
self._data.extend(self.iterable)
return self._data[index]
else:
raise TypeError("Unsupported index type")
def __repr__(self):
if self._exhausted:
return "LazySequence({!r}, {!r})".format(
self._data,
self.est_len,
)
else:
return "LazySequence(itertools.chain({!r}, {!r}), {!r})".format(
self._data,
self.iterable,
self.est_len,
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment