Allow courses api to return data incrementally

Prior to this commit, the course api (/api/courses/v1/courses/) performed all the work necessary to return all courses available to the user, and then only actually returned on page's worth of those courses. With this change, the api now does the work incrementally, computing only the data needed to fetch the courses up to and including the page being returned. This still increases approximately linearly as the page number accessed being increases, but should be more cache-friendly. One side effect of this is that the max_page reported by pagination will be an overestimate (it will include pages that are removed due to a users access restrictions). This change also changes the sort-order of courses being returned by the course_api. By sorting by course-id, rather than course-number, we can sort in the database, rather than in Python, and defer loading data from the end of the list until it is requested. REVMI-90

Allow courses api to return data incrementally
6c596f7c · Calen Pennington · 9ff9c33f · 6c596f7c · 6c596f7c · 6c596f7c
Commit 6c596f7c authored 6 years ago by Calen Pennington
--- a/lms/djangoapps/branding/__init__.py
+++ b/lms/djangoapps/branding/__init__.py
@@ -15,7 +15,7 @@ from openedx.core.djangoapps.site_configuration import helpers as configuration_

 def get_visible_courses(org=None, filter_=None):
    """
-    Return the set of CourseOverviews that should be visible in this branded
+    Yield the CourseOverviews that should be visible in this branded
    instance.

    Arguments:
@@ -27,9 +27,10 @@ def get_visible_courses(org=None, filter_=None):
    # Import is placed here to avoid model import at project startup.
    from openedx.core.djangoapps.content.course_overviews.models import CourseOverview

-    courses = []
    current_site_orgs = configuration_helpers.get_current_site_orgs()

+    courses = CourseOverview.objects.none()
+
    if org:
        # Check the current site's orgs to make sure the org's courses should be displayed
        if not current_site_orgs or org in current_site_orgs:
@@ -40,7 +41,7 @@ def get_visible_courses(org=None, filter_=None):
    else:
        courses = CourseOverview.get_all_courses(filter_=filter_)

-    courses = sorted(courses, key=lambda course: course.number)
+    courses = courses.order_by('id')

    # Filtering can stop here.
    if current_site_orgs:
@@ -57,11 +58,11 @@ def get_visible_courses(org=None, filter_=None):
        )

    if filtered_visible_ids:
-        return [course for course in courses if course.id in filtered_visible_ids]
+        return courses.filter(id__in=filtered_visible_ids)
    else:
        # Filter out any courses based on current org, to avoid leaking these.
        orgs = configuration_helpers.get_all_orgs()
-        return [course for course in courses if course.location.org not in orgs]
+        return courses.exclude(org__in=orgs)


 def get_university_for_request():

--- a/lms/djangoapps/course_api/api.py
+++ b/lms/djangoapps/course_api/api.py
@@ -57,7 +57,7 @@ def course_detail(request, username, course_key):

 def list_courses(request, username, org=None, filter_=None):
    """
-    Return a list of available courses.
+    Yield all available courses.

    The courses returned are all be visible to the user identified by
    `username` and the logged in user should have permission to view courses
@@ -81,7 +81,7 @@ def list_courses(request, username, org=None, filter_=None):
            by the given key-value pairs.

    Return value:
-        List of `CourseOverview` objects representing the collection of courses.
+        Yield `CourseOverview` objects representing the collection of courses.
    """
    user = get_effective_user(request.user, username)
    return get_courses(user, org=org, filter_=filter_)
--- a/lms/djangoapps/course_api/tests/test_api.py
+++ b/lms/djangoapps/course_api/tests/test_api.py
@@ -231,12 +231,12 @@ class TestGetCourseListExtras(CourseListTestMixin, ModuleStoreTestCase):

    def test_no_courses(self):
        courses = self._make_api_call(self.honor_user, self.honor_user)
-        self.assertEqual(len(courses), 0)
+        self.assertEqual(len(list(courses)), 0)

    def test_hidden_course_for_honor(self):
        self.create_course(visible_to_staff_only=True)
        courses = self._make_api_call(self.honor_user, self.honor_user)
-        self.assertEqual(len(courses), 0)
+        self.assertEqual(len(list(courses)), 0)

    def test_hidden_course_for_staff(self):
        self.create_course(visible_to_staff_only=True)

--- a/lms/djangoapps/course_api/tests/test_views.py
+++ b/lms/djangoapps/course_api/tests/test_views.py
@@ -356,7 +356,9 @@ class CourseListSearchViewTest(CourseApiTestViewMixin, ModuleStoreTestCase, Sear
        res = self.verify_response(params={'search_term': 'unique search term'})
        self.assertIn('results', res.data)
        self.assertNotEqual(res.data['results'], [])
-        self.assertEqual(res.data['pagination']['count'], 1)  # Should list a single course
+        # Returns a count of 3 courses because that's the estimate before filtering
+        self.assertEqual(res.data['pagination']['count'], 3)
+        self.assertEqual(len(res.data['results']), 1)  # Should return a single course

    def test_too_many_courses(self):
        """
@@ -390,7 +392,7 @@ class CourseListSearchViewTest(CourseApiTestViewMixin, ModuleStoreTestCase, Sear
        self.setup_user(self.audit_user)

        # These query counts were found empirically
-        query_counts = [1266, 349, 349, 349, 349, 349, 349, 349, 349, 349, 322]
+        query_counts = [174, 196, 226, 256, 286, 316, 346, 376, 406, 436, 331]
        ordered_course_ids = sorted([str(cid) for cid in (course_ids + [c.id for c in self.courses])])

        self.clear_caches()

--- a/lms/djangoapps/course_api/views.py
+++ b/lms/djangoapps/course_api/views.py
@@ -10,6 +10,7 @@ from rest_framework.throttling import UserRateThrottle

 from edx_rest_framework_extensions.paginators import NamespacedPageNumberPagination
 from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, view_auth_classes
+from openedx.core.lib.api.view_utils import LazySequence

 from . import USE_RATE_LIMIT_2_FOR_COURSE_LIST_API, USE_RATE_LIMIT_10_FOR_COURSE_LIST_API
 from .api import course_detail, list_courses
@@ -243,7 +244,7 @@ class CourseListView(DeveloperErrorViewMixin, ListAPIView):

    def get_queryset(self):
        """
-        Return a list of courses visible to the user.
+        Yield courses visible to the user.
        """
        form = CourseListGetForm(self.request.query_params, initial={'requesting_user': self.request.user})
        if not form.is_valid():
@@ -264,9 +265,12 @@ class CourseListView(DeveloperErrorViewMixin, ListAPIView):
            size=self.results_size_infinity,
        )

-        search_courses_ids = {course['data']['id']: True for course in search_courses['results']}
+        search_courses_ids = {course['data']['id'] for course in search_courses['results']}

-        return [
-            course for course in db_courses
-            if unicode(course.id) in search_courses_ids
-        ]
+        return LazySequence(
+            (
+                course for course in db_courses
+                if unicode(course.id) in search_courses_ids
+            ),
+            est_len=len(db_courses)
+        )
--- a/lms/djangoapps/courseware/courses.py
+++ b/lms/djangoapps/courseware/courses.py
@@ -35,6 +35,7 @@ from lms.djangoapps.courseware.exceptions import CourseAccessRedirect
 from opaque_keys.edx.keys import UsageKey
 from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
 from openedx.core.djangoapps.site_configuration import helpers as configuration_helpers
+from openedx.core.lib.api.view_utils import LazySequence
 from path import Path as path
 from six import text_type
 from static_replace import replace_static_urls
@@ -451,8 +452,7 @@ def get_course_syllabus_section(course, section_key):

 def get_courses(user, org=None, filter_=None):
    """
-    Returns a list of courses available, sorted by course.number and optionally
-    filtered by org code (case-insensitive).
+    Return a LazySequence of courses available, optionally filtered by org code (case-insensitive).
    """
    courses = branding.get_visible_courses(org=org, filter_=filter_)

@@ -461,9 +461,10 @@ def get_courses(user, org=None, filter_=None):
        settings.COURSE_CATALOG_VISIBILITY_PERMISSION
    )

-    courses = [c for c in courses if has_access(user, permission_name, c)]
-
-    return courses
+    return LazySequence(
+        (c for c in courses if has_access(user, permission_name, c)),
+        est_len=courses.count()
+    )


 def get_permission_for_course_about():

--- a/lms/djangoapps/courseware/tests/test_courses.py
+++ b/lms/djangoapps/courseware/tests/test_courses.py
@@ -141,7 +141,7 @@ class CoursesTest(ModuleStoreTestCase):

            # Request filtering for an org distinct from the designated org.
            no_courses = get_courses(user, org=primary)
-            self.assertEqual(no_courses, [])
+            self.assertEqual(list(no_courses), [])

            # Request filtering for an org matching the designated org.
            site_courses = get_courses(user, org=alternate)

--- a/openedx/core/djangoapps/content/course_overviews/models.py
+++ b/openedx/core/djangoapps/content/course_overviews/models.py
@@ -409,7 +409,8 @@ class CourseOverview(TimeStampedModel):
        migrate and test switching to display_name_with_default, which is no
        longer escaped.
        """
-        return block_metadata_utils.display_name_with_default_escaped(self)
+        # pylint: disable=line-too-long
+        return block_metadata_utils.display_name_with_default_escaped(self)  # xss-lint: disable=python-deprecated-display-name

    @property
    def dashboard_start_display(self):
@@ -560,7 +561,7 @@ class CourseOverview(TimeStampedModel):
    @classmethod
    def get_all_courses(cls, orgs=None, filter_=None):
        """
-        Returns all CourseOverview objects in the database.
+        Return a queryset containing all CourseOverview objects in the database.

        Arguments:
            orgs (list[string]): Optional parameter that allows case-insensitive

--- a/openedx/core/lib/api/view_utils.py
+++ b/openedx/core/lib/api/view_utils.py
 """
 Utilities related to API views
 """
+from collections import Sequence
 from django.core.exceptions import NON_FIELD_ERRORS, ObjectDoesNotExist, ValidationError
 from django.http import Http404
 from django.utils.translation import ugettext as _
@@ -205,3 +206,100 @@ class RetrievePatchAPIView(RetrieveModelMixin, UpdateModelMixin, GenericAPIView)
                # PATCH requests where the object does not exist should still
                # return a 404 response.
                raise
+
+
+class LazySequence(Sequence):
+    """
+    This class provides an immutable Sequence interface on top of an existing
+    iterable.
+
+    It is immutable, and accepts an estimated length in order to support __len__
+    without exhausting the underlying sequence
+    """
+    def __init__(self, iterable, est_len=None):  # pylint: disable=super-init-not-called
+        self.iterable = iterable
+        self.est_len = est_len
+        self._data = []
+        self._exhausted = False
+
+    def __len__(self):
+        # Return the actual data length if we know it exactly (because
+        # the underlying sequence is exhausted), or it's greater than
+        # the initial estimated length
+        if len(self._data) > self.est_len or self._exhausted:
+            return len(self._data)
+        else:
+            return self.est_len
+
+    def __iter__(self):
+        # Yield all the known data first
+        for item in self._data:
+            yield item
+
+        # Capture and yield data from the underlying iterator
+        # until it is exhausted
+        while True:
+            try:
+                item = next(self.iterable)
+                self._data.append(item)
+                yield item
+            except StopIteration:
+                self._exhausted = True
+                return
+
+    def __getitem__(self, index):
+        if isinstance(index, int):
+            # For a single index, if we haven't already loaded enough
+            # data, we can load data until we have enough, and then
+            # return the value from the loaded data
+            if index < 0:
+                raise IndexError("Negative indexes aren't supported")
+
+            while len(self._data) <= index:
+                try:
+                    self._data.append(next(self.iterable))
+                except StopIteration:
+                    self._exhausted = True
+                    raise IndexError("Underlying sequence exhausted")
+
+            return self._data[index]
+        elif isinstance(index, slice):
+            # For a slice, we can load data until we reach 'stop'.
+            # Once we have data including 'stop', then we can use
+            # the underlying list to actually understand the mechanics
+            # of the slicing operation.
+            if index.start is not None and index.start < 0:
+                raise IndexError("Negative indexes aren't supported")
+            if index.stop is not None and index.stop < 0:
+                raise IndexError("Negative indexes aren't supported")
+
+            if index.step is not None and index.step < 0:
+                largest_value = index.start + 1
+            else:
+                largest_value = index.stop
+
+            if largest_value is not None:
+                while len(self._data) <= largest_value:
+                    try:
+                        self._data.append(next(self.iterable))
+                    except StopIteration:
+                        self._exhausted = True
+                        break
+            else:
+                self._data.extend(self.iterable)
+            return self._data[index]
+        else:
+            raise TypeError("Unsupported index type")
+
+    def __repr__(self):
+        if self._exhausted:
+            return "LazySequence({!r}, {!r})".format(
+                self._data,
+                self.est_len,
+            )
+        else:
+            return "LazySequence(itertools.chain({!r}, {!r}), {!r})".format(
+                self._data,
+                self.iterable,
+                self.est_len,
+            )