Skip to content
Snippets Groups Projects
Commit 3ec3bbbe authored by Calen Pennington's avatar Calen Pennington
Browse files

Merge pull request #5162 from cpennington/split/import-export-performance

Performance improvements to split mongo and old-mongo, based on import/export tests.
parents 35d92f42 b9908ec2
No related merge requests found
Showing with 140 additions and 28 deletions
......@@ -287,3 +287,10 @@ class ContentStore(object):
logging.exception(u"Failed to generate thumbnail for {0}. Exception: {1}".format(content.location, str(e)))
return thumbnail_content, thumbnail_file_location
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
"""
pass
......@@ -375,6 +375,36 @@ class MongoContentStore(ContentStore):
fs_entry['_id'] = dbkey
return dbkey
def ensure_indexes(self):
# Index needed thru 'category' by `_get_all_content_for_course` and others. That query also takes a sort
# which can be `uploadDate`, `display_name`,
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('_id.name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('content_son.name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
sparse=True
)
def query_for_course(course_key, category=None):
"""
......
......@@ -486,6 +486,16 @@ class ModuleStoreRead(object):
"""
yield
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
pass
class ModuleStoreWrite(ModuleStoreRead):
"""
......
......@@ -653,3 +653,14 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
store = self._get_modulestore_for_courseid(course_id)
with store.bulk_operations(course_id):
yield
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
for store in self.modulestores:
store.ensure_indexes()
......@@ -1440,3 +1440,29 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
return {ModuleStoreEnum.Type.mongo: True}
else:
raise HeartbeatFailure("Can't connect to {}".format(self.database.name), 'mongo')
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
# Because we often query for some subset of the id, we define this index:
self.collection.create_index([
('_id.org', pymongo.ASCENDING),
('_id.course', pymongo.ASCENDING),
('_id.category', pymongo.ASCENDING),
('_id.name', pymongo.ASCENDING),
])
# Because we often scan for all category='course' regardless of the value of the other fields:
self.collection.create_index('_id.category')
# Because lms calls get_parent_locations frequently (for path generation):
self.collection.create_index('definition.children', sparse=True)
# To allow prioritizing draft vs published material
self.collection.create_index('_id.revision')
from opaque_keys.edx.locator import DefinitionLocator
from bson import SON
class DefinitionLazyLoader(object):
......@@ -25,9 +24,3 @@ class DefinitionLazyLoader(object):
loader pointer with the result so as not to fetch more than once
"""
return self.modulestore.db_connection.get_definition(self.definition_locator.definition_id)
def as_son(self):
return SON((
('block_type', self.definition_locator.block_type),
('definition', self.definition_locator.definition_id)
))
......@@ -82,7 +82,6 @@ class MongoConnection(object):
host=host,
port=port,
tz_aware=tz_aware,
document_class=son.SON,
**kwargs
),
db
......@@ -167,10 +166,10 @@ class MongoConnection(object):
"""
case_regex = ur"(?i)^{}$" if ignore_case else ur"{}"
return self.course_index.find_one(
son.SON([
(key_attr, re.compile(case_regex.format(getattr(key, key_attr))))
{
key_attr: re.compile(case_regex.format(getattr(key, key_attr)))
for key_attr in ('org', 'course', 'run')
])
}
)
def find_matching_course_indexes(self, branch=None, search_targets=None):
......@@ -182,7 +181,7 @@ class MongoConnection(object):
search_targets: If specified, this must be a dictionary specifying field values
that must exist in the search_targets of the returned courses
"""
query = son.SON()
query = {}
if branch is not None:
query['versions.{}'.format(branch)] = {'$exists': True}
......@@ -206,11 +205,11 @@ class MongoConnection(object):
from_index: If set, only update an index if it matches the one specified in `from_index`.
"""
self.course_index.update(
from_index or son.SON([
('org', course_index['org']),
('course', course_index['course']),
('run', course_index['run'])
]),
from_index or {
'org': course_index['org'],
'course': course_index['course'],
'run': course_index['run'],
},
course_index,
upsert=False,
)
......@@ -219,11 +218,11 @@ class MongoConnection(object):
"""
Delete the course_index from the persistence mechanism whose id is the given course_index
"""
return self.course_index.remove(son.SON([
('org', course_index['org']),
('course', course_index['course']),
('run', course_index['run'])
]))
return self.course_index.remove({
'org': course_index['org'],
'course': course_index['course'],
'run': course_index['run'],
})
def get_definition(self, key):
"""
......@@ -244,4 +243,20 @@ class MongoConnection(object):
"""
self.definitions.insert(definition)
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
self.course_index.create_index(
[
('org', pymongo.ASCENDING),
('course', pymongo.ASCENDING),
('run', pymongo.ASCENDING)
],
unique=True
)
......@@ -2360,6 +2360,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
services=self.services,
)
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
self.db_connection.ensure_indexes()
class SparseList(list):
"""
Enable inserting items into a list in arbitrary order and then retrieving them.
......
......@@ -96,6 +96,7 @@ class MongoModulestoreBuilder(object):
branch_setting_func=lambda: ModuleStoreEnum.Branch.draft_preferred,
metadata_inheritance_cache_subsystem=MemoryCache(),
)
modulestore.ensure_indexes()
try:
yield modulestore
......@@ -139,6 +140,7 @@ class VersioningModulestoreBuilder(object):
fs_root,
render_template=repr,
)
modulestore.ensure_indexes()
try:
yield modulestore
......@@ -210,6 +212,7 @@ class MongoContentstoreBuilder(object):
collection='content',
**COMMON_DOCSTORE_CONFIG
)
contentstore.ensure_indexes()
try:
yield contentstore
......
......@@ -310,7 +310,13 @@ class CourseComparisonTest(BulkAssertionTest):
if actual_item is None and expected_item.location.category == 'course':
actual_item_location = actual_item_location.replace(name='course')
actual_item = actual_item_map.get(map_key(actual_item_location))
self.assertIsNotNone(actual_item, u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map))
# Formatting the message slows down tests of large courses significantly, so only do it if it would be used
if actual_item is None:
msg = u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map)
else:
msg = None
self.assertIsNotNone(actual_item, msg)
# compare fields
self.assertEqual(expected_item.fields, actual_item.fields)
......@@ -328,17 +334,18 @@ class CourseComparisonTest(BulkAssertionTest):
exp_value = map_references(field.read_from(expected_item), field, actual_course_key)
actual_value = field.read_from(actual_item)
self.assertEqual(
exp_value,
actual_value,
"Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
# Formatting the message slows down tests of large courses significantly, so only do it if it would be used
if exp_value != actual_value:
msg = "Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
field_name,
expected_item.scope_ids.usage_id,
actual_item.scope_ids.usage_id,
exp_value,
actual_value,
)
)
else:
msg = None
self.assertEqual(exp_value, actual_value, msg)
# compare children
self.assertEqual(expected_item.has_children, actual_item.has_children)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment