From a6003991a5cabf48294c9c01806d789e3a791e17 Mon Sep 17 00:00:00 2001 From: Nimisha Asthagiri <nasthagiri@edx.org> Date: Wed, 3 Dec 2014 21:41:41 -0500 Subject: [PATCH] MA-157 Mobile analytics end-to-end. Move app-generated context back into properties. Ignore BI events through segment's webhook. Ignore events without Data in Properties. --- common/djangoapps/track/views/segmentio.py | 42 ++++++++++--------- .../track/views/tests/test_segmentio.py | 39 ++++++++++++----- lms/envs/aws.py | 9 +++- lms/envs/common.py | 1 + 4 files changed, 60 insertions(+), 31 deletions(-) diff --git a/common/djangoapps/track/views/segmentio.py b/common/djangoapps/track/views/segmentio.py index 59e1823af2d..6221064e16a 100644 --- a/common/djangoapps/track/views/segmentio.py +++ b/common/djangoapps/track/views/segmentio.py @@ -24,6 +24,7 @@ WARNING_IGNORED_TYPE = 'Type ignored' ERROR_MISSING_USER_ID = 'Required user_id missing from context' ERROR_USER_NOT_EXIST = 'Specified user does not exist' ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer' +ERROR_MISSING_DATA = 'The data field must be specified in the properties dictionary' ERROR_MISSING_NAME = 'The name field must be specified in the properties dictionary' ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found' ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found' @@ -119,7 +120,7 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements full_segment_event = request.json # We mostly care about the properties - segment_event = full_segment_event.get('properties', {}) + segment_properties = full_segment_event.get('properties', {}) # Start with the context provided by segment.io in the "client" field if it exists # We should tightly control which fields actually get included in the event emitted. @@ -136,32 +137,38 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements else: context['event_source'] = event_source - # Ignore types that are unsupported + if 'name' not in segment_properties: + raise EventValidationError(ERROR_MISSING_NAME) + + if 'data' not in segment_properties: + raise EventValidationError(ERROR_MISSING_DATA) + + # Ignore event types and names that are unsupported segment_event_type = full_segment_event.get('type') + segment_event_name = segment_properties['name'] allowed_types = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_TYPES', [])] - if not segment_event_type or segment_event_type.lower() not in allowed_types: + disallowed_substring_names = [ + a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES', []) + ] + if ( + not segment_event_type or + (segment_event_type.lower() not in allowed_types) or + any(disallowed_subs_name in segment_event_name.lower() for disallowed_subs_name in disallowed_substring_names) + ): raise EventValidationError(WARNING_IGNORED_TYPE) if segment_context: - # copy required fields from segment's context dict to our custom context dict - for context_field_name, segment_context_field_name in [ - ('course_id', 'course_id'), - ('open_in_browser_url', 'open_in_browser_url'), - ('agent', 'userAgent') - ]: - if segment_context_field_name in segment_context: - context[context_field_name] = segment_context[segment_context_field_name] - # copy the entire segment's context dict as a sub-field of our custom context dict context['client'] = dict(segment_context) + context['agent'] = segment_context.get('userAgent', '') # remove duplicate and unnecessary fields from our copy - for field in ('traits', 'integrations', 'userAgent', 'course_id', 'open_in_browser_url'): + for field in ('traits', 'integrations', 'userAgent'): if field in context['client']: del context['client'][field] # Overlay any context provided in the properties - context.update(segment_event.get('context', {})) + context.update(segment_properties.get('context', {})) user_id = full_segment_event.get('userId') if not user_id: @@ -203,13 +210,10 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements else: raise EventValidationError(ERROR_MISSING_RECEIVED_AT) - if 'name' not in segment_event: - raise EventValidationError(ERROR_MISSING_NAME) - - context['ip'] = segment_event.get('context', {}).get('ip', '') + context['ip'] = segment_properties.get('context', {}).get('ip', '') with tracker.get_tracker().context('edx.segmentio', context): - tracker.emit(segment_event['name'], segment_event.get('data', {})) + tracker.emit(segment_event_name, segment_properties.get('data', {})) def parse_iso8601_timestamp(timestamp): diff --git a/common/djangoapps/track/views/tests/test_segmentio.py b/common/djangoapps/track/views/tests/test_segmentio.py index 1a02dfb6f5e..7f90be0e09a 100644 --- a/common/djangoapps/track/views/tests/test_segmentio.py +++ b/common/djangoapps/track/views/tests/test_segmentio.py @@ -44,6 +44,7 @@ def expect_failure_with_message(message): TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET, TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT], TRACKING_SEGMENTIO_ALLOWED_TYPES=['track'], + TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES=['.bi.'], TRACKING_SEGMENTIO_SOURCE_MAP={'test-app': 'mobile'}, EVENT_TRACKING_PROCESSORS=MOBILE_SHIM_PROCESSOR, ) @@ -97,6 +98,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): def test_segmentio_ignore_actions(self, action): self.post_segmentio_event(action=action) + @data('edx.bi.some_name', 'EDX.BI.CAPITAL_NAME') + @expect_failure_with_message(segmentio.WARNING_IGNORED_TYPE) + def test_segmentio_ignore_names(self, name): + self.post_segmentio_event(name=name) + def post_segmentio_event(self, **kwargs): """Post a fake segment.io event to the view that processes it""" request = self.create_request( @@ -114,6 +120,9 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): "properties": { 'name': kwargs.get('name', str(sentinel.name)), 'data': kwargs.get('data', {}), + 'context': { + 'course_id': kwargs.get('course_id') or '', + } }, "channel": 'server', "context": { @@ -122,7 +131,6 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): "version": "unknown" }, 'userAgent': str(sentinel.user_agent), - 'course_id': kwargs.get('course_id') or '', }, "receivedAt": "2014-08-27T16:33:39.100Z", "timestamp": "2014-08-27T16:33:39.215Z", @@ -139,10 +147,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): } if 'context' in kwargs: - sample_event['context'].update(kwargs['context']) - - if 'open_in_browser_url' in kwargs: - sample_event['context']['open_in_browser_url'] = kwargs['open_in_browser_url'] + sample_event['properties']['context'].update(kwargs['context']) return sample_event @@ -231,6 +236,18 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): segmentio.track_segmentio_event(request) + @expect_failure_with_message(segmentio.ERROR_MISSING_DATA) + def test_missing_data(self): + sample_event_raw = self.create_segmentio_event() + del sample_event_raw['properties']['data'] + request = self.create_request( + data=json.dumps(sample_event_raw), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + segmentio.track_segmentio_event(request) + @expect_failure_with_message(segmentio.ERROR_MISSING_TIMESTAMP) def test_missing_timestamp(self): sample_event_raw = self.create_event_without_fields('timestamp') @@ -305,8 +322,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): data=self.create_segmentio_event_json( name=name, data=input_payload, - open_in_browser_url='https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2', context={ + 'open_in_browser_url': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2', 'course_id': course_id, 'application': { 'name': 'edx.mobileapp.android', @@ -344,11 +361,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): 'name': 'test-app', 'version': 'unknown' }, - 'application': { - 'name': 'edx.mobileapp.android', - 'version': '29', - 'component': 'videoplayer' - } + }, + 'application': { + 'name': 'edx.mobileapp.android', + 'version': '29', + 'component': 'videoplayer' }, 'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"), }, diff --git a/lms/envs/aws.py b/lms/envs/aws.py index bacd50a31ef..db98070efca 100644 --- a/lms/envs/aws.py +++ b/lms/envs/aws.py @@ -397,8 +397,15 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD # Event tracking TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {})) -TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET) +TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get( + "TRACKING_SEGMENTIO_WEBHOOK_SECRET", + TRACKING_SEGMENTIO_WEBHOOK_SECRET +) TRACKING_SEGMENTIO_ALLOWED_TYPES = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_TYPES", TRACKING_SEGMENTIO_ALLOWED_TYPES) +TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ENV_TOKENS.get( + "TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES", + TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES +) TRACKING_SEGMENTIO_SOURCE_MAP = ENV_TOKENS.get("TRACKING_SEGMENTIO_SOURCE_MAP", TRACKING_SEGMENTIO_SOURCE_MAP) # Student identity verification settings diff --git a/lms/envs/common.py b/lms/envs/common.py index 6b260e44c02..9e0c63f6cf1 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -540,6 +540,7 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): TRACKING_SEGMENTIO_WEBHOOK_SECRET = None TRACKING_SEGMENTIO_ALLOWED_TYPES = ['track'] +TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ['.bi.'] TRACKING_SEGMENTIO_SOURCE_MAP = { 'analytics-android': 'mobile', 'analytics-ios': 'mobile', -- GitLab