Skip to content
Snippets Groups Projects
Commit a6003991 authored by Nimisha Asthagiri's avatar Nimisha Asthagiri
Browse files

MA-157 Mobile analytics end-to-end.

Move app-generated context back into properties.
Ignore BI events through segment's webhook.
Ignore events without Data in Properties.
parent 3c999122
No related branches found
No related tags found
No related merge requests found
......@@ -24,6 +24,7 @@ WARNING_IGNORED_TYPE = 'Type ignored'
ERROR_MISSING_USER_ID = 'Required user_id missing from context'
ERROR_USER_NOT_EXIST = 'Specified user does not exist'
ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer'
ERROR_MISSING_DATA = 'The data field must be specified in the properties dictionary'
ERROR_MISSING_NAME = 'The name field must be specified in the properties dictionary'
ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found'
ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found'
......@@ -119,7 +120,7 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
full_segment_event = request.json
# We mostly care about the properties
segment_event = full_segment_event.get('properties', {})
segment_properties = full_segment_event.get('properties', {})
# Start with the context provided by segment.io in the "client" field if it exists
# We should tightly control which fields actually get included in the event emitted.
......@@ -136,32 +137,38 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
else:
context['event_source'] = event_source
# Ignore types that are unsupported
if 'name' not in segment_properties:
raise EventValidationError(ERROR_MISSING_NAME)
if 'data' not in segment_properties:
raise EventValidationError(ERROR_MISSING_DATA)
# Ignore event types and names that are unsupported
segment_event_type = full_segment_event.get('type')
segment_event_name = segment_properties['name']
allowed_types = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_TYPES', [])]
if not segment_event_type or segment_event_type.lower() not in allowed_types:
disallowed_substring_names = [
a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES', [])
]
if (
not segment_event_type or
(segment_event_type.lower() not in allowed_types) or
any(disallowed_subs_name in segment_event_name.lower() for disallowed_subs_name in disallowed_substring_names)
):
raise EventValidationError(WARNING_IGNORED_TYPE)
if segment_context:
# copy required fields from segment's context dict to our custom context dict
for context_field_name, segment_context_field_name in [
('course_id', 'course_id'),
('open_in_browser_url', 'open_in_browser_url'),
('agent', 'userAgent')
]:
if segment_context_field_name in segment_context:
context[context_field_name] = segment_context[segment_context_field_name]
# copy the entire segment's context dict as a sub-field of our custom context dict
context['client'] = dict(segment_context)
context['agent'] = segment_context.get('userAgent', '')
# remove duplicate and unnecessary fields from our copy
for field in ('traits', 'integrations', 'userAgent', 'course_id', 'open_in_browser_url'):
for field in ('traits', 'integrations', 'userAgent'):
if field in context['client']:
del context['client'][field]
# Overlay any context provided in the properties
context.update(segment_event.get('context', {}))
context.update(segment_properties.get('context', {}))
user_id = full_segment_event.get('userId')
if not user_id:
......@@ -203,13 +210,10 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
else:
raise EventValidationError(ERROR_MISSING_RECEIVED_AT)
if 'name' not in segment_event:
raise EventValidationError(ERROR_MISSING_NAME)
context['ip'] = segment_event.get('context', {}).get('ip', '')
context['ip'] = segment_properties.get('context', {}).get('ip', '')
with tracker.get_tracker().context('edx.segmentio', context):
tracker.emit(segment_event['name'], segment_event.get('data', {}))
tracker.emit(segment_event_name, segment_properties.get('data', {}))
def parse_iso8601_timestamp(timestamp):
......
......@@ -44,6 +44,7 @@ def expect_failure_with_message(message):
TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET,
TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT],
TRACKING_SEGMENTIO_ALLOWED_TYPES=['track'],
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES=['.bi.'],
TRACKING_SEGMENTIO_SOURCE_MAP={'test-app': 'mobile'},
EVENT_TRACKING_PROCESSORS=MOBILE_SHIM_PROCESSOR,
)
......@@ -97,6 +98,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
def test_segmentio_ignore_actions(self, action):
self.post_segmentio_event(action=action)
@data('edx.bi.some_name', 'EDX.BI.CAPITAL_NAME')
@expect_failure_with_message(segmentio.WARNING_IGNORED_TYPE)
def test_segmentio_ignore_names(self, name):
self.post_segmentio_event(name=name)
def post_segmentio_event(self, **kwargs):
"""Post a fake segment.io event to the view that processes it"""
request = self.create_request(
......@@ -114,6 +120,9 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
"properties": {
'name': kwargs.get('name', str(sentinel.name)),
'data': kwargs.get('data', {}),
'context': {
'course_id': kwargs.get('course_id') or '',
}
},
"channel": 'server',
"context": {
......@@ -122,7 +131,6 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
"version": "unknown"
},
'userAgent': str(sentinel.user_agent),
'course_id': kwargs.get('course_id') or '',
},
"receivedAt": "2014-08-27T16:33:39.100Z",
"timestamp": "2014-08-27T16:33:39.215Z",
......@@ -139,10 +147,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
}
if 'context' in kwargs:
sample_event['context'].update(kwargs['context'])
if 'open_in_browser_url' in kwargs:
sample_event['context']['open_in_browser_url'] = kwargs['open_in_browser_url']
sample_event['properties']['context'].update(kwargs['context'])
return sample_event
......@@ -231,6 +236,18 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
segmentio.track_segmentio_event(request)
@expect_failure_with_message(segmentio.ERROR_MISSING_DATA)
def test_missing_data(self):
sample_event_raw = self.create_segmentio_event()
del sample_event_raw['properties']['data']
request = self.create_request(
data=json.dumps(sample_event_raw),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))
segmentio.track_segmentio_event(request)
@expect_failure_with_message(segmentio.ERROR_MISSING_TIMESTAMP)
def test_missing_timestamp(self):
sample_event_raw = self.create_event_without_fields('timestamp')
......@@ -305,8 +322,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
data=self.create_segmentio_event_json(
name=name,
data=input_payload,
open_in_browser_url='https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2',
context={
'open_in_browser_url': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2',
'course_id': course_id,
'application': {
'name': 'edx.mobileapp.android',
......@@ -344,11 +361,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
'name': 'test-app',
'version': 'unknown'
},
'application': {
'name': 'edx.mobileapp.android',
'version': '29',
'component': 'videoplayer'
}
},
'application': {
'name': 'edx.mobileapp.android',
'version': '29',
'component': 'videoplayer'
},
'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"),
},
......
......@@ -397,8 +397,15 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD
# Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {}))
TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET)
TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get(
"TRACKING_SEGMENTIO_WEBHOOK_SECRET",
TRACKING_SEGMENTIO_WEBHOOK_SECRET
)
TRACKING_SEGMENTIO_ALLOWED_TYPES = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_TYPES", TRACKING_SEGMENTIO_ALLOWED_TYPES)
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ENV_TOKENS.get(
"TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES",
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES
)
TRACKING_SEGMENTIO_SOURCE_MAP = ENV_TOKENS.get("TRACKING_SEGMENTIO_SOURCE_MAP", TRACKING_SEGMENTIO_SOURCE_MAP)
# Student identity verification settings
......
......@@ -540,6 +540,7 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
TRACKING_SEGMENTIO_WEBHOOK_SECRET = None
TRACKING_SEGMENTIO_ALLOWED_TYPES = ['track']
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ['.bi.']
TRACKING_SEGMENTIO_SOURCE_MAP = {
'analytics-android': 'mobile',
'analytics-ios': 'mobile',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment