diff --git a/common/djangoapps/track/views.py b/common/djangoapps/track/views/__init__.py similarity index 100% rename from common/djangoapps/track/views.py rename to common/djangoapps/track/views/__init__.py diff --git a/common/djangoapps/track/views/segmentio.py b/common/djangoapps/track/views/segmentio.py new file mode 100644 index 0000000000000000000000000000000000000000..01951c94961c7b29d602a147f812efb812bbf04b --- /dev/null +++ b/common/djangoapps/track/views/segmentio.py @@ -0,0 +1,225 @@ +"""Handle events that were forwarded from the segment.io webhook integration""" + +import datetime +import json +import logging + +from django.conf import settings +from django.contrib.auth.models import User +from django.views.decorators.http import require_POST +from django_future.csrf import csrf_exempt + +from eventtracking import tracker as eventtracker +from opaque_keys.edx.keys import CourseKey +from opaque_keys import InvalidKeyError +from util.json_request import expect_json, JsonResponse + +from track import tracker +from track import shim + +log = logging.getLogger(__name__) + + +ERROR_UNAUTHORIZED = 'Unauthorized' +WARNING_IGNORED_CHANNEL = 'Channel ignored' +WARNING_IGNORED_ACTION = 'Action ignored' +ERROR_MISSING_USER_ID = 'Required user_id missing from context' +ERROR_USER_NOT_EXIST = 'Specified user does not exist' +ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer' +ERROR_MISSING_EVENT_TYPE = 'The event_type field must be specified in the properties dictionary' +ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found' +ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found' + + +@require_POST +@expect_json +@csrf_exempt +def track_segmentio_event(request): + """ + An endpoint for logging events using segment.io's webhook integration. + + segment.io provides a custom integration mechanism that initiates a request to a configurable URL every time an + event is received by their system. This endpoint is designed to receive those requests and convert the events into + standard tracking log entries. + + For now we limit the scope of handled events to track and screen events from mobile devices. In the future we could + enable logging of other types of events, however, there is significant overlap with our non-segment.io based event + tracking. Given that segment.io is closed third party solution we are limiting its required usage to just + collecting events from mobile devices for the time being. + + Many of the root fields of a standard edX tracking event are read out of the "properties" dictionary provided by the + segment.io event, which is, in turn, provided by the client that emitted the event. + + In order for an event to be logged the following preconditions must be met: + + * The "key" query string parameter must exactly match the django setting TRACKING_SEGMENTIO_WEBHOOK_SECRET. While + the endpoint is public, we want to limit access to it to the segment.io servers only. + * The value of the "channel" field of the event must be included in the list specified by the django setting + TRACKING_SEGMENTIO_ALLOWED_CHANNELS. This is intended to restrict the set of events to specific channels. For + example: just mobile devices. + * The value of the "action" field of the event must be included in the list specified by the django setting + TRACKING_SEGMENTIO_ALLOWED_ACTIONS. In order to make use of *all* of the features segment.io offers we would have + to implement some sort of persistent storage of information contained in some actions (like identify). For now, + we defer support of those actions and just support a limited set that can be handled without storing information + in external state. + * The value of the standard "userId" field of the event must be an integer that can be used to look up the user + using the primary key of the User model. + * Include an "event_type" field in the properties dictionary that indicates the edX event type. Note this can differ + from the "event" field found in the root of a segment.io event. The "event" field at the root of the structure is + intended to be human readable, the "event_type" field is expected to conform to the standard for naming events + found in the edX data documentation. + + Additionally the event can optionally: + + * Provide a "context" dictionary in the properties dictionary. This dictionary will be applied to the + existing context on the server overriding any existing keys. This context dictionary should include a "course_id" + field when the event is scoped to a particular course. The value of this field should be a valid course key. The + context may contain other arbitrary data that will be logged with the event, for example: identification + information for the device that emitted the event. + * Provide a "page" parameter in the properties dictionary which indicates the page that was being displayed to the + user or the mobile application screen that was visible to the user at the time the event was emitted. + + """ + + # Validate the security token. We must use a query string parameter for this since we cannot customize the POST body + # in the segment.io webhook configuration, we can only change the URL that they call, so we force this token to be + # included in the URL and reject any requests that do not include it. This also assumes HTTPS is used to make the + # connection between their server and ours. + expected_secret = getattr(settings, 'TRACKING_SEGMENTIO_WEBHOOK_SECRET', None) + provided_secret = request.GET.get('key') + if not expected_secret or provided_secret != expected_secret: + return failure_response(ERROR_UNAUTHORIZED, status=401) + + # The POST body will contain the JSON encoded event + full_segment_event = request.json + + def logged_failure_response(*args, **kwargs): + """Indicate a failure and log information about the event that will aide debugging efforts""" + failed_response = failure_response(*args, **kwargs) + log.warning('Unable to process event received from segment.io: %s', json.dumps(full_segment_event)) + return failed_response + + # Selectively listen to particular channels + channel = full_segment_event.get('channel') + allowed_channels = [c.lower() for c in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_CHANNELS', [])] + if not channel or channel.lower() not in allowed_channels: + return response(WARNING_IGNORED_CHANNEL, committed=False) + + # Ignore actions that are unsupported + action = full_segment_event.get('action') + allowed_actions = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_ACTIONS', [])] + if not action or action.lower() not in allowed_actions: + return response(WARNING_IGNORED_ACTION, committed=False) + + # We mostly care about the properties + segment_event = full_segment_event.get('properties', {}) + + context = {} + + # Start with the context provided by segment.io in the "client" field if it exists + segment_context = full_segment_event.get('context') + if segment_context: + context['client'] = segment_context + + # Overlay any context provided in the properties + context.update(segment_event.get('context', {})) + + user_id = full_segment_event.get('userId') + if not user_id: + return logged_failure_response(ERROR_MISSING_USER_ID) + + # userId is assumed to be the primary key of the django User model + try: + user = User.objects.get(pk=user_id) + except User.DoesNotExist: + return logged_failure_response(ERROR_USER_NOT_EXIST) + except ValueError: + return logged_failure_response(ERROR_INVALID_USER_ID) + else: + context['user_id'] = user_id + + # course_id is expected to be provided in the context when applicable + course_id = context.get('course_id') + if course_id: + try: + course_key = CourseKey.from_string(course_id) + context['org_id'] = course_key.org + except InvalidKeyError: + log.warning( + 'unable to parse course_id "{course_id}" from event: {event}'.format( + course_id=course_id, + event=json.dumps(full_segment_event), + ), + exc_info=True + ) + + if 'timestamp' in full_segment_event: + time = parse_iso8601_timestamp(full_segment_event['timestamp']) + else: + return logged_failure_response(ERROR_MISSING_TIMESTAMP) + + if 'receivedAt' in full_segment_event: + context['received_at'] = parse_iso8601_timestamp(full_segment_event['receivedAt']) + else: + return logged_failure_response(ERROR_MISSING_RECEIVED_AT) + + if 'event_type' in segment_event: + event_type = segment_event['event_type'] + else: + return logged_failure_response(ERROR_MISSING_EVENT_TYPE) + + with eventtracker.get_tracker().context('edx.segmentio', context): + complete_context = eventtracker.get_tracker().resolve_context() + event = { + "username": user.username, + "event_type": event_type, + # Will be either "mobile", "browser" or "server". These names happen to be identical to the names we already + # use so no mapping is necessary. + "event_source": channel, + # This timestamp is reported by the local clock on the device so it may be wildly incorrect. + "time": time, + "context": complete_context, + "page": segment_event.get('page'), + "host": complete_context.get('host', ''), + "agent": '', + "ip": segment_event.get('ip', ''), + "event": segment_event.get('event', {}), + } + + # Some duplicated fields are passed into event-tracking via the context by track.middleware. + # Remove them from the event here since they are captured elsewhere. + shim.remove_shim_context(event) + + tracker.send(event) + + return response() + + +def response(message=None, status=200, committed=True): + """ + Produce a response from the segment.io event handler. + + Returns: A JSON encoded string giving more information about what action was taken while processing the request. + """ + result = { + 'committed': committed + } + + if message: + result['message'] = message + + return JsonResponse(result, status=status) + + +def failure_response(message, status=400): + """ + Return a failure response when something goes wrong handling segment.io events. + + Returns: A JSON encoded string giving more information about what went wrong when processing the request. + """ + return response(message=message, status=status, committed=False) + + +def parse_iso8601_timestamp(timestamp): + """Parse a particular type of ISO8601 formatted timestamp""" + return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") diff --git a/common/djangoapps/track/views/tests/__init__.py b/common/djangoapps/track/views/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/common/djangoapps/track/views/tests/test_segmentio.py b/common/djangoapps/track/views/tests/test_segmentio.py new file mode 100644 index 0000000000000000000000000000000000000000..54f97638824f6fb90d3aa477e78db80ac45e424a --- /dev/null +++ b/common/djangoapps/track/views/tests/test_segmentio.py @@ -0,0 +1,244 @@ +"""Ensure we can parse events sent to us from the segment.io webhook integration""" + +from datetime import datetime +import json + +from ddt import ddt, data +from freezegun import freeze_time +from mock import patch, sentinel + +from django.contrib.auth.models import User +from django.test import TestCase +from django.test.client import RequestFactory +from django.test.utils import override_settings + +from track.middleware import TrackMiddleware +from track.views import segmentio + + +EXPECTED_TIME = datetime(2013, 10, 3, 8, 24, 55) +SECRET = 'anything' +ENDPOINT = '/segmentio/test/event' +USER_ID = 10 + + +@ddt +@override_settings( + TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET, + TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT], + TRACKING_SEGMENTIO_ALLOWED_ACTIONS=['Track', 'Screen'], + TRACKING_SEGMENTIO_ALLOWED_CHANNELS=['mobile'] +) +@freeze_time(EXPECTED_TIME) +class SegmentIOTrackingTestCase(TestCase): + """Test processing of segment.io events""" + + def setUp(self): + self.request_factory = RequestFactory() + + patcher = patch('track.views.segmentio.tracker') + self.mock_tracker = patcher.start() + self.addCleanup(patcher.stop) + + def test_segmentio_tracking_get_request(self): + request = self.request_factory.get(ENDPOINT) + response = segmentio.track_segmentio_event(request) + self.assertEquals(response.status_code, 405) + self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + + @override_settings( + TRACKING_SEGMENTIO_WEBHOOK_SECRET=None + ) + def test_segmentio_tracking_no_secret_config(self): + request = self.request_factory.post(ENDPOINT) + response = segmentio.track_segmentio_event(request) + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) + + def assert_segmentio_uncommitted_response(self, response, expected_message, expected_status=400): + """Assert that no event was emitted and an appropriate commit==false message was returned""" + self.assertEquals(response.status_code, expected_status) + parsed_content = json.loads(response.content) + self.assertEquals(parsed_content, {'committed': False, 'message': expected_message}) + self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + + def test_segmentio_tracking_no_secret_provided(self): + request = self.request_factory.post(ENDPOINT) + response = segmentio.track_segmentio_event(request) + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) + + def test_segmentio_tracking_secret_mismatch(self): + request = self.create_request(key='y') + response = segmentio.track_segmentio_event(request) + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) + + def create_request(self, key=None, **kwargs): + """Create a fake request that emulates a request from the segment.io servers to ours""" + if key is None: + key = SECRET + + return self.request_factory.post(ENDPOINT + "?key=" + key, **kwargs) + + @data('Identify', 'Group', 'Alias', 'Page', 'identify') + def test_segmentio_ignore_actions(self, action): + response = self.post_segmentio_event(action=action) + self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_ACTION, 200) + + def post_segmentio_event(self, **kwargs): + """Post a fake segment.io event to the view that processes it""" + request = self.create_request( + data=self.create_segmentio_event_json(**kwargs), + content_type='application/json' + ) + return segmentio.track_segmentio_event(request) + + @data('server', 'browser', 'Browser') + def test_segmentio_ignore_channels(self, channel): + response = self.post_segmentio_event(channel=channel) + self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_CHANNEL, 200) + + def create_segmentio_event(self, **kwargs): + """Populate a fake segment.io event with data of interest""" + action = kwargs.get('action', 'Track') + sample_event = { + "userId": kwargs.get('user_id', USER_ID), + "event": "Did something", + "properties": { + 'event_type': kwargs.get('event_type', ''), + 'event': kwargs.get('event', {}), + 'context': { + 'course_id': kwargs.get('course_id') or '', + } + }, + "channel": kwargs.get('channel', 'mobile'), + "context": { + "library": { + "name": "unknown", + "version": "unknown" + } + }, + "receivedAt": "2014-08-27T16:33:39.100Z", + "timestamp": "2014-08-27T16:33:39.215Z", + "type": action.lower(), + "projectId": "u0j33yjkr8", + "messageId": "qy52hwp4", + "version": 2, + "integrations": {}, + "options": { + "library": "unknown", + "providers": {} + }, + "action": action + } + return sample_event + + def create_segmentio_event_json(self, **kwargs): + """Return a json string containing a fake segment.io event""" + return json.dumps(self.create_segmentio_event(**kwargs)) + + def test_segmentio_tracking_no_user_for_user_id(self): + response = self.post_segmentio_event(user_id=40) + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_USER_NOT_EXIST, 400) + + def test_segmentio_tracking_invalid_user_id(self): + response = self.post_segmentio_event(user_id='foobar') + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_INVALID_USER_ID, 400) + + @data('foo/bar/baz', 'course-v1:foo+bar+baz') + def test_segmentio_tracking(self, course_id): + middleware = TrackMiddleware() + + request = self.create_request( + data=self.create_segmentio_event_json(event_type=str(sentinel.event_type), event={'foo': 'bar'}, course_id=course_id), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + middleware.process_request(request) + # The middleware normally emits an event, make sure it doesn't in this case. + self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + try: + response = segmentio.track_segmentio_event(request) + self.assertEquals(response.status_code, 200) + + expected_event = { + 'username': str(sentinel.username), + 'ip': '', + 'event_source': 'mobile', + 'event_type': str(sentinel.event_type), + 'event': {'foo': 'bar'}, + 'agent': '', + 'page': None, + 'time': datetime.strptime("2014-08-27T16:33:39.215Z", "%Y-%m-%dT%H:%M:%S.%fZ"), + 'host': 'testserver', + 'context': { + 'user_id': USER_ID, + 'course_id': course_id, + 'org_id': 'foo', + 'path': ENDPOINT, + 'client': { + 'library': { + 'name': 'unknown', + 'version': 'unknown' + } + }, + 'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"), + }, + } + finally: + middleware.process_response(request, None) + + self.mock_tracker.send.assert_called_once_with(expected_event) # pylint: disable=maybe-no-member + + def test_segmentio_tracking_invalid_course_id(self): + request = self.create_request( + data=self.create_segmentio_event_json(course_id='invalid'), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + response = segmentio.track_segmentio_event(request) + self.assertEquals(response.status_code, 200) + self.assertTrue(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + + def test_segmentio_tracking_missing_event_type(self): + sample_event_raw = self.create_segmentio_event() + sample_event_raw['properties'] = {} + request = self.create_request( + data=json.dumps(sample_event_raw), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + response = segmentio.track_segmentio_event(request) + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_EVENT_TYPE, 400) + + def test_segmentio_tracking_missing_timestamp(self): + sample_event_raw = self.create_event_without_fields('timestamp') + request = self.create_request( + data=json.dumps(sample_event_raw), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + response = segmentio.track_segmentio_event(request) + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_TIMESTAMP, 400) + + def create_event_without_fields(self, *fields): + """Create a fake event and remove some fields from it""" + event = self.create_segmentio_event() + + for field in fields: + if field in event: + del event[field] + + return event + + def test_segmentio_tracking_missing_received_at(self): + sample_event_raw = self.create_event_without_fields('receivedAt') + request = self.create_request( + data=json.dumps(sample_event_raw), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + response = segmentio.track_segmentio_event(request) + self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_RECEIVED_AT, 400) diff --git a/common/djangoapps/track/tests/test_views.py b/common/djangoapps/track/views/tests/test_views.py similarity index 100% rename from common/djangoapps/track/tests/test_views.py rename to common/djangoapps/track/views/tests/test_views.py diff --git a/lms/envs/aws.py b/lms/envs/aws.py index eb4f24ce75165acc6c06d2271ed994c57e80652a..3f656ef97c8f2bcbaa228119ac5310041afad550 100644 --- a/lms/envs/aws.py +++ b/lms/envs/aws.py @@ -400,6 +400,10 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD # Event tracking TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {})) +TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET) +TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_ACTIONS", TRACKING_SEGMENTIO_ALLOWED_ACTIONS) +TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_CHANNELS", TRACKING_SEGMENTIO_ALLOWED_CHANNELS) + # Student identity verification settings VERIFY_STUDENT = AUTH_TOKENS.get("VERIFY_STUDENT", VERIFY_STUDENT) diff --git a/lms/envs/common.py b/lms/envs/common.py index 9335e39de0e1293366d5f79ae90c01b4b2786f74..efdea27745e3083053b4175da81dd2bf61f69169 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -460,7 +460,7 @@ TRACKING_BACKENDS = { # We're already logging events, and we don't want to capture user # names/passwords. Heartbeat events are likely not interesting. -TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat'] +TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat', r'^/segmentio/event'] EVENT_TRACKING_ENABLED = True EVENT_TRACKING_BACKENDS = { @@ -492,6 +492,10 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): } }) +TRACKING_SEGMENTIO_WEBHOOK_SECRET = None +TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ['Track', 'Screen'] +TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ['mobile'] + ######################## GOOGLE ANALYTICS ########################### GOOGLE_ANALYTICS_ACCOUNT = None GOOGLE_ANALYTICS_LINKEDIN = 'GOOGLE_ANALYTICS_LINKEDIN_DUMMY' diff --git a/lms/urls.py b/lms/urls.py index f04a1cbbd9ee98ad76b33e3cb5c7a44c20b1ed5a..8754b8de78d963c61f19cf5f113ce94b534ed96e 100644 --- a/lms/urls.py +++ b/lms/urls.py @@ -28,6 +28,7 @@ urlpatterns = ('', # nopep8 url(r'^reject_name_change$', 'student.views.reject_name_change'), url(r'^pending_name_changes$', 'student.views.pending_name_changes'), url(r'^event$', 'track.views.user_track'), + url(r'^segmentio/event$', 'track.views.segmentio.track_segmentio_event'), url(r'^t/(?P<template>[^/]*)$', 'static_template_view.views.index'), # TODO: Is this used anymore? What is STATIC_GRAB? url(r'^accounts/login$', 'student.views.accounts_login', name="accounts_login"),