Skip to content
Snippets Groups Projects
Unverified Commit e0e03dec authored by Tim McCormack's avatar Tim McCormack Committed by GitHub
Browse files

Use more appropriate hash when making tracking ID; document SECRET_KEY use (#26134)

- Make it easier to rotate `SECRET_KEY` by documenting this usage
  for both sensitivity and rotation process. (ARCHBOM-1676).
- Just use a hash of the secret rather than HMAC + MD5. We're not
  authenticating a message, so HMAC isn't really needed -- it just needs
  to be unique, deterministic, and irreversible. SHAKE allows generation
  of an arbitrary length hash without needing to truncate.

Also, rename tracking session ID generator for clarity -- there's no
encryption happening here.

Add additional test for existing claim of uniqueness.
parent 68706bd5
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,6 @@ framework.
import hashlib
import hmac
import json
import logging
import re
......@@ -164,31 +163,55 @@ class TrackMiddleware(MiddlewareMixin):
)
def get_session_key(self, request):
""" Gets and encrypts the Django session key from the request or an empty string if it isn't found."""
"""
Gets a key suitable for representing this Django session for tracking purposes.
Returns an empty string if there is no active session.
"""
try:
return self.encrypt_session_key(request.session.session_key)
return self.substitute_session_key(request.session.session_key)
except AttributeError:
# NB: This can hide a missing SECRET_KEY
return ''
def encrypt_session_key(self, session_key):
"""Encrypts a Django session key to another 32-character hex value."""
def substitute_session_key(self, session_key):
"""
Deterministically generate a tracking session key from the real one.
If a session key is not provided, returns empty string.
The tracking session ID is a 32-character hexadecimal string (matching
Django session key format for convenience, and in case something
downstream makes assumptions.) The tracking ID does not allow recovery
of the original session key but will always be the same unless server
secrets are changed, and will be unique for each session key.
"""
if not session_key:
return ''
# Follow the model of django.utils.crypto.salted_hmac() and
# django.contrib.sessions.backends.base._hash() but use MD5
# instead of SHA1 so that the result has the same length (32)
# as the original session_key.
# TODO: Switch to SHA224, which is secure.
# If necessary, drop the last little bit of the hash to make it the same length.
# Using a known-insecure hash to shorten is silly.
# Also, why do we need same length?
key_salt = "common.djangoapps.track" + self.__class__.__name__
key_bytes = (key_salt + settings.SECRET_KEY).encode('utf-8')
key = hashlib.md5(key_bytes).digest()
encrypted_session_key = hmac.new(key, msg=session_key.encode('utf-8'), digestmod=hashlib.md5).hexdigest()
return encrypted_session_key
# Prevent confirmation attacks by using SECRET_KEY as a pepper (see
# ADR: docs/decisions/0008-secret-key-usage.rst).
# Tracking ID and session key will only be linkable
# by someone in possession of the pepper.
#
# This assumes that session_key is high-entropy and unpredictable (which
# it should be anyway.)
#
# Use SHAKE256 from SHA-3 hash family to generate a hash of arbitrary
# length.
hasher = hashlib.shake_128()
# This is one of several uses of SECRET_KEY.
#
# Impact of exposure: Could result in identifying the tracking data for
# users if their actual session keys are already known.
#
# Rotation process: Can be rotated at will. Results in a one-time
# discontinuity in tracking metrics and should be accompanied by a
# heads-up to data researchers.
hasher.update(settings.SECRET_KEY.encode())
hasher.update(session_key.encode())
# pylint doesn't know that SHAKE's hexdigest takes an arg:
# https://github.com/PyCQA/pylint/issues/4039
return hasher.hexdigest(16) # pylint: disable=too-many-function-args
def get_user_primary_key(self, request):
"""Gets the primary key of the logged in Django user"""
......
......@@ -173,7 +173,7 @@ class TrackMiddlewareTestCase(TestCase):
SessionMiddleware().process_request(request)
request.session.save()
session_key = request.session.session_key
expected_session_key = self.track_middleware.encrypt_session_key(session_key)
expected_session_key = self.track_middleware.substitute_session_key(session_key)
self.assertEqual(len(session_key), len(expected_session_key))
context = self.get_context_for_request(request)
self.assert_dict_subset(context, {
......@@ -181,11 +181,17 @@ class TrackMiddlewareTestCase(TestCase):
})
@override_settings(SECRET_KEY='85920908f28904ed733fe576320db18cabd7b6cd')
def test_session_key_encryption(self):
def test_session_key_substitution(self):
session_key = '665924b49a93e22b46ee9365abf28c2a'
expected_session_key = '3b81f559d14130180065d635a4f35dd2'
encrypted_session_key = self.track_middleware.encrypt_session_key(session_key)
self.assertEqual(encrypted_session_key, expected_session_key)
# Output value pinned to alert on unintended changes to generator
expected_session_key = 'b4103566fc80d20da1970cbb4380bccd'
substitute_session_key = self.track_middleware.substitute_session_key(session_key)
self.assertEqual(substitute_session_key, expected_session_key)
# Confirm that we get *different* outputs for different inputs
expected_session_key_2 = "6f0c784c1087c6bc4624b7eac982fedf"
substitute_session_key_2 = self.track_middleware.substitute_session_key(session_key + "different")
self.assertEqual(expected_session_key_2, substitute_session_key_2)
def test_request_headers(self):
ip_address = '10.0.0.0'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment