From a206e777af4378c2bb191d15064a62016bede987 Mon Sep 17 00:00:00 2001 From: "Albert (AJ) St. Aubin" <astaubin@edx.org> Date: Thu, 27 Aug 2020 14:19:58 -0400 Subject: [PATCH] Write a migration to clean up and secure duplicate UserDemographics entries [MICROBA-551] --- .../0002_clean_duplicate_entries.py | 48 +++++++++++++++++++ .../migrations/0003_auto_20200827_1949.py | 20 ++++++++ .../core/djangoapps/demographics/models.py | 2 +- 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py create mode 100644 openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py diff --git a/openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py b/openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py new file mode 100644 index 00000000000..bfadadf061b --- /dev/null +++ b/openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py @@ -0,0 +1,48 @@ +import logging + +from django.conf import settings +from django.db import migrations, models + +log = logging.getLogger(__name__) + + +def _clean_duplicate_entries(apps, schema_editor): + """ + This method finds all the duplicate user entries in the UserDemographics model + and then removes all duplicate entries except for the most recently modified one. + """ + demographics_model = apps.get_model('demographics', 'UserDemographics') + # Retrieve a list of all users that have more than one entry. + duplicate_users = ( + demographics_model.objects.values( + 'user' + ).annotate(models.Count('id')).values('user').order_by().filter(id__count__gt=1) + ) + # Get a QuerySet of all the UserDemographics instances for the duplicates + # sorted by user and modified in descending order. + user_demographic_dupes = demographics_model.objects.filter(user__in=duplicate_users).order_by('user', '-modified') + + # Go through the QuerySet and only keep the most recent instance. + existing_user_ids = set() + for demographic in user_demographic_dupes: + if demographic.user_id in existing_user_ids: + log.info('UserDemographics {user} -- {modified}'.format( + user=demographic.user_id, modified=demographic.modified + )) + demographic.delete() + else: + log.info('UserDemographics Duplicate User Delete {user} -- {modified}'.format( + user=demographic.user_id, modified=demographic.modified + )) + existing_user_ids.add(demographic.user_id) + + +class Migration(migrations.Migration): + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('demographics', '0001_initial'), + ] + + operations = [ + migrations.RunPython(_clean_duplicate_entries, migrations.RunPython.noop), + ] diff --git a/openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py b/openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py new file mode 100644 index 00000000000..d31c24841ea --- /dev/null +++ b/openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py @@ -0,0 +1,20 @@ +# Generated by Django 2.2.15 on 2020-08-27 19:49 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('demographics', '0002_clean_duplicate_entries'), + ] + + operations = [ + migrations.AlterField( + model_name='userdemographics', + name='user', + field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL), + ), + ] diff --git a/openedx/core/djangoapps/demographics/models.py b/openedx/core/djangoapps/demographics/models.py index be8251c80c6..f576b77a632 100644 --- a/openedx/core/djangoapps/demographics/models.py +++ b/openedx/core/djangoapps/demographics/models.py @@ -11,7 +11,7 @@ class UserDemographics(TimeStampedModel): A Users Demographics platform related data in support of the Demographics IDA and features """ - user = models.ForeignKey(User, on_delete=models.CASCADE) + user = models.OneToOneField(User, on_delete=models.CASCADE) show_call_to_action = models.BooleanField(default=True) history = HistoricalRecords(app='demographics') -- GitLab