From a206e777af4378c2bb191d15064a62016bede987 Mon Sep 17 00:00:00 2001
From: "Albert (AJ) St. Aubin" <astaubin@edx.org>
Date: Thu, 27 Aug 2020 14:19:58 -0400
Subject: [PATCH] Write a migration to clean up and secure duplicate
 UserDemographics entries

[MICROBA-551]
---
 .../0002_clean_duplicate_entries.py           | 48 +++++++++++++++++++
 .../migrations/0003_auto_20200827_1949.py     | 20 ++++++++
 .../core/djangoapps/demographics/models.py    |  2 +-
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py
 create mode 100644 openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py

diff --git a/openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py b/openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py
new file mode 100644
index 00000000000..bfadadf061b
--- /dev/null
+++ b/openedx/core/djangoapps/demographics/migrations/0002_clean_duplicate_entries.py
@@ -0,0 +1,48 @@
+import logging
+
+from django.conf import settings
+from django.db import migrations, models
+
+log = logging.getLogger(__name__)
+
+
+def _clean_duplicate_entries(apps, schema_editor):
+    """
+    This method finds all the duplicate user entries in the UserDemographics model
+    and then removes all duplicate entries except for the most recently modified one.
+    """
+    demographics_model = apps.get_model('demographics', 'UserDemographics')
+    # Retrieve a list of all users that have more than one entry.
+    duplicate_users = (
+        demographics_model.objects.values(
+            'user'
+        ).annotate(models.Count('id')).values('user').order_by().filter(id__count__gt=1)
+    )
+    # Get a QuerySet of all the UserDemographics instances for the duplicates
+    # sorted by user and modified in descending order.
+    user_demographic_dupes = demographics_model.objects.filter(user__in=duplicate_users).order_by('user', '-modified')
+
+    # Go through the QuerySet and only keep the most recent instance.
+    existing_user_ids = set()
+    for demographic in user_demographic_dupes:
+        if demographic.user_id in existing_user_ids:
+            log.info('UserDemographics {user} -- {modified}'.format(
+                user=demographic.user_id, modified=demographic.modified
+            ))
+            demographic.delete()
+        else:
+            log.info('UserDemographics Duplicate User Delete {user} -- {modified}'.format(
+                user=demographic.user_id, modified=demographic.modified
+            ))
+            existing_user_ids.add(demographic.user_id)
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+        ('demographics', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.RunPython(_clean_duplicate_entries, migrations.RunPython.noop),
+    ]
diff --git a/openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py b/openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py
new file mode 100644
index 00000000000..d31c24841ea
--- /dev/null
+++ b/openedx/core/djangoapps/demographics/migrations/0003_auto_20200827_1949.py
@@ -0,0 +1,20 @@
+# Generated by Django 2.2.15 on 2020-08-27 19:49
+
+from django.conf import settings
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('demographics', '0002_clean_duplicate_entries'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='userdemographics',
+            name='user',
+            field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
+        ),
+    ]
diff --git a/openedx/core/djangoapps/demographics/models.py b/openedx/core/djangoapps/demographics/models.py
index be8251c80c6..f576b77a632 100644
--- a/openedx/core/djangoapps/demographics/models.py
+++ b/openedx/core/djangoapps/demographics/models.py
@@ -11,7 +11,7 @@ class UserDemographics(TimeStampedModel):
     A Users Demographics platform related data in support of the Demographics
     IDA and features
     """
-    user = models.ForeignKey(User, on_delete=models.CASCADE)
+    user = models.OneToOneField(User, on_delete=models.CASCADE)
     show_call_to_action = models.BooleanField(default=True)
     history = HistoricalRecords(app='demographics')
 
-- 
GitLab