]> git.ipfire.org Git - thirdparty/patchwork.git/commitdiff
Fix slow Patch counting query
authorDaniel Axtens <dja@axtens.net>
Thu, 8 Mar 2018 01:28:22 +0000 (12:28 +1100)
committerDaniel Axtens <dja@axtens.net>
Tue, 20 Mar 2018 23:21:06 +0000 (10:21 +1100)
Stephen Rothwell noticed (way back in September - sorry Stephen!) that
the following query is really slow on OzLabs:

SELECT COUNT(*) AS "__count" FROM "patchwork_patch"
    INNER JOIN "patchwork_submission" ON
        ("patchwork_patch"."submission_ptr_id" = "patchwork_submission"."id")
    WHERE ("patchwork_submission"."project_id" = 14 AND
           "patchwork_patch"."state_id" IN
       (SELECT U0."id" AS Col1 FROM "patchwork_state" U0
                WHERE U0."action_required" = true
ORDER BY U0."ordering" ASC));

I think this is really slow because we have to join the patch and
submission table to get the project id, which we need to filter the
patches.

Duplicate the project id in the patch table itself, which allows us to
avoid the JOIN.

The new query reads as:
SELECT COUNT(*) AS "__count" FROM "patchwork_patch"
    WHERE ("patchwork_patch"."patch_project_id" = 1 AND
           "patchwork_patch"."state_id" IN
       (SELECT U0."id" AS Col1 FROM "patchwork_state" U0
        WHERE U0."action_required" = true
ORDER BY U0."ordering" ASC));

Very simple testing on a small, artifical Postgres instance (3
projects, 102711 patches), shows speed gains of ~1.5-5x for this
query. Looking at Postgres' cost estimates (EXPLAIN) of the first
query vs the second query, we see a ~1.75x improvement there too.

I suspect the gains will be bigger on OzLabs.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Daniel Axtens <dja@axtens.net>
patchwork/migrations/0024_patch_patch_project.py [new file with mode: 0644]
patchwork/models.py
patchwork/parser.py
patchwork/tests/utils.py
patchwork/views/__init__.py

diff --git a/patchwork/migrations/0024_patch_patch_project.py b/patchwork/migrations/0024_patch_patch_project.py
new file mode 100644 (file)
index 0000000..76d8f14
--- /dev/null
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.10 on 2018-03-08 01:51
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+    # per migration 16, but note this seems to be going away
+    # in new PostgreSQLs (https://stackoverflow.com/questions/12838111/south-cannot-alter-table-because-it-has-pending-trigger-events#comment44629663_12838113)
+    atomic = False
+
+    dependencies = [
+        ('patchwork', '0023_timezone_unify'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='patch',
+            name='patch_project',
+            field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='patchwork.Project'),
+            preserve_default=False,
+        ),
+
+        # as with 10, this will break if you use non-default table names
+        migrations.RunSQL('''UPDATE patchwork_patch SET patch_project_id =
+                               (SELECT project_id FROM patchwork_submission
+                                WHERE patchwork_submission.id =
+                                        patchwork_patch.submission_ptr_id);'''
+        ),
+
+        migrations.AlterField(
+            model_name='patch',
+            name='patch_project',
+            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='patchwork.Project'),
+        ),
+
+    ]
index ff1d7dce15f7a2d1317b30a8a4e9b87c8c53cc1f..f91b994c8689a77a9ed082b10d44e922453685c7 100644 (file)
@@ -433,6 +433,10 @@ class Patch(SeriesMixin, Submission):
     archived = models.BooleanField(default=False)
     hash = HashField(null=True, blank=True)
 
+    # duplicate project from submission in subclass so we can count the
+    # patches in a project without needing to do a JOIN.
+    patch_project = models.ForeignKey(Project, on_delete=models.CASCADE)
+
     objects = PatchManager()
 
     @staticmethod
index 803e98592fa88d6aa069e150695e551792f3e4c0..805037c72d73912833571b2126ed774a485f306c 100644 (file)
@@ -1004,6 +1004,7 @@ def parse_mail(mail, list_id=None):
         patch = Patch.objects.create(
             msgid=msgid,
             project=project,
+            patch_project=project,
             name=name[:255],
             date=date,
             headers=headers,
index f0a71fbe5d9de43c5a9d10e35ab36d4c6a03f6f0..eb14a7d00eb105f8517aa9a184e2a2c3cdbf97fc 100644 (file)
@@ -176,6 +176,8 @@ def create_patch(**kwargs):
         'diff': SAMPLE_DIFF,
     }
     values.update(kwargs)
+    if 'patch_project' not in values:
+        values['patch_project'] = values['project']
 
     return Patch.objects.create(**values)
 
index 3baf2999a83654e7f2bc1bf00197e7a2692545fa..f8d23a388ac7d8ca584d23dfebbf36d49145ad54 100644 (file)
@@ -270,7 +270,7 @@ def generic_list(request, project, view, view_args=None, filter_settings=None,
             context['filters'].set_status(filterclass, setting)
 
     if patches is None:
-        patches = Patch.objects.filter(project=project)
+        patches = Patch.objects.filter(patch_project=project)
 
     # annotate with tag counts
     patches = patches.with_tag_counts(project)