Bug 819432: Execute queries in two steps to improve performance

author Frédéric Buclin <LpSolit@gmail.com>

Wed, 16 Jan 2013 18:05:22 +0000 (19:05 +0100)

committer Frédéric Buclin <LpSolit@gmail.com>

Wed, 16 Jan 2013 18:05:22 +0000 (19:05 +0100)
author Frédéric Buclin <LpSolit@gmail.com>
Wed, 16 Jan 2013 18:05:22 +0000 (19:05 +0100)
committer Frédéric Buclin <LpSolit@gmail.com>
Wed, 16 Jan 2013 18:05:22 +0000 (19:05 +0100)
diff --git a/Bugzilla/Search.pm b/Bugzilla/Search.pm

index b05da1a04b3134e30a58ce8c411f4c43a1499939..1c6af782e8c745551c44eca7f955e9915c3389ca 100644 (file)
--- a/Bugzilla/Search.pm
+++ b/Bugzilla/Search.pm
@@ -32,9 +32,10 @@ use Data::Dumper;
  use Date::Format;
  use Date::Parse;
  use Scalar::Util qw(blessed);
-use List::MoreUtils qw(all part uniq);
+use List::MoreUtils qw(all firstidx part uniq);
  use POSIX qw(INT_MAX);
  use Storable qw(dclone);
+use Time::HiRes qw(gettimeofday tv_interval);
  
  # Description Of Boolean Charts
  # -----------------------------
@@ -686,7 +687,70 @@ sub new {
  # Public Accessors #
  ####################
  
-sub sql {
+sub data {
+    my $self = shift;
+    return $self->{data} if $self->{data};
+    my $dbh = Bugzilla->dbh;
+
+    # If all fields belong to the 'bugs' table, there is no need to split
+    # the original query into two pieces. Else we override the 'fields'
+    # argument to first get bug IDs based on the search criteria defined
+    # by the caller, and the desired fields are collected in the 2nd query.
+    my @orig_fields = $self->_input_columns;
+    my $all_in_bugs_table = 1;
+    foreach my $field (@orig_fields) {
+        next if $self->COLUMNS->{$field}->{name} =~ /^bugs\.\w+$/;
+        $self->{fields} = ['bug_id'];
+        $all_in_bugs_table = 0;
+        last;
+    }
+
+    my $start_time = [gettimeofday()];
+    my $sql = $self->_sql;
+    # Do we just want bug IDs to pass to the 2nd query or all the data immediately?
+    my $func = $all_in_bugs_table ? 'selectall_arrayref' : 'selectcol_arrayref';
+    my $bug_ids = $dbh->$func($sql);
+    my @extra_data = ({sql => $sql, time => tv_interval($start_time)});
+    # Restore the original 'fields' argument, just in case.
+    $self->{fields} = \@orig_fields unless $all_in_bugs_table;
+
+    # If there are no bugs found, or all fields are in the 'bugs' table,
+    # there is no need for another query.
+    if (!scalar @$bug_ids || $all_in_bugs_table) {
+        $self->{data} = $bug_ids;
+        return wantarray ? ($self->{data}, \@extra_data) : $self->{data};
+    }
+
+    # Make sure the bug_id will be returned. If not, append it to the list.
+    my $pos = firstidx { $_ eq 'bug_id' } @orig_fields;
+    if ($pos < 0) {
+        push(@orig_fields, 'bug_id');
+        $pos = $#orig_fields;
+    }
+
+    # Now create a query with the buglist above as the single criteria
+    # and the fields that the caller wants. No need to redo security checks;
+    # the list has already been validated above.
+    my $search = $self->new('fields' => \@orig_fields,
+                            'params' => {bug_id => $bug_ids, bug_id_type => 'anyexact'},
+                            'sharer' => $self->_sharer_id,
+                            'user'   => $self->_user,
+                            'allow_unlimited'    => 1,
+                            '_no_security_check' => 1);
+
+    $start_time = [gettimeofday()];
+    $sql = $search->_sql;
+    my $unsorted_data = $dbh->selectall_arrayref($sql);
+    push(@extra_data, {sql => $sql, time => tv_interval($start_time)});
+    # Let's sort the data. We didn't do it in the query itself because
+    # we already know in which order to sort bugs thanks to the first query,
+    # and this avoids additional table joins in the SQL query.
+    my %data = map { $_->[$pos] => $_ } @$unsorted_data;
+    $self->{data} = [map { $data{$_} } @$bug_ids];
+    return wantarray ? ($self->{data}, \@extra_data) : $self->{data};
+}
+
+sub _sql {
      my ($self) = @_;
      return $self->{sql} if $self->{sql};
      my $dbh = Bugzilla->dbh;
@@ -720,7 +784,7 @@ sub search_description {
      # Make sure that the description has actually been generated if
      # people are asking for the whole thing.
      else {
-        $self->sql;
+        $self->_sql;
      }
      return $self->{'search_description'};
  }
@@ -1116,6 +1180,7 @@ sub _standard_joins {
      my ($self) = @_;
      my $user = $self->_user;
      my @joins;
+    return () if $self->{_no_security_check};
  
      my $security_join = {
          table => 'bug_group_map',
@@ -1192,6 +1257,7 @@ sub _translate_join {
  # group security.
  sub _standard_where {
      my ($self) = @_;
+    return ('1=1') if $self->{_no_security_check};
      # If replication lags badly between the shadow db and the main DB,
      # it's possible for bugs to show up in searches before their group
      # controls are properly set. To prevent this, when initially creating
@@ -2960,6 +3026,112 @@ sub _translate_old_column {
  
  1;
  
+__END__
+
+=head1 NAME
+
+Bugzilla::Search - Provides methods to run queries against bugs.
+
+=head1 SYNOPSIS
+
+    use Bugzilla::Search;
+
+    my $search = new Bugzilla::Search({'fields' => \@fields,
+                                       'params' => \%search_criteria,
+                                       'sharer' => $sharer_id,
+                                       'user'   => $user_obj,
+                                       'allow_unlimited' => 1});
+
+    my $data = $search->data;
+    my ($data, $extra_data) = $search->data;
+
+=head1 DESCRIPTION
+
+Search.pm represents a search object. It's the single way to collect
+data about bugs in a secure way. The list of bugs matching criteria
+defined by the caller are filtered based on the user privileges.
+
+=head1 METHODS
+
+=head2 new
+
+=over
+
+=item B<Description>
+
+Create a Bugzilla::Search object.
+
+=item B<Params>
+
+=over
+
+=item C<fields>
+
+An arrayref representing the bug attributes for which data is desired.
+Legal attributes are listed in the fielddefs DB table. At least one field
+must be defined, typically the 'bug_id' field.
+
+=item C<params>
+
+A hashref representing search criteria. Each key => value pair represents
+a search criteria, where the key is the search field and the value is the
+value for this field. At least one search criteria must be defined if the
+'search_allow_no_criteria' parameter is turned off, else an error is thrown.
+
+=item C<sharer>
+
+When a saved search is shared by a user, this is his user ID.
+
+=item C<user>
+
+A L<Bugzilla::User> object representing the user to whom the data is addressed.
+All security checks are done based on this user object, so it's not safe
+to share results of the query with other users as not all users have the
+same privileges or have the same role for all bugs in the list. If this
+parameter is not defined, then the currently logged in user is taken into
+account. If no user is logged in, then only public bugs will be returned.
+
+=item C<allow_unlimited>
+
+If set to a true value, the number of bugs retrieved by the query is not
+limited.
+
+=back
+
+=item B<Returns>
+
+A L<Bugzilla::Search> object.
+
+=back
+
+=head2 data
+
+=over
+
+=item B<Description>
+
+Returns bugs matching search criteria passed to C<new()>.
+
+=item B<Params>
+
+None
+
+=item B<Returns>
+
+In scalar context, this method returns a reference to a list of bugs.
+Each item of the list represents a bug, which is itself a reference to
+a list where each item represents a bug attribute, in the same order as
+specified in the C<fields> parameter of C<new()>.
+
+In list context, this methods also returns a reference to a list containing
+references to hashes. For each hash, two keys are defined: C<sql> contains
+the SQL query which has been executed, and C<time> contains the time spent
+to execute the SQL query, in seconds. There can be either a single hash, or
+two hashes if two SQL queries have been executed sequentially to get all the
+required data.
+
+=back
+
  =head1 B<Methods in need of POD>
  
  =over
@@ -2968,8 +3140,6 @@ sub _translate_old_column {
  
  =item COLUMN_JOINS
  
-=item sql
-
  =item split_order_term
  
  =item SqlifyDate
diff --git a/buglist.cgi b/buglist.cgi

index 8a436a0a2998e45afa95ec1caa6759ecdf06ea72..625b7eab86343de1a853d64b7ff71260537835e3 100755 (executable)
--- a/buglist.cgi
+++ b/buglist.cgi
@@ -25,7 +25,6 @@ use Bugzilla::Status;
  use Bugzilla::Token;
  
  use Date::Parse;
-use Time::HiRes qw(gettimeofday tv_interval);
  
  my $cgi = Bugzilla->cgi;
  my $dbh = Bugzilla->dbh;
@@ -667,8 +666,7 @@ my $search = new Bugzilla::Search('fields' => \@selectcolumns,
                                    'params' => scalar $params->Vars,
                                    'order'  => \@order_columns,
                                    'sharer' => $sharer_id);
-my $query = $search->sql;
-$vars->{'search_description'} = $search->search_description;
+
  $order = join(',', $search->order);
  
  if (scalar @{$search->invalid_order_columns}) {
@@ -688,18 +686,6 @@ $params->delete('limit') if $vars->{'default_limited'};
  # Query Execution
  ################################################################################
  
-if ($cgi->param('debug')) {
-    $vars->{'debug'} = 1;
-    $vars->{'query'} = $query;
-    # Explains are limited to admins because you could use them to figure
-    # out how many hidden bugs are in a particular product (by doing
-    # searches and looking at the number of rows the explain says it's
-    # examining).
-    if ($user->in_group('admin')) {
-        $vars->{'query_explain'} = $dbh->bz_explain($query);
-    }
-}
-
  # Time to use server push to display an interim message to the user until
  # the query completes and we can display the bug list.
  if ($serverpush) {
@@ -732,11 +718,25 @@ $::SIG{TERM} = 'DEFAULT';
  $::SIG{PIPE} = 'DEFAULT';
  
  # Execute the query.
-my $start_time = [gettimeofday()];
-my $buglist_sth = $dbh->prepare($query);
-$buglist_sth->execute();
-$vars->{query_time} = tv_interval($start_time);
+my ($data, $extra_data) = $search->data;
+$vars->{'search_description'} = $search->search_description;
  
+if ($cgi->param('debug')) {
+    $vars->{'debug'} = 1;
+    $vars->{'queries'} = $extra_data;
+    my $query_time = 0;
+    $query_time += $_->{'time'} foreach @$extra_data;
+    $vars->{'query_time'} = $query_time;
+    # Explains are limited to admins because you could use them to figure
+    # out how many hidden bugs are in a particular product (by doing
+    # searches and looking at the number of rows the explain says it's
+    # examining).
+    if ($user->in_group('admin')) {
+        foreach my $query (@$extra_data) {
+            $query->{explain} = $dbh->bz_explain($query->{sql});
+        }
+    }
+}
  
  ################################################################################
  # Results Retrieval
@@ -768,14 +768,14 @@ my @bugidlist;
  
  my @bugs; # the list of records
  
-while (my @row = $buglist_sth->fetchrow_array()) {
+foreach my $row (@$data) {
      my $bug = {}; # a record
  
      # Slurp the row of data into the record.
      # The second from last column in the record is the number of groups
      # to which the bug is restricted.
      foreach my $column (@selectcolumns) {
-        $bug->{$column} = shift @row;
+        $bug->{$column} = shift @$row;
      }
  
      # Process certain values further (i.e. date format conversion).
diff --git a/collectstats.pl b/collectstats.pl

index bf0c6869638f5f2cb6179d9b6bfc338da567beab..330fae5b39f6ce7bc48df66c61bb7356f967bf38 100755 (executable)
--- a/collectstats.pl
+++ b/collectstats.pl
@@ -473,8 +473,7 @@ sub CollectSeriesData {
                                                'fields' => ["bug_id"],
                                                'allow_unlimited' => 1,
                                                'user'   => $user);
-            my $sql = $search->sql;
-            $data = $shadow_dbh->selectall_arrayref($sql);
+            $data = $search->data;
          };
  
          if (!$@) {
diff --git a/report.cgi b/report.cgi

index 69aadddbd8f53f147f7330b7b99a8cde8a0f7ab7..2949a18c32a7fe4074fc329e1e18d0fb3b8d2e15 100755 (executable)
--- a/report.cgi
+++ b/report.cgi
@@ -164,13 +164,12 @@ my $search = new Bugzilla::Search(
      params => scalar $params->Vars,
      allow_unlimited => 1,
  );
-my $query = $search->sql;
  
  $::SIG{TERM} = 'DEFAULT';
  $::SIG{PIPE} = 'DEFAULT';
  
-my $dbh = Bugzilla->switch_to_shadow_db();
-my $results = $dbh->selectall_arrayref($query);
+Bugzilla->switch_to_shadow_db();
+my ($results, $extra_data) = $search->data;
  
  # We have a hash of hashes for the data itself, and a hash to hold the 
  # row/col/table names.
@@ -257,7 +256,7 @@ if ($formatparam eq "bar") {
  
  $vars->{'width'} = $width;
  $vars->{'height'} = $height;
-$vars->{'query'} = $query;
+$vars->{'queries'} = $extra_data;
  $vars->{'saved_report_id'} = $cgi->param('saved_report_id');
  $vars->{'debug'} = $cgi->param('debug');
  
diff --git a/template/en/default/list/list.html.tmpl b/template/en/default/list/list.html.tmpl

index 3be607a38c245285a87078cc80b21a9e8c22a8c2..dcc140cf27bf237649745764c04239c0f563e62d 100644 (file)
--- a/template/en/default/list/list.html.tmpl
+++ b/template/en/default/list/list.html.tmpl
@@ -72,10 +72,13 @@
  
    [% IF debug %]
      <div class="bz_query_debug">
-      <p>[% query FILTER html %]</p>
-      <p>Execution time: [% query_time FILTER html %] seconds</p>
-      [% IF query_explain.defined %]
-        <pre>[% query_explain FILTER html %]</pre>
+      <p>Total execution time: [% query_time FILTER html %] seconds</p>
+      [% FOREACH query = queries %]
+        <p>[% query.sql FILTER html %]</p>
+        <p>Execution time: [% query.time FILTER html %] seconds</p>
+        [% IF query.explain %]
+          <pre>[% query.explain FILTER html %]</pre>
+        [% END %]
        [% END %]
      </div>
    [% END %]
diff --git a/template/en/default/reports/report.html.tmpl b/template/en/default/reports/report.html.tmpl

index 8dc4bc5a71107489ca192df055e802d55801ab6c..2ca5dd90f508e5d3d0b8a80a64fc039e7fd86554 100644 (file)
--- a/template/en/default/reports/report.html.tmpl
+++ b/template/en/default/reports/report.html.tmpl
@@ -61,7 +61,9 @@
  %]
  
  [% IF debug %]
-  <p>[% query FILTER html %]</p>
+  [% FOREACH query = queries %]
+    <p>[% query.sql FILTER html %]</p>
+  [% END %]
  [% END %]
  
  <div align="center">
diff --git a/whine.pl b/whine.pl

index 481b47425d81fb3ba814ee07adabcae4d8a5213c..f7ed6d5c7d782f0d5f9d1ddf8a7a8f444d852455 100755 (executable)
--- a/whine.pl
+++ b/whine.pl
@@ -453,7 +453,7 @@ sub run_queries {
              'order'  => \@orderstrings
          );
          # If a query fails for whatever reason, it shouldn't kill the script.
-        my $sqlquery = eval { $search->sql };
+        my $data = eval { $search->data };
          if ($@) {
              print STDERR get_text('whine_query_failed', { query_name => $thisquery->{'name'},
                                                            author => $args->{'author'},
@@ -461,15 +461,12 @@ sub run_queries {
              next;
          }
  
-        $sth = $dbh->prepare($sqlquery);
-        $sth->execute;
-
-        while (my @row = $sth->fetchrow_array) {
+        foreach my $row (@$data) {
              my $bug = {};
              for my $field (@searchfields) {
                  my $fieldname = $field;
                  $fieldname =~ s/^bugs\.//;  # No need for bugs.whatever
-                $bug->{$fieldname} = shift @row;
+                $bug->{$fieldname} = shift @$row;
              }
  
              if ($thisquery->{'onemailperbug'}) {
diff --git a/xt/lib/Bugzilla/Test/Search/FieldTest.pm b/xt/lib/Bugzilla/Test/Search/FieldTest.pm

index f8295aa70685af5dbd25b8eabe2ce662035e13c3..c92a6a7b644640aaa9e74336cdd7bd0cefc1f673 100644 (file)
--- a/xt/lib/Bugzilla/Test/Search/FieldTest.pm
+++ b/xt/lib/Bugzilla/Test/Search/FieldTest.pm
@@ -530,13 +530,13 @@ sub do_tests {
      my $sql;
      TODO: {
          local $TODO = $search_broken if $search_broken;
-        lives_ok { $sql = $search->sql } "$name: generate SQL";
+        lives_ok { $sql = $search->_sql } "$name: generate SQL";
      }
      
      my $results;
      SKIP: {
          skip "Can't run SQL without any SQL", 1 if !defined $sql;
-        $results = $self->_test_sql($sql);
+        $results = $self->_test_sql($search);
      }
  
      $self->_test_content($results, $sql);
@@ -553,12 +553,11 @@ sub _test_search_object_creation {
  }
  
  sub _test_sql {
-    my ($self, $sql) = @_;
-    my $dbh = Bugzilla->dbh;
+    my ($self, $search) = @_;
      my $name = $self->name;
      my $results;
-    lives_ok { $results = $dbh->selectall_arrayref($sql) } "$name: Run SQL Query"
-        or diag($sql);
+    lives_ok { $results = $search->data } "$name: Run SQL Query"
+        or diag($search->_sql);
      return $results;
  }
author	Frédéric Buclin <LpSolit@gmail.com>
	Wed, 16 Jan 2013 18:05:22 +0000 (19:05 +0100)
committer	Frédéric Buclin <LpSolit@gmail.com>
	Wed, 16 Jan 2013 18:05:22 +0000 (19:05 +0100)
Bugzilla/Search.pm		patch \| blob \| blame \| history
buglist.cgi		patch \| blob \| blame \| history
collectstats.pl		patch \| blob \| blame \| history
report.cgi		patch \| blob \| blame \| history
template/en/default/list/list.html.tmpl		patch \| blob \| blame \| history
template/en/default/reports/report.html.tmpl		patch \| blob \| blame \| history
whine.pl		patch \| blob \| blame \| history
xt/lib/Bugzilla/Test/Search/FieldTest.pm		patch \| blob \| blame \| history