]> git.ipfire.org Git - thirdparty/bugzilla.git/commitdiff
Bug 1381869 - Use separate elasticsearch index for Bugzilla::User
authorDylan William Hardison <dylan@hardison.net>
Thu, 20 Jul 2017 20:36:56 +0000 (16:36 -0400)
committerGitHub <noreply@github.com>
Thu, 20 Jul 2017 20:36:56 +0000 (16:36 -0400)
This patch removes the concept of a single, bugzilla-wide index in favor of a
per-class index. bugs and comments continue to use
Bugzilla->params->{elasticsearch_index} but users use
Bugzilla->params->{elasticsearch_index} . "_user".

It is assured via the ChildObject trait (role) that comments will share the
index with bugs, and we have kept the index for bugs/comments the same to avoid
the multi-hour reindexing of production. Re-indexing users takes only five
minutes.

Subsequent work on this will allow use to version the index names and use
aliases, but I wanted to keep this patch small.

This patch also corrects some mistakes

1. $indexer->put_mapping() should not have been a public method.
2. Time::HiRes should be imported at the top of the file, not in a sub.

Bugzilla/Bug.pm
Bugzilla/Comment.pm
Bugzilla/Elastic.pm
Bugzilla/Elastic/Indexer.pm
Bugzilla/Elastic/Role/ChildObject.pm
Bugzilla/Elastic/Role/HasIndexName.pm [deleted file]
Bugzilla/Elastic/Role/Object.pm
Bugzilla/Elastic/Search.pm
Bugzilla/User.pm
scripts/bulk_index.pl

index eb228d27c1fcc77ff234372df7941ceedbf466c8..ee48ed7a2b030840688a6b6a3bee3cde9e8beb9a 100644 (file)
@@ -302,6 +302,73 @@ with 'Bugzilla::Elastic::Role::Object';
 
 sub ES_TYPE {'bug'}
 
+sub ES_INDEX { Bugzilla->params->{elasticsearch_index} }
+
+sub ES_SETTINGS {
+    return {
+        number_of_shards => 2,
+        analysis         => {
+            filter => {
+                asciifolding_original => {
+                    type              => "asciifolding",
+                    preserve_original => \1,
+                },
+            },
+            analyzer => {
+                autocomplete => {
+                    type      => 'custom',
+                    tokenizer => 'keyword',
+                    filter    => [ 'lowercase', 'asciifolding_original' ],
+                },
+                folding => {
+                    tokenizer => 'standard',
+                    filter    => [ 'standard', 'lowercase', 'asciifolding_original' ],
+                },
+                bz_text_analyzer => {
+                    type             => 'standard',
+                    filter           => [ 'lowercase', 'stop' ],
+                    max_token_length => '20'
+                },
+                bz_equals_analyzer => {
+                    type      => 'custom',
+                    filter    => ['lowercase'],
+                    tokenizer => 'keyword',
+                },
+                whiteboard_words => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_words_pattern',
+                    filter    => ['stop']
+                },
+                whiteboard_shingle_words => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_words_pattern',
+                    filter    => [ 'stop', 'shingle', 'lowercase' ]
+                },
+                whiteboard_tokens => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_tokens_pattern',
+                    filter    => [ 'stop', 'lowercase' ]
+                },
+                whiteboard_shingle_tokens => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_tokens_pattern',
+                    filter    => [ 'stop', 'shingle', 'lowercase' ]
+                }
+            },
+            tokenizer => {
+                whiteboard_tokens_pattern => {
+                    type    => 'pattern',
+                    pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
+                },
+                whiteboard_words_pattern => {
+                    type    => 'pattern',
+                    pattern => '[\\[\\];,\\s]+'
+                },
+            },
+        },
+    };
+}
+
 sub _bz_field {
     my ($field, @fields) = @_;
 
index 14f28cbe5aa76682c981c1f7ff5e37c0cb28e48c..23c1d3f85c2cd40fd80cbf45eb74f38160c3eea0 100644 (file)
@@ -80,8 +80,8 @@ use constant VALIDATOR_DEPENDENCIES => {
 
 with 'Bugzilla::Elastic::Role::ChildObject';
 
-use constant ES_TYPE        => 'comment';
-use constant ES_PARENT_TYPE => 'bug';
+use constant ES_TYPE         => 'comment';
+use constant ES_PARENT_CLASS => 'Bugzilla::Bug';
 
 sub ES_OBJECTS_AT_ONCE { 50 }
 
index fa032d2a6dc1540d99c2d99bb74aa2f92940d801..3a3829e3bfcbf2dd469856770e55d0a147995591 100644 (file)
@@ -12,7 +12,6 @@ use Bugzilla::Elastic::Search;
 use Bugzilla::Util qw(trick_taint);
 
 with 'Bugzilla::Elastic::Role::HasClient';
-with 'Bugzilla::Elastic::Role::HasIndexName';
 
 sub suggest_users {
     my ($self, $text) = @_;
@@ -30,7 +29,7 @@ sub suggest_users {
 
     my $result = eval {
         $self->client->suggest(
-            index => $self->index_name,
+            index => Bugzilla::User->ES_INDEX,
             body  => {
                 $field => {
                     text       => $text,
index 46eb8f6483fafcdc6dbc5e83336393919a186370..36dd1dcb455a4f2b47377060b4460336cc23ba63 100644 (file)
@@ -11,10 +11,10 @@ use Moo;
 use List::MoreUtils qw(natatime);
 use Storable qw(dclone);
 use Scalar::Util qw(looks_like_number);
+use Time::HiRes;
 use namespace::clean;
 
 with 'Bugzilla::Elastic::Role::HasClient';
-with 'Bugzilla::Elastic::Role::HasIndexName';
 
 has 'shadow_dbh' => ( is => 'lazy' );
 
@@ -28,94 +28,34 @@ has 'progress_bar' => (
     predicate => 'has_progress_bar',
 );
 
-sub create_index {
-    my ($self) = @_;
-    my $indices = $self->client->indices;
-
-    $indices->create(
-        index => $self->index_name,
-        body => {
-            settings => {
-                number_of_shards => 2,
-                analysis => {
-                    filter => {
-                        asciifolding_original => {
-                            type              => "asciifolding",
-                            preserve_original => \1,
-                        },
-                    },
-                    analyzer => {
-                        autocomplete => {
-                            type      => 'custom',
-                            tokenizer => 'keyword',
-                            filter    => ['lowercase', 'asciifolding_original'],
-                        },
-                        folding => {
-                            tokenizer => 'standard',
-                            filter    => ['standard', 'lowercase', 'asciifolding_original'],
-                        },
-                        bz_text_analyzer => {
-                            type             => 'standard',
-                            filter           => ['lowercase', 'stop'],
-                            max_token_length => '20'
-                        },
-                        bz_equals_analyzer => {
-                            type   => 'custom',
-                            filter => ['lowercase'],
-                            tokenizer => 'keyword',
-                        },
-                        whiteboard_words => {
-                            type => 'custom',
-                            tokenizer => 'whiteboard_words_pattern',
-                            filter => ['stop']
-                        },
-                        whiteboard_shingle_words => {
-                            type => 'custom',
-                            tokenizer => 'whiteboard_words_pattern',
-                            filter => ['stop', 'shingle', 'lowercase']
-                        },
-                        whiteboard_tokens => {
-                            type => 'custom',
-                            tokenizer => 'whiteboard_tokens_pattern',
-                            filter => ['stop', 'lowercase']
-                        },
-                        whiteboard_shingle_tokens => {
-                            type => 'custom',
-                            tokenizer => 'whiteboard_tokens_pattern',
-                            filter => ['stop', 'shingle', 'lowercase']
-                        }
-                    },
-                    tokenizer => {
-                        whiteboard_tokens_pattern => {
-                            type => 'pattern',
-                            pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
-                        },
-                        whiteboard_words_pattern => {
-                            type => 'pattern',
-                            pattern => '[\\[\\];,\\s]+'
-                        },
-                    },
-                },
-            },
-        }
-    ) unless $indices->exists(index => $self->index_name);
+
+sub _create_index {
+    my ($self, $class) = @_;
+    my $indices    = $self->client->indices;
+    my $index_name = $class->ES_INDEX;
+
+    unless ($indices->exists(index => $index_name)) {
+        $indices->create(
+            index => $index_name,
+            body  => { settings => $class->ES_SETTINGS },
+        );
+    }
 }
 
 sub _bulk_helper {
     my ($self, $class) = @_;
 
     return $self->client->bulk_helper(
-        index => $self->index_name,
+        index => $class->ES_INDEX,
         type  => $class->ES_TYPE,
     );
 }
 
-
 sub _find_largest {
     my ($self, $class, $field) = @_;
 
     my $result = $self->client->search(
-        index => $self->index_name,
+        index => $class->ES_INDEX,
         type  => $class->ES_TYPE,
         body  => {
             aggs => { $field => { extended_stats => { field => $field } } },
@@ -147,7 +87,7 @@ sub _find_largest_id {
     return $self->_find_largest($class, $class->ID_FIELD);
 }
 
-sub put_mapping {
+sub _put_mapping {
     my ($self, $class) = @_;
 
     my %body = ( properties => scalar $class->ES_PROPERTIES );
@@ -156,7 +96,7 @@ sub put_mapping {
     }
 
     $self->client->indices->put_mapping(
-        index => $self->index_name,
+        index => $class->ES_INDEX,
         type => $class->ES_TYPE,
         body => \%body,
     );
@@ -178,13 +118,15 @@ sub _debug_sql {
 sub bulk_load {
     my ( $self, $class ) = @_;
 
+    $self->_create_index($class);
+
     my $bulk        = $self->_bulk_helper($class);
     my $last_mtime  = $self->_find_largest_mtime($class);
     my $last_id     = $self->_find_largest_id($class);
     my $new_ids     = $self->_select_all_ids($class, $last_id);
     my $updated_ids = $self->_select_updated_ids($class, $last_mtime);
 
-    $self->put_mapping($class);
+    $self->_put_mapping($class);
     $self->_bulk_load_ids($bulk, $class, $new_ids) if @$new_ids;
     $self->_bulk_load_ids($bulk, $class, $updated_ids) if @$updated_ids;
 
@@ -213,7 +155,8 @@ sub _select_updated_ids {
 sub bulk_load_ids {
     my ($self, $class, $ids) = @_;
 
-    $self->put_mapping($class);
+    $self->_create_index($class);
+    $self->_put_mapping($class);
     $self->_bulk_load_ids($self->_bulk_helper($class), $class, $ids);
 }
 
@@ -238,7 +181,6 @@ sub _bulk_load_ids {
     }
 
     my $total = 0;
-    use Time::HiRes;
     my $start = time;
     while (my @ids = $iter->()) {
         if ($progress_bar) {
index 1f7a7483a93e88663d9d16ba88d648c7d5c8ed58..9735cc1ed8ca9c01beb88e835ae99d52a82719b6 100644 (file)
@@ -11,6 +11,10 @@ use Role::Tiny;
 
 with 'Bugzilla::Elastic::Role::Object';
 
-requires qw(ES_PARENT_TYPE es_parent_id);
+requires qw(ES_PARENT_CLASS es_parent_id);
+
+sub ES_PARENT_TYPE { $_[0]->ES_PARENT_CLASS->ES_TYPE }
+sub ES_INDEX       { $_[0]->ES_PARENT_CLASS->ES_INDEX }
+sub ES_SETTINGS    { $_[0]->ES_PARENT_CLASS->ES_SETTINGS }
 
 1;
diff --git a/Bugzilla/Elastic/Role/HasIndexName.pm b/Bugzilla/Elastic/Role/HasIndexName.pm
deleted file mode 100644 (file)
index eaff339..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-# This Source Code Form is "Incompatible With Secondary Licenses", as
-# defined by the Mozilla Public License, v. 2.0.
-package Bugzilla::Elastic::Role::HasIndexName;
-
-use 5.10.1;
-use Moo::Role;
-use Search::Elasticsearch;
-
-has 'index_name' => ( is => 'ro', default => sub { Bugzilla->params->{elasticsearch_index} } );
-
-
-1;
index ad5ab002b6fc055c313ddcd741d9679d35bc529f..c51948ee9de09bf9421fa70644af0c6178afcf28 100644 (file)
@@ -9,7 +9,7 @@ package Bugzilla::Elastic::Role::Object;
 use 5.10.1;
 use Role::Tiny;
 
-requires qw(ES_TYPE ES_PROPERTIES es_document);
+requires qw(ES_TYPE ES_INDEX ES_SETTINGS ES_PROPERTIES es_document);
 requires qw(ID_FIELD DB_TABLE);
 
 sub ES_OBJECTS_AT_ONCE { 100 }
@@ -45,4 +45,6 @@ around 'es_document' => sub {
     return $doc;
 };
 
+
+
 1;
index e1af91032d225ec87c40efce17ba9be514b19b19..26ab71bec1268e0afbdcbff8972d72c968ddbd98 100644 (file)
@@ -31,7 +31,6 @@ has '_order'       => ( is => 'lazy', init_arg => undef );
 has 'invalid_order_columns' => ( is => 'lazy' );
 
 with 'Bugzilla::Elastic::Role::HasClient';
-with 'Bugzilla::Elastic::Role::HasIndexName';
 with 'Bugzilla::Elastic::Role::Search';
 
 my @SUPPORTED_FIELDS = qw(
@@ -92,9 +91,9 @@ sub data {
     my $body = $self->es_query;
     my $result = eval {
         $self->client->search(
-            index => $self->index_name,
-            type => 'bug',
-            body => $body,
+            index => Bugzilla::Bug->ES_INDEX,
+            type  => Bugzilla::Bug->ES_TYPE,
+            body  => $body,
         );
     };
     die $@ unless $result;
index 2d7f386400cb9fa06d102f15ed311f1f3aafda97..5257330698e1e37bd66f9f94e1d4a2b6fc1b9007 100644 (file)
@@ -126,6 +126,11 @@ use constant EXTRA_REQUIRED_FIELDS => qw(is_enabled);
 
 with 'Bugzilla::Elastic::Role::Object';
 
+sub ES_INDEX {
+    my ($class) = @_;
+    sprintf("%s_%s", Bugzilla->params->{elasticsearch_index}, $class->ES_TYPE);
+}
+
 sub ES_TYPE { 'user' }
 
 sub ES_OBJECTS_AT_ONCE { 5000 }
@@ -153,6 +158,31 @@ sub ES_SELECT_ALL_SQL {
     return ("SELECT $id FROM $table WHERE $id > ? AND is_enabled AND NOT disabledtext ORDER BY $id", [$last_id // 0]);
 }
 
+sub ES_SETTINGS {
+    return {
+        number_of_shards => 2,
+        analysis         => {
+            filter => {
+                asciifolding_original => {
+                    type              => "asciifolding",
+                    preserve_original => \1,
+                },
+            },
+            analyzer => {
+                autocomplete => {
+                    type      => 'custom',
+                    tokenizer => 'keyword',
+                    filter    => [ 'lowercase', 'asciifolding_original' ],
+                },
+                folding => {
+                    tokenizer => 'standard',
+                    filter    => [ 'standard', 'lowercase', 'asciifolding_original' ],
+                },
+            }
+        }
+    };
+}
+
 sub ES_PROPERTIES {
     return {
         suggest_user => {
index 8d04b7e8dd1d1620b20aa12e3c504d01a4217e8d..cd249a55c21a602668eeabd6fb084640961a5ac7 100755 (executable)
@@ -43,12 +43,10 @@ if ($progress_bar) {
 }
 
 my $indexer = Bugzilla::Elastic::Indexer->new(
-    $debug_sql ? ( debug_sql => 1 ) : (),
+    $debug_sql    ? ( debug_sql => 1 )                      : (),
     $progress_bar ? ( progress_bar => 'Term::ProgressBar' ) : (),
 );
 
-$indexer->create_index;
-
 my $run_time = time;
 my $loop = IO::Async::Loop->new;
 my $timer = IO::Async::Timer::Periodic->new(