This makes a significant difference when loading large files and during
commit and clear operations, thanks to improved cache locality. In the
measurements below, master refers to the code before any of the changes
to the patterns code, not the code before this one commit.
Timing the replacement of 10M entries from the CLI with this command
which also reports timestamps at start, end of upload and end of clear:
$ (echo "prompt i"; echo "show activity"; echo "prepare acl #0";
awk '{print "add acl @1 #0",$0}' < bad-ip.map; echo "show activity";
echo "commit acl @1 #0"; echo "clear acl @0 #0";echo "show activity") |
socat -t 10 - /tmp/sock1 | grep ^uptim
master, on a 3.7 GHz EPYC, 3 samples:
uptime_now: 6.087030
uptime_now: 25.981777 => 21.9 sec insertion time
uptime_now: 29.286368 => 3.3 sec commit+clear
uptime_now: 5.748087
uptime_now: 25.740675 => 20.0s insertion time
uptime_now: 29.039023 => 3.3 s commit+clear
uptime_now: 7.065362
uptime_now: 26.769596 => 19.7s insertion time
uptime_now: 30.065044 => 3.3s commit+clear
And after this commit:
uptime_now: 6.119215
uptime_now: 25.023019 => 18.9 sec insertion time
uptime_now: 27.155503 => 2.1 sec commit+clear
uptime_now: 5.675931
uptime_now: 24.551035 => 18.9s insertion
uptime_now: 26.652352 => 2.1s commit+clear
uptime_now: 6.722256
uptime_now: 25.593952 => 18.9s insertion
uptime_now: 27.724153 => 2.1s commit+clear
Now timing the startup time with a 10M entries file (on another machine)
on master, 20 samples:
Standard Deviation, s: 0.
061652677408033
Mean: 4.217
And after this commit:
Standard Deviation, s: 0.
081821371548669
Mean: 3.78
unsigned int flags; /* flags PAT_REF_*. */
unsigned int curr_gen; /* current generation number (anything below can be removed) */
unsigned int next_gen; /* next generation number (insertions use this one) */
+ /* We keep a cached pointer to the current generation for performance. */
+ struct {
+ struct pat_ref_gen *data;
+ unsigned int id;
+ } cached_gen;
int unique_id; /* Each pattern reference have unique id. */
unsigned long long revision; /* updated for each update */
unsigned long long entry_cnt; /* the total number of entries */
*/
struct pat_ref_gen *pat_ref_gen_get(struct pat_ref *ref, unsigned int gen_id)
{
- return cebu32_item_lookup(&ref->gen_root, gen_node, gen_id, gen_id, struct pat_ref_gen);
+ struct pat_ref_gen *gen;
+
+ /* We optimistically try to use the cached generation if it's the current one. */
+ if (likely(gen_id == ref->curr_gen && gen_id == ref->cached_gen.id && ref->cached_gen.data))
+ return ref->cached_gen.data;
+
+ gen = cebu32_item_lookup(&ref->gen_root, gen_node, gen_id, gen_id, struct pat_ref_gen);
+ if (unlikely(!gen))
+ return NULL;
+
+ if (gen_id == ref->curr_gen) {
+ ref->cached_gen.id = gen_id;
+ ref->cached_gen.data = gen;
+ }
+ return gen;
}
/* This function removes all elements belonging to <gen_id> and matching <key>
ref->revision = 0;
ref->entry_cnt = 0;
ceb_init_root(&ref->gen_root);
+ ref->cached_gen.id = ref->curr_gen;
+ ref->cached_gen.data = NULL;
LIST_INIT(&ref->pat);
HA_RWLOCK_INIT(&ref->lock);
event_hdl_sub_list_init(&ref->e_subs);
BUG_ON(!LIST_ISEMPTY(&gen->head));
BUG_ON(!ceb_isempty(&gen->elt_root));
cebu32_item_delete(&ref->gen_root, gen_node, gen_id, gen);
+ if (gen->gen_id == ref->cached_gen.id)
+ ref->cached_gen.data = NULL;
free(gen);
}