]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
stats: Add support for linear stats group-by fxn
authorJosef 'Jeff' Sipek <jeff.sipek@open-xchange.com>
Wed, 19 Feb 2020 16:02:24 +0000 (11:02 -0500)
committerjeff.sipek <jeff.sipek@open-xchange.com>
Fri, 13 Mar 2020 08:25:17 +0000 (08:25 +0000)
One can specify the quantization parameters in the config file as:

<field>:linear:<min val>:<max val>:<step>

For example:

group_by = bytes_out:linear:0:1000:100

Which will quantize the bytes_out values into the buckets: (-inf, 0],
(0,100], (100,200], ... (900, 1000], (1000, +inf).

src/stats/stats-metrics.c
src/stats/stats-metrics.h
src/stats/stats-settings.c
src/stats/stats-settings.h
src/stats/test-stats-metrics.c

index beb3f3dae2a5c3b0f9fdc5801fb829d7d3112dcb..dcdcb0fd5954621ffb4316f210e8147cb44867a5 100644 (file)
@@ -329,6 +329,10 @@ stats_metric_get_sub_metric(struct metric *metric,
                        if ((*sub_metrics)->group_value.intmax == value->intmax)
                                return *sub_metrics;
                        break;
+               case METRIC_VALUE_TYPE_BUCKET_INDEX:
+                       if ((*sub_metrics)->group_value.intmax == value->intmax)
+                               return *sub_metrics;
+                       break;
                }
        }
        return NULL;
@@ -373,6 +377,64 @@ stats_metric_group_by_discrete(const struct event_field *field,
        i_unreached();
 }
 
+/* convert the value to a bucket index */
+static bool
+stats_metric_group_by_quantized(const struct event_field *field,
+                               struct metric_value *value,
+                               const struct stats_metric_settings_group_by *group_by)
+{
+       switch (field->value_type) {
+       case EVENT_FIELD_VALUE_TYPE_STR:
+       case EVENT_FIELD_VALUE_TYPE_TIMEVAL:
+               return FALSE;
+       case EVENT_FIELD_VALUE_TYPE_INTMAX:
+               break;
+       }
+
+       value->type = METRIC_VALUE_TYPE_BUCKET_INDEX;
+
+       for (unsigned int i = 0; i < group_by->num_ranges; i++) {
+               if ((field->value.intmax <= group_by->ranges[i].min) ||
+                   (field->value.intmax > group_by->ranges[i].max))
+                       continue;
+
+               value->intmax = i;
+               return TRUE;
+       }
+
+       i_panic("failed to find a matching bucket for '%s'=%jd",
+               group_by->field, field->value.intmax);
+}
+
+/* convert value to a bucket label */
+static const char *
+stats_metric_group_by_quantized_label(const struct event_field *field,
+                                     const struct stats_metric_settings_group_by *group_by,
+                                     const size_t bucket_index)
+{
+       const struct stats_metric_settings_bucket_range *range = &group_by->ranges[bucket_index];
+       const char *name = group_by->field;
+       const char *label;
+
+       switch (field->value_type) {
+       case EVENT_FIELD_VALUE_TYPE_STR:
+       case EVENT_FIELD_VALUE_TYPE_TIMEVAL:
+               i_unreached();
+       case EVENT_FIELD_VALUE_TYPE_INTMAX:
+               break;
+       }
+
+       if (range->min == INTMAX_MIN)
+               label = t_strdup_printf("%s_ninf_%jd", name, range->max);
+       else if (range->max == INTMAX_MAX)
+               label = t_strdup_printf("%s_%jd_inf", name, range->min + 1);
+       else
+               label = t_strdup_printf("%s_%jd_%jd", name,
+                                       range->min + 1, range->max);
+
+       return label;
+}
+
 static void
 stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool)
 {
@@ -390,6 +452,10 @@ stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool)
                if (!stats_metric_group_by_discrete(field, &value))
                        return;
                break;
+       case STATS_METRIC_GROUPBY_QUANTIZED:
+               if (!stats_metric_group_by_quantized(field, &value, group_by))
+                       return;
+               break;
        }
 
        if (!array_is_created(&metric->sub_metrics))
@@ -407,6 +473,17 @@ stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool)
                case METRIC_VALUE_TYPE_INT:
                        value_label = dec2str(field->value.intmax);
                        break;
+               case METRIC_VALUE_TYPE_BUCKET_INDEX:
+                       switch (group_by->func) {
+                       case STATS_METRIC_GROUPBY_DISCRETE:
+                               i_unreached();
+                       case STATS_METRIC_GROUPBY_QUANTIZED:
+                               value_label = stats_metric_group_by_quantized_label(field,
+                                                                                   group_by,
+                                                                                   value.intmax);
+                               break;
+                       }
+                       break;
                }
 
                sub_metric = stats_metric_sub_metric_alloc(metric, value_label,
@@ -415,6 +492,7 @@ stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool)
                        sub_metric->group_by_count = metric->group_by_count - 1;
                        sub_metric->group_by = &metric->group_by[1];
                }
+               sub_metric->group_value.type = value.type;
                sub_metric->group_value.intmax = value.intmax;
                memcpy(sub_metric->group_value.hash, value.hash, SHA1_RESULTLEN);
        } T_END;
index 5481acb3e28fd38a7825afb95f6172f092aa3090..fe9048a0cea2a2c41d7da9dd24cdc292547fd5c4 100644 (file)
@@ -55,6 +55,7 @@ struct metric_field {
 enum metric_value_type {
        METRIC_VALUE_TYPE_STR,
        METRIC_VALUE_TYPE_INT,
+       METRIC_VALUE_TYPE_BUCKET_INDEX,
 };
 
 struct metric_value {
index db2ad5df26a92c8335a89244002fe1d226f5e141..ed6464f524a93fd0e2eb238530912b3ce5dfb0d8 100644 (file)
@@ -280,6 +280,90 @@ static bool stats_exporter_settings_check(void *_set, pool_t pool ATTR_UNUSED,
        return TRUE;
 }
 
+static bool parse_metric_group_by_common(const char *func,
+                                        const char *const *params,
+                                        intmax_t *min_r,
+                                        intmax_t *max_r,
+                                        intmax_t *other_r,
+                                        const char **error_r)
+{
+       intmax_t min, max, other;
+
+       if ((str_array_length(params) != 3) ||
+           (str_to_intmax(params[0], &min) < 0) ||
+           (str_to_intmax(params[1], &max) < 0) ||
+           (str_to_intmax(params[2], &other) < 0)) {
+               *error_r = t_strdup_printf("group_by '%s' aggregate function takes "
+                                          "3 int args", func);
+               return FALSE;
+       }
+
+       if ((min < 0) || (max < 0) || (other < 0)) {
+               *error_r = t_strdup_printf("group_by '%s' aggregate function "
+                                          "arguments must be >= 0", func);
+               return FALSE;
+       }
+
+       if (min >= max) {
+               *error_r = t_strdup_printf("group_by '%s' aggregate function "
+                                          "min must be < max (%ju must be < %ju)",
+                                          func, min, max);
+               return FALSE;
+       }
+
+       *min_r = min;
+       *max_r = max;
+       *other_r = other;
+
+       return TRUE;
+}
+
+static bool parse_metric_group_by_lin(pool_t pool, struct stats_metric_settings_group_by *group_by,
+                                     const char *const *params, const char **error_r)
+{
+       intmax_t min, max, step;
+
+       if (!parse_metric_group_by_common("linear", params, &min, &max, &step, error_r))
+               return FALSE;
+
+       if ((min + step) > max) {
+               *error_r = t_strdup_printf("group_by 'linear' aggregate function "
+                                          "min+step must be <= max (%ju must be <= %ju)",
+                                          min + step, max);
+               return FALSE;
+       }
+
+       group_by->func = STATS_METRIC_GROUPBY_QUANTIZED;
+
+       /*
+        * Allocate the bucket range array and fill it in
+        *
+        * The first bucket is special - it contains everything less than or
+        * equal to 'min'.  The last bucket is also special - it contains
+        * everything greater than 'max'.
+        *
+        * The second bucket begins at 'min + 1', the third bucket begins at
+        * 'min + 1 * step + 1', the fourth at 'min + 2 * step + 1', and so on.
+        */
+       group_by->num_ranges = (max - min) / step + 2;
+       group_by->ranges = p_new(pool, struct stats_metric_settings_bucket_range,
+                                group_by->num_ranges);
+
+       /* set up min & max buckets */
+       group_by->ranges[0].min = INTMAX_MIN;
+       group_by->ranges[0].max = min;
+       group_by->ranges[group_by->num_ranges - 1].min = max;
+       group_by->ranges[group_by->num_ranges - 1].max = INTMAX_MAX;
+
+       /* remaining buckets */
+       for (unsigned int i = 1; i < group_by->num_ranges - 1; i++) {
+               group_by->ranges[i].min = min + (i - 1) * step;
+               group_by->ranges[i].max = min + i * step;
+       }
+
+       return TRUE;
+}
+
 static bool parse_metric_group_by(struct stats_metric_settings *set,
                                  pool_t pool, const char **error_r)
 {
@@ -311,6 +395,10 @@ static bool parse_metric_group_by(struct stats_metric_settings *set,
                                           "does not take any args";
                                return FALSE;
                        }
+               } else if (strcmp(params[1], "linear") == 0) {
+                       /* <field>:linear:<min val>:<max val>:<step> */
+                       if (!parse_metric_group_by_lin(pool, &group_by, &params[2], error_r))
+                               return FALSE;
                } else {
                        *error_r = t_strdup_printf("unknown aggregation function "
                                                   "'%s' on field '%s'", params[1], params[0]);
index 38bfbf38532ca9186ef581f2241953e4b31cb3cf..d5a722477939e8c8d909bb0389a93e90330178c3 100644 (file)
@@ -72,11 +72,25 @@ struct stats_exporter_settings {
 /* <settings checks> */
 enum stats_metric_group_by_func {
        STATS_METRIC_GROUPBY_DISCRETE = 0,
+       STATS_METRIC_GROUPBY_QUANTIZED,
+};
+
+/*
+ * A range covering a stats bucket.  The the interval is half closed - the
+ * minimum is excluded and the maximum is included.  In other words: (min, max].
+ * Because we don't have a +Inf and -Inf, we use INTMAX_MIN and INTMAX_MAX
+ * respectively.
+ */
+struct stats_metric_settings_bucket_range {
+       intmax_t min;
+       intmax_t max;
 };
 
 struct stats_metric_settings_group_by {
        const char *field;
        enum stats_metric_group_by_func func;
+       unsigned int num_ranges;
+       struct stats_metric_settings_bucket_range *ranges;
 };
 /* </settings checks> */
 
index 81b19a5eed48518ec2f288ed35e8de73e41cb504..183e81155a76952f986d0e50f56384c162029253 100644 (file)
@@ -233,11 +233,149 @@ static void test_stats_metrics_group_by_discrete(void)
                test_stats_metrics_group_by_discrete_real(&discrete_tests[i]);
 }
 
+#define QUANTIZED_TEST_VAL_COUNT       15
+struct quantized_test {
+       const char *settings_blob;
+       unsigned int num_inputs;
+       intmax_t input_vals[QUANTIZED_TEST_VAL_COUNT];
+
+       unsigned int num_sub_metrics;
+
+       unsigned int num_ranges;
+       struct {
+               struct stats_metric_settings_bucket_range range;
+               intmax_t count;
+       } ranges[QUANTIZED_TEST_VAL_COUNT];
+};
+
+static const struct quantized_test quantized_tests[] = {
+       {
+               "linear:100:1000:100",
+               13,
+               { 0, 50, 100, 101, 200, 201, 250, 301, 900, 901, 1000, 1001, 2000 },
+               7,
+               11,
+               { { { INTMAX_MIN, 100 }, 3 },
+                 { { 100, 200 }, 2 },
+                 { { 200, 300 }, 2 },
+                 { { 300, 400 }, 1 },
+                 { { 400, 500 }, 0 },
+                 { { 500, 600 }, 0 },
+                 { { 600, 700 }, 0 },
+                 { { 700, 800 }, 0 },
+                 { { 800, 900 }, 1 },
+                 { { 900, 1000 }, 2 },
+                 { { 1000, INTMAX_MAX }, 2 },
+               }
+       },
+};
+
+static void test_stats_metrics_group_by_quantized_real(const struct quantized_test *test)
+{
+       unsigned int i;
+
+       test_begin(t_strdup_printf("stats metrics (quantized group by) - %s",
+                                  test->settings_blob));
+
+       test_init(t_strdup_printf("metric=test\n"
+                                 "metric/test/name=test\n"
+                                 "metric/test/event_name=test\n"
+                                 "metric/test/group_by=test_name foobar:%s\n"
+                                 "\n", test->settings_blob));
+
+       struct event *event;
+
+       for (i = 0; i < test->num_inputs; i++) {
+               event = event_create(NULL);
+               event_add_category(event, &test_category);
+               event_set_name(event, "test");
+               event_add_str(event, "test_name", "alpha");
+               event_add_int(event, "foobar", test->input_vals[i]);
+               test_event_send(event);
+               event_unref(&event);
+       }
+
+       /* check total number of events */
+       test_assert(get_stats_dist_field("test", STATS_DIST_COUNT) == test->num_inputs);
+
+       /* analyze the structure */
+       struct stats_metrics_iter *iter = stats_metrics_iterate_init(metrics);
+       const struct metric *root_metric = stats_metrics_iterate(iter);
+       stats_metrics_iterate_deinit(&iter);
+
+       test_stats_metrics_group_by_check_one(root_metric, NULL, test->num_inputs,
+                                             1, 2, STATS_METRIC_GROUPBY_DISCRETE,
+                                             "test_name", 0);
+
+       /* examine first level sub-metric */
+       struct metric *const *first = array_idx(&root_metric->sub_metrics, 0);
+       test_stats_metrics_group_by_check_one(first[0],
+                                             "alpha",
+                                             test->num_inputs,
+                                             test->num_sub_metrics,
+                                             1,
+                                             STATS_METRIC_GROUPBY_QUANTIZED,
+                                             "foobar",
+                                             METRIC_VALUE_TYPE_STR);
+
+       /* check the ranges */
+       test_assert(first[0]->group_by[0].num_ranges == test->num_ranges);
+       for (i = 0; i < test->num_ranges; i++) {
+               test_assert(first[0]->group_by[0].ranges[i].min == test->ranges[i].range.min);
+               test_assert(first[0]->group_by[0].ranges[i].max == test->ranges[i].range.max);
+       }
+
+       /* examine second level sub-metrics */
+       struct metric *const *second = array_idx(&first[0]->sub_metrics, 0);
+
+       for (i = 0; i < test->num_sub_metrics; i++) {
+               const char *sub_name;
+               intmax_t range_idx;
+
+               /* we check these first, before we use the value below */
+               test_assert(second[i]->group_value.type == METRIC_VALUE_TYPE_BUCKET_INDEX);
+               test_assert(second[i]->group_value.intmax < test->num_ranges);
+
+               range_idx = second[i]->group_value.intmax;
+
+               /* construct the expected sub-metric name */
+               if (test->ranges[range_idx].range.min == INTMAX_MIN) {
+                       sub_name = t_strdup_printf("foobar_ninf_%jd",
+                                                  test->ranges[range_idx].range.max);
+               } else if (test->ranges[range_idx].range.max == INTMAX_MAX) {
+                       sub_name = t_strdup_printf("foobar_%jd_inf",
+                                                  test->ranges[range_idx].range.min + 1);
+               } else {
+                       sub_name = t_strdup_printf("foobar_%jd_%jd",
+                                                  test->ranges[range_idx].range.min + 1,
+                                                  test->ranges[range_idx].range.max);
+               }
+
+               test_stats_metrics_group_by_check_one(second[i],
+                                                     sub_name,
+                                                     test->ranges[second[i]->group_value.intmax].count,
+                                                     0, 0, 0, NULL,
+                                                     METRIC_VALUE_TYPE_BUCKET_INDEX);
+       }
+
+       test_deinit();
+       test_end();
+}
+
+static void test_stats_metrics_group_by_quantized(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < N_ELEMENTS(quantized_tests); i++)
+               test_stats_metrics_group_by_quantized_real(&quantized_tests[i]);
+}
+
 int main(void) {
        void (*const test_functions[])(void) = {
                test_stats_metrics,
                test_stats_metrics_filter,
                test_stats_metrics_group_by_discrete,
+               test_stats_metrics_group_by_quantized,
                NULL
        };