From: Josef 'Jeff' Sipek Date: Wed, 19 Feb 2020 16:02:24 +0000 (-0500) Subject: stats: Add support for linear stats group-by fxn X-Git-Tag: 2.3.11.2~537 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1542585e9cf0fe8d3135e102ff91da8c11f384eb;p=thirdparty%2Fdovecot%2Fcore.git stats: Add support for linear stats group-by fxn One can specify the quantization parameters in the config file as: :linear::: For example: group_by = bytes_out:linear:0:1000:100 Which will quantize the bytes_out values into the buckets: (-inf, 0], (0,100], (100,200], ... (900, 1000], (1000, +inf). --- diff --git a/src/stats/stats-metrics.c b/src/stats/stats-metrics.c index beb3f3dae2..dcdcb0fd59 100644 --- a/src/stats/stats-metrics.c +++ b/src/stats/stats-metrics.c @@ -329,6 +329,10 @@ stats_metric_get_sub_metric(struct metric *metric, if ((*sub_metrics)->group_value.intmax == value->intmax) return *sub_metrics; break; + case METRIC_VALUE_TYPE_BUCKET_INDEX: + if ((*sub_metrics)->group_value.intmax == value->intmax) + return *sub_metrics; + break; } } return NULL; @@ -373,6 +377,64 @@ stats_metric_group_by_discrete(const struct event_field *field, i_unreached(); } +/* convert the value to a bucket index */ +static bool +stats_metric_group_by_quantized(const struct event_field *field, + struct metric_value *value, + const struct stats_metric_settings_group_by *group_by) +{ + switch (field->value_type) { + case EVENT_FIELD_VALUE_TYPE_STR: + case EVENT_FIELD_VALUE_TYPE_TIMEVAL: + return FALSE; + case EVENT_FIELD_VALUE_TYPE_INTMAX: + break; + } + + value->type = METRIC_VALUE_TYPE_BUCKET_INDEX; + + for (unsigned int i = 0; i < group_by->num_ranges; i++) { + if ((field->value.intmax <= group_by->ranges[i].min) || + (field->value.intmax > group_by->ranges[i].max)) + continue; + + value->intmax = i; + return TRUE; + } + + i_panic("failed to find a matching bucket for '%s'=%jd", + group_by->field, field->value.intmax); +} + +/* convert value to a bucket label */ +static const char * +stats_metric_group_by_quantized_label(const struct event_field *field, + const struct stats_metric_settings_group_by *group_by, + const size_t bucket_index) +{ + const struct stats_metric_settings_bucket_range *range = &group_by->ranges[bucket_index]; + const char *name = group_by->field; + const char *label; + + switch (field->value_type) { + case EVENT_FIELD_VALUE_TYPE_STR: + case EVENT_FIELD_VALUE_TYPE_TIMEVAL: + i_unreached(); + case EVENT_FIELD_VALUE_TYPE_INTMAX: + break; + } + + if (range->min == INTMAX_MIN) + label = t_strdup_printf("%s_ninf_%jd", name, range->max); + else if (range->max == INTMAX_MAX) + label = t_strdup_printf("%s_%jd_inf", name, range->min + 1); + else + label = t_strdup_printf("%s_%jd_%jd", name, + range->min + 1, range->max); + + return label; +} + static void stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool) { @@ -390,6 +452,10 @@ stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool) if (!stats_metric_group_by_discrete(field, &value)) return; break; + case STATS_METRIC_GROUPBY_QUANTIZED: + if (!stats_metric_group_by_quantized(field, &value, group_by)) + return; + break; } if (!array_is_created(&metric->sub_metrics)) @@ -407,6 +473,17 @@ stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool) case METRIC_VALUE_TYPE_INT: value_label = dec2str(field->value.intmax); break; + case METRIC_VALUE_TYPE_BUCKET_INDEX: + switch (group_by->func) { + case STATS_METRIC_GROUPBY_DISCRETE: + i_unreached(); + case STATS_METRIC_GROUPBY_QUANTIZED: + value_label = stats_metric_group_by_quantized_label(field, + group_by, + value.intmax); + break; + } + break; } sub_metric = stats_metric_sub_metric_alloc(metric, value_label, @@ -415,6 +492,7 @@ stats_metric_group_by(struct metric *metric, struct event *event, pool_t pool) sub_metric->group_by_count = metric->group_by_count - 1; sub_metric->group_by = &metric->group_by[1]; } + sub_metric->group_value.type = value.type; sub_metric->group_value.intmax = value.intmax; memcpy(sub_metric->group_value.hash, value.hash, SHA1_RESULTLEN); } T_END; diff --git a/src/stats/stats-metrics.h b/src/stats/stats-metrics.h index 5481acb3e2..fe9048a0ce 100644 --- a/src/stats/stats-metrics.h +++ b/src/stats/stats-metrics.h @@ -55,6 +55,7 @@ struct metric_field { enum metric_value_type { METRIC_VALUE_TYPE_STR, METRIC_VALUE_TYPE_INT, + METRIC_VALUE_TYPE_BUCKET_INDEX, }; struct metric_value { diff --git a/src/stats/stats-settings.c b/src/stats/stats-settings.c index db2ad5df26..ed6464f524 100644 --- a/src/stats/stats-settings.c +++ b/src/stats/stats-settings.c @@ -280,6 +280,90 @@ static bool stats_exporter_settings_check(void *_set, pool_t pool ATTR_UNUSED, return TRUE; } +static bool parse_metric_group_by_common(const char *func, + const char *const *params, + intmax_t *min_r, + intmax_t *max_r, + intmax_t *other_r, + const char **error_r) +{ + intmax_t min, max, other; + + if ((str_array_length(params) != 3) || + (str_to_intmax(params[0], &min) < 0) || + (str_to_intmax(params[1], &max) < 0) || + (str_to_intmax(params[2], &other) < 0)) { + *error_r = t_strdup_printf("group_by '%s' aggregate function takes " + "3 int args", func); + return FALSE; + } + + if ((min < 0) || (max < 0) || (other < 0)) { + *error_r = t_strdup_printf("group_by '%s' aggregate function " + "arguments must be >= 0", func); + return FALSE; + } + + if (min >= max) { + *error_r = t_strdup_printf("group_by '%s' aggregate function " + "min must be < max (%ju must be < %ju)", + func, min, max); + return FALSE; + } + + *min_r = min; + *max_r = max; + *other_r = other; + + return TRUE; +} + +static bool parse_metric_group_by_lin(pool_t pool, struct stats_metric_settings_group_by *group_by, + const char *const *params, const char **error_r) +{ + intmax_t min, max, step; + + if (!parse_metric_group_by_common("linear", params, &min, &max, &step, error_r)) + return FALSE; + + if ((min + step) > max) { + *error_r = t_strdup_printf("group_by 'linear' aggregate function " + "min+step must be <= max (%ju must be <= %ju)", + min + step, max); + return FALSE; + } + + group_by->func = STATS_METRIC_GROUPBY_QUANTIZED; + + /* + * Allocate the bucket range array and fill it in + * + * The first bucket is special - it contains everything less than or + * equal to 'min'. The last bucket is also special - it contains + * everything greater than 'max'. + * + * The second bucket begins at 'min + 1', the third bucket begins at + * 'min + 1 * step + 1', the fourth at 'min + 2 * step + 1', and so on. + */ + group_by->num_ranges = (max - min) / step + 2; + group_by->ranges = p_new(pool, struct stats_metric_settings_bucket_range, + group_by->num_ranges); + + /* set up min & max buckets */ + group_by->ranges[0].min = INTMAX_MIN; + group_by->ranges[0].max = min; + group_by->ranges[group_by->num_ranges - 1].min = max; + group_by->ranges[group_by->num_ranges - 1].max = INTMAX_MAX; + + /* remaining buckets */ + for (unsigned int i = 1; i < group_by->num_ranges - 1; i++) { + group_by->ranges[i].min = min + (i - 1) * step; + group_by->ranges[i].max = min + i * step; + } + + return TRUE; +} + static bool parse_metric_group_by(struct stats_metric_settings *set, pool_t pool, const char **error_r) { @@ -311,6 +395,10 @@ static bool parse_metric_group_by(struct stats_metric_settings *set, "does not take any args"; return FALSE; } + } else if (strcmp(params[1], "linear") == 0) { + /* :linear::: */ + if (!parse_metric_group_by_lin(pool, &group_by, ¶ms[2], error_r)) + return FALSE; } else { *error_r = t_strdup_printf("unknown aggregation function " "'%s' on field '%s'", params[1], params[0]); diff --git a/src/stats/stats-settings.h b/src/stats/stats-settings.h index 38bfbf3853..d5a7224779 100644 --- a/src/stats/stats-settings.h +++ b/src/stats/stats-settings.h @@ -72,11 +72,25 @@ struct stats_exporter_settings { /* */ enum stats_metric_group_by_func { STATS_METRIC_GROUPBY_DISCRETE = 0, + STATS_METRIC_GROUPBY_QUANTIZED, +}; + +/* + * A range covering a stats bucket. The the interval is half closed - the + * minimum is excluded and the maximum is included. In other words: (min, max]. + * Because we don't have a +Inf and -Inf, we use INTMAX_MIN and INTMAX_MAX + * respectively. + */ +struct stats_metric_settings_bucket_range { + intmax_t min; + intmax_t max; }; struct stats_metric_settings_group_by { const char *field; enum stats_metric_group_by_func func; + unsigned int num_ranges; + struct stats_metric_settings_bucket_range *ranges; }; /* */ diff --git a/src/stats/test-stats-metrics.c b/src/stats/test-stats-metrics.c index 81b19a5eed..183e81155a 100644 --- a/src/stats/test-stats-metrics.c +++ b/src/stats/test-stats-metrics.c @@ -233,11 +233,149 @@ static void test_stats_metrics_group_by_discrete(void) test_stats_metrics_group_by_discrete_real(&discrete_tests[i]); } +#define QUANTIZED_TEST_VAL_COUNT 15 +struct quantized_test { + const char *settings_blob; + unsigned int num_inputs; + intmax_t input_vals[QUANTIZED_TEST_VAL_COUNT]; + + unsigned int num_sub_metrics; + + unsigned int num_ranges; + struct { + struct stats_metric_settings_bucket_range range; + intmax_t count; + } ranges[QUANTIZED_TEST_VAL_COUNT]; +}; + +static const struct quantized_test quantized_tests[] = { + { + "linear:100:1000:100", + 13, + { 0, 50, 100, 101, 200, 201, 250, 301, 900, 901, 1000, 1001, 2000 }, + 7, + 11, + { { { INTMAX_MIN, 100 }, 3 }, + { { 100, 200 }, 2 }, + { { 200, 300 }, 2 }, + { { 300, 400 }, 1 }, + { { 400, 500 }, 0 }, + { { 500, 600 }, 0 }, + { { 600, 700 }, 0 }, + { { 700, 800 }, 0 }, + { { 800, 900 }, 1 }, + { { 900, 1000 }, 2 }, + { { 1000, INTMAX_MAX }, 2 }, + } + }, +}; + +static void test_stats_metrics_group_by_quantized_real(const struct quantized_test *test) +{ + unsigned int i; + + test_begin(t_strdup_printf("stats metrics (quantized group by) - %s", + test->settings_blob)); + + test_init(t_strdup_printf("metric=test\n" + "metric/test/name=test\n" + "metric/test/event_name=test\n" + "metric/test/group_by=test_name foobar:%s\n" + "\n", test->settings_blob)); + + struct event *event; + + for (i = 0; i < test->num_inputs; i++) { + event = event_create(NULL); + event_add_category(event, &test_category); + event_set_name(event, "test"); + event_add_str(event, "test_name", "alpha"); + event_add_int(event, "foobar", test->input_vals[i]); + test_event_send(event); + event_unref(&event); + } + + /* check total number of events */ + test_assert(get_stats_dist_field("test", STATS_DIST_COUNT) == test->num_inputs); + + /* analyze the structure */ + struct stats_metrics_iter *iter = stats_metrics_iterate_init(metrics); + const struct metric *root_metric = stats_metrics_iterate(iter); + stats_metrics_iterate_deinit(&iter); + + test_stats_metrics_group_by_check_one(root_metric, NULL, test->num_inputs, + 1, 2, STATS_METRIC_GROUPBY_DISCRETE, + "test_name", 0); + + /* examine first level sub-metric */ + struct metric *const *first = array_idx(&root_metric->sub_metrics, 0); + test_stats_metrics_group_by_check_one(first[0], + "alpha", + test->num_inputs, + test->num_sub_metrics, + 1, + STATS_METRIC_GROUPBY_QUANTIZED, + "foobar", + METRIC_VALUE_TYPE_STR); + + /* check the ranges */ + test_assert(first[0]->group_by[0].num_ranges == test->num_ranges); + for (i = 0; i < test->num_ranges; i++) { + test_assert(first[0]->group_by[0].ranges[i].min == test->ranges[i].range.min); + test_assert(first[0]->group_by[0].ranges[i].max == test->ranges[i].range.max); + } + + /* examine second level sub-metrics */ + struct metric *const *second = array_idx(&first[0]->sub_metrics, 0); + + for (i = 0; i < test->num_sub_metrics; i++) { + const char *sub_name; + intmax_t range_idx; + + /* we check these first, before we use the value below */ + test_assert(second[i]->group_value.type == METRIC_VALUE_TYPE_BUCKET_INDEX); + test_assert(second[i]->group_value.intmax < test->num_ranges); + + range_idx = second[i]->group_value.intmax; + + /* construct the expected sub-metric name */ + if (test->ranges[range_idx].range.min == INTMAX_MIN) { + sub_name = t_strdup_printf("foobar_ninf_%jd", + test->ranges[range_idx].range.max); + } else if (test->ranges[range_idx].range.max == INTMAX_MAX) { + sub_name = t_strdup_printf("foobar_%jd_inf", + test->ranges[range_idx].range.min + 1); + } else { + sub_name = t_strdup_printf("foobar_%jd_%jd", + test->ranges[range_idx].range.min + 1, + test->ranges[range_idx].range.max); + } + + test_stats_metrics_group_by_check_one(second[i], + sub_name, + test->ranges[second[i]->group_value.intmax].count, + 0, 0, 0, NULL, + METRIC_VALUE_TYPE_BUCKET_INDEX); + } + + test_deinit(); + test_end(); +} + +static void test_stats_metrics_group_by_quantized(void) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(quantized_tests); i++) + test_stats_metrics_group_by_quantized_real(&quantized_tests[i]); +} + int main(void) { void (*const test_functions[])(void) = { test_stats_metrics, test_stats_metrics_filter, test_stats_metrics_group_by_discrete, + test_stats_metrics_group_by_quantized, NULL };