From: Leonard Göhrs Date: Fri, 24 Mar 2023 13:03:43 +0000 (+0100) Subject: src/daemon/plugin.c: init fam.{time,interval} before uc_update(fam) on dispatch X-Git-Tag: 6.0.0-rc0~55 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=44185083d848b4618811d001a8be51a53cc3c1dd;p=thirdparty%2Fcollectd.git src/daemon/plugin.c: init fam.{time,interval} before uc_update(fam) on dispatch The changes in commit 55efb56a ([collectd 6] src/daemon/plugin.c: Use one thread per write plugin) changed the order in which the time and interval on a to-be-dispatched metric family were set and uc_update() was called on the metric family. The time and interval inside a metric family have to be set before calling uc_update() on it, otherwise warnings like these will be generated en masse[1]: uc_update: Value too old: … value time = 0.000; last cache update = 0.000; And the "missing" handlers for the metric family will be called resulting in plugins like write_prometheus dropping the value, resulting in it not showing up when queried [2]. This change requires the inroduction of a second metric_family_clone() in the dispatch path: plugin_dispatch_metric_family() | \ | Has to modify the passed fam to initialize the correct time and interval. | So it has to metric_family_clone() the input. v plugin_dispatch_metric_internal() | \ | Calls the filter chains and uc_update(fam), which both require correct | times and intervals to be set. | | plugin_dispatch_metric_internal() calls either fc_process_chain() or v <-- fc_default_action() depending on if a chain is configured. |\ | fc_process_chain() | | | v | plugin_write() | \ | A chain may call plugin_write() multiple times with the same fam. | This means plugin_write() can not take "ownership" of the fam, | put it in a queue and free it once it feels like it. | It must instead create another clone to put into the queue. v fc_default_action() | v plugin_write() \ This is the much more common case, as filter chains are a niche feature, and in this case we could actually transfer ownership of the fam to plugin_write and save a clone, but we would have to tell plugin_write() that it is responsible for freeing the passed fam. The negative performance impact of the clone could be mitigated by adding a reference count to the metric_family_t struct and only freeing it once all references to it are gone. But that would be a larger change and not a bug fix. Only fix the "uninitialized time and interval" bug for now. [1]: https://github.com/collectd/collectd/pull/4026#issuecomment-1463884930 [2]: https://github.com/collectd/collectd/issues/4098 Signed-off-by: Leonard Göhrs --- diff --git a/src/daemon/plugin.c b/src/daemon/plugin.c index a733803bf..0827d3f78 100644 --- a/src/daemon/plugin.c +++ b/src/daemon/plugin.c @@ -1944,6 +1944,8 @@ EXPORT int plugin_write(const char *plugin, metric_family_t const *fam) { return EINVAL; } + /* Create a copy of the metric_family_t so we can metric_family_free() it + * ourself once it is processed. */ metric_family_t *fam_copy = metric_family_clone(fam); if (fam_copy == NULL) { int status = errno; @@ -1951,22 +1953,9 @@ EXPORT int plugin_write(const char *plugin, metric_family_t const *fam) { return status; } - cdtime_t time = cdtime(); - cdtime_t interval = plugin_get_interval(); - - for (size_t i = 0; i < fam_copy->metric.num; i++) { - if (fam_copy->metric.ptr[i].time == 0) { - fam_copy->metric.ptr[i].time = time; - } - if (fam_copy->metric.ptr[i].interval == 0) { - fam_copy->metric.ptr[i].interval = interval; - } - - /* TODO(octo): set target labels here. */ - } - write_queue_elem_t *elem = calloc(1, sizeof(*elem)); if (elem == NULL) { + metric_family_free(fam_copy); return ENOMEM; } @@ -2213,13 +2202,41 @@ EXPORT int plugin_dispatch_metric_family(metric_family_t const *fam) { return EINVAL; } - int status = plugin_dispatch_metric_internal(fam); + /* Create a copy of the metric_family_t so we can modify the time and + * interval without causing confusion when the callee later passes the same + * fam again. */ + metric_family_t *fam_copy = metric_family_clone(fam); + if (fam_copy == NULL) { + int status = errno; + ERROR("plugin_dispatch_metric_family: metric_family_clone failed: %s", + STRERROR(status)); + return status; + } + + cdtime_t time = cdtime(); + cdtime_t interval = plugin_get_interval(); + + for (size_t i = 0; i < fam_copy->metric.num; i++) { + if (fam_copy->metric.ptr[i].time == 0) { + fam_copy->metric.ptr[i].time = time; + } + if (fam_copy->metric.ptr[i].interval == 0) { + fam_copy->metric.ptr[i].interval = interval; + } + + /* TODO(octo): set target labels here. */ + } + + int status = plugin_dispatch_metric_internal(fam_copy); if (status != 0) { ERROR( "plugin_dispatch_metric_family: plugin_dispatch_metric_internal failed " "with status %i (%s).", status, STRERROR(status)); } + + metric_family_free(fam_copy); + return status; }