]> git.ipfire.org Git - thirdparty/collectd.git/commitdiff
src/daemon/metric.c: Allow arbitrary UTF-8 strings are label name.
authorFlorian Forster <octo@collectd.org>
Fri, 22 Dec 2023 21:12:53 +0000 (22:12 +0100)
committerFlorian Forster <octo@collectd.org>
Thu, 28 Dec 2023 08:54:46 +0000 (09:54 +0100)
Makefile.am
src/daemon/metric.c
src/daemon/metric_test.c

index c7ebd2d1ddc7978149b64fa32dce7da9e7ea0cf0..ab7a2086c4ee2d1cb55834c951c8a9a6b267784d 100644 (file)
@@ -438,7 +438,7 @@ libmetadata_la_SOURCES = \
 libmetric_la_SOURCES = \
                       src/daemon/metric.c \
                       src/daemon/metric.h
-libmetric_la_LIBADD = libmetadata.la $(COMMON_LIBS)
+libmetric_la_LIBADD = libmetadata.la libutf8.la $(COMMON_LIBS)
 
 libplugin_mock_la_SOURCES = \
        src/daemon/plugin_mock.c \
index f7b816c90c67e6091c57932e331dd686a877f473..7e861a3dd1985c2691c8296971b1aa2afbc90120 100644 (file)
 
 #include "metric.h"
 #include "plugin.h"
+#include "utils/utf8/utf8.h"
 
-/* Label names must match the regex `[a-zA-Z_][a-zA-Z0-9_]*`. Label names
- * beginning with __ are reserved for internal use.
- *
- * Source:
- * https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels */
-#define VALID_LABEL_CHARS                                                      \
+/* If these characters are used in resource attribute names or metric label
+ * names, they will not cause quotes to be printed when formatting the metric
+ * name. Resource attribute values and metric label values are always printed in
+ * quotes. */
+#define UNQUOTED_LABEL_CHARS                                                   \
   "abcdefghijklmnopqrstuvwxyz"                                                 \
   "ABCDEFGHIJKLMNOPQRSTUVWXYZ"                                                 \
-  "0123456789_.-"
+  "0123456789_.-:"
 
 /* Metric names must match the regex `[a-zA-Z_:][a-zA-Z0-9_:]*` */
 // instrument-name = ALPHA 0*254 ("_" / "." / "-" / "/" / ALPHA / DIGIT)
-#define VALID_NAME_CHARS VALID_LABEL_CHARS "/"
+#define VALID_NAME_CHARS                                                       \
+  "abcdefghijklmnopqrstuvwxyz"                                                 \
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"                                                 \
+  "0123456789_.-/"
 
 #define RESOURCE_LABEL_PREFIX "resource:"
 
@@ -98,8 +101,7 @@ int label_set_add(label_set_t *labels, char const *name, char const *value) {
     return EINVAL;
   }
 
-  size_t valid_len = strspn(name, VALID_LABEL_CHARS);
-  if ((valid_len != name_len) || isdigit((int)name[0])) {
+  if (!utf8_valid(name) || !utf8_valid(value)) {
     return EINVAL;
   }
 
@@ -265,8 +267,18 @@ static int internal_label_set_format(strbuf_t *buf, label_set_t const *labels,
       status = status || strbuf_print(buf, ",");
     }
 
-    status = status || strbuf_print(buf, prefix);
-    status = status || strbuf_print(buf, labels->ptr[i].name);
+    bool needs_quotes = strlen(labels->ptr[i].name) !=
+                        strspn(labels->ptr[i].name, UNQUOTED_LABEL_CHARS);
+    if (needs_quotes) {
+      status = status || strbuf_print(buf, "\"");
+      status = status || strbuf_print(buf, prefix);
+      status = status || strbuf_print_escaped(buf, labels->ptr[i].name,
+                                              "\\\"\n\r\t", '\\');
+      status = status || strbuf_print(buf, "\"");
+    } else {
+      status = status || strbuf_print(buf, prefix);
+      status = status || strbuf_print(buf, labels->ptr[i].name);
+    }
     status = status || strbuf_print(buf, "=\"");
     status = status || strbuf_print_escaped(buf, labels->ptr[i].value,
                                             "\\\"\n\r\t", '\\');
@@ -502,11 +514,11 @@ metric_family_t *metric_family_clone(metric_family_t const *fam) {
   return ret;
 }
 
-/* parse_label_value reads a label value, unescapes it and prints it to buf. On
- * success, inout is updated to point to the character just *after* the label
- * value, i.e. the character *following* the ending quotes - either a comma or
- * closing curlies. */
-static int parse_label_value(strbuf_t *buf, char const **inout) {
+/* parse_quoted_string reads a label value, unescapes it and prints it to buf.
+ * On success, inout is updated to point to the character just *after* the
+ * string value, i.e. the character *following* the ending quotes - either an
+ * equal sign, a comma, or closing curlies. */
+static int parse_quoted_string(strbuf_t *buf, char const **inout) {
   char const *ptr = *inout;
 
   if (ptr[0] != '"') {
@@ -602,21 +614,19 @@ static int metric_family_unmarshal_identity(metric_family_t *fam,
   while ((ptr[0] == '{') || (ptr[0] == ',')) {
     ptr++;
 
-    bool is_resource_label =
-        strncmp(ptr, RESOURCE_LABEL_PREFIX, strlen(RESOURCE_LABEL_PREFIX)) == 0;
-    if (is_resource_label) {
-      ptr += strlen(RESOURCE_LABEL_PREFIX);
-    }
-
-    size_t key_len = strspn(ptr, VALID_LABEL_CHARS);
-    if (key_len == 0) {
-      ret = EINVAL;
-      break;
+    strbuf_t key = STRBUF_CREATE;
+    if (ptr[0] == '"') {
+      int status = parse_quoted_string(&key, &ptr);
+      if (status != 0) {
+        ret = status;
+        STRBUF_DESTROY(key);
+        break;
+      }
+    } else {
+      size_t key_len = strspn(ptr, UNQUOTED_LABEL_CHARS);
+      strbuf_printn(&key, ptr, key_len);
+      ptr += key_len;
     }
-    char key[key_len + 1];
-    strncpy(key, ptr, key_len);
-    key[key_len] = 0;
-    ptr += key_len;
 
     if (ptr[0] != '=') {
       ret = EINVAL;
@@ -625,9 +635,10 @@ static int metric_family_unmarshal_identity(metric_family_t *fam,
     ptr++;
 
     strbuf_t value = STRBUF_CREATE;
-    int status = parse_label_value(&value, &ptr);
+    int status = parse_quoted_string(&value, &ptr);
     if (status != 0) {
       ret = status;
+      STRBUF_DESTROY(key);
       STRBUF_DESTROY(value);
       break;
     }
@@ -635,11 +646,15 @@ static int metric_family_unmarshal_identity(metric_family_t *fam,
     /* one metric is added to the family by metric_family_unmarshal_text. */
     assert(fam->metric.num >= 1);
 
+    bool is_resource_label = strncmp(key.ptr, RESOURCE_LABEL_PREFIX,
+                                     strlen(RESOURCE_LABEL_PREFIX)) == 0;
     if (is_resource_label) {
-      status = metric_family_resource_attribute_update(fam, key, value.ptr);
+      status = metric_family_resource_attribute_update(
+          fam, key.ptr + strlen(RESOURCE_LABEL_PREFIX), value.ptr);
     } else {
-      status = metric_label_set(m, key, value.ptr);
+      status = metric_label_set(m, key.ptr, value.ptr);
     }
+    STRBUF_DESTROY(key);
     STRBUF_DESTROY(value);
     if (status != 0) {
       ret = status;
index 7242f85ef6cf636c166d6e0f06f33716b53ccaec..5007241df975a5899037639923016cd1f21877ad 100644 (file)
@@ -253,6 +253,23 @@ DEF_TEST(metric_identity) {
               "metric_with_resource_and_labels{resource:alpha=\"resources\","
               "resource:omega=\"always\",beta=\"come\",gamma=\"first\"}",
       },
+      {
+          .name = "complex_names.are.quoted",
+          .rattr =
+              (label_pair_t[]){
+                  {"with space", "gets quotes"},
+              },
+          .rattr_num = 1,
+          .labels =
+              (label_pair_t[]){
+                  {"and \"quotes\" are", "escaped"},
+              },
+          .labels_num = 1,
+          .want = "complex_names.are.quoted{"
+                  "\"resource:with space\"=\"gets quotes\","
+                  "\"and \\\"quotes\\\" are\"=\"escaped\""
+                  "}",
+      },
   };
 
   for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); i++) {
@@ -374,6 +391,34 @@ DEF_TEST(metric_parse_identity) {
                       },
               },
       },
+      {
+          .name = "complex names are quoted",
+          .input = "complex_names.are.quoted{\"resource:with space\"=\"gets "
+                   "quotes\",\"and \\\"quotes\\\" are\"=\"escaped\"}",
+          .want =
+              {
+                  .label =
+                      {
+                          .ptr =
+                              (label_pair_t[]){
+                                  {"and \"quotes\" are", "escaped"},
+                              },
+                          .num = 1,
+                      },
+                  .family =
+                      &(metric_family_t){
+                          .name = "complex_names.are.quoted",
+                          .resource =
+                              {
+                                  .ptr =
+                                      (label_pair_t[]){
+                                          {"with space", "gets quotes"},
+                                      },
+                                  .num = 1,
+                              },
+                      },
+              },
+      },
   };
 
   for (size_t i = 0; i < STATIC_ARRAY_SIZE(cases); i++) {