]> git.ipfire.org Git - thirdparty/freeradius-server.git/commitdiff
merge adjacent constant strings in xlat_tokenize_input()
authorAlan T. DeKok <aland@freeradius.org>
Mon, 25 May 2026 02:58:15 +0000 (22:58 -0400)
committerAlan T. DeKok <aland@freeradius.org>
Mon, 25 May 2026 03:39:11 +0000 (23:39 -0400)
src/lib/unlang/xlat_tokenize.c
src/tests/unit/xlat/base.txt

index b6c619876fa06682ea103c1808af4b1bc7e94f0b..982cb17e1e71da17d041090839b421078d539ccb 100644 (file)
@@ -871,6 +871,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head
                                                           fr_sbuff_parse_rules_t const *p_rules, tmpl_rules_t const *t_rules)
 {
        xlat_exp_t                      *node = NULL;
+       xlat_exp_t                      *prev = NULL;
        fr_slen_t                       slen;
        fr_sbuff_term_t                 terminals = FR_SBUFF_TERMS(
                                                L("%"),
@@ -897,6 +898,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head
                /*
                 *      Find the next token
                 */
+       skip_alloc:
                fr_sbuff_marker(&m_s, &our_in);
                slen = fr_sbuff_out_aunescape_until(node, &str, &our_in, SIZE_MAX, tokens, escapes);
 
@@ -917,6 +919,35 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head
                 */
                if (slen > 0) {
                do_value_box:
+                       /*
+                        *      If the previous node was also a constant value-box, we can merge the new
+                        *      string into it instead of inserting a fresh node.  This merge ensures that we
+                        *      only have one constant value-box produced, instead of many.
+                        */
+                       if (prev && (prev->type == XLAT_BOX)) {
+                               size_t  prev_len = prev->data.vb_length;
+                               size_t  add_len = talloc_strlen(str);
+                               size_t  total = prev_len + add_len;
+                               char    *merged;
+
+                               fr_assert(prev->data.type == FR_TYPE_STRING);
+
+                               MEM(fr_value_box_bstr_realloc(prev, &merged, &prev->data, total) == 0);
+                               memcpy(merged + prev_len, str, add_len + 1);
+
+                               xlat_exp_set_name(prev, merged, total);
+                               talloc_free(str);
+
+                               XLAT_DEBUG("VALUE-BOX merged --> %s", prev->fmt);
+
+                               fr_sbuff_marker_release(&m_s);
+
+                               /*
+                                *      Keep "node", as we haven't used it.
+                                */
+                               goto skip_alloc;
+                       }
+
                        xlat_exp_set_name_shallow(node, str);
                        fr_value_box_bstrndup(node, &node->data, NULL, str, talloc_strlen(str), false);
                        fr_value_box_mark_safe_for(&node->data, t_rules->literals_safe_for);
@@ -932,6 +963,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head
 
                        xlat_exp_insert_tail(head, node);
 
+                       prev = node;
                        node = NULL;
                        fr_sbuff_marker_release(&m_s);
                        continue;
@@ -957,6 +989,8 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head
                                goto error;
                        }
 
+                       prev = NULL;    /* non-value-box inserted; subsequent text must not merge */
+
                next:
                        fr_sbuff_marker_release(&m_s);
                        continue;
@@ -982,6 +1016,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head
                         *      Tokenize the function arguments using the new method.
                         */
                        if (xlat_tokenize_function_args(head, &our_in, t_rules) < 0) goto error;
+                       prev = NULL;    /* non-value-box inserted; subsequent text must not merge */
                        goto next;
                }
 
index 3d41d08d88d4727007d47bece559e9cfb56fc151..d1894ea334cc9ff551d4ad8ef26ebffd64b7c855 100644 (file)
@@ -22,6 +22,27 @@ match \%
 xlat \
 match \\
 
+#
+#  Constant strings that contain a literal '%' adjacent to other text.  The
+#  tokenizer breaks the input at every '%' and emits a separate value-box per
+#  chunk; xlat_tokenize_input() must merge the adjacent constants into a
+#  single value-box so callers see exactly one constant child.
+#
+xlat 100%
+match 100\%
+
+xlat a%
+match a\%
+
+xlat x% y
+match x\% y
+
+xlat a%%b
+match a\%b
+
+xlat %%
+match \%
+
 #
 #  Literals mixed with attributes
 #
@@ -250,6 +271,24 @@ match ERROR offset 44: Unexpected text after attribute reference
 xlat_argv echo hello %{Filter-Id}:1234 world
 match [0]{ echo }, [1]{ hello }, [2]{ %{Filter-Id}:1234 }, [3]{ world }
 
+#
+#  Double-quoted constant arguments containing a literal '%'.  Without the
+#  in-tokenizer merge in xlat_tokenize_input(), the group node would contain
+#  multiple constant value-box children and trip the
+#  fr_dlist_num_elements(...) == 1 assertion in xlat_tokenize_word().
+#
+xlat_argv "100%"
+match [0]{ "100\%" }
+
+xlat_argv "a%%b"
+match [0]{ "a\%b" }
+
+xlat_argv /bin/sh "100%" "%{User-Name}"
+match [0]{ /bin/sh }, [1]{ "100\%" }, [2]{ "%{User-Name}" }
+
+xlat_argv foo "ratio is %" bar
+match [0]{ foo }, [1]{ "ratio is \%" }, [2]{ bar }
+
 xlat %debug(5)
 match %debug(5)
 
@@ -304,4 +343,4 @@ xlat %hash.md5('arg"')
 match %hash.md5(0x61726722)
 
 count
-match 161
+match 179