From b17b28a027e67cf0e08ebed427b7d29684079879 Mon Sep 17 00:00:00 2001 From: "Alan T. DeKok" Date: Sun, 24 May 2026 22:58:15 -0400 Subject: [PATCH] merge adjacent constant strings in xlat_tokenize_input() --- src/lib/unlang/xlat_tokenize.c | 35 +++++++++++++++++++++++++++++ src/tests/unit/xlat/base.txt | 41 +++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/lib/unlang/xlat_tokenize.c b/src/lib/unlang/xlat_tokenize.c index b6c619876fa..982cb17e1e7 100644 --- a/src/lib/unlang/xlat_tokenize.c +++ b/src/lib/unlang/xlat_tokenize.c @@ -871,6 +871,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head fr_sbuff_parse_rules_t const *p_rules, tmpl_rules_t const *t_rules) { xlat_exp_t *node = NULL; + xlat_exp_t *prev = NULL; fr_slen_t slen; fr_sbuff_term_t terminals = FR_SBUFF_TERMS( L("%"), @@ -897,6 +898,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head /* * Find the next token */ + skip_alloc: fr_sbuff_marker(&m_s, &our_in); slen = fr_sbuff_out_aunescape_until(node, &str, &our_in, SIZE_MAX, tokens, escapes); @@ -917,6 +919,35 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head */ if (slen > 0) { do_value_box: + /* + * If the previous node was also a constant value-box, we can merge the new + * string into it instead of inserting a fresh node. This merge ensures that we + * only have one constant value-box produced, instead of many. + */ + if (prev && (prev->type == XLAT_BOX)) { + size_t prev_len = prev->data.vb_length; + size_t add_len = talloc_strlen(str); + size_t total = prev_len + add_len; + char *merged; + + fr_assert(prev->data.type == FR_TYPE_STRING); + + MEM(fr_value_box_bstr_realloc(prev, &merged, &prev->data, total) == 0); + memcpy(merged + prev_len, str, add_len + 1); + + xlat_exp_set_name(prev, merged, total); + talloc_free(str); + + XLAT_DEBUG("VALUE-BOX merged --> %s", prev->fmt); + + fr_sbuff_marker_release(&m_s); + + /* + * Keep "node", as we haven't used it. + */ + goto skip_alloc; + } + xlat_exp_set_name_shallow(node, str); fr_value_box_bstrndup(node, &node->data, NULL, str, talloc_strlen(str), false); fr_value_box_mark_safe_for(&node->data, t_rules->literals_safe_for); @@ -932,6 +963,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head xlat_exp_insert_tail(head, node); + prev = node; node = NULL; fr_sbuff_marker_release(&m_s); continue; @@ -957,6 +989,8 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head goto error; } + prev = NULL; /* non-value-box inserted; subsequent text must not merge */ + next: fr_sbuff_marker_release(&m_s); continue; @@ -982,6 +1016,7 @@ static CC_HINT(nonnull(1,2,4)) ssize_t xlat_tokenize_input(xlat_exp_head_t *head * Tokenize the function arguments using the new method. */ if (xlat_tokenize_function_args(head, &our_in, t_rules) < 0) goto error; + prev = NULL; /* non-value-box inserted; subsequent text must not merge */ goto next; } diff --git a/src/tests/unit/xlat/base.txt b/src/tests/unit/xlat/base.txt index 3d41d08d88d..d1894ea334c 100644 --- a/src/tests/unit/xlat/base.txt +++ b/src/tests/unit/xlat/base.txt @@ -22,6 +22,27 @@ match \% xlat \ match \\ +# +# Constant strings that contain a literal '%' adjacent to other text. The +# tokenizer breaks the input at every '%' and emits a separate value-box per +# chunk; xlat_tokenize_input() must merge the adjacent constants into a +# single value-box so callers see exactly one constant child. +# +xlat 100% +match 100\% + +xlat a% +match a\% + +xlat x% y +match x\% y + +xlat a%%b +match a\%b + +xlat %% +match \% + # # Literals mixed with attributes # @@ -250,6 +271,24 @@ match ERROR offset 44: Unexpected text after attribute reference xlat_argv echo hello %{Filter-Id}:1234 world match [0]{ echo }, [1]{ hello }, [2]{ %{Filter-Id}:1234 }, [3]{ world } +# +# Double-quoted constant arguments containing a literal '%'. Without the +# in-tokenizer merge in xlat_tokenize_input(), the group node would contain +# multiple constant value-box children and trip the +# fr_dlist_num_elements(...) == 1 assertion in xlat_tokenize_word(). +# +xlat_argv "100%" +match [0]{ "100\%" } + +xlat_argv "a%%b" +match [0]{ "a\%b" } + +xlat_argv /bin/sh "100%" "%{User-Name}" +match [0]{ /bin/sh }, [1]{ "100\%" }, [2]{ "%{User-Name}" } + +xlat_argv foo "ratio is %" bar +match [0]{ foo }, [1]{ "ratio is \%" }, [2]{ bar } + xlat %debug(5) match %debug(5) @@ -304,4 +343,4 @@ xlat %hash.md5('arg"') match %hash.md5(0x61726722) count -match 161 +match 179 -- 2.47.3