]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bpf: allow UTF-8 literals in bpf_bprintf_prepare()
authorYihan Ding <dingyihan@uniontech.com>
Thu, 16 Apr 2026 12:01:41 +0000 (20:01 +0800)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 16 Apr 2026 22:53:32 +0000 (15:53 -0700)
bpf_bprintf_prepare() only needs ASCII parsing for conversion
specifiers. Plain text can safely carry bytes >= 0x80, so allow
UTF-8 literals outside '%' sequences while keeping ASCII control
bytes rejected and format specifiers ASCII-only.

This keeps existing parsing rules for format directives unchanged,
while allowing helpers such as bpf_trace_printk() to emit UTF-8
literal text.

Update test_snprintf_negative() in the same commit so selftests keep
matching the new plain-text vs format-specifier split during bisection.

Fixes: 48cac3f4a96d ("bpf: Implement formatted output helpers with bstr_printf")
Signed-off-by: Yihan Ding <dingyihan@uniontech.com>
Acked-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/20260416120142.1420646-2-dingyihan@uniontech.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/helpers.c
tools/testing/selftests/bpf/prog_tests/snprintf.c

index bb95e287b0dc81d681953651f18d644ffde84845..2bb60200c2665e40252a2ecb2a6ebef455b0b5ec 100644 (file)
@@ -845,7 +845,13 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
                data->buf = buffers->buf;
 
        for (i = 0; i < fmt_size; i++) {
-               if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+               unsigned char c = fmt[i];
+
+               /*
+                * Permit bytes >= 0x80 in plain text so UTF-8 literals can pass
+                * through unchanged, while still rejecting ASCII control bytes.
+                */
+               if (isascii(c) && !isprint(c) && !isspace(c)) {
                        err = -EINVAL;
                        goto out;
                }
@@ -867,6 +873,15 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
                 * always access fmt[i + 1], in the worst case it will be a 0
                 */
                i++;
+               c = fmt[i];
+               /*
+                * The format parser below only understands ASCII conversion
+                * specifiers and modifiers, so reject non-ASCII after '%'.
+                */
+               if (!isascii(c)) {
+                       err = -EINVAL;
+                       goto out;
+               }
 
                /* skip optional "[0 +-][num]" width formatting field */
                while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
index 594441acb7071fe13891100c4f425a31f253915e..4e4a82d54f799dcbad3bded0e16171c37a9a0224 100644 (file)
@@ -114,7 +114,8 @@ static void test_snprintf_negative(void)
        ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
        ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
        ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
-       ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+       ASSERT_OK(load_single_snprintf("\x80"), "non ascii plain text");
+       ASSERT_ERR(load_single_snprintf("%\x80"), "non ascii in specifier");
        ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
        ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
        ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");