--- /dev/null
+From 6fad274f06f038c29660aa53fbad14241c9fd976 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 21 Oct 2024 17:28:05 +0200
+Subject: bpf: Add MEM_WRITE attribute
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 6fad274f06f038c29660aa53fbad14241c9fd976 upstream.
+
+Add a MEM_WRITE attribute for BPF helper functions which can be used in
+bpf_func_proto to annotate an argument type in order to let the verifier
+know that the helper writes into the memory passed as an argument. In
+the past MEM_UNINIT has been (ab)used for this function, but the latter
+merely tells the verifier that the passed memory can be uninitialized.
+
+There have been bugs with overloading the latter but aside from that
+there are also cases where the passed memory is read + written which
+currently cannot be expressed, see also 4b3786a6c539 ("bpf: Zero former
+ARG_PTR_TO_{LONG,INT} args in case of error").
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20241021152809.33343-1-daniel@iogearbox.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: BRUNO VERNAY <bruno.vernay@se.com>
+Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>
+Stable-dep-of: 8ea607330a39 ("bpf: Fix overloading of MEM_UNINIT's meaning")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h | 14 +++++++++++---
+ kernel/bpf/helpers.c | 10 +++++-----
+ kernel/bpf/ringbuf.c | 2 +-
+ kernel/bpf/syscall.c | 2 +-
+ kernel/trace/bpf_trace.c | 4 ++--
+ net/core/filter.c | 4 ++--
+ 6 files changed, 22 insertions(+), 14 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -464,6 +464,7 @@ enum bpf_type_flag {
+ */
+ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS),
+
++ /* MEM can be uninitialized. */
+ MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to memory local to the bpf program. */
+@@ -480,6 +481,13 @@ enum bpf_type_flag {
+ */
+ MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS),
+
++ /* MEM is being written to, often combined with MEM_UNINIT. Non-presence
++ * of MEM_WRITE means that MEM is only being read. MEM_WRITE without the
++ * MEM_UNINIT means that memory needs to be initialized since it is also
++ * read.
++ */
++ MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS),
++
+ __BPF_TYPE_FLAG_MAX,
+ __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
+ };
+@@ -537,10 +545,10 @@ enum bpf_arg_type {
+ ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
+ ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
+ ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
+- /* pointer to memory does not need to be initialized, helper function must fill
+- * all bytes or clear them in error case.
++ /* Pointer to memory does not need to be initialized, since helper function
++ * fills all bytes or clears them in error case.
+ */
+- ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
++ ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | MEM_WRITE | ARG_PTR_TO_MEM,
+ /* Pointer to valid memory of size known at compile time. */
+ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
+
+--- a/kernel/bpf/helpers.c
++++ b/kernel/bpf/helpers.c
+@@ -107,7 +107,7 @@ const struct bpf_func_proto bpf_map_pop_
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+- .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
++ .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
+ };
+
+ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
+@@ -120,7 +120,7 @@ const struct bpf_func_proto bpf_map_peek
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+- .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
++ .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
+ };
+
+ BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
+@@ -531,7 +531,7 @@ const struct bpf_func_proto bpf_strtol_p
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg4_size = sizeof(s64),
+ };
+
+@@ -561,7 +561,7 @@ const struct bpf_func_proto bpf_strtoul_
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg4_size = sizeof(u64),
+ };
+
+@@ -1502,7 +1502,7 @@ static const struct bpf_func_proto bpf_d
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
++ .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
+ };
+
+ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
+--- a/kernel/bpf/ringbuf.c
++++ b/kernel/bpf/ringbuf.c
+@@ -618,7 +618,7 @@ const struct bpf_func_proto bpf_ringbuf_
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT,
++ .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT | MEM_WRITE,
+ };
+
+ BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -5265,7 +5265,7 @@ static const struct bpf_func_proto bpf_k
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg4_size = sizeof(u64),
+ };
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -1192,7 +1192,7 @@ static const struct bpf_func_proto bpf_g
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg3_size = sizeof(u64),
+ };
+
+@@ -1209,7 +1209,7 @@ static const struct bpf_func_proto bpf_g
+ .func = get_func_ret,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg2_size = sizeof(u64),
+ };
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -6243,7 +6243,7 @@ static const struct bpf_func_proto bpf_s
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg3_size = sizeof(u32),
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6255,7 +6255,7 @@ static const struct bpf_func_proto bpf_x
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg3_size = sizeof(u32),
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
--- /dev/null
+From 8ea607330a39184f51737c6ae706db7fdca7628e Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 21 Oct 2024 17:28:06 +0200
+Subject: bpf: Fix overloading of MEM_UNINIT's meaning
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 8ea607330a39184f51737c6ae706db7fdca7628e upstream.
+
+Lonial reported an issue in the BPF verifier where check_mem_size_reg()
+has the following code:
+
+ if (!tnum_is_const(reg->var_off))
+ /* For unprivileged variable accesses, disable raw
+ * mode so that the program is required to
+ * initialize all the memory that the helper could
+ * just partially fill up.
+ */
+ meta = NULL;
+
+This means that writes are not checked when the register containing the
+size of the passed buffer has not a fixed size. Through this bug, a BPF
+program can write to a map which is marked as read-only, for example,
+.rodata global maps.
+
+The problem is that MEM_UNINIT's initial meaning that "the passed buffer
+to the BPF helper does not need to be initialized" which was added back
+in commit 435faee1aae9 ("bpf, verifier: add ARG_PTR_TO_RAW_STACK type")
+got overloaded over time with "the passed buffer is being written to".
+
+The problem however is that checks such as the above which were added later
+via 06c1c049721a ("bpf: allow helpers access to variable memory") set meta
+to NULL in order force the user to always initialize the passed buffer to
+the helper. Due to the current double meaning of MEM_UNINIT, this bypasses
+verifier write checks to the memory (not boundary checks though) and only
+assumes the latter memory is read instead.
+
+Fix this by reverting MEM_UNINIT back to its original meaning, and having
+MEM_WRITE as an annotation to BPF helpers in order to then trigger the
+BPF verifier checks for writing to memory.
+
+Some notes: check_arg_pair_ok() ensures that for ARG_CONST_SIZE{,_OR_ZERO}
+we can access fn->arg_type[arg - 1] since it must contain a preceding
+ARG_PTR_TO_MEM. For check_mem_reg() the meta argument can be removed
+altogether since we do check both BPF_READ and BPF_WRITE. Same for the
+equivalent check_kfunc_mem_size_reg().
+
+Fixes: 7b3552d3f9f6 ("bpf: Reject writes for PTR_TO_MAP_KEY in check_helper_mem_access")
+Fixes: 97e6d7dab1ca ("bpf: Check PTR_TO_MEM | MEM_RDONLY in check_helper_mem_access")
+Fixes: 15baa55ff5b0 ("bpf/verifier: allow all functions to read user provided context")
+Reported-by: Lonial Con <kongln9170@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20241021152809.33343-2-daniel@iogearbox.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: BRUNO VERNAY <bruno.vernay@se.com>
+Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 76 +++++++++++++++++++++++---------------------------
+ 1 file changed, 36 insertions(+), 40 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5416,7 +5416,8 @@ mark:
+ }
+
+ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
+- int access_size, bool zero_size_allowed,
++ int access_size, enum bpf_access_type access_type,
++ bool zero_size_allowed,
+ struct bpf_call_arg_meta *meta)
+ {
+ struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
+@@ -5428,7 +5429,7 @@ static int check_helper_mem_access(struc
+ return check_packet_access(env, regno, reg->off, access_size,
+ zero_size_allowed);
+ case PTR_TO_MAP_KEY:
+- if (meta && meta->raw_mode) {
++ if (access_type == BPF_WRITE) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+@@ -5436,15 +5437,13 @@ static int check_helper_mem_access(struc
+ return check_mem_region_access(env, regno, reg->off, access_size,
+ reg->map_ptr->key_size, false);
+ case PTR_TO_MAP_VALUE:
+- if (check_map_access_type(env, regno, reg->off, access_size,
+- meta && meta->raw_mode ? BPF_WRITE :
+- BPF_READ))
++ if (check_map_access_type(env, regno, reg->off, access_size, access_type))
+ return -EACCES;
+ return check_map_access(env, regno, reg->off, access_size,
+ zero_size_allowed, ACCESS_HELPER);
+ case PTR_TO_MEM:
+ if (type_is_rdonly_mem(reg->type)) {
+- if (meta && meta->raw_mode) {
++ if (access_type == BPF_WRITE) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+@@ -5455,7 +5454,7 @@ static int check_helper_mem_access(struc
+ zero_size_allowed);
+ case PTR_TO_BUF:
+ if (type_is_rdonly_mem(reg->type)) {
+- if (meta && meta->raw_mode) {
++ if (access_type == BPF_WRITE) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+@@ -5480,7 +5479,6 @@ static int check_helper_mem_access(struc
+ * Dynamically check it now.
+ */
+ if (!env->ops->convert_ctx_access) {
+- enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
+ int offset = access_size - 1;
+
+ /* Allow zero-byte read from PTR_TO_CTX */
+@@ -5488,7 +5486,7 @@ static int check_helper_mem_access(struc
+ return zero_size_allowed ? 0 : -EACCES;
+
+ return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
+- atype, -1, false);
++ access_type, -1, false);
+ }
+
+ fallthrough;
+@@ -5507,6 +5505,7 @@ static int check_helper_mem_access(struc
+
+ static int check_mem_size_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
++ enum bpf_access_type access_type,
+ bool zero_size_allowed,
+ struct bpf_call_arg_meta *meta)
+ {
+@@ -5522,15 +5521,12 @@ static int check_mem_size_reg(struct bpf
+ */
+ meta->msize_max_value = reg->umax_value;
+
+- /* The register is SCALAR_VALUE; the access check
+- * happens using its boundaries.
++ /* The register is SCALAR_VALUE; the access check happens using
++ * its boundaries. For unprivileged variable accesses, disable
++ * raw mode so that the program is required to initialize all
++ * the memory that the helper could just partially fill up.
+ */
+ if (!tnum_is_const(reg->var_off))
+- /* For unprivileged variable accesses, disable raw
+- * mode so that the program is required to
+- * initialize all the memory that the helper could
+- * just partially fill up.
+- */
+ meta = NULL;
+
+ if (reg->smin_value < 0) {
+@@ -5541,8 +5537,7 @@ static int check_mem_size_reg(struct bpf
+
+ if (reg->umin_value == 0) {
+ err = check_helper_mem_access(env, regno - 1, 0,
+- zero_size_allowed,
+- meta);
++ access_type, zero_size_allowed, meta);
+ if (err)
+ return err;
+ }
+@@ -5552,9 +5547,8 @@ static int check_mem_size_reg(struct bpf
+ regno);
+ return -EACCES;
+ }
+- err = check_helper_mem_access(env, regno - 1,
+- reg->umax_value,
+- zero_size_allowed, meta);
++ err = check_helper_mem_access(env, regno - 1, reg->umax_value,
++ access_type, zero_size_allowed, meta);
+ if (!err)
+ err = mark_chain_precision(env, regno);
+ return err;
+@@ -5565,13 +5559,11 @@ int check_mem_reg(struct bpf_verifier_en
+ {
+ bool may_be_null = type_may_be_null(reg->type);
+ struct bpf_reg_state saved_reg;
+- struct bpf_call_arg_meta meta;
+ int err;
+
+ if (register_is_null(reg))
+ return 0;
+
+- memset(&meta, 0, sizeof(meta));
+ /* Assuming that the register contains a value check if the memory
+ * access is safe. Temporarily save and restore the register's state as
+ * the conversion shouldn't be visible to a caller.
+@@ -5581,10 +5573,8 @@ int check_mem_reg(struct bpf_verifier_en
+ mark_ptr_not_null_reg(reg);
+ }
+
+- err = check_helper_mem_access(env, regno, mem_size, true, &meta);
+- /* Check access for BPF_WRITE */
+- meta.raw_mode = true;
+- err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
++ err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL);
++ err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL);
+
+ if (may_be_null)
+ *reg = saved_reg;
+@@ -5610,13 +5600,12 @@ int check_kfunc_mem_size_reg(struct bpf_
+ mark_ptr_not_null_reg(mem_reg);
+ }
+
+- err = check_mem_size_reg(env, reg, regno, true, &meta);
+- /* Check access for BPF_WRITE */
+- meta.raw_mode = true;
+- err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
++ err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta);
++ err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta);
+
+ if (may_be_null)
+ *mem_reg = saved_reg;
++
+ return err;
+ }
+
+@@ -6227,9 +6216,8 @@ skip_type_check:
+ verbose(env, "invalid map_ptr to access map->key\n");
+ return -EACCES;
+ }
+- err = check_helper_mem_access(env, regno,
+- meta->map_ptr->key_size, false,
+- NULL);
++ err = check_helper_mem_access(env, regno, meta->map_ptr->key_size,
++ BPF_READ, false, NULL);
+ break;
+ case ARG_PTR_TO_MAP_VALUE:
+ if (type_may_be_null(arg_type) && register_is_null(reg))
+@@ -6244,9 +6232,9 @@ skip_type_check:
+ return -EACCES;
+ }
+ meta->raw_mode = arg_type & MEM_UNINIT;
+- err = check_helper_mem_access(env, regno,
+- meta->map_ptr->value_size, false,
+- meta);
++ err = check_helper_mem_access(env, regno, meta->map_ptr->value_size,
++ arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
++ false, meta);
+ break;
+ case ARG_PTR_TO_PERCPU_BTF_ID:
+ if (!reg->btf_id) {
+@@ -6281,7 +6269,9 @@ skip_type_check:
+ */
+ meta->raw_mode = arg_type & MEM_UNINIT;
+ if (arg_type & MEM_FIXED_SIZE) {
+- err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
++ err = check_helper_mem_access(env, regno, fn->arg_size[arg],
++ arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
++ false, meta);
+ if (err)
+ return err;
+ if (arg_type & MEM_ALIGNED)
+@@ -6289,10 +6279,16 @@ skip_type_check:
+ }
+ break;
+ case ARG_CONST_SIZE:
+- err = check_mem_size_reg(env, reg, regno, false, meta);
++ err = check_mem_size_reg(env, reg, regno,
++ fn->arg_type[arg - 1] & MEM_WRITE ?
++ BPF_WRITE : BPF_READ,
++ false, meta);
+ break;
+ case ARG_CONST_SIZE_OR_ZERO:
+- err = check_mem_size_reg(env, reg, regno, true, meta);
++ err = check_mem_size_reg(env, reg, regno,
++ fn->arg_type[arg - 1] & MEM_WRITE ?
++ BPF_WRITE : BPF_READ,
++ true, meta);
+ break;
+ case ARG_PTR_TO_DYNPTR:
+ /* We only need to check for initialized / uninitialized helper
--- /dev/null
+From 6df90c02bae468a3a6110bafbc659884d0c4966c Mon Sep 17 00:00:00 2001
+From: Milan Broz <gmazyland@gmail.com>
+Date: Wed, 18 Dec 2024 13:56:58 +0100
+Subject: dm-verity FEC: Fix RS FEC repair for roots unaligned to block size (take 2)
+
+From: Milan Broz <gmazyland@gmail.com>
+
+commit 6df90c02bae468a3a6110bafbc659884d0c4966c upstream.
+
+This patch fixes an issue that was fixed in the commit
+ df7b59ba9245 ("dm verity: fix FEC for RS roots unaligned to block size")
+but later broken again in the commit
+ 8ca7cab82bda ("dm verity fec: fix misaligned RS roots IO")
+
+If the Reed-Solomon roots setting spans multiple blocks, the code does not
+use proper parity bytes and randomly fails to repair even trivial errors.
+
+This bug cannot happen if the sector size is multiple of RS roots
+setting (Android case with roots 2).
+
+The previous solution was to find a dm-bufio block size that is multiple
+of the device sector size and roots size. Unfortunately, the optimization
+in commit 8ca7cab82bda ("dm verity fec: fix misaligned RS roots IO")
+is incorrect and uses data block size for some roots (for example, it uses
+4096 block size for roots = 20).
+
+This patch uses a different approach:
+
+ - It always uses a configured data block size for dm-bufio to avoid
+ possible misaligned IOs.
+
+ - and it caches the processed parity bytes, so it can join it
+ if it spans two blocks.
+
+As the RS calculation is called only if an error is detected and
+the process is computationally intensive, copying a few more bytes
+should not introduce performance issues.
+
+The issue was reported to cryptsetup with trivial reproducer
+ https://gitlab.com/cryptsetup/cryptsetup/-/issues/923
+
+Reproducer (with roots=20):
+
+ # create verity device with RS FEC
+ dd if=/dev/urandom of=data.img bs=4096 count=8 status=none
+ veritysetup format data.img hash.img --fec-device=fec.img --fec-roots=20 | \
+ awk '/^Root hash/{ print $3 }' >roothash
+
+ # create an erasure that should always be repairable with this roots setting
+ dd if=/dev/zero of=data.img conv=notrunc bs=1 count=4 seek=4 status=none
+
+ # try to read it through dm-verity
+ veritysetup open data.img test hash.img --fec-device=fec.img --fec-roots=20 $(cat roothash)
+ dd if=/dev/mapper/test of=/dev/null bs=4096 status=noxfer
+
+ Even now the log says it cannot repair it:
+ : verity-fec: 7:1: FEC 0: failed to correct: -74
+ : device-mapper: verity: 7:1: data block 0 is corrupted
+ ...
+
+With this fix, errors are properly repaired.
+ : verity-fec: 7:1: FEC 0: corrected 4 errors
+
+Signed-off-by: Milan Broz <gmazyland@gmail.com>
+Fixes: 8ca7cab82bda ("dm verity fec: fix misaligned RS roots IO")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Milan Broz <gmazyland@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-verity-fec.c | 39 ++++++++++++++++++++++++++-------------
+ 1 file changed, 26 insertions(+), 13 deletions(-)
+
+--- a/drivers/md/dm-verity-fec.c
++++ b/drivers/md/dm-verity-fec.c
+@@ -60,14 +60,19 @@ static int fec_decode_rs8(struct dm_veri
+ * to the data block. Caller is responsible for releasing buf.
+ */
+ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
+- unsigned int *offset, struct dm_buffer **buf)
++ unsigned int *offset, unsigned int par_buf_offset,
++ struct dm_buffer **buf)
+ {
+ u64 position, block, rem;
+ u8 *res;
+
++ /* We have already part of parity bytes read, skip to the next block */
++ if (par_buf_offset)
++ index++;
++
+ position = (index + rsb) * v->fec->roots;
+ block = div64_u64_rem(position, v->fec->io_size, &rem);
+- *offset = (unsigned int)rem;
++ *offset = par_buf_offset ? 0 : (unsigned int)rem;
+
+ res = dm_bufio_read(v->fec->bufio, block, buf);
+ if (IS_ERR(res)) {
+@@ -127,10 +132,11 @@ static int fec_decode_bufs(struct dm_ver
+ {
+ int r, corrected = 0, res;
+ struct dm_buffer *buf;
+- unsigned int n, i, offset;
+- u8 *par, *block;
++ unsigned int n, i, offset, par_buf_offset = 0;
++ u8 *par, *block, par_buf[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
+
+- par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
++ par = fec_read_parity(v, rsb, block_offset, &offset,
++ par_buf_offset, &buf);
+ if (IS_ERR(par))
+ return PTR_ERR(par);
+
+@@ -140,7 +146,8 @@ static int fec_decode_bufs(struct dm_ver
+ */
+ fec_for_each_buffer_rs_block(fio, n, i) {
+ block = fec_buffer_rs_block(v, fio, n, i);
+- res = fec_decode_rs8(v, fio, block, &par[offset], neras);
++ memcpy(&par_buf[par_buf_offset], &par[offset], v->fec->roots - par_buf_offset);
++ res = fec_decode_rs8(v, fio, block, par_buf, neras);
+ if (res < 0) {
+ r = res;
+ goto error;
+@@ -153,12 +160,21 @@ static int fec_decode_bufs(struct dm_ver
+ if (block_offset >= 1 << v->data_dev_block_bits)
+ goto done;
+
+- /* read the next block when we run out of parity bytes */
+- offset += v->fec->roots;
++ /* Read the next block when we run out of parity bytes */
++ offset += (v->fec->roots - par_buf_offset);
++ /* Check if parity bytes are split between blocks */
++ if (offset < v->fec->io_size && (offset + v->fec->roots) > v->fec->io_size) {
++ par_buf_offset = v->fec->io_size - offset;
++ memcpy(par_buf, &par[offset], par_buf_offset);
++ offset += par_buf_offset;
++ } else
++ par_buf_offset = 0;
++
+ if (offset >= v->fec->io_size) {
+ dm_bufio_release(buf);
+
+- par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
++ par = fec_read_parity(v, rsb, block_offset, &offset,
++ par_buf_offset, &buf);
+ if (IS_ERR(par))
+ return PTR_ERR(par);
+ }
+@@ -743,10 +759,7 @@ int verity_fec_ctr(struct dm_verity *v)
+ return -E2BIG;
+ }
+
+- if ((f->roots << SECTOR_SHIFT) & ((1 << v->data_dev_block_bits) - 1))
+- f->io_size = 1 << v->data_dev_block_bits;
+- else
+- f->io_size = v->fec->roots << SECTOR_SHIFT;
++ f->io_size = 1 << v->data_dev_block_bits;
+
+ f->bufio = dm_bufio_client_create(f->dev->bdev,
+ f->io_size,
acpi-resource-add-tongfang-gm5hg0a-to-irq1_edge_low_force_override.patch
acpi-resource-add-asus-vivobook-x1504vap-to-irq1_level_low_skip_override.patch
drm-amd-display-increase-max_surfaces-to-the-value-supported-by-hw.patch
+dm-verity-fec-fix-rs-fec-repair-for-roots-unaligned-to-block-size-take-2.patch
+bpf-add-mem_write-attribute.patch
+bpf-fix-overloading-of-mem_uninit-s-meaning.patch