--- /dev/null
+From 0af2ffc93a4b50948f9dad2786b7f1bd253bf0b9 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 15 Jan 2020 21:47:33 +0100
+Subject: bpf: Fix incorrect verifier simulation of ARSH under ALU32
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 0af2ffc93a4b50948f9dad2786b7f1bd253bf0b9 upstream.
+
+Anatoly has been fuzzing with kBdysch harness and reported a hang in one
+of the outcomes:
+
+ 0: R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 0: (85) call bpf_get_socket_cookie#46
+ 1: R0_w=invP(id=0) R10=fp0
+ 1: (57) r0 &= 808464432
+ 2: R0_w=invP(id=0,umax_value=808464432,var_off=(0x0; 0x30303030)) R10=fp0
+ 2: (14) w0 -= 810299440
+ 3: R0_w=invP(id=0,umax_value=4294967295,var_off=(0xcf800000; 0x3077fff0)) R10=fp0
+ 3: (c4) w0 s>>= 1
+ 4: R0_w=invP(id=0,umin_value=1740636160,umax_value=2147221496,var_off=(0x67c00000; 0x183bfff8)) R10=fp0
+ 4: (76) if w0 s>= 0x30303030 goto pc+216
+ 221: R0_w=invP(id=0,umin_value=1740636160,umax_value=2147221496,var_off=(0x67c00000; 0x183bfff8)) R10=fp0
+ 221: (95) exit
+ processed 6 insns (limit 1000000) [...]
+
+Taking a closer look, the program was xlated as follows:
+
+ # ./bpftool p d x i 12
+ 0: (85) call bpf_get_socket_cookie#7800896
+ 1: (bf) r6 = r0
+ 2: (57) r6 &= 808464432
+ 3: (14) w6 -= 810299440
+ 4: (c4) w6 s>>= 1
+ 5: (76) if w6 s>= 0x30303030 goto pc+216
+ 6: (05) goto pc-1
+ 7: (05) goto pc-1
+ 8: (05) goto pc-1
+ [...]
+ 220: (05) goto pc-1
+ 221: (05) goto pc-1
+ 222: (95) exit
+
+Meaning, the visible effect is very similar to f54c7898ed1c ("bpf: Fix
+precision tracking for unbounded scalars"), that is, the fall-through
+branch in the instruction 5 is considered to be never taken given the
+conclusion from the min/max bounds tracking in w6, and therefore the
+dead-code sanitation rewrites it as goto pc-1. However, real-life input
+disagrees with verification analysis since a soft-lockup was observed.
+
+The bug sits in the analysis of the ARSH. The definition is that we shift
+the target register value right by K bits through shifting in copies of
+its sign bit. In adjust_scalar_min_max_vals(), we do first coerce the
+register into 32 bit mode, same happens after simulating the operation.
+However, for the case of simulating the actual ARSH, we don't take the
+mode into account and act as if it's always 64 bit, but location of sign
+bit is different:
+
+ dst_reg->smin_value >>= umin_val;
+ dst_reg->smax_value >>= umin_val;
+ dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
+
+Consider an unknown R0 where bpf_get_socket_cookie() (or others) would
+for example return 0xffff. With the above ARSH simulation, we'd see the
+following results:
+
+ [...]
+ 1: R1=ctx(id=0,off=0,imm=0) R2_w=invP65535 R10=fp0
+ 1: (85) call bpf_get_socket_cookie#46
+ 2: R0_w=invP(id=0) R10=fp0
+ 2: (57) r0 &= 808464432
+ -> R0_runtime = 0x3030
+ 3: R0_w=invP(id=0,umax_value=808464432,var_off=(0x0; 0x30303030)) R10=fp0
+ 3: (14) w0 -= 810299440
+ -> R0_runtime = 0xcfb40000
+ 4: R0_w=invP(id=0,umax_value=4294967295,var_off=(0xcf800000; 0x3077fff0)) R10=fp0
+ (0xffffffff)
+ 4: (c4) w0 s>>= 1
+ -> R0_runtime = 0xe7da0000
+ 5: R0_w=invP(id=0,umin_value=1740636160,umax_value=2147221496,var_off=(0x67c00000; 0x183bfff8)) R10=fp0
+ (0x67c00000) (0x7ffbfff8)
+ [...]
+
+In insn 3, we have a runtime value of 0xcfb40000, which is '1100 1111 1011
+0100 0000 0000 0000 0000', the result after the shift has 0xe7da0000 that
+is '1110 0111 1101 1010 0000 0000 0000 0000', where the sign bit is correctly
+retained in 32 bit mode. In insn4, the umax was 0xffffffff, and changed into
+0x7ffbfff8 after the shift, that is, '0111 1111 1111 1011 1111 1111 1111 1000'
+and means here that the simulation didn't retain the sign bit. With above
+logic, the updates happen on the 64 bit min/max bounds and given we coerced
+the register, the sign bits of the bounds are cleared as well, meaning, we
+need to force the simulation into s32 space for 32 bit alu mode.
+
+Verification after the fix below. We're first analyzing the fall-through branch
+on 32 bit signed >= test eventually leading to rejection of the program in this
+specific case:
+
+ 0: R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 0: (b7) r2 = 808464432
+ 1: R1=ctx(id=0,off=0,imm=0) R2_w=invP808464432 R10=fp0
+ 1: (85) call bpf_get_socket_cookie#46
+ 2: R0_w=invP(id=0) R10=fp0
+ 2: (bf) r6 = r0
+ 3: R0_w=invP(id=0) R6_w=invP(id=0) R10=fp0
+ 3: (57) r6 &= 808464432
+ 4: R0_w=invP(id=0) R6_w=invP(id=0,umax_value=808464432,var_off=(0x0; 0x30303030)) R10=fp0
+ 4: (14) w6 -= 810299440
+ 5: R0_w=invP(id=0) R6_w=invP(id=0,umax_value=4294967295,var_off=(0xcf800000; 0x3077fff0)) R10=fp0
+ 5: (c4) w6 s>>= 1
+ 6: R0_w=invP(id=0) R6_w=invP(id=0,umin_value=3888119808,umax_value=4294705144,var_off=(0xe7c00000; 0x183bfff8)) R10=fp0
+ (0x67c00000) (0xfffbfff8)
+ 6: (76) if w6 s>= 0x30303030 goto pc+216
+ 7: R0_w=invP(id=0) R6_w=invP(id=0,umin_value=3888119808,umax_value=4294705144,var_off=(0xe7c00000; 0x183bfff8)) R10=fp0
+ 7: (30) r0 = *(u8 *)skb[808464432]
+ BPF_LD_[ABS|IND] uses reserved fields
+ processed 8 insns (limit 1000000) [...]
+
+Fixes: 9cbe1f5a32dc ("bpf/verifier: improve register value range tracking with ARSH")
+Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Yonghong Song <yhs@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/bpf/20200115204733.16648-1-daniel@iogearbox.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/tnum.h | 2 +-
+ kernel/bpf/tnum.c | 9 +++++++--
+ kernel/bpf/verifier.c | 13 ++++++++++---
+ 3 files changed, 18 insertions(+), 6 deletions(-)
+
+--- a/include/linux/tnum.h
++++ b/include/linux/tnum.h
+@@ -30,7 +30,7 @@ struct tnum tnum_lshift(struct tnum a, u
+ /* Shift (rsh) a tnum right (by a fixed shift) */
+ struct tnum tnum_rshift(struct tnum a, u8 shift);
+ /* Shift (arsh) a tnum right (by a fixed min_shift) */
+-struct tnum tnum_arshift(struct tnum a, u8 min_shift);
++struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness);
+ /* Add two tnums, return @a + @b */
+ struct tnum tnum_add(struct tnum a, struct tnum b);
+ /* Subtract two tnums, return @a - @b */
+--- a/kernel/bpf/tnum.c
++++ b/kernel/bpf/tnum.c
+@@ -44,14 +44,19 @@ struct tnum tnum_rshift(struct tnum a, u
+ return TNUM(a.value >> shift, a.mask >> shift);
+ }
+
+-struct tnum tnum_arshift(struct tnum a, u8 min_shift)
++struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness)
+ {
+ /* if a.value is negative, arithmetic shifting by minimum shift
+ * will have larger negative offset compared to more shifting.
+ * If a.value is nonnegative, arithmetic shifting by minimum shift
+ * will have larger positive offset compare to more shifting.
+ */
+- return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
++ if (insn_bitness == 32)
++ return TNUM((u32)(((s32)a.value) >> min_shift),
++ (u32)(((s32)a.mask) >> min_shift));
++ else
++ return TNUM((s64)a.value >> min_shift,
++ (s64)a.mask >> min_shift);
+ }
+
+ struct tnum tnum_add(struct tnum a, struct tnum b)
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4824,9 +4824,16 @@ static int adjust_scalar_min_max_vals(st
+ /* Upon reaching here, src_known is true and
+ * umax_val is equal to umin_val.
+ */
+- dst_reg->smin_value >>= umin_val;
+- dst_reg->smax_value >>= umin_val;
+- dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
++ if (insn_bitness == 32) {
++ dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val);
++ dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val);
++ } else {
++ dst_reg->smin_value >>= umin_val;
++ dst_reg->smax_value >>= umin_val;
++ }
++
++ dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val,
++ insn_bitness);
+
+ /* blow away the dst_reg umin_value/umax_value and rely on
+ * dst_reg var_off to refine the result.
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
- fs/btrfs/inode.c | 46 ++++++++++++++++++++--------------------------
+ fs/btrfs/inode.c | 46 ++++++++++++++++++++--------------------------
1 file changed, 20 insertions(+), 26 deletions(-)
-diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
-index 9e2a9b5449d9..6f0568fb5899 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
-@@ -4215,18 +4215,30 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+@@ -4215,18 +4215,30 @@ out:
}
static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
-@@ -4475,8 +4487,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
+@@ -4464,8 +4476,7 @@ int btrfs_delete_subvolume(struct inode
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
if (ret) {
err = ret;
btrfs_abort_transaction(trans, ret);
-@@ -4571,10 +4582,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+@@ -4560,10 +4571,7 @@ static int btrfs_rmdir(struct inode *dir
return PTR_ERR(trans);
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
goto out;
}
-@@ -9530,7 +9538,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+@@ -9519,7 +9527,6 @@ static int btrfs_rename_exchange(struct
u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
u64 old_idx = 0;
u64 new_idx = 0;
int ret;
bool root_log_pinned = false;
bool dest_log_pinned = false;
-@@ -9636,10 +9643,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+@@ -9625,10 +9632,7 @@ static int btrfs_rename_exchange(struct
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
} else { /* src is an inode */
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(old_dentry->d_inode),
-@@ -9655,10 +9659,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+@@ -9644,10 +9648,7 @@ static int btrfs_rename_exchange(struct
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
} else { /* dest is an inode */
ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
BTRFS_I(new_dentry->d_inode),
-@@ -9856,7 +9857,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+@@ -9845,7 +9846,6 @@ static int btrfs_rename(struct inode *ol
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
u64 index = 0;
int ret;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
bool log_pinned = false;
-@@ -9964,10 +9964,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+@@ -9953,10 +9953,7 @@ static int btrfs_rename(struct inode *ol
BTRFS_I(old_inode), 1);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
} else {
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
-@@ -9986,10 +9983,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+@@ -9975,10 +9972,7 @@ static int btrfs_rename(struct inode *ol
new_inode->i_ctime = current_time(new_inode);
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
BUG_ON(new_inode->i_nlink == 0);
} else {
ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
---
-2.20.1
-