From c08b5d2acf798e1c870b2e752b529f0aed0bf73d Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 18 Apr 2021 20:27:09 -0400 Subject: [PATCH] Fixes for 5.11 Signed-off-by: Sasha Levin --- ...eg-has-no-mixed-signed-bounds-for-al.patch | 87 ++++ ...f-move-off_reg-into-sanitize_ptr_alu.patch | 60 +++ .../kasan-fix-hwasan-build-for-gcc.patch | 84 ++++ ...kasan-remove-redundant-config-option.patch | 216 ++++++++++ ...vert-vcpu_vmx.exit_reason-to-a-union.patch | 401 ++++++++++++++++++ ...e-vcpu-run-internal.ndata-as-an-arra.patch | 59 +++ ...-don-t-advertise-pause-in-jumbo-mode.patch | 57 +++ ...read-request-size-for-newer-chips-al.patch | 64 +++ queue-5.11/series | 8 + 9 files changed, 1036 insertions(+) create mode 100644 queue-5.11/bpf-ensure-off_reg-has-no-mixed-signed-bounds-for-al.patch create mode 100644 queue-5.11/bpf-move-off_reg-into-sanitize_ptr_alu.patch create mode 100644 queue-5.11/kasan-fix-hwasan-build-for-gcc.patch create mode 100644 queue-5.11/kasan-remove-redundant-config-option.patch create mode 100644 queue-5.11/kvm-vmx-convert-vcpu_vmx.exit_reason-to-a-union.patch create mode 100644 queue-5.11/kvm-vmx-don-t-use-vcpu-run-internal.ndata-as-an-arra.patch create mode 100644 queue-5.11/r8169-don-t-advertise-pause-in-jumbo-mode.patch create mode 100644 queue-5.11/r8169-tweak-max-read-request-size-for-newer-chips-al.patch diff --git a/queue-5.11/bpf-ensure-off_reg-has-no-mixed-signed-bounds-for-al.patch b/queue-5.11/bpf-ensure-off_reg-has-no-mixed-signed-bounds-for-al.patch new file mode 100644 index 00000000000..f5349324846 --- /dev/null +++ b/queue-5.11/bpf-ensure-off_reg-has-no-mixed-signed-bounds-for-al.patch @@ -0,0 +1,87 @@ +From 83f4669778de43a91b3d30a8c5d695b51c8e15fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Mar 2021 08:51:02 +0100 +Subject: bpf: Ensure off_reg has no mixed signed bounds for all types + +From: Daniel Borkmann + +[ Upstream commit 24c109bb1537c12c02aeed2d51a347b4d6a9b76e ] + +The mixed signed bounds check really belongs into retrieve_ptr_limit() +instead of outside of it in adjust_ptr_min_max_vals(). The reason is +that this check is not tied to PTR_TO_MAP_VALUE only, but to all pointer +types that we handle in retrieve_ptr_limit() and given errors from the latter +propagate back to adjust_ptr_min_max_vals() and lead to rejection of the +program, it's a better place to reside to avoid anything slipping through +for future types. The reason why we must reject such off_reg is that we +otherwise would not be able to derive a mask, see details in 9d7eceede769 +("bpf: restrict unknown scalars of mixed signed bounds for unprivileged"). + +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Acked-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index b654174619e5..fe473e565092 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5385,12 +5385,18 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) + } + + static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, +- u32 *ptr_limit, u8 opcode, bool off_is_neg) ++ const struct bpf_reg_state *off_reg, ++ u32 *ptr_limit, u8 opcode) + { ++ bool off_is_neg = off_reg->smin_value < 0; + bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || + (opcode == BPF_SUB && !off_is_neg); + u32 off, max; + ++ if (!tnum_is_const(off_reg->var_off) && ++ (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) ++ return -EACCES; ++ + switch (ptr_reg->type) { + case PTR_TO_STACK: + /* Offset 0 is out-of-bounds, but acceptable start for the +@@ -5483,7 +5489,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + +- err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg); ++ err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); + if (err < 0) + return err; + +@@ -5528,8 +5534,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; + u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, + umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; +- u32 dst = insn->dst_reg, src = insn->src_reg; + u8 opcode = BPF_OP(insn->code); ++ u32 dst = insn->dst_reg; + int ret; + + dst_reg = ®s[dst]; +@@ -5577,13 +5583,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + verbose(env, "R%d pointer arithmetic on %s prohibited\n", + dst, reg_type_str[ptr_reg->type]); + return -EACCES; +- case PTR_TO_MAP_VALUE: +- if (!env->env->bypass_spec_v1 && !known && (smin_val < 0) != (smax_val < 0)) { +- verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", +- off_reg == dst_reg ? dst : src); +- return -EACCES; +- } +- fallthrough; + default: + break; + } +-- +2.30.2 + diff --git a/queue-5.11/bpf-move-off_reg-into-sanitize_ptr_alu.patch b/queue-5.11/bpf-move-off_reg-into-sanitize_ptr_alu.patch new file mode 100644 index 00000000000..eee3fbb235d --- /dev/null +++ b/queue-5.11/bpf-move-off_reg-into-sanitize_ptr_alu.patch @@ -0,0 +1,60 @@ +From c5e027419158e72176bb6ef7cc3a69aab4342eb0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Mar 2021 15:45:52 +0100 +Subject: bpf: Move off_reg into sanitize_ptr_alu + +From: Daniel Borkmann + +[ Upstream commit 6f55b2f2a1178856c19bbce2f71449926e731914 ] + +Small refactor to drag off_reg into sanitize_ptr_alu(), so we later on can +use off_reg for generalizing some of the checks for all pointer types. + +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Acked-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index fe473e565092..afb2ae55c9a7 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5463,11 +5463,12 @@ static int sanitize_val_alu(struct bpf_verifier_env *env, + static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, +- struct bpf_reg_state *dst_reg, +- bool off_is_neg) ++ const struct bpf_reg_state *off_reg, ++ struct bpf_reg_state *dst_reg) + { + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_insn_aux_data *aux = cur_aux(env); ++ bool off_is_neg = off_reg->smin_value < 0; + bool ptr_is_dst_reg = ptr_reg == dst_reg; + u8 opcode = BPF_OP(insn->code); + u32 alu_state, alu_limit; +@@ -5602,7 +5603,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + + switch (opcode) { + case BPF_ADD: +- ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); + if (ret < 0) { + verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); + return ret; +@@ -5657,7 +5658,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + } + break; + case BPF_SUB: +- ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); + if (ret < 0) { + verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); + return ret; +-- +2.30.2 + diff --git a/queue-5.11/kasan-fix-hwasan-build-for-gcc.patch b/queue-5.11/kasan-fix-hwasan-build-for-gcc.patch new file mode 100644 index 00000000000..bc698479111 --- /dev/null +++ b/queue-5.11/kasan-fix-hwasan-build-for-gcc.patch @@ -0,0 +1,84 @@ +From 46972ae8971d848e40ae654e52719e52ea8d3a62 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Apr 2021 15:45:57 -0700 +Subject: kasan: fix hwasan build for gcc + +From: Arnd Bergmann + +[ Upstream commit 5c595ac4c776c44b5c59de22ab43b3fe256d9fbb ] + +gcc-11 adds support for -fsanitize=kernel-hwaddress, so it becomes +possible to enable CONFIG_KASAN_SW_TAGS. + +Unfortunately this fails to build at the moment, because the +corresponding command line arguments use llvm specific syntax. + +Change it to use the cc-param macro instead, which works on both clang +and gcc. + +[elver@google.com: fixup for "kasan: fix hwasan build for gcc"] + Link: https://lkml.kernel.org/r/YHQZVfVVLE/LDK2v@elver.google.com + +Link: https://lkml.kernel.org/r/20210323124112.1229772-1-arnd@kernel.org +Signed-off-by: Arnd Bergmann +Signed-off-by: Marco Elver +Reviewed-by: Marco Elver +Acked-by: Andrey Konovalov +Cc: Masahiro Yamada +Cc: Michal Marek +Cc: Andrey Ryabinin +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Alexander Potapenko +Cc: Dmitry Vyukov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + scripts/Makefile.kasan | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan +index 1e000cc2e7b4..127012f45166 100644 +--- a/scripts/Makefile.kasan ++++ b/scripts/Makefile.kasan +@@ -2,6 +2,8 @@ + CFLAGS_KASAN_NOSANITIZE := -fno-builtin + KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) + ++cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) ++ + ifdef CONFIG_KASAN_GENERIC + + ifdef CONFIG_KASAN_INLINE +@@ -12,8 +14,6 @@ endif + + CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address + +-cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) +- + # -fasan-shadow-offset fails without -fsanitize + CFLAGS_KASAN_SHADOW := $(call cc-option, -fsanitize=kernel-address \ + -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET), \ +@@ -36,14 +36,14 @@ endif # CONFIG_KASAN_GENERIC + ifdef CONFIG_KASAN_SW_TAGS + + ifdef CONFIG_KASAN_INLINE +- instrumentation_flags := -mllvm -hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET) ++ instrumentation_flags := $(call cc-param,hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET)) + else +- instrumentation_flags := -mllvm -hwasan-instrument-with-calls=1 ++ instrumentation_flags := $(call cc-param,hwasan-instrument-with-calls=1) + endif + + CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ +- -mllvm -hwasan-instrument-stack=$(CONFIG_KASAN_STACK) \ +- -mllvm -hwasan-use-short-granules=0 \ ++ $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \ ++ $(call cc-param,hwasan-use-short-granules=0) \ + $(instrumentation_flags) + + endif # CONFIG_KASAN_SW_TAGS +-- +2.30.2 + diff --git a/queue-5.11/kasan-remove-redundant-config-option.patch b/queue-5.11/kasan-remove-redundant-config-option.patch new file mode 100644 index 00000000000..2322ed8c573 --- /dev/null +++ b/queue-5.11/kasan-remove-redundant-config-option.patch @@ -0,0 +1,216 @@ +From ea48828e5c9d4055b5e42955898e4612e0d4bf27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Apr 2021 15:46:00 -0700 +Subject: kasan: remove redundant config option + +From: Walter Wu + +[ Upstream commit 02c587733c8161355a43e6e110c2e29bd0acff72 ] + +CONFIG_KASAN_STACK and CONFIG_KASAN_STACK_ENABLE both enable KASAN stack +instrumentation, but we should only need one config, so that we remove +CONFIG_KASAN_STACK_ENABLE and make CONFIG_KASAN_STACK workable. see [1]. + +When enable KASAN stack instrumentation, then for gcc we could do no +prompt and default value y, and for clang prompt and default value n. + +This patch fixes the following compilation warning: + + include/linux/kasan.h:333:30: warning: 'CONFIG_KASAN_STACK' is not defined, evaluates to 0 [-Wundef] + +[akpm@linux-foundation.org: fix merge snafu] + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=210221 [1] +Link: https://lkml.kernel.org/r/20210226012531.29231-1-walter-zh.wu@mediatek.com +Fixes: d9b571c885a8 ("kasan: fix KASAN_STACK dependency for HW_TAGS") +Signed-off-by: Walter Wu +Suggested-by: Dmitry Vyukov +Reviewed-by: Nathan Chancellor +Acked-by: Arnd Bergmann +Reviewed-by: Andrey Konovalov +Cc: Andrey Ryabinin +Cc: Dmitry Vyukov +Cc: Alexander Potapenko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + arch/arm64/kernel/sleep.S | 2 +- + arch/x86/kernel/acpi/wakeup_64.S | 2 +- + include/linux/kasan.h | 2 +- + lib/Kconfig.kasan | 9 ++------- + mm/kasan/common.c | 2 +- + mm/kasan/kasan.h | 2 +- + mm/kasan/report_generic.c | 2 +- + scripts/Makefile.kasan | 10 ++++++++-- + security/Kconfig.hardening | 4 ++-- + 9 files changed, 18 insertions(+), 17 deletions(-) + +diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S +index 6bdef7362c0e..7c44ede122a9 100644 +--- a/arch/arm64/kernel/sleep.S ++++ b/arch/arm64/kernel/sleep.S +@@ -133,7 +133,7 @@ SYM_FUNC_START(_cpu_resume) + */ + bl cpu_do_resume + +-#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK ++#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) + mov x0, sp + bl kasan_unpoison_task_stack_below + #endif +diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S +index 5d3a0b8fd379..c7f412f4e07d 100644 +--- a/arch/x86/kernel/acpi/wakeup_64.S ++++ b/arch/x86/kernel/acpi/wakeup_64.S +@@ -112,7 +112,7 @@ SYM_FUNC_START(do_suspend_lowlevel) + movq pt_regs_r14(%rax), %r14 + movq pt_regs_r15(%rax), %r15 + +-#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK ++#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) + /* + * The suspend path may have poisoned some areas deeper in the stack, + * which we now need to unpoison. +diff --git a/include/linux/kasan.h b/include/linux/kasan.h +index 0aea9e2a2a01..f2980f010a48 100644 +--- a/include/linux/kasan.h ++++ b/include/linux/kasan.h +@@ -306,7 +306,7 @@ static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} + + #endif /* CONFIG_KASAN */ + +-#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK ++#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) + void kasan_unpoison_task_stack(struct task_struct *task); + #else + static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan +index 0d3b7940cf43..fde82ec85f8f 100644 +--- a/lib/Kconfig.kasan ++++ b/lib/Kconfig.kasan +@@ -138,9 +138,10 @@ config KASAN_INLINE + + endchoice + +-config KASAN_STACK_ENABLE ++config KASAN_STACK + bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST + depends on KASAN_GENERIC || KASAN_SW_TAGS ++ default y if CC_IS_GCC + help + The LLVM stack address sanitizer has a know problem that + causes excessive stack usage in a lot of functions, see +@@ -154,12 +155,6 @@ config KASAN_STACK_ENABLE + CONFIG_COMPILE_TEST. On gcc it is assumed to always be safe + to use and enabled by default. + +-config KASAN_STACK +- int +- depends on KASAN_GENERIC || KASAN_SW_TAGS +- default 1 if KASAN_STACK_ENABLE || CC_IS_GCC +- default 0 +- + config KASAN_SW_TAGS_IDENTIFY + bool "Enable memory corruption identification" + depends on KASAN_SW_TAGS +diff --git a/mm/kasan/common.c b/mm/kasan/common.c +index b25167664ead..38ceb759f853 100644 +--- a/mm/kasan/common.c ++++ b/mm/kasan/common.c +@@ -63,7 +63,7 @@ void __kasan_unpoison_range(const void *address, size_t size) + unpoison_range(address, size); + } + +-#if CONFIG_KASAN_STACK ++#ifdef CONFIG_KASAN_STACK + /* Unpoison the entire stack for a task. */ + void kasan_unpoison_task_stack(struct task_struct *task) + { +diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h +index 8c706e7652f2..daa06aa5ea19 100644 +--- a/mm/kasan/kasan.h ++++ b/mm/kasan/kasan.h +@@ -224,7 +224,7 @@ void *find_first_bad_addr(void *addr, size_t size); + const char *get_bug_type(struct kasan_access_info *info); + void metadata_fetch_row(char *buffer, void *row); + +-#if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK ++#if defined(CONFIG_KASAN_GENERIC) && defined(CONFIG_KASAN_STACK) + void print_address_stack_frame(const void *addr); + #else + static inline void print_address_stack_frame(const void *addr) { } +diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c +index 8a9c889872da..4e16518d9877 100644 +--- a/mm/kasan/report_generic.c ++++ b/mm/kasan/report_generic.c +@@ -128,7 +128,7 @@ void metadata_fetch_row(char *buffer, void *row) + memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); + } + +-#if CONFIG_KASAN_STACK ++#ifdef CONFIG_KASAN_STACK + static bool __must_check tokenize_frame_descr(const char **frame_descr, + char *token, size_t max_tok_len, + unsigned long *value) +diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan +index 127012f45166..3d791908ed36 100644 +--- a/scripts/Makefile.kasan ++++ b/scripts/Makefile.kasan +@@ -4,6 +4,12 @@ KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) + + cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) + ++ifdef CONFIG_KASAN_STACK ++ stack_enable := 1 ++else ++ stack_enable := 0 ++endif ++ + ifdef CONFIG_KASAN_GENERIC + + ifdef CONFIG_KASAN_INLINE +@@ -27,7 +33,7 @@ else + CFLAGS_KASAN := $(CFLAGS_KASAN_SHADOW) \ + $(call cc-param,asan-globals=1) \ + $(call cc-param,asan-instrumentation-with-call-threshold=$(call_threshold)) \ +- $(call cc-param,asan-stack=$(CONFIG_KASAN_STACK)) \ ++ $(call cc-param,asan-stack=$(stack_enable)) \ + $(call cc-param,asan-instrument-allocas=1) + endif + +@@ -42,7 +48,7 @@ else + endif + + CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ +- $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \ ++ $(call cc-param,hwasan-instrument-stack=$(stack_enable)) \ + $(call cc-param,hwasan-use-short-granules=0) \ + $(instrumentation_flags) + +diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening +index 269967c4fc1b..a56c36470cb1 100644 +--- a/security/Kconfig.hardening ++++ b/security/Kconfig.hardening +@@ -64,7 +64,7 @@ choice + config GCC_PLUGIN_STRUCTLEAK_BYREF + bool "zero-init structs passed by reference (strong)" + depends on GCC_PLUGINS +- depends on !(KASAN && KASAN_STACK=1) ++ depends on !(KASAN && KASAN_STACK) + select GCC_PLUGIN_STRUCTLEAK + help + Zero-initialize any structures on the stack that may +@@ -82,7 +82,7 @@ choice + config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL + bool "zero-init anything passed by reference (very strong)" + depends on GCC_PLUGINS +- depends on !(KASAN && KASAN_STACK=1) ++ depends on !(KASAN && KASAN_STACK) + select GCC_PLUGIN_STRUCTLEAK + help + Zero-initialize any stack variables that may be passed +-- +2.30.2 + diff --git a/queue-5.11/kvm-vmx-convert-vcpu_vmx.exit_reason-to-a-union.patch b/queue-5.11/kvm-vmx-convert-vcpu_vmx.exit_reason-to-a-union.patch new file mode 100644 index 00000000000..db0baba0e7c --- /dev/null +++ b/queue-5.11/kvm-vmx-convert-vcpu_vmx.exit_reason-to-a-union.patch @@ -0,0 +1,401 @@ +From e15928c2ee60567a1ede4431549da95e49ce4f11 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Nov 2020 17:03:12 +0800 +Subject: KVM: VMX: Convert vcpu_vmx.exit_reason to a union + +From: Sean Christopherson + +[ Upstream commit 8e53324021645f820a01bf8aa745711c802c8542 ] + +Convert vcpu_vmx.exit_reason from a u32 to a union (of size u32). The +full VM_EXIT_REASON field is comprised of a 16-bit basic exit reason in +bits 15:0, and single-bit modifiers in bits 31:16. + +Historically, KVM has only had to worry about handling the "failed +VM-Entry" modifier, which could only be set in very specific flows and +required dedicated handling. I.e. manually stripping the FAILED_VMENTRY +bit was a somewhat viable approach. But even with only a single bit to +worry about, KVM has had several bugs related to comparing a basic exit +reason against the full exit reason store in vcpu_vmx. + +Upcoming Intel features, e.g. SGX, will add new modifier bits that can +be set on more or less any VM-Exit, as opposed to the significantly more +restricted FAILED_VMENTRY, i.e. correctly handling everything in one-off +flows isn't scalable. Tracking exit reason in a union forces code to +explicitly choose between consuming the full exit reason and the basic +exit, and is a convenient way to document and access the modifiers. + +No functional change intended. + +Cc: Xiaoyao Li +Signed-off-by: Sean Christopherson +Signed-off-by: Chenyi Qiang +Message-Id: <20201106090315.18606-2-chenyi.qiang@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/vmx/nested.c | 42 +++++++++++++++--------- + arch/x86/kvm/vmx/vmx.c | 68 ++++++++++++++++++++------------------- + arch/x86/kvm/vmx/vmx.h | 25 +++++++++++++- + 3 files changed, 86 insertions(+), 49 deletions(-) + +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index f2b9bfb58206..cb48236cc24d 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -3330,7 +3330,11 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + enum vm_entry_failure_code entry_failure_code; + bool evaluate_pending_interrupts; +- u32 exit_reason, failed_index; ++ union vmx_exit_reason exit_reason = { ++ .basic = EXIT_REASON_INVALID_STATE, ++ .failed_vmentry = 1, ++ }; ++ u32 failed_index; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); +@@ -3382,7 +3386,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + + if (nested_vmx_check_guest_state(vcpu, vmcs12, + &entry_failure_code)) { +- exit_reason = EXIT_REASON_INVALID_STATE; ++ exit_reason.basic = EXIT_REASON_INVALID_STATE; + vmcs12->exit_qualification = entry_failure_code; + goto vmentry_fail_vmexit; + } +@@ -3393,7 +3397,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + vcpu->arch.tsc_offset += vmcs12->tsc_offset; + + if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { +- exit_reason = EXIT_REASON_INVALID_STATE; ++ exit_reason.basic = EXIT_REASON_INVALID_STATE; + vmcs12->exit_qualification = entry_failure_code; + goto vmentry_fail_vmexit_guest_mode; + } +@@ -3403,7 +3407,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + vmcs12->vm_entry_msr_load_addr, + vmcs12->vm_entry_msr_load_count); + if (failed_index) { +- exit_reason = EXIT_REASON_MSR_LOAD_FAIL; ++ exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL; + vmcs12->exit_qualification = failed_index; + goto vmentry_fail_vmexit_guest_mode; + } +@@ -3471,7 +3475,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + return NVMX_VMENTRY_VMEXIT; + + load_vmcs12_host_state(vcpu, vmcs12); +- vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; ++ vmcs12->vm_exit_reason = exit_reason.full; + if (enable_shadow_vmcs || vmx->nested.hv_evmcs) + vmx->nested.need_vmcs12_to_shadow_sync = true; + return NVMX_VMENTRY_VMEXIT; +@@ -5559,7 +5563,12 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) + return kvm_skip_emulated_instruction(vcpu); + + fail: +- nested_vmx_vmexit(vcpu, vmx->exit_reason, ++ /* ++ * This is effectively a reflected VM-Exit, as opposed to a synthesized ++ * nested VM-Exit. Pass the original exit reason, i.e. don't hardcode ++ * EXIT_REASON_VMFUNC as the exit reason. ++ */ ++ nested_vmx_vmexit(vcpu, vmx->exit_reason.full, + vmx_get_intr_info(vcpu), + vmx_get_exit_qual(vcpu)); + return 1; +@@ -5627,7 +5636,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps. + */ + static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, +- struct vmcs12 *vmcs12, u32 exit_reason) ++ struct vmcs12 *vmcs12, ++ union vmx_exit_reason exit_reason) + { + u32 msr_index = kvm_rcx_read(vcpu); + gpa_t bitmap; +@@ -5641,7 +5651,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, + * First we need to figure out which of the four to use: + */ + bitmap = vmcs12->msr_bitmap; +- if (exit_reason == EXIT_REASON_MSR_WRITE) ++ if (exit_reason.basic == EXIT_REASON_MSR_WRITE) + bitmap += 2048; + if (msr_index >= 0xc0000000) { + msr_index -= 0xc0000000; +@@ -5778,11 +5788,12 @@ static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) + * Return true if L0 wants to handle an exit from L2 regardless of whether or not + * L1 wants the exit. Only call this when in is_guest_mode (L2). + */ +-static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) ++static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, ++ union vmx_exit_reason exit_reason) + { + u32 intr_info; + +- switch ((u16)exit_reason) { ++ switch ((u16)exit_reason.basic) { + case EXIT_REASON_EXCEPTION_NMI: + intr_info = vmx_get_intr_info(vcpu); + if (is_nmi(intr_info)) +@@ -5838,12 +5849,13 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) + * Return 1 if L1 wants to intercept an exit from L2. Only call this when in + * is_guest_mode (L2). + */ +-static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) ++static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, ++ union vmx_exit_reason exit_reason) + { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 intr_info; + +- switch ((u16)exit_reason) { ++ switch ((u16)exit_reason.basic) { + case EXIT_REASON_EXCEPTION_NMI: + intr_info = vmx_get_intr_info(vcpu); + if (is_nmi(intr_info)) +@@ -5962,7 +5974,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) + bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); +- u32 exit_reason = vmx->exit_reason; ++ union vmx_exit_reason exit_reason = vmx->exit_reason; + unsigned long exit_qual; + u32 exit_intr_info; + +@@ -5981,7 +5993,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) + goto reflect_vmexit; + } + +- trace_kvm_nested_vmexit(exit_reason, vcpu, KVM_ISA_VMX); ++ trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX); + + /* If L0 (KVM) wants the exit, it trumps L1's desires. */ + if (nested_vmx_l0_wants_exit(vcpu, exit_reason)) +@@ -6007,7 +6019,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) + exit_qual = vmx_get_exit_qual(vcpu); + + reflect_vmexit: +- nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual); ++ nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual); + return true; + } + +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index eb69fef57485..880a2617820c 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -1577,7 +1577,7 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) + * i.e. we end up advancing IP with some random value. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || +- to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { ++ to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) { + orig_rip = kvm_rip_read(vcpu); + rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + #ifdef CONFIG_X86_64 +@@ -5667,7 +5667,7 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, + struct vcpu_vmx *vmx = to_vmx(vcpu); + + *info1 = vmx_get_exit_qual(vcpu); +- if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { ++ if (!(vmx->exit_reason.failed_vmentry)) { + *info2 = vmx->idt_vectoring_info; + *intr_info = vmx_get_intr_info(vcpu); + if (is_exception_with_error_code(*intr_info)) +@@ -5911,8 +5911,9 @@ void dump_vmcs(void) + static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); +- u32 exit_reason = vmx->exit_reason; ++ union vmx_exit_reason exit_reason = vmx->exit_reason; + u32 vectoring_info = vmx->idt_vectoring_info; ++ u16 exit_handler_index; + + /* + * Flush logged GPAs PML buffer, this will make dirty_bitmap more +@@ -5954,11 +5955,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) + return 1; + } + +- if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { ++ if (exit_reason.failed_vmentry) { + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->run->fail_entry.hardware_entry_failure_reason +- = exit_reason; ++ = exit_reason.full; + vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu; + return 0; + } +@@ -5980,18 +5981,18 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) + * will cause infinite loop. + */ + if ((vectoring_info & VECTORING_INFO_VALID_MASK) && +- (exit_reason != EXIT_REASON_EXCEPTION_NMI && +- exit_reason != EXIT_REASON_EPT_VIOLATION && +- exit_reason != EXIT_REASON_PML_FULL && +- exit_reason != EXIT_REASON_APIC_ACCESS && +- exit_reason != EXIT_REASON_TASK_SWITCH)) { ++ (exit_reason.basic != EXIT_REASON_EXCEPTION_NMI && ++ exit_reason.basic != EXIT_REASON_EPT_VIOLATION && ++ exit_reason.basic != EXIT_REASON_PML_FULL && ++ exit_reason.basic != EXIT_REASON_APIC_ACCESS && ++ exit_reason.basic != EXIT_REASON_TASK_SWITCH)) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; + vcpu->run->internal.ndata = 3; + vcpu->run->internal.data[0] = vectoring_info; +- vcpu->run->internal.data[1] = exit_reason; ++ vcpu->run->internal.data[1] = exit_reason.full; + vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; +- if (exit_reason == EXIT_REASON_EPT_MISCONFIG) { ++ if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) { + vcpu->run->internal.ndata++; + vcpu->run->internal.data[3] = + vmcs_read64(GUEST_PHYSICAL_ADDRESS); +@@ -6023,38 +6024,39 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) + if (exit_fastpath != EXIT_FASTPATH_NONE) + return 1; + +- if (exit_reason >= kvm_vmx_max_exit_handlers) ++ if (exit_reason.basic >= kvm_vmx_max_exit_handlers) + goto unexpected_vmexit; + #ifdef CONFIG_RETPOLINE +- if (exit_reason == EXIT_REASON_MSR_WRITE) ++ if (exit_reason.basic == EXIT_REASON_MSR_WRITE) + return kvm_emulate_wrmsr(vcpu); +- else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) ++ else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER) + return handle_preemption_timer(vcpu); +- else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) ++ else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW) + return handle_interrupt_window(vcpu); +- else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) ++ else if (exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) + return handle_external_interrupt(vcpu); +- else if (exit_reason == EXIT_REASON_HLT) ++ else if (exit_reason.basic == EXIT_REASON_HLT) + return kvm_emulate_halt(vcpu); +- else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) ++ else if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) + return handle_ept_misconfig(vcpu); + #endif + +- exit_reason = array_index_nospec(exit_reason, +- kvm_vmx_max_exit_handlers); +- if (!kvm_vmx_exit_handlers[exit_reason]) ++ exit_handler_index = array_index_nospec((u16)exit_reason.basic, ++ kvm_vmx_max_exit_handlers); ++ if (!kvm_vmx_exit_handlers[exit_handler_index]) + goto unexpected_vmexit; + +- return kvm_vmx_exit_handlers[exit_reason](vcpu); ++ return kvm_vmx_exit_handlers[exit_handler_index](vcpu); + + unexpected_vmexit: +- vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason); ++ vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", ++ exit_reason.full); + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; +- vcpu->run->internal.data[0] = exit_reason; ++ vcpu->run->internal.data[0] = exit_reason.full; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + return 0; + } +@@ -6373,9 +6375,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); + +- if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) ++ if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) + handle_external_interrupt_irqoff(vcpu); +- else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) ++ else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) + handle_exception_nmi_irqoff(vmx); + } + +@@ -6567,7 +6569,7 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) + + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + { +- switch (to_vmx(vcpu)->exit_reason) { ++ switch (to_vmx(vcpu)->exit_reason.basic) { + case EXIT_REASON_MSR_WRITE: + return handle_fastpath_set_msr_irqoff(vcpu); + case EXIT_REASON_PREEMPTION_TIMER: +@@ -6768,17 +6770,17 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) + vmx->idt_vectoring_info = 0; + + if (unlikely(vmx->fail)) { +- vmx->exit_reason = 0xdead; ++ vmx->exit_reason.full = 0xdead; + return EXIT_FASTPATH_NONE; + } + +- vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); +- if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)) ++ vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON); ++ if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY)) + kvm_machine_check(); + +- trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); ++ trace_kvm_exit(vmx->exit_reason.full, vcpu, KVM_ISA_VMX); + +- if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) ++ if (unlikely(vmx->exit_reason.failed_vmentry)) + return EXIT_FASTPATH_NONE; + + vmx->loaded_vmcs->launched = 1; +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index 9d3a557949ac..4dd71b7494ea 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -70,6 +70,29 @@ struct pt_desc { + struct pt_ctx guest; + }; + ++union vmx_exit_reason { ++ struct { ++ u32 basic : 16; ++ u32 reserved16 : 1; ++ u32 reserved17 : 1; ++ u32 reserved18 : 1; ++ u32 reserved19 : 1; ++ u32 reserved20 : 1; ++ u32 reserved21 : 1; ++ u32 reserved22 : 1; ++ u32 reserved23 : 1; ++ u32 reserved24 : 1; ++ u32 reserved25 : 1; ++ u32 reserved26 : 1; ++ u32 enclave_mode : 1; ++ u32 smi_pending_mtf : 1; ++ u32 smi_from_vmx_root : 1; ++ u32 reserved30 : 1; ++ u32 failed_vmentry : 1; ++ }; ++ u32 full; ++}; ++ + /* + * The nested_vmx structure is part of vcpu_vmx, and holds information we need + * for correct emulation of VMX (i.e., nested VMX) on this vcpu. +@@ -244,7 +267,7 @@ struct vcpu_vmx { + int vpid; + bool emulation_required; + +- u32 exit_reason; ++ union vmx_exit_reason exit_reason; + + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; +-- +2.30.2 + diff --git a/queue-5.11/kvm-vmx-don-t-use-vcpu-run-internal.ndata-as-an-arra.patch b/queue-5.11/kvm-vmx-don-t-use-vcpu-run-internal.ndata-as-an-arra.patch new file mode 100644 index 00000000000..071d8e5b17f --- /dev/null +++ b/queue-5.11/kvm-vmx-don-t-use-vcpu-run-internal.ndata-as-an-arra.patch @@ -0,0 +1,59 @@ +From 7ee3fa3365a94f1e81b4ede5029d14ca1f155617 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Apr 2021 15:47:40 +0000 +Subject: KVM: VMX: Don't use vcpu->run->internal.ndata as an array index + +From: Reiji Watanabe + +[ Upstream commit 04c4f2ee3f68c9a4bf1653d15f1a9a435ae33f7a ] + +__vmx_handle_exit() uses vcpu->run->internal.ndata as an index for +an array access. Since vcpu->run is (can be) mapped to a user address +space with a writer permission, the 'ndata' could be updated by the +user process at anytime (the user process can set it to outside the +bounds of the array). +So, it is not safe that __vmx_handle_exit() uses the 'ndata' that way. + +Fixes: 1aa561b1a4c0 ("kvm: x86: Add "last CPU" to some KVM_EXIT information") +Signed-off-by: Reiji Watanabe +Reviewed-by: Jim Mattson +Message-Id: <20210413154739.490299-1-reijiw@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/vmx/vmx.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 880a2617820c..95f836fbceb2 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -5986,19 +5986,19 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) + exit_reason.basic != EXIT_REASON_PML_FULL && + exit_reason.basic != EXIT_REASON_APIC_ACCESS && + exit_reason.basic != EXIT_REASON_TASK_SWITCH)) { ++ int ndata = 3; ++ + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; +- vcpu->run->internal.ndata = 3; + vcpu->run->internal.data[0] = vectoring_info; + vcpu->run->internal.data[1] = exit_reason.full; + vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; + if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) { +- vcpu->run->internal.ndata++; +- vcpu->run->internal.data[3] = ++ vcpu->run->internal.data[ndata++] = + vmcs_read64(GUEST_PHYSICAL_ADDRESS); + } +- vcpu->run->internal.data[vcpu->run->internal.ndata++] = +- vcpu->arch.last_vmentry_cpu; ++ vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu; ++ vcpu->run->internal.ndata = ndata; + return 0; + } + +-- +2.30.2 + diff --git a/queue-5.11/r8169-don-t-advertise-pause-in-jumbo-mode.patch b/queue-5.11/r8169-don-t-advertise-pause-in-jumbo-mode.patch new file mode 100644 index 00000000000..452969d9c2b --- /dev/null +++ b/queue-5.11/r8169-don-t-advertise-pause-in-jumbo-mode.patch @@ -0,0 +1,57 @@ +From e2b5b467a282aaae917e81c02bc0e9ecb69a56b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Apr 2021 10:47:10 +0200 +Subject: r8169: don't advertise pause in jumbo mode + +From: Heiner Kallweit + +[ Upstream commit 453a77894efa4d9b6ef9644d74b9419c47ac427c ] + +It has been reported [0] that using pause frames in jumbo mode impacts +performance. There's no available chip documentation, but vendor +drivers r8168 and r8125 don't advertise pause in jumbo mode. So let's +do the same, according to Roman it fixes the issue. + +[0] https://bugzilla.kernel.org/show_bug.cgi?id=212617 + +Fixes: 9cf9b84cc701 ("r8169: make use of phy_set_asym_pause") +Reported-by: Roman Mamedov +Tested-by: Roman Mamedov +Signed-off-by: Heiner Kallweit +Cc: stable@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/realtek/r8169_main.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c +index 47a4ff832d75..e04e885f2893 100644 +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -2384,6 +2384,13 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) + + if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) + pcie_set_readrq(tp->pci_dev, readrq); ++ ++ /* Chip doesn't support pause in jumbo mode */ ++ linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, ++ tp->phydev->advertising, !jumbo); ++ linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, ++ tp->phydev->advertising, !jumbo); ++ phy_start_aneg(tp->phydev); + } + + DECLARE_RTL_COND(rtl_chipcmd_cond) +@@ -4662,8 +4669,6 @@ static int r8169_phy_connect(struct rtl8169_private *tp) + if (!tp->supports_gmii) + phy_set_max_speed(phydev, SPEED_100); + +- phy_support_asym_pause(phydev); +- + phy_attached_info(phydev); + + return 0; +-- +2.30.2 + diff --git a/queue-5.11/r8169-tweak-max-read-request-size-for-newer-chips-al.patch b/queue-5.11/r8169-tweak-max-read-request-size-for-newer-chips-al.patch new file mode 100644 index 00000000000..25dd37ed6af --- /dev/null +++ b/queue-5.11/r8169-tweak-max-read-request-size-for-newer-chips-al.patch @@ -0,0 +1,64 @@ +From 9cd611c882b1134dab256623b52de1ef21746831 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Jan 2021 23:01:18 +0100 +Subject: r8169: tweak max read request size for newer chips also in jumbo mtu + mode + +From: Heiner Kallweit + +[ Upstream commit 5e00e16cb98935bcf06f51931876d898c226f65c ] + +So far we don't increase the max read request size if we switch to +jumbo mode before bringing up the interface for the first time. +Let's change this. + +Signed-off-by: Heiner Kallweit +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/realtek/r8169_main.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c +index 7c1a057dcf3d..47a4ff832d75 100644 +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -2342,13 +2342,14 @@ static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp) + static void rtl_jumbo_config(struct rtl8169_private *tp) + { + bool jumbo = tp->dev->mtu > ETH_DATA_LEN; ++ int readrq = 4096; + + rtl_unlock_config_regs(tp); + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_12: + case RTL_GIGA_MAC_VER_17: + if (jumbo) { +- pcie_set_readrq(tp->pci_dev, 512); ++ readrq = 512; + r8168b_1_hw_jumbo_enable(tp); + } else { + r8168b_1_hw_jumbo_disable(tp); +@@ -2356,7 +2357,7 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) + break; + case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26: + if (jumbo) { +- pcie_set_readrq(tp->pci_dev, 512); ++ readrq = 512; + r8168c_hw_jumbo_enable(tp); + } else { + r8168c_hw_jumbo_disable(tp); +@@ -2381,8 +2382,8 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) + } + rtl_lock_config_regs(tp); + +- if (!jumbo && pci_is_pcie(tp->pci_dev) && tp->supports_gmii) +- pcie_set_readrq(tp->pci_dev, 4096); ++ if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) ++ pcie_set_readrq(tp->pci_dev, readrq); + } + + DECLARE_RTL_COND(rtl_chipcmd_cond) +-- +2.30.2 + diff --git a/queue-5.11/series b/queue-5.11/series index bc253317adb..c3e31e507a6 100644 --- a/queue-5.11/series +++ b/queue-5.11/series @@ -108,3 +108,11 @@ arm-omap2-fix-uninitialized-sr_inst.patch arm64-dts-allwinner-fix-sd-card-cd-gpio-for-sopine-s.patch arm64-dts-allwinner-h6-beelink-gs1-remove-ext.-32-kh.patch bpf-use-correct-permission-flag-for-mixed-signed-bou.patch +kvm-vmx-convert-vcpu_vmx.exit_reason-to-a-union.patch +kvm-vmx-don-t-use-vcpu-run-internal.ndata-as-an-arra.patch +kasan-fix-hwasan-build-for-gcc.patch +kasan-remove-redundant-config-option.patch +r8169-tweak-max-read-request-size-for-newer-chips-al.patch +r8169-don-t-advertise-pause-in-jumbo-mode.patch +bpf-ensure-off_reg-has-no-mixed-signed-bounds-for-al.patch +bpf-move-off_reg-into-sanitize_ptr_alu.patch -- 2.47.3