From 183f888b4ed082b5b4df7e51f7e115babddae4c3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 13 Aug 2021 11:03:39 +0200 Subject: [PATCH] 4.19-stable patches added patches: bpf-do-not-mark-insn-as-seen-under-speculative-path-verification.patch bpf-fix-leakage-under-speculation-on-mispredicted-branches.patch bpf-inherit-expanded-patched-seen-count-from-old-aux-data.patch bpf-selftests-adjust-few-selftest-outcomes-wrt-unreachable-code.patch kvm-x86-mmu-use-the-correct-inherited-permissions-to-get-shadow-page.patch ovl-prevent-private-clone-if-bind-mount-is-not-allowed.patch ppp-fix-generating-ppp-unit-id-when-ifname-is-not-specified.patch tracing-reject-string-operand-in-the-histogram-expression.patch usb-ehci-fix-kunpeng920-ehci-hardware-problem.patch --- ...-under-speculative-path-verification.patch | 77 ++++++ ...speculation-on-mispredicted-branches.patch | 223 ++++++++++++++++++ ...patched-seen-count-from-old-aux-data.patch | 59 +++++ ...lftest-outcomes-wrt-unreachable-code.patch | 48 ++++ ...rited-permissions-to-get-shadow-page.patch | 154 ++++++++++++ ...e-clone-if-bind-mount-is-not-allowed.patch | 97 ++++++++ ...unit-id-when-ifname-is-not-specified.patch | 109 +++++++++ queue-4.19/series | 10 +- ...-operand-in-the-histogram-expression.patch | 61 +++++ ...use-list_replace_init-before-travers.patch | 112 --------- ...fix-kunpeng920-ehci-hardware-problem.patch | 42 ++++ 11 files changed, 879 insertions(+), 113 deletions(-) create mode 100644 queue-4.19/bpf-do-not-mark-insn-as-seen-under-speculative-path-verification.patch create mode 100644 queue-4.19/bpf-fix-leakage-under-speculation-on-mispredicted-branches.patch create mode 100644 queue-4.19/bpf-inherit-expanded-patched-seen-count-from-old-aux-data.patch create mode 100644 queue-4.19/bpf-selftests-adjust-few-selftest-outcomes-wrt-unreachable-code.patch create mode 100644 queue-4.19/kvm-x86-mmu-use-the-correct-inherited-permissions-to-get-shadow-page.patch create mode 100644 queue-4.19/ovl-prevent-private-clone-if-bind-mount-is-not-allowed.patch create mode 100644 queue-4.19/ppp-fix-generating-ppp-unit-id-when-ifname-is-not-specified.patch create mode 100644 queue-4.19/tracing-reject-string-operand-in-the-histogram-expression.patch delete mode 100644 queue-4.19/usb-dwc3-gadget-use-list_replace_init-before-travers.patch create mode 100644 queue-4.19/usb-ehci-fix-kunpeng920-ehci-hardware-problem.patch diff --git a/queue-4.19/bpf-do-not-mark-insn-as-seen-under-speculative-path-verification.patch b/queue-4.19/bpf-do-not-mark-insn-as-seen-under-speculative-path-verification.patch new file mode 100644 index 00000000000..86dfcd77eb9 --- /dev/null +++ b/queue-4.19/bpf-do-not-mark-insn-as-seen-under-speculative-path-verification.patch @@ -0,0 +1,77 @@ +From foo@baz Fri Aug 13 10:38:10 AM CEST 2021 +From: Ovidiu Panait +Date: Thu, 12 Aug 2021 20:00:35 +0300 +Subject: bpf: Do not mark insn as seen under speculative path verification +To: stable@vger.kernel.org +Cc: bpf@vger.kernel.org +Message-ID: <20210812170037.2370387-3-ovidiu.panait@windriver.com> + +From: Daniel Borkmann + +commit fe9a5ca7e370e613a9a75a13008a3845ea759d6e upstream. + +... in such circumstances, we do not want to mark the instruction as seen given +the goal is still to jmp-1 rewrite/sanitize dead code, if it is not reachable +from the non-speculative path verification. We do however want to verify it for +safety regardless. + +With the patch as-is all the insns that have been marked as seen before the +patch will also be marked as seen after the patch (just with a potentially +different non-zero count). An upcoming patch will also verify paths that are +unreachable in the non-speculative domain, hence this extension is needed. + +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Reviewed-by: Benedict Schlueter +Reviewed-by: Piotr Krysiuk +Acked-by: Alexei Starovoitov +[OP: - env->pass_cnt is not used in 4.19, so adjust sanitize_mark_insn_seen() + to assign "true" instead + - drop sanitize_insn_aux_data() comment changes, as the function is not + present in 4.19] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2901,6 +2901,19 @@ do_sim: + return !ret ? REASON_STACK : 0; + } + ++static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ ++ /* If we simulate paths under speculation, we don't update the ++ * insn as 'seen' such that when we verify unreachable paths in ++ * the non-speculative domain, sanitize_dead_code() can still ++ * rewrite/sanitize them. ++ */ ++ if (!vstate->speculative) ++ env->insn_aux_data[env->insn_idx].seen = true; ++} ++ + static int sanitize_err(struct bpf_verifier_env *env, + const struct bpf_insn *insn, int reason, + const struct bpf_reg_state *off_reg, +@@ -5254,7 +5267,7 @@ static int do_check(struct bpf_verifier_ + } + + regs = cur_regs(env); +- env->insn_aux_data[env->insn_idx].seen = true; ++ sanitize_mark_insn_seen(env); + + if (class == BPF_ALU || class == BPF_ALU64) { + err = check_alu_op(env, insn); +@@ -5472,7 +5485,7 @@ process_bpf_exit: + return err; + + env->insn_idx++; +- env->insn_aux_data[env->insn_idx].seen = true; ++ sanitize_mark_insn_seen(env); + } else { + verbose(env, "invalid BPF_LD mode\n"); + return -EINVAL; diff --git a/queue-4.19/bpf-fix-leakage-under-speculation-on-mispredicted-branches.patch b/queue-4.19/bpf-fix-leakage-under-speculation-on-mispredicted-branches.patch new file mode 100644 index 00000000000..6e503fa8370 --- /dev/null +++ b/queue-4.19/bpf-fix-leakage-under-speculation-on-mispredicted-branches.patch @@ -0,0 +1,223 @@ +From foo@baz Fri Aug 13 10:38:10 AM CEST 2021 +From: Ovidiu Panait +Date: Thu, 12 Aug 2021 20:00:36 +0300 +Subject: bpf: Fix leakage under speculation on mispredicted branches +To: stable@vger.kernel.org +Cc: bpf@vger.kernel.org +Message-ID: <20210812170037.2370387-4-ovidiu.panait@windriver.com> + +From: Daniel Borkmann + +commit 9183671af6dbf60a1219371d4ed73e23f43b49db upstream. + +The verifier only enumerates valid control-flow paths and skips paths that +are unreachable in the non-speculative domain. And so it can miss issues +under speculative execution on mispredicted branches. + +For example, a type confusion has been demonstrated with the following +crafted program: + + // r0 = pointer to a map array entry + // r6 = pointer to readable stack slot + // r9 = scalar controlled by attacker + 1: r0 = *(u64 *)(r0) // cache miss + 2: if r0 != 0x0 goto line 4 + 3: r6 = r9 + 4: if r0 != 0x1 goto line 6 + 5: r9 = *(u8 *)(r6) + 6: // leak r9 + +Since line 3 runs iff r0 == 0 and line 5 runs iff r0 == 1, the verifier +concludes that the pointer dereference on line 5 is safe. But: if the +attacker trains both the branches to fall-through, such that the following +is speculatively executed ... + + r6 = r9 + r9 = *(u8 *)(r6) + // leak r9 + +... then the program will dereference an attacker-controlled value and could +leak its content under speculative execution via side-channel. This requires +to mistrain the branch predictor, which can be rather tricky, because the +branches are mutually exclusive. However such training can be done at +congruent addresses in user space using different branches that are not +mutually exclusive. That is, by training branches in user space ... + + A: if r0 != 0x0 goto line C + B: ... + C: if r0 != 0x0 goto line D + D: ... + +... such that addresses A and C collide to the same CPU branch prediction +entries in the PHT (pattern history table) as those of the BPF program's +lines 2 and 4, respectively. A non-privileged attacker could simply brute +force such collisions in the PHT until observing the attack succeeding. + +Alternative methods to mistrain the branch predictor are also possible that +avoid brute forcing the collisions in the PHT. A reliable attack has been +demonstrated, for example, using the following crafted program: + + // r0 = pointer to a [control] map array entry + // r7 = *(u64 *)(r0 + 0), training/attack phase + // r8 = *(u64 *)(r0 + 8), oob address + // [...] + // r0 = pointer to a [data] map array entry + 1: if r7 == 0x3 goto line 3 + 2: r8 = r0 + // crafted sequence of conditional jumps to separate the conditional + // branch in line 193 from the current execution flow + 3: if r0 != 0x0 goto line 5 + 4: if r0 == 0x0 goto exit + 5: if r0 != 0x0 goto line 7 + 6: if r0 == 0x0 goto exit + [...] + 187: if r0 != 0x0 goto line 189 + 188: if r0 == 0x0 goto exit + // load any slowly-loaded value (due to cache miss in phase 3) ... + 189: r3 = *(u64 *)(r0 + 0x1200) + // ... and turn it into known zero for verifier, while preserving slowly- + // loaded dependency when executing: + 190: r3 &= 1 + 191: r3 &= 2 + // speculatively bypassed phase dependency + 192: r7 += r3 + 193: if r7 == 0x3 goto exit + 194: r4 = *(u8 *)(r8 + 0) + // leak r4 + +As can be seen, in training phase (phase != 0x3), the condition in line 1 +turns into false and therefore r8 with the oob address is overridden with +the valid map value address, which in line 194 we can read out without +issues. However, in attack phase, line 2 is skipped, and due to the cache +miss in line 189 where the map value is (zeroed and later) added to the +phase register, the condition in line 193 takes the fall-through path due +to prior branch predictor training, where under speculation, it'll load the +byte at oob address r8 (unknown scalar type at that point) which could then +be leaked via side-channel. + +One way to mitigate these is to 'branch off' an unreachable path, meaning, +the current verification path keeps following the is_branch_taken() path +and we push the other branch to the verification stack. Given this is +unreachable from the non-speculative domain, this branch's vstate is +explicitly marked as speculative. This is needed for two reasons: i) if +this path is solely seen from speculative execution, then we later on still +want the dead code elimination to kick in in order to sanitize these +instructions with jmp-1s, and ii) to ensure that paths walked in the +non-speculative domain are not pruned from earlier walks of paths walked in +the speculative domain. Additionally, for robustness, we mark the registers +which have been part of the conditional as unknown in the speculative path +given there should be no assumptions made on their content. + +The fix in here mitigates type confusion attacks described earlier due to +i) all code paths in the BPF program being explored and ii) existing +verifier logic already ensuring that given memory access instruction +references one specific data structure. + +An alternative to this fix that has also been looked at in this scope was to +mark aux->alu_state at the jump instruction with a BPF_JMP_TAKEN state as +well as direction encoding (always-goto, always-fallthrough, unknown), such +that mixing of different always-* directions themselves as well as mixing of +always-* with unknown directions would cause a program rejection by the +verifier, e.g. programs with constructs like 'if ([...]) { x = 0; } else +{ x = 1; }' with subsequent 'if (x == 1) { [...] }'. For unprivileged, this +would result in only single direction always-* taken paths, and unknown taken +paths being allowed, such that the former could be patched from a conditional +jump to an unconditional jump (ja). Compared to this approach here, it would +have two downsides: i) valid programs that otherwise are not performing any +pointer arithmetic, etc, would potentially be rejected/broken, and ii) we are +required to turn off path pruning for unprivileged, where both can be avoided +in this work through pushing the invalid branch to the verification stack. + +The issue was originally discovered by Adam and Ofek, and later independently +discovered and reported as a result of Benedict and Piotr's research work. + +Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") +Reported-by: Adam Morrison +Reported-by: Ofek Kirzner +Reported-by: Benedict Schlueter +Reported-by: Piotr Krysiuk +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Reviewed-by: Benedict Schlueter +Reviewed-by: Piotr Krysiuk +Acked-by: Alexei Starovoitov +[OP: use allow_ptr_leaks instead of bypass_spec_v1] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 46 +++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 41 insertions(+), 5 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2812,6 +2812,27 @@ struct bpf_sanitize_info { + bool mask_to_left; + }; + ++static struct bpf_verifier_state * ++sanitize_speculative_path(struct bpf_verifier_env *env, ++ const struct bpf_insn *insn, ++ u32 next_idx, u32 curr_idx) ++{ ++ struct bpf_verifier_state *branch; ++ struct bpf_reg_state *regs; ++ ++ branch = push_stack(env, next_idx, curr_idx, true); ++ if (branch && insn) { ++ regs = branch->frame[branch->curframe]->regs; ++ if (BPF_SRC(insn->code) == BPF_K) { ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ } else if (BPF_SRC(insn->code) == BPF_X) { ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ mark_reg_unknown(env, regs, insn->src_reg); ++ } ++ } ++ return branch; ++} ++ + static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, +@@ -2895,7 +2916,8 @@ do_sim: + tmp = *dst_reg; + *dst_reg = *ptr_reg; + } +- ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); ++ ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, ++ env->insn_idx); + if (!ptr_is_dst_reg && ret) + *dst_reg = tmp; + return !ret ? REASON_STACK : 0; +@@ -4288,14 +4310,28 @@ static int check_cond_jmp_op(struct bpf_ + tnum_is_const(src_reg->var_off)) + pred = is_branch_taken(dst_reg, src_reg->var_off.value, + opcode); ++ + if (pred == 1) { +- /* only follow the goto, ignore fall-through */ ++ /* Only follow the goto, ignore fall-through. If needed, push ++ * the fall-through branch for simulation under speculative ++ * execution. ++ */ ++ if (!env->allow_ptr_leaks && ++ !sanitize_speculative_path(env, insn, *insn_idx + 1, ++ *insn_idx)) ++ return -EFAULT; + *insn_idx += insn->off; + return 0; + } else if (pred == 0) { +- /* only follow fall-through branch, since +- * that's where the program will go +- */ ++ /* Only follow the fall-through branch, since that's where the ++ * program will go. If needed, push the goto branch for ++ * simulation under speculative execution. ++ */ ++ if (!env->allow_ptr_leaks && ++ !sanitize_speculative_path(env, insn, ++ *insn_idx + insn->off + 1, ++ *insn_idx)) ++ return -EFAULT; + return 0; + } + diff --git a/queue-4.19/bpf-inherit-expanded-patched-seen-count-from-old-aux-data.patch b/queue-4.19/bpf-inherit-expanded-patched-seen-count-from-old-aux-data.patch new file mode 100644 index 00000000000..cca7dc6a140 --- /dev/null +++ b/queue-4.19/bpf-inherit-expanded-patched-seen-count-from-old-aux-data.patch @@ -0,0 +1,59 @@ +From foo@baz Fri Aug 13 10:38:10 AM CEST 2021 +From: Ovidiu Panait +Date: Thu, 12 Aug 2021 20:00:34 +0300 +Subject: bpf: Inherit expanded/patched seen count from old aux data +To: stable@vger.kernel.org +Cc: bpf@vger.kernel.org +Message-ID: <20210812170037.2370387-2-ovidiu.panait@windriver.com> + +From: Daniel Borkmann + +commit d203b0fd863a2261e5d00b97f3d060c4c2a6db71 upstream. + +Instead of relying on current env->pass_cnt, use the seen count from the +old aux data in adjust_insn_aux_data(), and expand it to the new range of +patched instructions. This change is valid given we always expand 1:n +with n>=1, so what applies to the old/original instruction needs to apply +for the replacement as well. + +Not relying on env->pass_cnt is a prerequisite for a later change where we +want to avoid marking an instruction seen when verified under speculative +execution path. + +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Reviewed-by: Benedict Schlueter +Reviewed-by: Piotr Krysiuk +Acked-by: Alexei Starovoitov +[OP: - declare old_data as bool instead of u32 (struct bpf_insn_aux_data.seen + is bool in 5.4) + - adjusted context for 4.19] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5690,6 +5690,7 @@ static int adjust_insn_aux_data(struct b + u32 off, u32 cnt) + { + struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; ++ bool old_seen = old_data[off].seen; + int i; + + if (cnt == 1) +@@ -5701,8 +5702,10 @@ static int adjust_insn_aux_data(struct b + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); + memcpy(new_data + off + cnt - 1, old_data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); +- for (i = off; i < off + cnt - 1; i++) +- new_data[i].seen = true; ++ for (i = off; i < off + cnt - 1; i++) { ++ /* Expand insni[off]'s seen count to the patched range. */ ++ new_data[i].seen = old_seen; ++ } + env->insn_aux_data = new_data; + vfree(old_data); + return 0; diff --git a/queue-4.19/bpf-selftests-adjust-few-selftest-outcomes-wrt-unreachable-code.patch b/queue-4.19/bpf-selftests-adjust-few-selftest-outcomes-wrt-unreachable-code.patch new file mode 100644 index 00000000000..ffc2382b579 --- /dev/null +++ b/queue-4.19/bpf-selftests-adjust-few-selftest-outcomes-wrt-unreachable-code.patch @@ -0,0 +1,48 @@ +From foo@baz Fri Aug 13 10:38:10 AM CEST 2021 +From: Ovidiu Panait +Date: Thu, 12 Aug 2021 20:00:37 +0300 +Subject: bpf, selftests: Adjust few selftest outcomes wrt unreachable code +To: stable@vger.kernel.org +Cc: bpf@vger.kernel.org +Message-ID: <20210812170037.2370387-5-ovidiu.panait@windriver.com> + +From: Daniel Borkmann + +commit 973377ffe8148180b2651825b92ae91988141b05 upstream. + +In almost all cases from test_verifier that have been changed in here, we've +had an unreachable path with a load from a register which has an invalid +address on purpose. This was basically to make sure that we never walk this +path and to have the verifier complain if it would otherwise. Change it to +match on the right error for unprivileged given we now test these paths +under speculative execution. + +There's one case where we match on exact # of insns_processed. Due to the +extra path, this will of course mismatch on unprivileged. Thus, restrict the +test->insn_processed check to privileged-only. + +In one other case, we result in a 'pointer comparison prohibited' error. This +is similarly due to verifying an 'invalid' branch where we end up with a value +pointer on one side of the comparison. + +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Acked-by: Alexei Starovoitov +[OP: ignore changes to tests that do not exist in 4.19] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/test_verifier.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -2792,6 +2792,8 @@ static struct bpf_test tests[] = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R7 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 0, + }, diff --git a/queue-4.19/kvm-x86-mmu-use-the-correct-inherited-permissions-to-get-shadow-page.patch b/queue-4.19/kvm-x86-mmu-use-the-correct-inherited-permissions-to-get-shadow-page.patch new file mode 100644 index 00000000000..4d069fa2a0d --- /dev/null +++ b/queue-4.19/kvm-x86-mmu-use-the-correct-inherited-permissions-to-get-shadow-page.patch @@ -0,0 +1,154 @@ +From b1bd5cba3306691c771d558e94baa73e8b0b96b7 Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Thu, 3 Jun 2021 13:24:55 +0800 +Subject: KVM: X86: MMU: Use the correct inherited permissions to get shadow page + +From: Lai Jiangshan + +commit b1bd5cba3306691c771d558e94baa73e8b0b96b7 upstream. + +When computing the access permissions of a shadow page, use the effective +permissions of the walk up to that point, i.e. the logic AND of its parents' +permissions. Two guest PxE entries that point at the same table gfn need to +be shadowed with different shadow pages if their parents' permissions are +different. KVM currently uses the effective permissions of the last +non-leaf entry for all non-leaf entries. Because all non-leaf SPTEs have +full ("uwx") permissions, and the effective permissions are recorded only +in role.access and merged into the leaves, this can lead to incorrect +reuse of a shadow page and eventually to a missing guest protection page +fault. + +For example, here is a shared pagetable: + + pgd[] pud[] pmd[] virtual address pointers + /->pmd1(u--)->pte1(uw-)->page1 <- ptr1 (u--) + /->pud1(uw-)--->pmd2(uw-)->pte2(uw-)->page2 <- ptr2 (uw-) + pgd-| (shared pmd[] as above) + \->pud2(u--)--->pmd1(u--)->pte1(uw-)->page1 <- ptr3 (u--) + \->pmd2(uw-)->pte2(uw-)->page2 <- ptr4 (u--) + + pud1 and pud2 point to the same pmd table, so: + - ptr1 and ptr3 points to the same page. + - ptr2 and ptr4 points to the same page. + +(pud1 and pud2 here are pud entries, while pmd1 and pmd2 here are pmd entries) + +- First, the guest reads from ptr1 first and KVM prepares a shadow + page table with role.access=u--, from ptr1's pud1 and ptr1's pmd1. + "u--" comes from the effective permissions of pgd, pud1 and + pmd1, which are stored in pt->access. "u--" is used also to get + the pagetable for pud1, instead of "uw-". + +- Then the guest writes to ptr2 and KVM reuses pud1 which is present. + The hypervisor set up a shadow page for ptr2 with pt->access is "uw-" + even though the pud1 pmd (because of the incorrect argument to + kvm_mmu_get_page in the previous step) has role.access="u--". + +- Then the guest reads from ptr3. The hypervisor reuses pud1's + shadow pmd for pud2, because both use "u--" for their permissions. + Thus, the shadow pmd already includes entries for both pmd1 and pmd2. + +- At last, the guest writes to ptr4. This causes no vmexit or pagefault, + because pud1's shadow page structures included an "uw-" page even though + its role.access was "u--". + +Any kind of shared pagetable might have the similar problem when in +virtual machine without TDP enabled if the permissions are different +from different ancestors. + +In order to fix the problem, we change pt->access to be an array, and +any access in it will not include permissions ANDed from child ptes. + +The test code is: https://lore.kernel.org/kvm/20210603050537.19605-1-jiangshanlai@gmail.com/ +Remember to test it with TDP disabled. + +The problem had existed long before the commit 41074d07c78b ("KVM: MMU: +Fix inherited permissions for emulated guest pte updates"), and it +is hard to find which is the culprit. So there is no fixes tag here. + +Signed-off-by: Lai Jiangshan +Message-Id: <20210603052455.21023-1-jiangshanlai@gmail.com> +Cc: stable@vger.kernel.org +Fixes: cea0f0e7ea54 ("[PATCH] KVM: MMU: Shadow page table caching") +Signed-off-by: Paolo Bonzini +[OP: - apply arch/x86/kvm/mmu/* changes to arch/x86/kvm + - apply documentation changes to Documentation/virtual/kvm/mmu.txt + - adjusted context in arch/x86/kvm/paging_tmpl.h] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/virtual/kvm/mmu.txt | 4 ++-- + arch/x86/kvm/paging_tmpl.h | 14 +++++++++----- + 2 files changed, 11 insertions(+), 7 deletions(-) + +--- a/Documentation/virtual/kvm/mmu.txt ++++ b/Documentation/virtual/kvm/mmu.txt +@@ -152,8 +152,8 @@ Shadow pages contain the following infor + shadow pages) so role.quadrant takes values in the range 0..3. Each + quadrant maps 1GB virtual address space. + role.access: +- Inherited guest access permissions in the form uwx. Note execute +- permission is positive, not negative. ++ Inherited guest access permissions from the parent ptes in the form uwx. ++ Note execute permission is positive, not negative. + role.invalid: + The page is invalid and should not be used. It is a root page that is + currently pinned (by a cpu hardware register pointing to it); once it is +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -93,8 +93,8 @@ struct guest_walker { + gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; + pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; +- unsigned pt_access; +- unsigned pte_access; ++ unsigned int pt_access[PT_MAX_FULL_LEVELS]; ++ unsigned int pte_access; + gfn_t gfn; + struct x86_exception fault; + }; +@@ -388,13 +388,15 @@ retry_walk: + } + + walker->ptes[walker->level - 1] = pte; ++ ++ /* Convert to ACC_*_MASK flags for struct guest_walker. */ ++ walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask); + } while (!is_last_gpte(mmu, walker->level, pte)); + + pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); + accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; + + /* Convert to ACC_*_MASK flags for struct guest_walker. */ +- walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask); + walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask); + errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); + if (unlikely(errcode)) +@@ -433,7 +435,8 @@ retry_walk: + } + + pgprintk("%s: pte %llx pte_access %x pt_access %x\n", +- __func__, (u64)pte, walker->pte_access, walker->pt_access); ++ __func__, (u64)pte, walker->pte_access, ++ walker->pt_access[walker->level - 1]); + return 1; + + error: +@@ -602,7 +605,7 @@ static int FNAME(fetch)(struct kvm_vcpu + { + struct kvm_mmu_page *sp = NULL; + struct kvm_shadow_walk_iterator it; +- unsigned direct_access, access = gw->pt_access; ++ unsigned int direct_access, access; + int top_level, ret; + gfn_t gfn, base_gfn; + +@@ -634,6 +637,7 @@ static int FNAME(fetch)(struct kvm_vcpu + sp = NULL; + if (!is_shadow_present_pte(*it.sptep)) { + table_gfn = gw->table_gfn[it.level - 2]; ++ access = gw->pt_access[it.level - 2]; + sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, + false, access); + } diff --git a/queue-4.19/ovl-prevent-private-clone-if-bind-mount-is-not-allowed.patch b/queue-4.19/ovl-prevent-private-clone-if-bind-mount-is-not-allowed.patch new file mode 100644 index 00000000000..a3055f1c181 --- /dev/null +++ b/queue-4.19/ovl-prevent-private-clone-if-bind-mount-is-not-allowed.patch @@ -0,0 +1,97 @@ +From 427215d85e8d1476da1a86b8d67aceb485eb3631 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Mon, 9 Aug 2021 10:19:47 +0200 +Subject: ovl: prevent private clone if bind mount is not allowed + +From: Miklos Szeredi + +commit 427215d85e8d1476da1a86b8d67aceb485eb3631 upstream. + +Add the following checks from __do_loopback() to clone_private_mount() as +well: + + - verify that the mount is in the current namespace + + - verify that there are no locked children + +Reported-by: Alois Wohlschlager +Fixes: c771d683a62e ("vfs: introduce clone_private_mount()") +Cc: # v3.18 +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/namespace.c | 42 ++++++++++++++++++++++++++++-------------- + 1 file changed, 28 insertions(+), 14 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -1799,6 +1799,20 @@ void drop_collected_mounts(struct vfsmou + namespace_unlock(); + } + ++static bool has_locked_children(struct mount *mnt, struct dentry *dentry) ++{ ++ struct mount *child; ++ ++ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { ++ if (!is_subdir(child->mnt_mountpoint, dentry)) ++ continue; ++ ++ if (child->mnt.mnt_flags & MNT_LOCKED) ++ return true; ++ } ++ return false; ++} ++ + /** + * clone_private_mount - create a private clone of a path + * +@@ -1813,14 +1827,27 @@ struct vfsmount *clone_private_mount(con + struct mount *old_mnt = real_mount(path->mnt); + struct mount *new_mnt; + ++ down_read(&namespace_sem); + if (IS_MNT_UNBINDABLE(old_mnt)) +- return ERR_PTR(-EINVAL); ++ goto invalid; ++ ++ if (!check_mnt(old_mnt)) ++ goto invalid; ++ ++ if (has_locked_children(old_mnt, path->dentry)) ++ goto invalid; + + new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); ++ up_read(&namespace_sem); ++ + if (IS_ERR(new_mnt)) + return ERR_CAST(new_mnt); + + return &new_mnt->mnt; ++ ++invalid: ++ up_read(&namespace_sem); ++ return ERR_PTR(-EINVAL); + } + EXPORT_SYMBOL_GPL(clone_private_mount); + +@@ -2136,19 +2163,6 @@ static int do_change_type(struct path *p + return err; + } + +-static bool has_locked_children(struct mount *mnt, struct dentry *dentry) +-{ +- struct mount *child; +- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { +- if (!is_subdir(child->mnt_mountpoint, dentry)) +- continue; +- +- if (child->mnt.mnt_flags & MNT_LOCKED) +- return true; +- } +- return false; +-} +- + /* + * do loopback mount. + */ diff --git a/queue-4.19/ppp-fix-generating-ppp-unit-id-when-ifname-is-not-specified.patch b/queue-4.19/ppp-fix-generating-ppp-unit-id-when-ifname-is-not-specified.patch new file mode 100644 index 00000000000..479a6ab5948 --- /dev/null +++ b/queue-4.19/ppp-fix-generating-ppp-unit-id-when-ifname-is-not-specified.patch @@ -0,0 +1,109 @@ +From 3125f26c514826077f2a4490b75e9b1c7a644c42 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Pali=20Roh=C3=A1r?= +Date: Sat, 7 Aug 2021 18:00:50 +0200 +Subject: ppp: Fix generating ppp unit id when ifname is not specified +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Pali Rohár + +commit 3125f26c514826077f2a4490b75e9b1c7a644c42 upstream. + +When registering new ppp interface via PPPIOCNEWUNIT ioctl then kernel has +to choose interface name as this ioctl API does not support specifying it. + +Kernel in this case register new interface with name "ppp" where +is the ppp unit id, which can be obtained via PPPIOCGUNIT ioctl. This +applies also in the case when registering new ppp interface via rtnl +without supplying IFLA_IFNAME. + +PPPIOCNEWUNIT ioctl allows to specify own ppp unit id which will kernel +assign to ppp interface, in case this ppp id is not already used by other +ppp interface. + +In case user does not specify ppp unit id then kernel choose the first free +ppp unit id. This applies also for case when creating ppp interface via +rtnl method as it does not provide a way for specifying own ppp unit id. + +If some network interface (does not have to be ppp) has name "ppp" +with this first free ppp id then PPPIOCNEWUNIT ioctl or rtnl call fails. + +And registering new ppp interface is not possible anymore, until interface +which holds conflicting name is renamed. Or when using rtnl method with +custom interface name in IFLA_IFNAME. + +As list of allocated / used ppp unit ids is not possible to retrieve from +kernel to userspace, userspace has no idea what happens nor which interface +is doing this conflict. + +So change the algorithm how ppp unit id is generated. And choose the first +number which is not neither used as ppp unit id nor in some network +interface with pattern "ppp". + +This issue can be simply reproduced by following pppd call when there is no +ppp interface registered and also no interface with name pattern "ppp": + + pppd ifname ppp1 +ipv6 noip noauth nolock local nodetach pty "pppd +ipv6 noip noauth nolock local nodetach notty" + +Or by creating the one ppp interface (which gets assigned ppp unit id 0), +renaming it to "ppp1" and then trying to create a new ppp interface (which +will always fails as next free ppp unit id is 1, but network interface with +name "ppp1" exists). + +This patch fixes above described issue by generating new and new ppp unit +id until some non-conflicting id with network interfaces is generated. + +Signed-off-by: Pali Rohár +Cc: stable@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -287,7 +287,7 @@ static struct channel *ppp_find_channel( + static int ppp_connect_channel(struct channel *pch, int unit); + static int ppp_disconnect_channel(struct channel *pch); + static void ppp_destroy_channel(struct channel *pch); +-static int unit_get(struct idr *p, void *ptr); ++static int unit_get(struct idr *p, void *ptr, int min); + static int unit_set(struct idr *p, void *ptr, int n); + static void unit_put(struct idr *p, int n); + static void *unit_find(struct idr *p, int n); +@@ -963,9 +963,20 @@ static int ppp_unit_register(struct ppp + mutex_lock(&pn->all_ppp_mutex); + + if (unit < 0) { +- ret = unit_get(&pn->units_idr, ppp); ++ ret = unit_get(&pn->units_idr, ppp, 0); + if (ret < 0) + goto err; ++ if (!ifname_is_set) { ++ while (1) { ++ snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ret); ++ if (!__dev_get_by_name(ppp->ppp_net, ppp->dev->name)) ++ break; ++ unit_put(&pn->units_idr, ret); ++ ret = unit_get(&pn->units_idr, ppp, ret + 1); ++ if (ret < 0) ++ goto err; ++ } ++ } + } else { + /* Caller asked for a specific unit number. Fail with -EEXIST + * if unavailable. For backward compatibility, return -EEXIST +@@ -3252,9 +3263,9 @@ static int unit_set(struct idr *p, void + } + + /* get new free unit number and associate pointer with it */ +-static int unit_get(struct idr *p, void *ptr) ++static int unit_get(struct idr *p, void *ptr, int min) + { +- return idr_alloc(p, ptr, 0, 0, GFP_KERNEL); ++ return idr_alloc(p, ptr, min, 0, GFP_KERNEL); + } + + /* put unit number back to a pool */ diff --git a/queue-4.19/series b/queue-4.19/series index 3bf2f669090..cb0c97284fe 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -1,2 +1,10 @@ kvm-svm-fix-off-by-one-indexing-when-nullifying-last.patch -usb-dwc3-gadget-use-list_replace_init-before-travers.patch +tracing-reject-string-operand-in-the-histogram-expression.patch +bpf-inherit-expanded-patched-seen-count-from-old-aux-data.patch +bpf-do-not-mark-insn-as-seen-under-speculative-path-verification.patch +bpf-fix-leakage-under-speculation-on-mispredicted-branches.patch +bpf-selftests-adjust-few-selftest-outcomes-wrt-unreachable-code.patch +kvm-x86-mmu-use-the-correct-inherited-permissions-to-get-shadow-page.patch +usb-ehci-fix-kunpeng920-ehci-hardware-problem.patch +ppp-fix-generating-ppp-unit-id-when-ifname-is-not-specified.patch +ovl-prevent-private-clone-if-bind-mount-is-not-allowed.patch diff --git a/queue-4.19/tracing-reject-string-operand-in-the-histogram-expression.patch b/queue-4.19/tracing-reject-string-operand-in-the-histogram-expression.patch new file mode 100644 index 00000000000..48f283db10f --- /dev/null +++ b/queue-4.19/tracing-reject-string-operand-in-the-histogram-expression.patch @@ -0,0 +1,61 @@ +From a9d10ca4986571bffc19778742d508cc8dd13e02 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Wed, 28 Jul 2021 07:55:43 +0900 +Subject: tracing: Reject string operand in the histogram expression + +From: Masami Hiramatsu + +commit a9d10ca4986571bffc19778742d508cc8dd13e02 upstream. + +Since the string type can not be the target of the addition / subtraction +operation, it must be rejected. Without this fix, the string type silently +converted to digits. + +Link: https://lkml.kernel.org/r/162742654278.290973.1523000673366456634.stgit@devnote2 + +Cc: stable@vger.kernel.org +Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers") +Signed-off-by: Masami Hiramatsu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events_hist.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -2790,6 +2790,12 @@ static struct hist_field *parse_unary(st + ret = PTR_ERR(operand1); + goto free; + } ++ if (operand1->flags & HIST_FIELD_FL_STRING) { ++ /* String type can not be the operand of unary operator. */ ++ destroy_hist_field(operand1, 0); ++ ret = -EINVAL; ++ goto free; ++ } + + expr->flags |= operand1->flags & + (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); +@@ -2890,6 +2896,10 @@ static struct hist_field *parse_expr(str + operand1 = NULL; + goto free; + } ++ if (operand1->flags & HIST_FIELD_FL_STRING) { ++ ret = -EINVAL; ++ goto free; ++ } + + /* rest of string could be another expression e.g. b+c in a+b+c */ + operand_flags = 0; +@@ -2899,6 +2909,10 @@ static struct hist_field *parse_expr(str + operand2 = NULL; + goto free; + } ++ if (operand2->flags & HIST_FIELD_FL_STRING) { ++ ret = -EINVAL; ++ goto free; ++ } + + ret = check_expr_operands(operand1, operand2); + if (ret) diff --git a/queue-4.19/usb-dwc3-gadget-use-list_replace_init-before-travers.patch b/queue-4.19/usb-dwc3-gadget-use-list_replace_init-before-travers.patch deleted file mode 100644 index 0c037514bbc..00000000000 --- a/queue-4.19/usb-dwc3-gadget-use-list_replace_init-before-travers.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 9c65ea2597a5fec2e57551b71c2a70e034a435d6 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 29 Jul 2021 00:33:14 -0700 -Subject: usb: dwc3: gadget: Use list_replace_init() before traversing lists - -From: Wesley Cheng - -[ Upstream commit d25d85061bd856d6be221626605319154f9b5043 ] - -The list_for_each_entry_safe() macro saves the current item (n) and -the item after (n+1), so that n can be safely removed without -corrupting the list. However, when traversing the list and removing -items using gadget giveback, the DWC3 lock is briefly released, -allowing other routines to execute. There is a situation where, while -items are being removed from the cancelled_list using -dwc3_gadget_ep_cleanup_cancelled_requests(), the pullup disable -routine is running in parallel (due to UDC unbind). As the cleanup -routine removes n, and the pullup disable removes n+1, once the -cleanup retakes the DWC3 lock, it references a request who was already -removed/handled. With list debug enabled, this leads to a panic. -Ensure all instances of the macro are replaced where gadget giveback -is used. - -Example call stack: - -Thread#1: -__dwc3_gadget_ep_set_halt() - CLEAR HALT - -> dwc3_gadget_ep_cleanup_cancelled_requests() - ->list_for_each_entry_safe() - ->dwc3_gadget_giveback(n) - ->dwc3_gadget_del_and_unmap_request()- n deleted[cancelled_list] - ->spin_unlock - ->Thread#2 executes - ... - ->dwc3_gadget_giveback(n+1) - ->Already removed! - -Thread#2: -dwc3_gadget_pullup() - ->waiting for dwc3 spin_lock - ... - ->Thread#1 released lock - ->dwc3_stop_active_transfers() - ->dwc3_remove_requests() - ->fetches n+1 item from cancelled_list (n removed by Thread#1) - ->dwc3_gadget_giveback() - ->dwc3_gadget_del_and_unmap_request()- n+1 -deleted[cancelled_list] - ->spin_unlock - -Fix this condition by utilizing list_replace_init(), and traversing -through a local copy of the current elements in the endpoint lists. -This will also set the parent list as empty, so if another thread is -also looping through the list, it will be empty on the next iteration. - -Fixes: d4f1afe5e896 ("usb: dwc3: gadget: move requests to cancelled_list") -Cc: stable -Acked-by: Felipe Balbi -Signed-off-by: Wesley Cheng -Link: https://lore.kernel.org/r/1627543994-20327-1-git-send-email-wcheng@codeaurora.org -Signed-off-by: Greg Kroah-Hartman -Signed-off-by: Sasha Levin ---- - drivers/usb/dwc3/gadget.c | 18 ++++++++++++++++-- - 1 file changed, 16 insertions(+), 2 deletions(-) - ---- a/drivers/usb/dwc3/gadget.c -+++ b/drivers/usb/dwc3/gadget.c -@@ -1466,11 +1466,18 @@ static void dwc3_gadget_ep_cleanup_cance - { - struct dwc3_request *req; - struct dwc3_request *tmp; -+ struct list_head local; - -- list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) { -+restart: -+ list_replace_init(&dep->cancelled_list, &local); -+ -+ list_for_each_entry_safe(req, tmp, &local, list) { - dwc3_gadget_ep_skip_trbs(dep, req); - dwc3_gadget_giveback(dep, req, -ECONNRESET); - } -+ -+ if (!list_empty(&dep->cancelled_list)) -+ goto restart; - } - - static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, -@@ -2443,8 +2450,12 @@ static void dwc3_gadget_ep_cleanup_compl - { - struct dwc3_request *req; - struct dwc3_request *tmp; -+ struct list_head local; - -- list_for_each_entry_safe(req, tmp, &dep->started_list, list) { -+restart: -+ list_replace_init(&dep->started_list, &local); -+ -+ list_for_each_entry_safe(req, tmp, &local, list) { - int ret; - - ret = dwc3_gadget_ep_cleanup_completed_request(dep, event, -@@ -2452,6 +2463,9 @@ static void dwc3_gadget_ep_cleanup_compl - if (ret) - break; - } -+ -+ if (!list_empty(&dep->started_list)) -+ goto restart; - } - - static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep) diff --git a/queue-4.19/usb-ehci-fix-kunpeng920-ehci-hardware-problem.patch b/queue-4.19/usb-ehci-fix-kunpeng920-ehci-hardware-problem.patch new file mode 100644 index 00000000000..2bb3273d838 --- /dev/null +++ b/queue-4.19/usb-ehci-fix-kunpeng920-ehci-hardware-problem.patch @@ -0,0 +1,42 @@ +From 26b75952ca0b8b4b3050adb9582c8e2f44d49687 Mon Sep 17 00:00:00 2001 +From: Longfang Liu +Date: Fri, 9 Apr 2021 16:48:01 +0800 +Subject: USB:ehci:fix Kunpeng920 ehci hardware problem + +From: Longfang Liu + +commit 26b75952ca0b8b4b3050adb9582c8e2f44d49687 upstream. + +Kunpeng920's EHCI controller does not have SBRN register. +Reading the SBRN register when the controller driver is +initialized will get 0. + +When rebooting the EHCI driver, ehci_shutdown() will be called. +if the sbrn flag is 0, ehci_shutdown() will return directly. +The sbrn flag being 0 will cause the EHCI interrupt signal to +not be turned off after reboot. this interrupt that is not closed +will cause an exception to the device sharing the interrupt. + +Therefore, the EHCI controller of Kunpeng920 needs to skip +the read operation of the SBRN register. + +Acked-by: Alan Stern +Signed-off-by: Longfang Liu +Link: https://lore.kernel.org/r/1617958081-17999-1-git-send-email-liulongfang@huawei.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/host/ehci-pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/usb/host/ehci-pci.c ++++ b/drivers/usb/host/ehci-pci.c +@@ -298,6 +298,9 @@ static int ehci_pci_setup(struct usb_hcd + if (pdev->vendor == PCI_VENDOR_ID_STMICRO + && pdev->device == PCI_DEVICE_ID_STMICRO_USB_HOST) + ; /* ConneXT has no sbrn register */ ++ else if (pdev->vendor == PCI_VENDOR_ID_HUAWEI ++ && pdev->device == 0xa239) ++ ; /* HUAWEI Kunpeng920 USB EHCI has no sbrn register */ + else + pci_read_config_byte(pdev, 0x60, &ehci->sbrn); + -- 2.47.3