From: Greg Kroah-Hartman Date: Tue, 8 Sep 2020 13:21:34 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.14.197~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=05eb9c912c0a407b90e01e40a8c3ab09890b794e;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: kvm-arm64-add-kvm_extable-for-vaxorcism-code.patch kvm-arm64-defer-guest-entry-when-an-asynchronous-exception-is-pending.patch kvm-arm64-set-hcr_el2.ptw-to-prevent-at-taking-synchronous-exception.patch kvm-arm64-survive-synchronous-exceptions-caused-by-at-instructions.patch net-initialize-fastreuse-on-inet_inherit_port.patch net-refactor-bind_bucket-fastreuse-into-helper.patch --- diff --git a/queue-4.9/kvm-arm64-add-kvm_extable-for-vaxorcism-code.patch b/queue-4.9/kvm-arm64-add-kvm_extable-for-vaxorcism-code.patch new file mode 100644 index 00000000000..1c9d6a95f1f --- /dev/null +++ b/queue-4.9/kvm-arm64-add-kvm_extable-for-vaxorcism-code.patch @@ -0,0 +1,230 @@ +From foo@baz Tue Sep 8 03:16:28 PM CEST 2020 +From: Andre Przywara +Date: Fri, 4 Sep 2020 12:28:57 +0100 +Subject: KVM: arm64: Add kvm_extable for vaxorcism code +To: stable@vger.kernel.org +Cc: James Morse , Marc Zyngier , Catalin Marinas +Message-ID: <20200904112900.230831-2-andre.przywara@arm.com> + +From: James Morse + +commit e9ee186bb735bfc17fa81dbc9aebf268aee5b41e upstream. + +KVM has a one instruction window where it will allow an SError exception +to be consumed by the hypervisor without treating it as a hypervisor bug. +This is used to consume asynchronous external abort that were caused by +the guest. + +As we are about to add another location that survives unexpected exceptions, +generalise this code to make it behave like the host's extable. + +KVM's version has to be mapped to EL2 to be accessible on nVHE systems. + +The SError vaxorcism code is a one instruction window, so has two entries +in the extable. Because the KVM code is copied for VHE and nVHE, we end up +with four entries, half of which correspond with code that isn't mapped. + +Cc: stable@vger.kernel.org # v4.9 +Signed-off-by: James Morse +Reviewed-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Andre Przywara +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/kvm_asm.h | 15 +++++++++++ + arch/arm64/kernel/vmlinux.lds.S | 8 ++++++ + arch/arm64/kvm/hyp/entry.S | 16 +++++++----- + arch/arm64/kvm/hyp/hyp-entry.S | 51 ++++++++++++++++++++++++--------------- + arch/arm64/kvm/hyp/switch.c | 31 +++++++++++++++++++++++ + 5 files changed, 96 insertions(+), 25 deletions(-) + +--- a/arch/arm64/include/asm/kvm_asm.h ++++ b/arch/arm64/include/asm/kvm_asm.h +@@ -106,6 +106,21 @@ extern u32 __init_stage2_translation(voi + kern_hyp_va \vcpu + .endm + ++/* ++ * KVM extable for unexpected exceptions. ++ * In the same format _asm_extable, but output to a different section so that ++ * it can be mapped to EL2. The KVM version is not sorted. The caller must ++ * ensure: ++ * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented ++ * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. ++ */ ++.macro _kvm_extable, from, to ++ .pushsection __kvm_ex_table, "a" ++ .align 3 ++ .long (\from - .), (\to - .) ++ .popsection ++.endm ++ + #endif + + #endif /* __ARM_KVM_ASM_H__ */ +--- a/arch/arm64/kernel/vmlinux.lds.S ++++ b/arch/arm64/kernel/vmlinux.lds.S +@@ -23,6 +23,13 @@ ENTRY(_text) + + jiffies = jiffies_64; + ++ ++#define HYPERVISOR_EXTABLE \ ++ . = ALIGN(SZ_8); \ ++ VMLINUX_SYMBOL(__start___kvm_ex_table) = .; \ ++ *(__kvm_ex_table) \ ++ VMLINUX_SYMBOL(__stop___kvm_ex_table) = .; ++ + #define HYPERVISOR_TEXT \ + /* \ + * Align to 4 KB so that \ +@@ -38,6 +45,7 @@ jiffies = jiffies_64; + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ ++ HYPERVISOR_EXTABLE \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + + #define IDMAP_TEXT \ +--- a/arch/arm64/kvm/hyp/entry.S ++++ b/arch/arm64/kvm/hyp/entry.S +@@ -135,18 +135,22 @@ ENTRY(__guest_exit) + // This is our single instruction exception window. A pending + // SError is guaranteed to occur at the earliest when we unmask + // it, and at the latest just after the ISB. +- .global abort_guest_exit_start + abort_guest_exit_start: + + isb + +- .global abort_guest_exit_end + abort_guest_exit_end: ++ msr daifset, #4 // Mask aborts ++ ret + +- // If the exception took place, restore the EL1 exception +- // context so that we can report some information. +- // Merge the exception code with the SError pending bit. +- tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f ++ _kvm_extable abort_guest_exit_start, 9997f ++ _kvm_extable abort_guest_exit_end, 9997f ++9997: ++ msr daifset, #4 // Mask aborts ++ mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) ++ ++ // restore the EL1 exception context so that we can report some ++ // information. Merge the exception code with the SError pending bit. + msr elr_el2, x2 + msr esr_el2, x3 + msr spsr_el2, x4 +--- a/arch/arm64/kvm/hyp/hyp-entry.S ++++ b/arch/arm64/kvm/hyp/hyp-entry.S +@@ -25,6 +25,30 @@ + #include + #include + ++.macro save_caller_saved_regs_vect ++ stp x0, x1, [sp, #-16]! ++ stp x2, x3, [sp, #-16]! ++ stp x4, x5, [sp, #-16]! ++ stp x6, x7, [sp, #-16]! ++ stp x8, x9, [sp, #-16]! ++ stp x10, x11, [sp, #-16]! ++ stp x12, x13, [sp, #-16]! ++ stp x14, x15, [sp, #-16]! ++ stp x16, x17, [sp, #-16]! ++.endm ++ ++.macro restore_caller_saved_regs_vect ++ ldp x16, x17, [sp], #16 ++ ldp x14, x15, [sp], #16 ++ ldp x12, x13, [sp], #16 ++ ldp x10, x11, [sp], #16 ++ ldp x8, x9, [sp], #16 ++ ldp x6, x7, [sp], #16 ++ ldp x4, x5, [sp], #16 ++ ldp x2, x3, [sp], #16 ++ ldp x0, x1, [sp], #16 ++.endm ++ + .text + .pushsection .hyp.text, "ax" + +@@ -178,25 +202,14 @@ el1_error: + b __guest_exit + + el2_error: +- /* +- * Only two possibilities: +- * 1) Either we come from the exit path, having just unmasked +- * PSTATE.A: change the return code to an EL2 fault, and +- * carry on, as we're already in a sane state to handle it. +- * 2) Or we come from anywhere else, and that's a bug: we panic. +- * +- * For (1), x0 contains the original return code and x1 doesn't +- * contain anything meaningful at that stage. We can reuse them +- * as temp registers. +- * For (2), who cares? +- */ +- mrs x0, elr_el2 +- adr x1, abort_guest_exit_start +- cmp x0, x1 +- adr x1, abort_guest_exit_end +- ccmp x0, x1, #4, ne +- b.ne __hyp_panic +- mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) ++ save_caller_saved_regs_vect ++ stp x29, x30, [sp, #-16]! ++ ++ bl kvm_unexpected_el2_exception ++ ++ ldp x29, x30, [sp], #16 ++ restore_caller_saved_regs_vect ++ + eret + + ENTRY(__hyp_do_panic) +--- a/arch/arm64/kvm/hyp/switch.c ++++ b/arch/arm64/kvm/hyp/switch.c +@@ -25,6 +25,10 @@ + #include + #include + #include ++#include ++ ++extern struct exception_table_entry __start___kvm_ex_table; ++extern struct exception_table_entry __stop___kvm_ex_table; + + static bool __hyp_text __fpsimd_enabled_nvhe(void) + { +@@ -454,3 +458,30 @@ void __hyp_text __noreturn hyp_panic(str + + unreachable(); + } ++ ++asmlinkage void __hyp_text kvm_unexpected_el2_exception(void) ++{ ++ unsigned long addr, fixup; ++ struct kvm_cpu_context *host_ctxt; ++ struct exception_table_entry *entry, *end; ++ unsigned long elr_el2 = read_sysreg(elr_el2); ++ ++ entry = hyp_symbol_addr(__start___kvm_ex_table); ++ end = hyp_symbol_addr(__stop___kvm_ex_table); ++ host_ctxt = __hyp_this_cpu_ptr(kvm_host_cpu_state); ++ ++ while (entry < end) { ++ addr = (unsigned long)&entry->insn + entry->insn; ++ fixup = (unsigned long)&entry->fixup + entry->fixup; ++ ++ if (addr != elr_el2) { ++ entry++; ++ continue; ++ } ++ ++ write_sysreg(fixup, elr_el2); ++ return; ++ } ++ ++ hyp_panic(host_ctxt); ++} diff --git a/queue-4.9/kvm-arm64-defer-guest-entry-when-an-asynchronous-exception-is-pending.patch b/queue-4.9/kvm-arm64-defer-guest-entry-when-an-asynchronous-exception-is-pending.patch new file mode 100644 index 00000000000..1d0693949ae --- /dev/null +++ b/queue-4.9/kvm-arm64-defer-guest-entry-when-an-asynchronous-exception-is-pending.patch @@ -0,0 +1,74 @@ +From foo@baz Tue Sep 8 03:16:28 PM CEST 2020 +From: Andre Przywara +Date: Fri, 4 Sep 2020 12:28:58 +0100 +Subject: KVM: arm64: Defer guest entry when an asynchronous exception is pending +To: stable@vger.kernel.org +Cc: James Morse , Marc Zyngier , Catalin Marinas +Message-ID: <20200904112900.230831-3-andre.przywara@arm.com> + +From: James Morse + +commit 5dcd0fdbb492d49dac6bf21c436dfcb5ded0a895 upstream. + +SError that occur during world-switch's entry to the guest will be +accounted to the guest, as the exception is masked until we enter the +guest... but we want to attribute the SError as precisely as possible. + +Reading DISR_EL1 before guest entry requires free registers, and using +ESB+DISR_EL1 to consume and read back the ESR would leave KVM holding +a host SError... We would rather leave the SError pending and let the +host take it once we exit world-switch. To do this, we need to defer +guest-entry if an SError is pending. + +Read the ISR to see if SError (or an IRQ) is pending. If so fake an +exit. Place this check between __guest_enter()'s save of the host +registers, and restore of the guest's. SError that occur between +here and the eret into the guest must have affected the guest's +registers, which we can naturally attribute to the guest. + +The dsb is needed to ensure any previous writes have been done before +we read ISR_EL1. On systems without the v8.2 RAS extensions this +doesn't give us anything as we can't contain errors, and the ESR bits +to describe the severity are all implementation-defined. Replace +this with a nop for these systems. + +v4.9-backport: as this kernel version doesn't have the RAS support at +all, remove the RAS alternative. + +Cc: stable@vger.kernel.org # v4.9 +Signed-off-by: James Morse +Signed-off-by: Marc Zyngier +[ James: Removed v8.2 RAS related barriers ] +Signed-off-by: James Morse +Signed-off-by: Andre Przywara +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/entry.S | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/arm64/kvm/hyp/entry.S ++++ b/arch/arm64/kvm/hyp/entry.S +@@ -17,6 +17,7 @@ + + #include + ++#include + #include + #include + #include +@@ -62,6 +63,15 @@ ENTRY(__guest_enter) + // Store the host regs + save_callee_saved_regs x1 + ++ // Now the host state is stored if we have a pending RAS SError it must ++ // affect the host. If any asynchronous exception is pending we defer ++ // the guest entry. ++ mrs x1, isr_el1 ++ cbz x1, 1f ++ mov x0, #ARM_EXCEPTION_IRQ ++ ret ++ ++1: + add x18, x0, #VCPU_CONTEXT + + // Restore guest regs x0-x17 diff --git a/queue-4.9/kvm-arm64-set-hcr_el2.ptw-to-prevent-at-taking-synchronous-exception.patch b/queue-4.9/kvm-arm64-set-hcr_el2.ptw-to-prevent-at-taking-synchronous-exception.patch new file mode 100644 index 00000000000..dce472c0b8d --- /dev/null +++ b/queue-4.9/kvm-arm64-set-hcr_el2.ptw-to-prevent-at-taking-synchronous-exception.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Sep 8 03:16:28 PM CEST 2020 +From: Andre Przywara +Date: Fri, 4 Sep 2020 12:29:00 +0100 +Subject: KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception +To: stable@vger.kernel.org +Cc: James Morse , Marc Zyngier , Catalin Marinas +Message-ID: <20200904112900.230831-5-andre.przywara@arm.com> + +From: James Morse + +commit 71a7f8cb1ca4ca7214a700b1243626759b6c11d4 upstream. + +AT instructions do a translation table walk and return the result, or +the fault in PAR_EL1. KVM uses these to find the IPA when the value is +not provided by the CPU in HPFAR_EL1. + +If a translation table walk causes an external abort it is taken as an +exception, even if it was due to an AT instruction. (DDI0487F.a's D5.2.11 +"Synchronous faults generated by address translation instructions") + +While we previously made KVM resilient to exceptions taken due to AT +instructions, the device access causes mismatched attributes, and may +occur speculatively. Prevent this, by forbidding a walk through memory +described as device at stage2. Now such AT instructions will report a +stage2 fault. + +Such a fault will cause KVM to restart the guest. If the AT instructions +always walk the page tables, but guest execution uses the translation cached +in the TLB, the guest can't make forward progress until the TLB entry is +evicted. This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: +Defer guest entry when an asynchronous exception is pending"), KVM will +return to the host to process IRQs allowing the rest of the system to keep +running. + +Cc: stable@vger.kernel.org # v4.9 +Signed-off-by: James Morse +Reviewed-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Andre Przywara +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/kvm_arm.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/arm64/include/asm/kvm_arm.h ++++ b/arch/arm64/include/asm/kvm_arm.h +@@ -78,10 +78,11 @@ + * IMO: Override CPSR.I and enable signaling with VI + * FMO: Override CPSR.F and enable signaling with VF + * SWIO: Turn set/way invalidates into set/way clean+invalidate ++ * PTW: Take a stage2 fault if a stage1 walk steps in device memory + */ + #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ + HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ +- HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) ++ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_PTW) + #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) + #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) + #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) diff --git a/queue-4.9/kvm-arm64-survive-synchronous-exceptions-caused-by-at-instructions.patch b/queue-4.9/kvm-arm64-survive-synchronous-exceptions-caused-by-at-instructions.patch new file mode 100644 index 00000000000..6ae604f20a2 --- /dev/null +++ b/queue-4.9/kvm-arm64-survive-synchronous-exceptions-caused-by-at-instructions.patch @@ -0,0 +1,141 @@ +From foo@baz Tue Sep 8 03:16:28 PM CEST 2020 +From: Andre Przywara +Date: Fri, 4 Sep 2020 12:28:59 +0100 +Subject: KVM: arm64: Survive synchronous exceptions caused by AT instructions +To: stable@vger.kernel.org +Cc: James Morse , Marc Zyngier , Catalin Marinas +Message-ID: <20200904112900.230831-4-andre.przywara@arm.com> + +From: James Morse + +commit 88a84ccccb3966bcc3f309cdb76092a9892c0260 upstream. + +KVM doesn't expect any synchronous exceptions when executing, any such +exception leads to a panic(). AT instructions access the guest page +tables, and can cause a synchronous external abort to be taken. + +The arm-arm is unclear on what should happen if the guest has configured +the hardware update of the access-flag, and a memory type in TCR_EL1 that +does not support atomic operations. B2.2.6 "Possible implementation +restrictions on using atomic instructions" from DDI0487F.a lists +synchronous external abort as a possible behaviour of atomic instructions +that target memory that isn't writeback cacheable, but the page table +walker may behave differently. + +Make KVM robust to synchronous exceptions caused by AT instructions. +Add a get_user() style helper for AT instructions that returns -EFAULT +if an exception was generated. + +While KVM's version of the exception table mixes synchronous and +asynchronous exceptions, only one of these can occur at each location. + +Re-enter the guest when the AT instructions take an exception on the +assumption the guest will take the same exception. This isn't guaranteed +to make forward progress, as the AT instructions may always walk the page +tables, but guest execution may use the translation cached in the TLB. + +This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: Defer guest +entry when an asynchronous exception is pending"), KVM will return to the +host to process IRQs allowing the rest of the system to keep running. + +Cc: stable@vger.kernel.org # v4.9 +Signed-off-by: James Morse +Reviewed-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Andre Przywara +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/kvm_asm.h | 28 ++++++++++++++++++++++++++++ + arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++-- + arch/arm64/kvm/hyp/switch.c | 8 ++++---- + 3 files changed, 42 insertions(+), 6 deletions(-) + +--- a/arch/arm64/include/asm/kvm_asm.h ++++ b/arch/arm64/include/asm/kvm_asm.h +@@ -82,6 +82,34 @@ extern u32 __init_stage2_translation(voi + *__hyp_this_cpu_ptr(sym); \ + }) + ++#define __KVM_EXTABLE(from, to) \ ++ " .pushsection __kvm_ex_table, \"a\"\n" \ ++ " .align 3\n" \ ++ " .long (" #from " - .), (" #to " - .)\n" \ ++ " .popsection\n" ++ ++ ++#define __kvm_at(at_op, addr) \ ++( { \ ++ int __kvm_at_err = 0; \ ++ u64 spsr, elr; \ ++ asm volatile( \ ++ " mrs %1, spsr_el2\n" \ ++ " mrs %2, elr_el2\n" \ ++ "1: at "at_op", %3\n" \ ++ " isb\n" \ ++ " b 9f\n" \ ++ "2: msr spsr_el2, %1\n" \ ++ " msr elr_el2, %2\n" \ ++ " mov %w0, %4\n" \ ++ "9:\n" \ ++ __KVM_EXTABLE(1b, 2b) \ ++ : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \ ++ : "r" (addr), "i" (-EFAULT)); \ ++ __kvm_at_err; \ ++} ) ++ ++ + #else /* __ASSEMBLY__ */ + + .macro hyp_adr_this_cpu reg, sym, tmp +--- a/arch/arm64/kvm/hyp/hyp-entry.S ++++ b/arch/arm64/kvm/hyp/hyp-entry.S +@@ -201,6 +201,15 @@ el1_error: + mov x0, #ARM_EXCEPTION_EL1_SERROR + b __guest_exit + ++el2_sync: ++ save_caller_saved_regs_vect ++ stp x29, x30, [sp, #-16]! ++ bl kvm_unexpected_el2_exception ++ ldp x29, x30, [sp], #16 ++ restore_caller_saved_regs_vect ++ ++ eret ++ + el2_error: + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! +@@ -238,7 +247,6 @@ ENDPROC(\label) + invalid_vector el2t_irq_invalid + invalid_vector el2t_fiq_invalid + invalid_vector el2t_error_invalid +- invalid_vector el2h_sync_invalid + invalid_vector el2h_irq_invalid + invalid_vector el2h_fiq_invalid + invalid_vector el1_sync_invalid +@@ -255,7 +263,7 @@ ENTRY(__kvm_hyp_vector) + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + +- ventry el2h_sync_invalid // Synchronous EL2h ++ ventry el2_sync // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2_error // Error EL2h +--- a/arch/arm64/kvm/hyp/switch.c ++++ b/arch/arm64/kvm/hyp/switch.c +@@ -206,10 +206,10 @@ static bool __hyp_text __translate_far_t + * saved the guest context yet, and we may return early... + */ + par = read_sysreg(par_el1); +- asm volatile("at s1e1r, %0" : : "r" (far)); +- isb(); +- +- tmp = read_sysreg(par_el1); ++ if (!__kvm_at("s1e1r", far)) ++ tmp = read_sysreg(par_el1); ++ else ++ tmp = 1; /* back to the guest */ + write_sysreg(par, par_el1); + + if (unlikely(tmp & 1)) diff --git a/queue-4.9/net-initialize-fastreuse-on-inet_inherit_port.patch b/queue-4.9/net-initialize-fastreuse-on-inet_inherit_port.patch new file mode 100644 index 00000000000..ad8df26d96b --- /dev/null +++ b/queue-4.9/net-initialize-fastreuse-on-inet_inherit_port.patch @@ -0,0 +1,61 @@ +From d76f3351cea2d927fdf70dd7c06898235035e84e Mon Sep 17 00:00:00 2001 +From: Tim Froidcoeur +Date: Tue, 11 Aug 2020 20:33:24 +0200 +Subject: net: initialize fastreuse on inet_inherit_port + +From: Tim Froidcoeur + +commit d76f3351cea2d927fdf70dd7c06898235035e84e upstream. + +In the case of TPROXY, bind_conflict optimizations for SO_REUSEADDR or +SO_REUSEPORT are broken, possibly resulting in O(n) instead of O(1) bind +behaviour or in the incorrect reuse of a bind. + +the kernel keeps track for each bind_bucket if all sockets in the +bind_bucket support SO_REUSEADDR or SO_REUSEPORT in two fastreuse flags. +These flags allow skipping the costly bind_conflict check when possible +(meaning when all sockets have the proper SO_REUSE option). + +For every socket added to a bind_bucket, these flags need to be updated. +As soon as a socket that does not support reuse is added, the flag is +set to false and will never go back to true, unless the bind_bucket is +deleted. + +Note that there is no mechanism to re-evaluate these flags when a socket +is removed (this might make sense when removing a socket that would not +allow reuse; this leaves room for a future patch). + +For this optimization to work, it is mandatory that these flags are +properly initialized and updated. + +When a child socket is created from a listen socket in +__inet_inherit_port, the TPROXY case could create a new bind bucket +without properly initializing these flags, thus preventing the +optimization to work. Alternatively, a socket not allowing reuse could +be added to an existing bind bucket without updating the flags, causing +bind_conflict to never be called as it should. + +Call inet_csk_update_fastreuse when __inet_inherit_port decides to create +a new bind_bucket or use a different bind_bucket than the one of the +listen socket. + +Fixes: 093d282321da ("tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()") +Acked-by: Matthieu Baerts +Signed-off-by: Tim Froidcoeur +Signed-off-by: David S. Miller +Signed-off-by: Tim Froidcoeur +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_hashtables.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -163,6 +163,7 @@ int __inet_inherit_port(const struct soc + return -ENOMEM; + } + } ++ inet_csk_update_fastreuse(tb, child); + } + inet_bind_hash(child, tb, port); + spin_unlock(&head->lock); diff --git a/queue-4.9/net-refactor-bind_bucket-fastreuse-into-helper.patch b/queue-4.9/net-refactor-bind_bucket-fastreuse-into-helper.patch new file mode 100644 index 00000000000..e8ea2a20f62 --- /dev/null +++ b/queue-4.9/net-refactor-bind_bucket-fastreuse-into-helper.patch @@ -0,0 +1,88 @@ +From 62ffc589abb176821662efc4525ee4ac0b9c3894 Mon Sep 17 00:00:00 2001 +From: Tim Froidcoeur +Date: Tue, 11 Aug 2020 20:33:23 +0200 +Subject: net: refactor bind_bucket fastreuse into helper + +From: Tim Froidcoeur + +commit 62ffc589abb176821662efc4525ee4ac0b9c3894 upstream. + +Refactor the fastreuse update code in inet_csk_get_port into a small +helper function that can be called from other places. + +Acked-by: Matthieu Baerts +Signed-off-by: Tim Froidcoeur +Signed-off-by: David S. Miller +Signed-off-by: Tim Froidcoeur +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_connection_sock.h | 4 ++++ + net/ipv4/inet_connection_sock.c | 37 +++++++++++++++++++++++++------------ + 2 files changed, 29 insertions(+), 12 deletions(-) + +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -319,5 +319,9 @@ int inet_csk_compat_getsockopt(struct so + int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); + ++/* update the fast reuse flag when adding a socket */ ++void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, ++ struct sock *sk); ++ + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); + #endif /* _INET_CONNECTION_SOCK_H */ +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -89,6 +89,28 @@ int inet_csk_bind_conflict(const struct + } + EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); + ++void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, ++ struct sock *sk) ++{ ++ kuid_t uid = sock_i_uid(sk); ++ bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; ++ ++ if (!hlist_empty(&tb->owners)) { ++ if (!reuse) ++ tb->fastreuse = 0; ++ if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)) ++ tb->fastreuseport = 0; ++ } else { ++ tb->fastreuse = reuse; ++ if (sk->sk_reuseport) { ++ tb->fastreuseport = 1; ++ tb->fastuid = uid; ++ } else { ++ tb->fastreuseport = 0; ++ } ++ } ++} ++ + /* Obtain a reference to a local port for the given sock, + * if snum is zero it means select any available local port. + * We try to allocate an odd port (and leave even ports for connect()) +@@ -218,19 +240,10 @@ tb_found: + } + goto fail_unlock; + } +- if (!reuse) +- tb->fastreuse = 0; +- if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)) +- tb->fastreuseport = 0; +- } else { +- tb->fastreuse = reuse; +- if (sk->sk_reuseport) { +- tb->fastreuseport = 1; +- tb->fastuid = uid; +- } else { +- tb->fastreuseport = 0; +- } + } ++ ++ inet_csk_update_fastreuse(tb, sk); ++ + success: + if (!inet_csk(sk)->icsk_bind_hash) + inet_bind_hash(sk, tb, port); diff --git a/queue-4.9/series b/queue-4.9/series index 0bc68038964..8771d1285a4 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -50,3 +50,9 @@ mm-slub-fix-conversion-of-freelist_corrupted.patch vfio-type1-support-faulting-pfnmap-vmas.patch vfio-pci-fault-mmaps-to-enable-vma-tracking.patch vfio-pci-invalidate-mmaps-and-block-mmio-access-on-disabled-memory.patch +kvm-arm64-add-kvm_extable-for-vaxorcism-code.patch +kvm-arm64-defer-guest-entry-when-an-asynchronous-exception-is-pending.patch +kvm-arm64-survive-synchronous-exceptions-caused-by-at-instructions.patch +kvm-arm64-set-hcr_el2.ptw-to-prevent-at-taking-synchronous-exception.patch +net-refactor-bind_bucket-fastreuse-into-helper.patch +net-initialize-fastreuse-on-inet_inherit_port.patch