From d33cec3830c568ce066369e32207b89758942d89 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Apr 2019 18:42:42 +0200 Subject: [PATCH] 4.19-stable patches added patches: arm-dts-am335x-evm-correct-the-regulators-for-the-audio-codec.patch arm-dts-am335x-evmsk-correct-the-regulators-for-the-audio-codec.patch arm-dts-at91-fix-typo-in-isc_d0-on-pc9.patch arm-dts-rockchip-fix-rk3288-cpu-opp-node-reference.patch arm64-backtrace-don-t-bother-trying-to-unwind-the-userspace-stack.patch arm64-dts-rockchip-fix-rk3328-rgmii-high-tx-error-rate.patch arm64-futex-fix-futex_wake_op-atomic-ops-with-non-zero-result-value.patch dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch dm-integrity-fix-deadlock-with-overlapping-i-o.patch dm-revert-8f50e358153d-dm-limit-the-max-bio-size-as-bio_max_pages-page_size.patch dm-table-propagate-bdi_cap_stable_writes-to-fix-sporadic-checksum-errors.patch pci-add-function-1-dma-alias-quirk-for-marvell-9170-sata-controller.patch pci-pciehp-ignore-link-state-changes-after-powering-off-a-slot.patch sched-fair-do-not-re-read-h_load_next-during-hierarchical-load-calculation.patch x86-asm-remove-dead-__gnuc__-conditionals.patch x86-asm-use-stricter-assembly-constraints-in-bitops.patch x86-perf-amd-remove-need-to-check-running-bit-in-nmi-handler.patch x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch x86-perf-amd-resolve-race-condition-when-disabling-pmc.patch xen-prevent-buffer-overflow-in-privcmd-ioctl.patch xtensa-fix-return_address.patch --- ...t-the-regulators-for-the-audio-codec.patch | 63 +++++ ...t-the-regulators-for-the-audio-codec.patch | 63 +++++ ...m-dts-at91-fix-typo-in-isc_d0-on-pc9.patch | 34 +++ ...ip-fix-rk3288-cpu-opp-node-reference.patch | 54 +++++ ...trying-to-unwind-the-userspace-stack.patch | 72 ++++++ ...-fix-rk3328-rgmii-high-tx-error-rate.patch | 121 ++++++++++ ...tomic-ops-with-non-zero-result-value.patch | 92 +++++++ ...emcmp-to-strncmp-in-dm_integrity_ctr.patch | 57 +++++ ...ty-fix-deadlock-with-overlapping-i-o.patch | 49 ++++ ...-bio-size-as-bio_max_pages-page_size.patch | 53 ++++ ...ites-to-fix-sporadic-checksum-errors.patch | 80 ++++++ ...irk-for-marvell-9170-sata-controller.patch | 37 +++ ...te-changes-after-powering-off-a-slot.patch | 50 ++++ ...during-hierarchical-load-calculation.patch | 82 +++++++ queue-4.19/series | 21 ++ ...sm-remove-dead-__gnuc__-conditionals.patch | 116 +++++++++ ...icter-assembly-constraints-in-bitops.patch | 228 ++++++++++++++++++ ...-to-check-running-bit-in-nmi-handler.patch | 127 ++++++++++ ...e-nmi-latency-issues-for-active-pmcs.patch | 142 +++++++++++ ...ve-race-condition-when-disabling-pmc.patch | 152 ++++++++++++ ...ent-buffer-overflow-in-privcmd-ioctl.patch | 37 +++ queue-4.19/xtensa-fix-return_address.patch | 42 ++++ 22 files changed, 1772 insertions(+) create mode 100644 queue-4.19/arm-dts-am335x-evm-correct-the-regulators-for-the-audio-codec.patch create mode 100644 queue-4.19/arm-dts-am335x-evmsk-correct-the-regulators-for-the-audio-codec.patch create mode 100644 queue-4.19/arm-dts-at91-fix-typo-in-isc_d0-on-pc9.patch create mode 100644 queue-4.19/arm-dts-rockchip-fix-rk3288-cpu-opp-node-reference.patch create mode 100644 queue-4.19/arm64-backtrace-don-t-bother-trying-to-unwind-the-userspace-stack.patch create mode 100644 queue-4.19/arm64-dts-rockchip-fix-rk3328-rgmii-high-tx-error-rate.patch create mode 100644 queue-4.19/arm64-futex-fix-futex_wake_op-atomic-ops-with-non-zero-result-value.patch create mode 100644 queue-4.19/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch create mode 100644 queue-4.19/dm-integrity-fix-deadlock-with-overlapping-i-o.patch create mode 100644 queue-4.19/dm-revert-8f50e358153d-dm-limit-the-max-bio-size-as-bio_max_pages-page_size.patch create mode 100644 queue-4.19/dm-table-propagate-bdi_cap_stable_writes-to-fix-sporadic-checksum-errors.patch create mode 100644 queue-4.19/pci-add-function-1-dma-alias-quirk-for-marvell-9170-sata-controller.patch create mode 100644 queue-4.19/pci-pciehp-ignore-link-state-changes-after-powering-off-a-slot.patch create mode 100644 queue-4.19/sched-fair-do-not-re-read-h_load_next-during-hierarchical-load-calculation.patch create mode 100644 queue-4.19/x86-asm-remove-dead-__gnuc__-conditionals.patch create mode 100644 queue-4.19/x86-asm-use-stricter-assembly-constraints-in-bitops.patch create mode 100644 queue-4.19/x86-perf-amd-remove-need-to-check-running-bit-in-nmi-handler.patch create mode 100644 queue-4.19/x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch create mode 100644 queue-4.19/x86-perf-amd-resolve-race-condition-when-disabling-pmc.patch create mode 100644 queue-4.19/xen-prevent-buffer-overflow-in-privcmd-ioctl.patch create mode 100644 queue-4.19/xtensa-fix-return_address.patch diff --git a/queue-4.19/arm-dts-am335x-evm-correct-the-regulators-for-the-audio-codec.patch b/queue-4.19/arm-dts-am335x-evm-correct-the-regulators-for-the-audio-codec.patch new file mode 100644 index 0000000000..e7222830fc --- /dev/null +++ b/queue-4.19/arm-dts-am335x-evm-correct-the-regulators-for-the-audio-codec.patch @@ -0,0 +1,63 @@ +From 4f96dc0a3e79ec257a2b082dab3ee694ff88c317 Mon Sep 17 00:00:00 2001 +From: Peter Ujfalusi +Date: Fri, 15 Mar 2019 12:59:09 +0200 +Subject: ARM: dts: am335x-evm: Correct the regulators for the audio codec + +From: Peter Ujfalusi + +commit 4f96dc0a3e79ec257a2b082dab3ee694ff88c317 upstream. + +Correctly map the regulators used by tlv320aic3106. +Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators. + +Cc: # v4.14+ +Signed-off-by: Peter Ujfalusi +Signed-off-by: Tony Lindgren +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/boot/dts/am335x-evm.dts | 26 ++++++++++++++++++++++---- + 1 file changed, 22 insertions(+), 4 deletions(-) + +--- a/arch/arm/boot/dts/am335x-evm.dts ++++ b/arch/arm/boot/dts/am335x-evm.dts +@@ -57,6 +57,24 @@ + enable-active-high; + }; + ++ /* TPS79501 */ ++ v1_8d_reg: fixedregulator-v1_8d { ++ compatible = "regulator-fixed"; ++ regulator-name = "v1_8d"; ++ vin-supply = <&vbat>; ++ regulator-min-microvolt = <1800000>; ++ regulator-max-microvolt = <1800000>; ++ }; ++ ++ /* TPS79501 */ ++ v3_3d_reg: fixedregulator-v3_3d { ++ compatible = "regulator-fixed"; ++ regulator-name = "v3_3d"; ++ vin-supply = <&vbat>; ++ regulator-min-microvolt = <3300000>; ++ regulator-max-microvolt = <3300000>; ++ }; ++ + matrix_keypad: matrix_keypad0 { + compatible = "gpio-matrix-keypad"; + debounce-delay-ms = <5>; +@@ -499,10 +517,10 @@ + status = "okay"; + + /* Regulators */ +- AVDD-supply = <&vaux2_reg>; +- IOVDD-supply = <&vaux2_reg>; +- DRVDD-supply = <&vaux2_reg>; +- DVDD-supply = <&vbat>; ++ AVDD-supply = <&v3_3d_reg>; ++ IOVDD-supply = <&v3_3d_reg>; ++ DRVDD-supply = <&v3_3d_reg>; ++ DVDD-supply = <&v1_8d_reg>; + }; + }; + diff --git a/queue-4.19/arm-dts-am335x-evmsk-correct-the-regulators-for-the-audio-codec.patch b/queue-4.19/arm-dts-am335x-evmsk-correct-the-regulators-for-the-audio-codec.patch new file mode 100644 index 0000000000..aaef28c73d --- /dev/null +++ b/queue-4.19/arm-dts-am335x-evmsk-correct-the-regulators-for-the-audio-codec.patch @@ -0,0 +1,63 @@ +From 6691370646e844be98bb6558c024269791d20bd7 Mon Sep 17 00:00:00 2001 +From: Peter Ujfalusi +Date: Fri, 15 Mar 2019 12:59:17 +0200 +Subject: ARM: dts: am335x-evmsk: Correct the regulators for the audio codec + +From: Peter Ujfalusi + +commit 6691370646e844be98bb6558c024269791d20bd7 upstream. + +Correctly map the regulators used by tlv320aic3106. +Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators. + +Cc: # v4.14+ +Signed-off-by: Peter Ujfalusi +Signed-off-by: Tony Lindgren +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/boot/dts/am335x-evmsk.dts | 26 ++++++++++++++++++++++---- + 1 file changed, 22 insertions(+), 4 deletions(-) + +--- a/arch/arm/boot/dts/am335x-evmsk.dts ++++ b/arch/arm/boot/dts/am335x-evmsk.dts +@@ -73,6 +73,24 @@ + enable-active-high; + }; + ++ /* TPS79518 */ ++ v1_8d_reg: fixedregulator-v1_8d { ++ compatible = "regulator-fixed"; ++ regulator-name = "v1_8d"; ++ vin-supply = <&vbat>; ++ regulator-min-microvolt = <1800000>; ++ regulator-max-microvolt = <1800000>; ++ }; ++ ++ /* TPS78633 */ ++ v3_3d_reg: fixedregulator-v3_3d { ++ compatible = "regulator-fixed"; ++ regulator-name = "v3_3d"; ++ vin-supply = <&vbat>; ++ regulator-min-microvolt = <3300000>; ++ regulator-max-microvolt = <3300000>; ++ }; ++ + leds { + pinctrl-names = "default"; + pinctrl-0 = <&user_leds_s0>; +@@ -501,10 +519,10 @@ + status = "okay"; + + /* Regulators */ +- AVDD-supply = <&vaux2_reg>; +- IOVDD-supply = <&vaux2_reg>; +- DRVDD-supply = <&vaux2_reg>; +- DVDD-supply = <&vbat>; ++ AVDD-supply = <&v3_3d_reg>; ++ IOVDD-supply = <&v3_3d_reg>; ++ DRVDD-supply = <&v3_3d_reg>; ++ DVDD-supply = <&v1_8d_reg>; + }; + }; + diff --git a/queue-4.19/arm-dts-at91-fix-typo-in-isc_d0-on-pc9.patch b/queue-4.19/arm-dts-at91-fix-typo-in-isc_d0-on-pc9.patch new file mode 100644 index 0000000000..b48374e3fa --- /dev/null +++ b/queue-4.19/arm-dts-at91-fix-typo-in-isc_d0-on-pc9.patch @@ -0,0 +1,34 @@ +From e7dfb6d04e4715be1f3eb2c60d97b753fd2e4516 Mon Sep 17 00:00:00 2001 +From: David Engraf +Date: Mon, 11 Mar 2019 08:57:42 +0100 +Subject: ARM: dts: at91: Fix typo in ISC_D0 on PC9 + +From: David Engraf + +commit e7dfb6d04e4715be1f3eb2c60d97b753fd2e4516 upstream. + +The function argument for the ISC_D0 on PC9 was incorrect. According to +the documentation it should be 'C' aka 3. + +Signed-off-by: David Engraf +Reviewed-by: Nicolas Ferre +Signed-off-by: Ludovic Desroches +Fixes: 7f16cb676c00 ("ARM: at91/dt: add sama5d2 pinmux") +Cc: # v4.4+ +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/boot/dts/sama5d2-pinfunc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/boot/dts/sama5d2-pinfunc.h ++++ b/arch/arm/boot/dts/sama5d2-pinfunc.h +@@ -518,7 +518,7 @@ + #define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0) + #define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3) + #define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1) +-#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1) ++#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1) + #define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2) + #define PIN_PC10 74 + #define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0) diff --git a/queue-4.19/arm-dts-rockchip-fix-rk3288-cpu-opp-node-reference.patch b/queue-4.19/arm-dts-rockchip-fix-rk3288-cpu-opp-node-reference.patch new file mode 100644 index 0000000000..e75b272f28 --- /dev/null +++ b/queue-4.19/arm-dts-rockchip-fix-rk3288-cpu-opp-node-reference.patch @@ -0,0 +1,54 @@ +From 6b2fde3dbfab6ebc45b0cd605e17ca5057ff9a3b Mon Sep 17 00:00:00 2001 +From: Jonas Karlman +Date: Sun, 24 Feb 2019 21:51:22 +0000 +Subject: ARM: dts: rockchip: fix rk3288 cpu opp node reference + +From: Jonas Karlman + +commit 6b2fde3dbfab6ebc45b0cd605e17ca5057ff9a3b upstream. + +The following error can be seen during boot: + + of: /cpus/cpu@501: Couldn't find opp node + +Change cpu nodes to use operating-points-v2 in order to fix this. + +Fixes: ce76de984649 ("ARM: dts: rockchip: convert rk3288 to operating-points-v2") +Cc: stable@vger.kernel.org +Signed-off-by: Jonas Karlman +Signed-off-by: Heiko Stuebner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/boot/dts/rk3288.dtsi | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm/boot/dts/rk3288.dtsi ++++ b/arch/arm/boot/dts/rk3288.dtsi +@@ -70,7 +70,7 @@ + compatible = "arm,cortex-a12"; + reg = <0x501>; + resets = <&cru SRST_CORE1>; +- operating-points = <&cpu_opp_table>; ++ operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; /* min followed by max */ + clock-latency = <40000>; + clocks = <&cru ARMCLK>; +@@ -80,7 +80,7 @@ + compatible = "arm,cortex-a12"; + reg = <0x502>; + resets = <&cru SRST_CORE2>; +- operating-points = <&cpu_opp_table>; ++ operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; /* min followed by max */ + clock-latency = <40000>; + clocks = <&cru ARMCLK>; +@@ -90,7 +90,7 @@ + compatible = "arm,cortex-a12"; + reg = <0x503>; + resets = <&cru SRST_CORE3>; +- operating-points = <&cpu_opp_table>; ++ operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; /* min followed by max */ + clock-latency = <40000>; + clocks = <&cru ARMCLK>; diff --git a/queue-4.19/arm64-backtrace-don-t-bother-trying-to-unwind-the-userspace-stack.patch b/queue-4.19/arm64-backtrace-don-t-bother-trying-to-unwind-the-userspace-stack.patch new file mode 100644 index 0000000000..59f1209f82 --- /dev/null +++ b/queue-4.19/arm64-backtrace-don-t-bother-trying-to-unwind-the-userspace-stack.patch @@ -0,0 +1,72 @@ +From 1e6f5440a6814d28c32d347f338bfef68bc3e69d Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Mon, 8 Apr 2019 17:56:34 +0100 +Subject: arm64: backtrace: Don't bother trying to unwind the userspace stack + +From: Will Deacon + +commit 1e6f5440a6814d28c32d347f338bfef68bc3e69d upstream. + +Calling dump_backtrace() with a pt_regs argument corresponding to +userspace doesn't make any sense and our unwinder will simply print +"Call trace:" before unwinding the stack looking for user frames. + +Rather than go through this song and dance, just return early if we're +passed a user register state. + +Cc: +Fixes: 1149aad10b1e ("arm64: Add dump_backtrace() in show_regs") +Reported-by: Kefeng Wang +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/traps.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/arch/arm64/kernel/traps.c ++++ b/arch/arm64/kernel/traps.c +@@ -101,10 +101,16 @@ static void dump_instr(const char *lvl, + void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) + { + struct stackframe frame; +- int skip; ++ int skip = 0; + + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + ++ if (regs) { ++ if (user_mode(regs)) ++ return; ++ skip = 1; ++ } ++ + if (!tsk) + tsk = current; + +@@ -125,7 +131,6 @@ void dump_backtrace(struct pt_regs *regs + frame.graph = tsk->curr_ret_stack; + #endif + +- skip = !!regs; + printk("Call trace:\n"); + do { + /* skip until specified stack frame */ +@@ -175,15 +180,13 @@ static int __die(const char *str, int er + return ret; + + print_modules(); +- __show_regs(regs); + pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), + end_of_stack(tsk)); ++ show_regs(regs); + +- if (!user_mode(regs)) { +- dump_backtrace(regs, tsk); ++ if (!user_mode(regs)) + dump_instr(KERN_EMERG, regs); +- } + + return ret; + } diff --git a/queue-4.19/arm64-dts-rockchip-fix-rk3328-rgmii-high-tx-error-rate.patch b/queue-4.19/arm64-dts-rockchip-fix-rk3328-rgmii-high-tx-error-rate.patch new file mode 100644 index 0000000000..134679edea --- /dev/null +++ b/queue-4.19/arm64-dts-rockchip-fix-rk3328-rgmii-high-tx-error-rate.patch @@ -0,0 +1,121 @@ +From 6fd8b9780ec1a49ac46e0aaf8775247205e66231 Mon Sep 17 00:00:00 2001 +From: Peter Geis +Date: Wed, 13 Mar 2019 18:45:36 +0000 +Subject: arm64: dts: rockchip: fix rk3328 rgmii high tx error rate + +From: Peter Geis + +commit 6fd8b9780ec1a49ac46e0aaf8775247205e66231 upstream. + +Several rk3328 based boards experience high rgmii tx error rates. +This is due to several pins in the rk3328.dtsi rgmii pinmux that are +missing a defined pull strength setting. +This causes the pinmux driver to default to 2ma (bit mask 00). + +These pins are only defined in the rk3328.dtsi, and are not listed in +the rk3328 specification. +The TRM only lists them as "Reserved" +(RK3328 TRM V1.1, 3.3.3 Detail Register Description, GRF_GPIO0B_IOMUX, +GRF_GPIO0C_IOMUX, GRF_GPIO0D_IOMUX). +However, removal of these pins from the rgmii pinmux definition causes +the interface to fail to transmit. + +Also, the rgmii tx and rx pins defined in the dtsi are not consistent +with the rk3328 specification, with tx pins currently set to 12ma and +rx pins set to 2ma. + +Fix this by setting tx pins to 8ma and the rx pins to 4ma, consistent +with the specification. +Defining the drive strength for the undefined pins eliminated the high +tx packet error rate observed under heavy data transfers. +Aligning the drive strength to the TRM values eliminated the occasional +packet retry errors under iperf3 testing. +This allows much higher data rates with no recorded tx errors. + +Tested on the rk3328-roc-cc board. + +Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs") +Cc: stable@vger.kernel.org +Signed-off-by: Peter Geis +Signed-off-by: Heiko Stuebner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/boot/dts/rockchip/rk3328.dtsi | 44 +++++++++++++++---------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi ++++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi +@@ -1553,50 +1553,50 @@ + rgmiim1_pins: rgmiim1-pins { + rockchip,pins = + /* mac_txclk */ +- <1 RK_PB4 2 &pcfg_pull_none_12ma>, ++ <1 RK_PB4 2 &pcfg_pull_none_8ma>, + /* mac_rxclk */ +- <1 RK_PB5 2 &pcfg_pull_none_2ma>, ++ <1 RK_PB5 2 &pcfg_pull_none_4ma>, + /* mac_mdio */ +- <1 RK_PC3 2 &pcfg_pull_none_2ma>, ++ <1 RK_PC3 2 &pcfg_pull_none_4ma>, + /* mac_txen */ +- <1 RK_PD1 2 &pcfg_pull_none_12ma>, ++ <1 RK_PD1 2 &pcfg_pull_none_8ma>, + /* mac_clk */ +- <1 RK_PC5 2 &pcfg_pull_none_2ma>, ++ <1 RK_PC5 2 &pcfg_pull_none_4ma>, + /* mac_rxdv */ +- <1 RK_PC6 2 &pcfg_pull_none_2ma>, ++ <1 RK_PC6 2 &pcfg_pull_none_4ma>, + /* mac_mdc */ +- <1 RK_PC7 2 &pcfg_pull_none_2ma>, ++ <1 RK_PC7 2 &pcfg_pull_none_4ma>, + /* mac_rxd1 */ +- <1 RK_PB2 2 &pcfg_pull_none_2ma>, ++ <1 RK_PB2 2 &pcfg_pull_none_4ma>, + /* mac_rxd0 */ +- <1 RK_PB3 2 &pcfg_pull_none_2ma>, ++ <1 RK_PB3 2 &pcfg_pull_none_4ma>, + /* mac_txd1 */ +- <1 RK_PB0 2 &pcfg_pull_none_12ma>, ++ <1 RK_PB0 2 &pcfg_pull_none_8ma>, + /* mac_txd0 */ +- <1 RK_PB1 2 &pcfg_pull_none_12ma>, ++ <1 RK_PB1 2 &pcfg_pull_none_8ma>, + /* mac_rxd3 */ +- <1 RK_PB6 2 &pcfg_pull_none_2ma>, ++ <1 RK_PB6 2 &pcfg_pull_none_4ma>, + /* mac_rxd2 */ +- <1 RK_PB7 2 &pcfg_pull_none_2ma>, ++ <1 RK_PB7 2 &pcfg_pull_none_4ma>, + /* mac_txd3 */ +- <1 RK_PC0 2 &pcfg_pull_none_12ma>, ++ <1 RK_PC0 2 &pcfg_pull_none_8ma>, + /* mac_txd2 */ +- <1 RK_PC1 2 &pcfg_pull_none_12ma>, ++ <1 RK_PC1 2 &pcfg_pull_none_8ma>, + + /* mac_txclk */ +- <0 RK_PB0 1 &pcfg_pull_none>, ++ <0 RK_PB0 1 &pcfg_pull_none_8ma>, + /* mac_txen */ +- <0 RK_PB4 1 &pcfg_pull_none>, ++ <0 RK_PB4 1 &pcfg_pull_none_8ma>, + /* mac_clk */ +- <0 RK_PD0 1 &pcfg_pull_none>, ++ <0 RK_PD0 1 &pcfg_pull_none_4ma>, + /* mac_txd1 */ +- <0 RK_PC0 1 &pcfg_pull_none>, ++ <0 RK_PC0 1 &pcfg_pull_none_8ma>, + /* mac_txd0 */ +- <0 RK_PC1 1 &pcfg_pull_none>, ++ <0 RK_PC1 1 &pcfg_pull_none_8ma>, + /* mac_txd3 */ +- <0 RK_PC7 1 &pcfg_pull_none>, ++ <0 RK_PC7 1 &pcfg_pull_none_8ma>, + /* mac_txd2 */ +- <0 RK_PC6 1 &pcfg_pull_none>; ++ <0 RK_PC6 1 &pcfg_pull_none_8ma>; + }; + + rmiim1_pins: rmiim1-pins { diff --git a/queue-4.19/arm64-futex-fix-futex_wake_op-atomic-ops-with-non-zero-result-value.patch b/queue-4.19/arm64-futex-fix-futex_wake_op-atomic-ops-with-non-zero-result-value.patch new file mode 100644 index 0000000000..dd098eb64d --- /dev/null +++ b/queue-4.19/arm64-futex-fix-futex_wake_op-atomic-ops-with-non-zero-result-value.patch @@ -0,0 +1,92 @@ +From 045afc24124d80c6998d9c770844c67912083506 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Mon, 8 Apr 2019 12:45:09 +0100 +Subject: arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value + +From: Will Deacon + +commit 045afc24124d80c6998d9c770844c67912083506 upstream. + +Rather embarrassingly, our futex() FUTEX_WAKE_OP implementation doesn't +explicitly set the return value on the non-faulting path and instead +leaves it holding the result of the underlying atomic operation. This +means that any FUTEX_WAKE_OP atomic operation which computes a non-zero +value will be reported as having failed. Regrettably, I wrote the buggy +code back in 2011 and it was upstreamed as part of the initial arm64 +support in 2012. + +The reasons we appear to get away with this are: + + 1. FUTEX_WAKE_OP is rarely used and therefore doesn't appear to get + exercised by futex() test applications + + 2. If the result of the atomic operation is zero, the system call + behaves correctly + + 3. Prior to version 2.25, the only operation used by GLIBC set the + futex to zero, and therefore worked as expected. From 2.25 onwards, + FUTEX_WAKE_OP is not used by GLIBC at all. + +Fix the implementation by ensuring that the return value is either 0 +to indicate that the atomic operation completed successfully, or -EFAULT +if we encountered a fault when accessing the user mapping. + +Cc: +Fixes: 6170a97460db ("arm64: Atomic operations") +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/futex.h | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/arch/arm64/include/asm/futex.h ++++ b/arch/arm64/include/asm/futex.h +@@ -30,8 +30,8 @@ do { \ + " prfm pstl1strm, %2\n" \ + "1: ldxr %w1, %2\n" \ + insn "\n" \ +-"2: stlxr %w3, %w0, %2\n" \ +-" cbnz %w3, 1b\n" \ ++"2: stlxr %w0, %w3, %2\n" \ ++" cbnz %w0, 1b\n" \ + " dmb ish\n" \ + "3:\n" \ + " .pushsection .fixup,\"ax\"\n" \ +@@ -50,30 +50,30 @@ do { \ + static inline int + arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) + { +- int oldval = 0, ret, tmp; ++ int oldval, ret, tmp; + u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: +- __futex_atomic_op("mov %w0, %w4", ++ __futex_atomic_op("mov %w3, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_ADD: +- __futex_atomic_op("add %w0, %w1, %w4", ++ __futex_atomic_op("add %w3, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_OR: +- __futex_atomic_op("orr %w0, %w1, %w4", ++ __futex_atomic_op("orr %w3, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_ANDN: +- __futex_atomic_op("and %w0, %w1, %w4", ++ __futex_atomic_op("and %w3, %w1, %w4", + ret, oldval, uaddr, tmp, ~oparg); + break; + case FUTEX_OP_XOR: +- __futex_atomic_op("eor %w0, %w1, %w4", ++ __futex_atomic_op("eor %w3, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + default: diff --git a/queue-4.19/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch b/queue-4.19/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch new file mode 100644 index 0000000000..39a016f070 --- /dev/null +++ b/queue-4.19/dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch @@ -0,0 +1,57 @@ +From 0d74e6a3b6421d98eeafbed26f29156d469bc0b5 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Wed, 13 Mar 2019 07:56:02 -0400 +Subject: dm integrity: change memcmp to strncmp in dm_integrity_ctr + +From: Mikulas Patocka + +commit 0d74e6a3b6421d98eeafbed26f29156d469bc0b5 upstream. + +If the string opt_string is small, the function memcmp can access bytes +that are beyond the terminating nul character. In theory, it could cause +segfault, if opt_string were located just below some unmapped memory. + +Change from memcmp to strncmp so that we don't read bytes beyond the end +of the string. + +Cc: stable@vger.kernel.org # v4.12+ +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-integrity.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -3174,7 +3174,7 @@ static int dm_integrity_ctr(struct dm_ta + journal_watermark = val; + else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) + sync_msec = val; +- else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) { ++ else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) { + if (ic->meta_dev) { + dm_put_device(ti, ic->meta_dev); + ic->meta_dev = NULL; +@@ -3193,17 +3193,17 @@ static int dm_integrity_ctr(struct dm_ta + goto bad; + } + ic->sectors_per_block = val >> SECTOR_SHIFT; +- } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { ++ } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { + r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, + "Invalid internal_hash argument"); + if (r) + goto bad; +- } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { ++ } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { + r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, + "Invalid journal_crypt argument"); + if (r) + goto bad; +- } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { ++ } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { + r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, + "Invalid journal_mac argument"); + if (r) diff --git a/queue-4.19/dm-integrity-fix-deadlock-with-overlapping-i-o.patch b/queue-4.19/dm-integrity-fix-deadlock-with-overlapping-i-o.patch new file mode 100644 index 0000000000..791f3507bd --- /dev/null +++ b/queue-4.19/dm-integrity-fix-deadlock-with-overlapping-i-o.patch @@ -0,0 +1,49 @@ +From 4ed319c6ac08e9a28fca7ac188181ac122f4de84 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 5 Apr 2019 15:26:39 -0400 +Subject: dm integrity: fix deadlock with overlapping I/O + +From: Mikulas Patocka + +commit 4ed319c6ac08e9a28fca7ac188181ac122f4de84 upstream. + +dm-integrity will deadlock if overlapping I/O is issued to it, the bug +was introduced by commit 724376a04d1a ("dm integrity: implement fair +range locks"). Users rarely use overlapping I/O so this bug went +undetected until now. + +Fix this bug by correcting, likely cut-n-paste, typos in +ranges_overlap() and also remove a flawed ranges_overlap() check in +remove_range_unlocked(). This condition could leave unprocessed bios +hanging on wait_list forever. + +Cc: stable@vger.kernel.org # v4.19+ +Fixes: 724376a04d1a ("dm integrity: implement fair range locks") +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-integrity.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -908,7 +908,7 @@ static void copy_from_journal(struct dm_ + static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) + { + return range1->logical_sector < range2->logical_sector + range2->n_sectors && +- range2->logical_sector + range2->n_sectors > range2->logical_sector; ++ range1->logical_sector + range1->n_sectors > range2->logical_sector; + } + + static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) +@@ -954,8 +954,6 @@ static void remove_range_unlocked(struct + struct dm_integrity_range *last_range = + list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); + struct task_struct *last_range_task; +- if (!ranges_overlap(range, last_range)) +- break; + last_range_task = last_range->task; + list_del(&last_range->wait_entry); + if (!add_new_range(ic, last_range, false)) { diff --git a/queue-4.19/dm-revert-8f50e358153d-dm-limit-the-max-bio-size-as-bio_max_pages-page_size.patch b/queue-4.19/dm-revert-8f50e358153d-dm-limit-the-max-bio-size-as-bio_max_pages-page_size.patch new file mode 100644 index 0000000000..c84a379b6a --- /dev/null +++ b/queue-4.19/dm-revert-8f50e358153d-dm-limit-the-max-bio-size-as-bio_max_pages-page_size.patch @@ -0,0 +1,53 @@ +From 75ae193626de3238ca5fb895868ec91c94e63b1b Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Thu, 21 Mar 2019 16:46:12 -0400 +Subject: dm: revert 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE") + +From: Mikulas Patocka + +commit 75ae193626de3238ca5fb895868ec91c94e63b1b upstream. + +The limit was already incorporated to dm-crypt with commit 4e870e948fba +("dm crypt: fix error with too large bios"), so we don't need to apply +it globally to all targets. The quantity BIO_MAX_PAGES * PAGE_SIZE is +wrong anyway because the variable ti->max_io_len it is supposed to be in +the units of 512-byte sectors not in bytes. + +Reduction of the limit to 1048576 sectors could even cause data +corruption in rare cases - suppose that we have a dm-striped device with +stripe size 768MiB. The target will call dm_set_target_max_io_len with +the value 1572864. The buggy code would reduce it to 1048576. Now, the +dm-core will errorneously split the bios on 1048576-sector boundary +insetad of 1572864-sector boundary and pass these stripe-crossing bios +to the striped target. + +Cc: stable@vger.kernel.org # v4.16+ +Fixes: 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE") +Signed-off-by: Mikulas Patocka +Acked-by: Ming Lei +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm.c | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -1007,15 +1007,7 @@ int dm_set_target_max_io_len(struct dm_t + return -EINVAL; + } + +- /* +- * BIO based queue uses its own splitting. When multipage bvecs +- * is switched on, size of the incoming bio may be too big to +- * be handled in some targets, such as crypt. +- * +- * When these targets are ready for the big bio, we can remove +- * the limit. +- */ +- ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE); ++ ti->max_io_len = (uint32_t) len; + + return 0; + } diff --git a/queue-4.19/dm-table-propagate-bdi_cap_stable_writes-to-fix-sporadic-checksum-errors.patch b/queue-4.19/dm-table-propagate-bdi_cap_stable_writes-to-fix-sporadic-checksum-errors.patch new file mode 100644 index 0000000000..49f88e3618 --- /dev/null +++ b/queue-4.19/dm-table-propagate-bdi_cap_stable_writes-to-fix-sporadic-checksum-errors.patch @@ -0,0 +1,80 @@ +From eb40c0acdc342b815d4d03ae6abb09e80c0f2988 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Tue, 26 Mar 2019 20:20:58 +0100 +Subject: dm table: propagate BDI_CAP_STABLE_WRITES to fix sporadic checksum errors + +From: Ilya Dryomov + +commit eb40c0acdc342b815d4d03ae6abb09e80c0f2988 upstream. + +Some devices don't use blk_integrity but still want stable pages +because they do their own checksumming. Examples include rbd and iSCSI +when data digests are negotiated. Stacking DM (and thus LVM) on top of +these devices results in sporadic checksum errors. + +Set BDI_CAP_STABLE_WRITES if any underlying device has it set. + +Cc: stable@vger.kernel.org +Signed-off-by: Ilya Dryomov +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-table.c | 39 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +--- a/drivers/md/dm-table.c ++++ b/drivers/md/dm-table.c +@@ -1872,6 +1872,36 @@ static bool dm_table_supports_secure_era + return true; + } + ++static int device_requires_stable_pages(struct dm_target *ti, ++ struct dm_dev *dev, sector_t start, ++ sector_t len, void *data) ++{ ++ struct request_queue *q = bdev_get_queue(dev->bdev); ++ ++ return q && bdi_cap_stable_pages_required(q->backing_dev_info); ++} ++ ++/* ++ * If any underlying device requires stable pages, a table must require ++ * them as well. Only targets that support iterate_devices are considered: ++ * don't want error, zero, etc to require stable pages. ++ */ ++static bool dm_table_requires_stable_pages(struct dm_table *t) ++{ ++ struct dm_target *ti; ++ unsigned i; ++ ++ for (i = 0; i < dm_table_get_num_targets(t); i++) { ++ ti = dm_table_get_target(t, i); ++ ++ if (ti->type->iterate_devices && ++ ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) ++ return true; ++ } ++ ++ return false; ++} ++ + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, + struct queue_limits *limits) + { +@@ -1930,6 +1960,15 @@ void dm_table_set_restrictions(struct dm + dm_table_verify_integrity(t); + + /* ++ * Some devices don't use blk_integrity but still want stable pages ++ * because they do their own checksumming. ++ */ ++ if (dm_table_requires_stable_pages(t)) ++ q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; ++ else ++ q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; ++ ++ /* + * Determine whether or not this queue's I/O timings contribute + * to the entropy pool, Only request-based targets use this. + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not diff --git a/queue-4.19/pci-add-function-1-dma-alias-quirk-for-marvell-9170-sata-controller.patch b/queue-4.19/pci-add-function-1-dma-alias-quirk-for-marvell-9170-sata-controller.patch new file mode 100644 index 0000000000..c5e925d123 --- /dev/null +++ b/queue-4.19/pci-add-function-1-dma-alias-quirk-for-marvell-9170-sata-controller.patch @@ -0,0 +1,37 @@ +From 9cde402a59770a0669d895399c13407f63d7d209 Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Fri, 5 Apr 2019 16:20:47 +0100 +Subject: PCI: Add function 1 DMA alias quirk for Marvell 9170 SATA controller + +From: Andre Przywara + +commit 9cde402a59770a0669d895399c13407f63d7d209 upstream. + +There is a Marvell 88SE9170 PCIe SATA controller I found on a board here. +Some quick testing with the ARM SMMU enabled reveals that it suffers from +the same requester ID mixup problems as the other Marvell chips listed +already. + +Add the PCI vendor/device ID to the list of chips which need the +workaround. + +Signed-off-by: Andre Przywara +Signed-off-by: Bjorn Helgaas +CC: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/quirks.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/pci/quirks.c ++++ b/drivers/pci/quirks.c +@@ -3852,6 +3852,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_M + /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */ + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130, + quirk_dma_func1_alias); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170, ++ quirk_dma_func1_alias); + /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */ + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172, + quirk_dma_func1_alias); diff --git a/queue-4.19/pci-pciehp-ignore-link-state-changes-after-powering-off-a-slot.patch b/queue-4.19/pci-pciehp-ignore-link-state-changes-after-powering-off-a-slot.patch new file mode 100644 index 0000000000..9e22a4159d --- /dev/null +++ b/queue-4.19/pci-pciehp-ignore-link-state-changes-after-powering-off-a-slot.patch @@ -0,0 +1,50 @@ +From 3943af9d01e94330d0cfac6fccdbc829aad50c92 Mon Sep 17 00:00:00 2001 +From: Sergey Miroshnichenko +Date: Tue, 12 Mar 2019 15:05:48 +0300 +Subject: PCI: pciehp: Ignore Link State Changes after powering off a slot + +From: Sergey Miroshnichenko + +commit 3943af9d01e94330d0cfac6fccdbc829aad50c92 upstream. + +During a safe hot remove, the OS powers off the slot, which may cause a +Data Link Layer State Changed event. The slot has already been set to +OFF_STATE, so that event results in re-enabling the device, making it +impossible to safely remove it. + +Clear out the Presence Detect Changed and Data Link Layer State Changed +events when the disabled slot has settled down. + +It is still possible to re-enable the device if it remains in the slot +after pressing the Attention Button by pressing it again. + +Fixes the problem that Micah reported below: an NVMe drive power button may +not actually turn off the drive. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=203237 +Reported-by: Micah Parrish +Tested-by: Micah Parrish +Signed-off-by: Sergey Miroshnichenko +[bhelgaas: changelog, add bugzilla URL] +Signed-off-by: Bjorn Helgaas +Reviewed-by: Lukas Wunner +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/hotplug/pciehp_ctrl.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/pci/hotplug/pciehp_ctrl.c ++++ b/drivers/pci/hotplug/pciehp_ctrl.c +@@ -117,6 +117,10 @@ static void remove_board(struct slot *p_ + * removed from the slot/adapter. + */ + msleep(1000); ++ ++ /* Ignore link or presence changes caused by power off */ ++ atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC), ++ &ctrl->pending_events); + } + + /* turn off Green LED */ diff --git a/queue-4.19/sched-fair-do-not-re-read-h_load_next-during-hierarchical-load-calculation.patch b/queue-4.19/sched-fair-do-not-re-read-h_load_next-during-hierarchical-load-calculation.patch new file mode 100644 index 0000000000..7699443c62 --- /dev/null +++ b/queue-4.19/sched-fair-do-not-re-read-h_load_next-during-hierarchical-load-calculation.patch @@ -0,0 +1,82 @@ +From 0e9f02450da07fc7b1346c8c32c771555173e397 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Tue, 19 Mar 2019 12:36:10 +0000 +Subject: sched/fair: Do not re-read ->h_load_next during hierarchical load calculation + +From: Mel Gorman + +commit 0e9f02450da07fc7b1346c8c32c771555173e397 upstream. + +A NULL pointer dereference bug was reported on a distribution kernel but +the same issue should be present on mainline kernel. It occured on s390 +but should not be arch-specific. A partial oops looks like: + + Unable to handle kernel pointer dereference in virtual kernel address space + ... + Call Trace: + ... + try_to_wake_up+0xfc/0x450 + vhost_poll_wakeup+0x3a/0x50 [vhost] + __wake_up_common+0xbc/0x178 + __wake_up_common_lock+0x9e/0x160 + __wake_up_sync_key+0x4e/0x60 + sock_def_readable+0x5e/0x98 + +The bug hits any time between 1 hour to 3 days. The dereference occurs +in update_cfs_rq_h_load when accumulating h_load. The problem is that +cfq_rq->h_load_next is not protected by any locking and can be updated +by parallel calls to task_h_load. Depending on the compiler, code may be +generated that re-reads cfq_rq->h_load_next after the check for NULL and +then oops when reading se->avg.load_avg. The dissassembly showed that it +was possible to reread h_load_next after the check for NULL. + +While this does not appear to be an issue for later compilers, it's still +an accident if the correct code is generated. Full locking in this path +would have high overhead so this patch uses READ_ONCE to read h_load_next +only once and check for NULL before dereferencing. It was confirmed that +there were no further oops after 10 days of testing. + +As Peter pointed out, it is also necessary to use WRITE_ONCE() to avoid any +potential problems with store tearing. + +Signed-off-by: Mel Gorman +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Valentin Schneider +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: +Fixes: 685207963be9 ("sched: Move h_load calculation to task_h_load()") +Link: https://lkml.kernel.org/r/20190319123610.nsivgf3mjbjjesxb@techsingularity.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7437,10 +7437,10 @@ static void update_cfs_rq_h_load(struct + if (cfs_rq->last_h_load_update == now) + return; + +- cfs_rq->h_load_next = NULL; ++ WRITE_ONCE(cfs_rq->h_load_next, NULL); + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); +- cfs_rq->h_load_next = se; ++ WRITE_ONCE(cfs_rq->h_load_next, se); + if (cfs_rq->last_h_load_update == now) + break; + } +@@ -7450,7 +7450,7 @@ static void update_cfs_rq_h_load(struct + cfs_rq->last_h_load_update = now; + } + +- while ((se = cfs_rq->h_load_next) != NULL) { ++ while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) { + load = cfs_rq->h_load; + load = div64_ul(load * se->avg.load_avg, + cfs_rq_load_avg(cfs_rq) + 1); diff --git a/queue-4.19/series b/queue-4.19/series index b84ad70ee1..06c6b885fc 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -73,3 +73,24 @@ block-fix-the-return-errno-for-direct-io.patch genirq-respect-irqchip_skip_set_wake-in-irq_chip_set_wake_parent.patch genirq-initialize-request_mutex-if-config_sparse_irq-n.patch virtio-honour-may_reduce_num-in-vring_create_virtqueue.patch +arm-dts-rockchip-fix-rk3288-cpu-opp-node-reference.patch +arm-dts-am335x-evmsk-correct-the-regulators-for-the-audio-codec.patch +arm-dts-am335x-evm-correct-the-regulators-for-the-audio-codec.patch +arm-dts-at91-fix-typo-in-isc_d0-on-pc9.patch +arm64-futex-fix-futex_wake_op-atomic-ops-with-non-zero-result-value.patch +arm64-dts-rockchip-fix-rk3328-rgmii-high-tx-error-rate.patch +arm64-backtrace-don-t-bother-trying-to-unwind-the-userspace-stack.patch +xen-prevent-buffer-overflow-in-privcmd-ioctl.patch +sched-fair-do-not-re-read-h_load_next-during-hierarchical-load-calculation.patch +xtensa-fix-return_address.patch +x86-asm-remove-dead-__gnuc__-conditionals.patch +x86-asm-use-stricter-assembly-constraints-in-bitops.patch +x86-perf-amd-resolve-race-condition-when-disabling-pmc.patch +x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch +x86-perf-amd-remove-need-to-check-running-bit-in-nmi-handler.patch +pci-add-function-1-dma-alias-quirk-for-marvell-9170-sata-controller.patch +pci-pciehp-ignore-link-state-changes-after-powering-off-a-slot.patch +dm-integrity-change-memcmp-to-strncmp-in-dm_integrity_ctr.patch +dm-revert-8f50e358153d-dm-limit-the-max-bio-size-as-bio_max_pages-page_size.patch +dm-table-propagate-bdi_cap_stable_writes-to-fix-sporadic-checksum-errors.patch +dm-integrity-fix-deadlock-with-overlapping-i-o.patch diff --git a/queue-4.19/x86-asm-remove-dead-__gnuc__-conditionals.patch b/queue-4.19/x86-asm-remove-dead-__gnuc__-conditionals.patch new file mode 100644 index 0000000000..4f499e6c19 --- /dev/null +++ b/queue-4.19/x86-asm-remove-dead-__gnuc__-conditionals.patch @@ -0,0 +1,116 @@ +From 88ca66d8540ca26119b1428cddb96b37925bdf01 Mon Sep 17 00:00:00 2001 +From: Rasmus Villemoes +Date: Fri, 11 Jan 2019 09:49:30 +0100 +Subject: x86/asm: Remove dead __GNUC__ conditionals + +From: Rasmus Villemoes + +commit 88ca66d8540ca26119b1428cddb96b37925bdf01 upstream. + +The minimum supported gcc version is >= 4.6, so these can be removed. + +Signed-off-by: Rasmus Villemoes +Signed-off-by: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Dan Williams +Cc: Geert Uytterhoeven +Cc: Ingo Molnar +Cc: Matthew Wilcox +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: x86-ml +Link: https://lkml.kernel.org/r/20190111084931.24601-1-linux@rasmusvillemoes.dk +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/bitops.h | 6 ------ + arch/x86/include/asm/string_32.h | 20 -------------------- + arch/x86/include/asm/string_64.h | 15 --------------- + 3 files changed, 41 deletions(-) + +--- a/arch/x86/include/asm/bitops.h ++++ b/arch/x86/include/asm/bitops.h +@@ -36,13 +36,7 @@ + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) +-/* Technically wrong, but this avoids compilation errors on some gcc +- versions. */ +-#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) +-#else + #define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) +-#endif + + #define ADDR BITOP_ADDR(addr) + +--- a/arch/x86/include/asm/string_32.h ++++ b/arch/x86/include/asm/string_32.h +@@ -179,14 +179,7 @@ static inline void *__memcpy3d(void *to, + * No 3D Now! + */ + +-#if (__GNUC__ >= 4) + #define memcpy(t, f, n) __builtin_memcpy(t, f, n) +-#else +-#define memcpy(t, f, n) \ +- (__builtin_constant_p((n)) \ +- ? __constant_memcpy((t), (f), (n)) \ +- : __memcpy((t), (f), (n))) +-#endif + + #endif + #endif /* !CONFIG_FORTIFY_SOURCE */ +@@ -282,12 +275,7 @@ void *__constant_c_and_count_memset(void + + { + int d0, d1; +-#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 +- /* Workaround for broken gcc 4.0 */ +- register unsigned long eax asm("%eax") = pattern; +-#else + unsigned long eax = pattern; +-#endif + + switch (count % 4) { + case 0: +@@ -321,15 +309,7 @@ void *__constant_c_and_count_memset(void + #define __HAVE_ARCH_MEMSET + extern void *memset(void *, int, size_t); + #ifndef CONFIG_FORTIFY_SOURCE +-#if (__GNUC__ >= 4) + #define memset(s, c, count) __builtin_memset(s, c, count) +-#else +-#define memset(s, c, count) \ +- (__builtin_constant_p(c) \ +- ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ +- (count)) \ +- : __memset((s), (c), (count))) +-#endif + #endif /* !CONFIG_FORTIFY_SOURCE */ + + #define __HAVE_ARCH_MEMSET16 +--- a/arch/x86/include/asm/string_64.h ++++ b/arch/x86/include/asm/string_64.h +@@ -32,21 +32,6 @@ static __always_inline void *__inline_me + extern void *memcpy(void *to, const void *from, size_t len); + extern void *__memcpy(void *to, const void *from, size_t len); + +-#ifndef CONFIG_FORTIFY_SOURCE +-#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 +-#define memcpy(dst, src, len) \ +-({ \ +- size_t __len = (len); \ +- void *__ret; \ +- if (__builtin_constant_p(len) && __len >= 64) \ +- __ret = __memcpy((dst), (src), __len); \ +- else \ +- __ret = __builtin_memcpy((dst), (src), __len); \ +- __ret; \ +-}) +-#endif +-#endif /* !CONFIG_FORTIFY_SOURCE */ +- + #define __HAVE_ARCH_MEMSET + void *memset(void *s, int c, size_t n); + void *__memset(void *s, int c, size_t n); diff --git a/queue-4.19/x86-asm-use-stricter-assembly-constraints-in-bitops.patch b/queue-4.19/x86-asm-use-stricter-assembly-constraints-in-bitops.patch new file mode 100644 index 0000000000..02db159a85 --- /dev/null +++ b/queue-4.19/x86-asm-use-stricter-assembly-constraints-in-bitops.patch @@ -0,0 +1,228 @@ +From 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 Mon Sep 17 00:00:00 2001 +From: Alexander Potapenko +Date: Tue, 2 Apr 2019 13:28:13 +0200 +Subject: x86/asm: Use stricter assembly constraints in bitops + +From: Alexander Potapenko + +commit 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 upstream. + +There's a number of problems with how arch/x86/include/asm/bitops.h +is currently using assembly constraints for the memory region +bitops are modifying: + +1) Use memory clobber in bitops that touch arbitrary memory + +Certain bit operations that read/write bits take a base pointer and an +arbitrarily large offset to address the bit relative to that base. +Inline assembly constraints aren't expressive enough to tell the +compiler that the assembly directive is going to touch a specific memory +location of unknown size, therefore we have to use the "memory" clobber +to indicate that the assembly is going to access memory locations other +than those listed in the inputs/outputs. + +To indicate that BTR/BTS instructions don't necessarily touch the first +sizeof(long) bytes of the argument, we also move the address to assembly +inputs. + +This particular change leads to size increase of 124 kernel functions in +a defconfig build. For some of them the diff is in NOP operations, other +end up re-reading values from memory and may potentially slow down the +execution. But without these clobbers the compiler is free to cache +the contents of the bitmaps and use them as if they weren't changed by +the inline assembly. + +2) Use byte-sized arguments for operations touching single bytes. + +Passing a long value to ANDB/ORB/XORB instructions makes the compiler +treat sizeof(long) bytes as being clobbered, which isn't the case. This +may theoretically lead to worse code in the case of heavy optimization. + +Practical impact: + +I've built a defconfig kernel and looked through some of the functions +generated by GCC 7.3.0 with and without this clobber, and didn't spot +any miscompilations. + +However there is a (trivial) theoretical case where this code leads to +miscompilation: + + https://lkml.org/lkml/2019/3/28/393 + +using just GCC 8.3.0 with -O2. It isn't hard to imagine someone writes +such a function in the kernel someday. + +So the primary motivation is to fix an existing misuse of the asm +directive, which happens to work in certain configurations now, but +isn't guaranteed to work under different circumstances. + +[ --mingo: Added -stable tag because defconfig only builds a fraction + of the kernel and the trivial testcase looks normal enough to + be used in existing or in-development code. ] + +Signed-off-by: Alexander Potapenko +Cc: +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: Dmitry Vyukov +Cc: H. Peter Anvin +Cc: James Y Knight +Cc: Linus Torvalds +Cc: Paul E. McKenney +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20190402112813.193378-1-glider@google.com +[ Edited the changelog, tidied up one of the defines. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/bitops.h | 41 ++++++++++++++++++----------------------- + 1 file changed, 18 insertions(+), 23 deletions(-) + +--- a/arch/x86/include/asm/bitops.h ++++ b/arch/x86/include/asm/bitops.h +@@ -36,16 +36,17 @@ + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) ++#define RLONG_ADDR(x) "m" (*(volatile long *) (x)) ++#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) + +-#define ADDR BITOP_ADDR(addr) ++#define ADDR RLONG_ADDR(addr) + + /* + * We do the locked ops that don't return the old value as + * a mask operation on a byte. + */ + #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) +-#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) ++#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) + #define CONST_MASK(nr) (1 << ((nr) & 7)) + + /** +@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long + : "memory"); + } else { + asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" +- : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); ++ : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); + } + } + +@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long + */ + static __always_inline void __set_bit(long nr, volatile unsigned long *addr) + { +- asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); ++ asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); + } + + /** +@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned lon + : "iq" ((u8)~CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" +- : BITOP_ADDR(addr) +- : "Ir" (nr)); ++ : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); + } + } + +@@ -131,7 +131,7 @@ static __always_inline void clear_bit_un + + static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) + { +- asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); ++ asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); + } + + static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_un + bool negative; + asm volatile(LOCK_PREFIX "andb %2,%1" + CC_SET(s) +- : CC_OUT(s) (negative), ADDR ++ : CC_OUT(s) (negative), WBYTE_ADDR(addr) + : "ir" ((char) ~(1 << nr)) : "memory"); + return negative; + } +@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_un + * __clear_bit() is non-atomic and implies release semantics before the memory + * operation. It can be used for an unlock if no other CPUs can concurrently + * modify other bits in the word. +- * +- * No memory barrier is required here, because x86 cannot reorder stores past +- * older loads. Same principle as spin_unlock. + */ + static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) + { +- barrier(); + __clear_bit(nr, addr); + } + +@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_ + */ + static __always_inline void __change_bit(long nr, volatile unsigned long *addr) + { +- asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); ++ asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); + } + + /** +@@ -196,8 +192,7 @@ static __always_inline void change_bit(l + : "iq" ((u8)CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" +- : BITOP_ADDR(addr) +- : "Ir" (nr)); ++ : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); + } + } + +@@ -243,8 +238,8 @@ static __always_inline bool __test_and_s + + asm(__ASM_SIZE(bts) " %2,%1" + CC_SET(c) +- : CC_OUT(c) (oldbit), ADDR +- : "Ir" (nr)); ++ : CC_OUT(c) (oldbit) ++ : ADDR, "Ir" (nr) : "memory"); + return oldbit; + } + +@@ -284,8 +279,8 @@ static __always_inline bool __test_and_c + + asm volatile(__ASM_SIZE(btr) " %2,%1" + CC_SET(c) +- : CC_OUT(c) (oldbit), ADDR +- : "Ir" (nr)); ++ : CC_OUT(c) (oldbit) ++ : ADDR, "Ir" (nr) : "memory"); + return oldbit; + } + +@@ -296,8 +291,8 @@ static __always_inline bool __test_and_c + + asm volatile(__ASM_SIZE(btc) " %2,%1" + CC_SET(c) +- : CC_OUT(c) (oldbit), ADDR +- : "Ir" (nr) : "memory"); ++ : CC_OUT(c) (oldbit) ++ : ADDR, "Ir" (nr) : "memory"); + + return oldbit; + } +@@ -329,7 +324,7 @@ static __always_inline bool variable_tes + asm volatile(__ASM_SIZE(bt) " %2,%1" + CC_SET(c) + : CC_OUT(c) (oldbit) +- : "m" (*(unsigned long *)addr), "Ir" (nr)); ++ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); + + return oldbit; + } diff --git a/queue-4.19/x86-perf-amd-remove-need-to-check-running-bit-in-nmi-handler.patch b/queue-4.19/x86-perf-amd-remove-need-to-check-running-bit-in-nmi-handler.patch new file mode 100644 index 0000000000..211f39db71 --- /dev/null +++ b/queue-4.19/x86-perf-amd-remove-need-to-check-running-bit-in-nmi-handler.patch @@ -0,0 +1,127 @@ +From 3966c3feca3fd10b2935caa0b4a08c7dd59469e5 Mon Sep 17 00:00:00 2001 +From: "Lendacky, Thomas" +Date: Tue, 2 Apr 2019 15:21:18 +0000 +Subject: x86/perf/amd: Remove need to check "running" bit in NMI handler + +From: Lendacky, Thomas + +commit 3966c3feca3fd10b2935caa0b4a08c7dd59469e5 upstream. + +Spurious interrupt support was added to perf in the following commit, almost +a decade ago: + + 63e6be6d98e1 ("perf, x86: Catch spurious interrupts after disabling counters") + +The two previous patches (resolving the race condition when disabling a +PMC and NMI latency mitigation) allow for the removal of this older +spurious interrupt support. + +Currently in x86_pmu_stop(), the bit for the PMC in the active_mask bitmap +is cleared before disabling the PMC, which sets up a race condition. This +race condition was mitigated by introducing the running bitmap. That race +condition can be eliminated by first disabling the PMC, waiting for PMC +reset on overflow and then clearing the bit for the PMC in the active_mask +bitmap. The NMI handler will not re-enable a disabled counter. + +If x86_pmu_stop() is called from the perf NMI handler, the NMI latency +mitigation support will guard against any unhandled NMI messages. + +Signed-off-by: Tom Lendacky +Signed-off-by: Peter Zijlstra (Intel) +Cc: # 4.14.x- +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Arnaldo Carvalho de Melo +Cc: Borislav Petkov +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lkml.kernel.org/r/Message-ID: +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/amd/core.c | 21 +++++++++++++++++++-- + arch/x86/events/core.c | 13 +++---------- + 2 files changed, 22 insertions(+), 12 deletions(-) + +--- a/arch/x86/events/amd/core.c ++++ b/arch/x86/events/amd/core.c +@@ -4,8 +4,8 @@ + #include + #include + #include +-#include + #include ++#include + + #include "../perf_event.h" + +@@ -491,6 +491,23 @@ static void amd_pmu_disable_all(void) + } + } + ++static void amd_pmu_disable_event(struct perf_event *event) ++{ ++ x86_pmu_disable_event(event); ++ ++ /* ++ * This can be called from NMI context (via x86_pmu_stop). The counter ++ * may have overflowed, but either way, we'll never see it get reset ++ * by the NMI if we're already in the NMI. And the NMI latency support ++ * below will take care of any pending NMI that might have been ++ * generated by the overflow. ++ */ ++ if (in_nmi()) ++ return; ++ ++ amd_pmu_wait_on_overflow(event->hw.idx); ++} ++ + /* + * Because of NMI latency, if multiple PMC counters are active or other sources + * of NMIs are received, the perf NMI handler can handle one or more overflowed +@@ -738,7 +755,7 @@ static __initconst const struct x86_pmu + .disable_all = amd_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, +- .disable = x86_pmu_disable_event, ++ .disable = amd_pmu_disable_event, + .hw_config = amd_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_K7_EVNTSEL0, +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -1328,8 +1328,9 @@ void x86_pmu_stop(struct perf_event *eve + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + +- if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { ++ if (test_bit(hwc->idx, cpuc->active_mask)) { + x86_pmu.disable(event); ++ __clear_bit(hwc->idx, cpuc->active_mask); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; +@@ -1426,16 +1427,8 @@ int x86_pmu_handle_irq(struct pt_regs *r + apic_write(APIC_LVTPC, APIC_DM_NMI); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { +- if (!test_bit(idx, cpuc->active_mask)) { +- /* +- * Though we deactivated the counter some cpus +- * might still deliver spurious interrupts still +- * in flight. Catch them: +- */ +- if (__test_and_clear_bit(idx, cpuc->running)) +- handled++; ++ if (!test_bit(idx, cpuc->active_mask)) + continue; +- } + + event = cpuc->events[idx]; + diff --git a/queue-4.19/x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch b/queue-4.19/x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch new file mode 100644 index 0000000000..49aba7d892 --- /dev/null +++ b/queue-4.19/x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch @@ -0,0 +1,142 @@ +From 6d3edaae16c6c7d238360f2841212c2b26774d5e Mon Sep 17 00:00:00 2001 +From: "Lendacky, Thomas" +Date: Tue, 2 Apr 2019 15:21:16 +0000 +Subject: x86/perf/amd: Resolve NMI latency issues for active PMCs + +From: Lendacky, Thomas + +commit 6d3edaae16c6c7d238360f2841212c2b26774d5e upstream. + +On AMD processors, the detection of an overflowed PMC counter in the NMI +handler relies on the current value of the PMC. So, for example, to check +for overflow on a 48-bit counter, bit 47 is checked to see if it is 1 (not +overflowed) or 0 (overflowed). + +When the perf NMI handler executes it does not know in advance which PMC +counters have overflowed. As such, the NMI handler will process all active +PMC counters that have overflowed. NMI latency in newer AMD processors can +result in multiple overflowed PMC counters being processed in one NMI and +then a subsequent NMI, that does not appear to be a back-to-back NMI, not +finding any PMC counters that have overflowed. This may appear to be an +unhandled NMI resulting in either a panic or a series of messages, +depending on how the kernel was configured. + +To mitigate this issue, add an AMD handle_irq callback function, +amd_pmu_handle_irq(), that will invoke the common x86_pmu_handle_irq() +function and upon return perform some additional processing that will +indicate if the NMI has been handled or would have been handled had an +earlier NMI not handled the overflowed PMC. Using a per-CPU variable, a +minimum value of the number of active PMCs or 2 will be set whenever a +PMC is active. This is used to indicate the possible number of NMIs that +can still occur. The value of 2 is used for when an NMI does not arrive +at the LAPIC in time to be collapsed into an already pending NMI. Each +time the function is called without having handled an overflowed counter, +the per-CPU value is checked. If the value is non-zero, it is decremented +and the NMI indicates that it handled the NMI. If the value is zero, then +the NMI indicates that it did not handle the NMI. + +Signed-off-by: Tom Lendacky +Signed-off-by: Peter Zijlstra (Intel) +Cc: # 4.14.x- +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Arnaldo Carvalho de Melo +Cc: Borislav Petkov +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lkml.kernel.org/r/Message-ID: +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/amd/core.c | 56 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 55 insertions(+), 1 deletion(-) + +--- a/arch/x86/events/amd/core.c ++++ b/arch/x86/events/amd/core.c +@@ -4,10 +4,13 @@ + #include + #include + #include ++#include + #include + + #include "../perf_event.h" + ++static DEFINE_PER_CPU(unsigned int, perf_nmi_counter); ++ + static __initconst const u64 amd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] +@@ -488,6 +491,57 @@ static void amd_pmu_disable_all(void) + } + } + ++/* ++ * Because of NMI latency, if multiple PMC counters are active or other sources ++ * of NMIs are received, the perf NMI handler can handle one or more overflowed ++ * PMC counters outside of the NMI associated with the PMC overflow. If the NMI ++ * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel ++ * back-to-back NMI support won't be active. This PMC handler needs to take into ++ * account that this can occur, otherwise this could result in unknown NMI ++ * messages being issued. Examples of this is PMC overflow while in the NMI ++ * handler when multiple PMCs are active or PMC overflow while handling some ++ * other source of an NMI. ++ * ++ * Attempt to mitigate this by using the number of active PMCs to determine ++ * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset ++ * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the ++ * number of active PMCs or 2. The value of 2 is used in case an NMI does not ++ * arrive at the LAPIC in time to be collapsed into an already pending NMI. ++ */ ++static int amd_pmu_handle_irq(struct pt_regs *regs) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ int active, handled; ++ ++ /* ++ * Obtain the active count before calling x86_pmu_handle_irq() since ++ * it is possible that x86_pmu_handle_irq() may make a counter ++ * inactive (through x86_pmu_stop). ++ */ ++ active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX); ++ ++ /* Process any counter overflows */ ++ handled = x86_pmu_handle_irq(regs); ++ ++ /* ++ * If a counter was handled, record the number of possible remaining ++ * NMIs that can occur. ++ */ ++ if (handled) { ++ this_cpu_write(perf_nmi_counter, ++ min_t(unsigned int, 2, active)); ++ ++ return handled; ++ } ++ ++ if (!this_cpu_read(perf_nmi_counter)) ++ return NMI_DONE; ++ ++ this_cpu_dec(perf_nmi_counter); ++ ++ return NMI_HANDLED; ++} ++ + static struct event_constraint * + amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +@@ -680,7 +734,7 @@ static ssize_t amd_event_sysfs_show(char + + static __initconst const struct x86_pmu amd_pmu = { + .name = "AMD", +- .handle_irq = x86_pmu_handle_irq, ++ .handle_irq = amd_pmu_handle_irq, + .disable_all = amd_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, diff --git a/queue-4.19/x86-perf-amd-resolve-race-condition-when-disabling-pmc.patch b/queue-4.19/x86-perf-amd-resolve-race-condition-when-disabling-pmc.patch new file mode 100644 index 0000000000..b725e9675b --- /dev/null +++ b/queue-4.19/x86-perf-amd-resolve-race-condition-when-disabling-pmc.patch @@ -0,0 +1,152 @@ +From 914123fa39042e651d79eaf86bbf63a1b938dddf Mon Sep 17 00:00:00 2001 +From: "Lendacky, Thomas" +Date: Tue, 2 Apr 2019 15:21:14 +0000 +Subject: x86/perf/amd: Resolve race condition when disabling PMC + +From: Lendacky, Thomas + +commit 914123fa39042e651d79eaf86bbf63a1b938dddf upstream. + +On AMD processors, the detection of an overflowed counter in the NMI +handler relies on the current value of the counter. So, for example, to +check for overflow on a 48 bit counter, bit 47 is checked to see if it +is 1 (not overflowed) or 0 (overflowed). + +There is currently a race condition present when disabling and then +updating the PMC. Increased NMI latency in newer AMD processors makes this +race condition more pronounced. If the counter value has overflowed, it is +possible to update the PMC value before the NMI handler can run. The +updated PMC value is not an overflowed value, so when the perf NMI handler +does run, it will not find an overflowed counter. This may appear as an +unknown NMI resulting in either a panic or a series of messages, depending +on how the kernel is configured. + +To eliminate this race condition, the PMC value must be checked after +disabling the counter. Add an AMD function, amd_pmu_disable_all(), that +will wait for the NMI handler to reset any active and overflowed counter +after calling x86_pmu_disable_all(). + +Signed-off-by: Tom Lendacky +Signed-off-by: Peter Zijlstra (Intel) +Cc: # 4.14.x- +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Arnaldo Carvalho de Melo +Cc: Borislav Petkov +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lkml.kernel.org/r/Message-ID: +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/amd/core.c | 65 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 62 insertions(+), 3 deletions(-) + +--- a/arch/x86/events/amd/core.c ++++ b/arch/x86/events/amd/core.c +@@ -3,6 +3,7 @@ + #include + #include + #include ++#include + #include + + #include "../perf_event.h" +@@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu) + } + } + ++/* ++ * When a PMC counter overflows, an NMI is used to process the event and ++ * reset the counter. NMI latency can result in the counter being updated ++ * before the NMI can run, which can result in what appear to be spurious ++ * NMIs. This function is intended to wait for the NMI to run and reset ++ * the counter to avoid possible unhandled NMI messages. ++ */ ++#define OVERFLOW_WAIT_COUNT 50 ++ ++static void amd_pmu_wait_on_overflow(int idx) ++{ ++ unsigned int i; ++ u64 counter; ++ ++ /* ++ * Wait for the counter to be reset if it has overflowed. This loop ++ * should exit very, very quickly, but just in case, don't wait ++ * forever... ++ */ ++ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { ++ rdmsrl(x86_pmu_event_addr(idx), counter); ++ if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) ++ break; ++ ++ /* Might be in IRQ context, so can't sleep */ ++ udelay(1); ++ } ++} ++ ++static void amd_pmu_disable_all(void) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ int idx; ++ ++ x86_pmu_disable_all(); ++ ++ /* ++ * This shouldn't be called from NMI context, but add a safeguard here ++ * to return, since if we're in NMI context we can't wait for an NMI ++ * to reset an overflowed counter value. ++ */ ++ if (in_nmi()) ++ return; ++ ++ /* ++ * Check each counter for overflow and wait for it to be reset by the ++ * NMI if it has overflowed. This relies on the fact that all active ++ * counters are always enabled when this function is caled and ++ * ARCH_PERFMON_EVENTSEL_INT is always set. ++ */ ++ for (idx = 0; idx < x86_pmu.num_counters; idx++) { ++ if (!test_bit(idx, cpuc->active_mask)) ++ continue; ++ ++ amd_pmu_wait_on_overflow(idx); ++ } ++} ++ + static struct event_constraint * + amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +@@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char + static __initconst const struct x86_pmu amd_pmu = { + .name = "AMD", + .handle_irq = x86_pmu_handle_irq, +- .disable_all = x86_pmu_disable_all, ++ .disable_all = amd_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, +@@ -728,7 +787,7 @@ void amd_pmu_enable_virt(void) + cpuc->perf_ctr_virt_mask = 0; + + /* Reload all events */ +- x86_pmu_disable_all(); ++ amd_pmu_disable_all(); + x86_pmu_enable_all(0); + } + EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); +@@ -746,7 +805,7 @@ void amd_pmu_disable_virt(void) + cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; + + /* Reload all events */ +- x86_pmu_disable_all(); ++ amd_pmu_disable_all(); + x86_pmu_enable_all(0); + } + EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); diff --git a/queue-4.19/xen-prevent-buffer-overflow-in-privcmd-ioctl.patch b/queue-4.19/xen-prevent-buffer-overflow-in-privcmd-ioctl.patch new file mode 100644 index 0000000000..265d3b9b50 --- /dev/null +++ b/queue-4.19/xen-prevent-buffer-overflow-in-privcmd-ioctl.patch @@ -0,0 +1,37 @@ +From 42d8644bd77dd2d747e004e367cb0c895a606f39 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 4 Apr 2019 18:12:17 +0300 +Subject: xen: Prevent buffer overflow in privcmd ioctl + +From: Dan Carpenter + +commit 42d8644bd77dd2d747e004e367cb0c895a606f39 upstream. + +The "call" variable comes from the user in privcmd_ioctl_hypercall(). +It's an offset into the hypercall_page[] which has (PAGE_SIZE / 32) +elements. We need to put an upper bound on it to prevent an out of +bounds access. + +Cc: stable@vger.kernel.org +Fixes: 1246ae0bb992 ("xen: add variable hypercall caller") +Signed-off-by: Dan Carpenter +Reviewed-by: Boris Ostrovsky +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/xen/hypercall.h | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/include/asm/xen/hypercall.h ++++ b/arch/x86/include/asm/xen/hypercall.h +@@ -206,6 +206,9 @@ xen_single_call(unsigned int call, + __HYPERCALL_DECLS; + __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + ++ if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) ++ return -EINVAL; ++ + asm volatile(CALL_NOSPEC + : __HYPERCALL_5PARAM + : [thunk_target] "a" (&hypercall_page[call]) diff --git a/queue-4.19/xtensa-fix-return_address.patch b/queue-4.19/xtensa-fix-return_address.patch new file mode 100644 index 0000000000..f05f33aa59 --- /dev/null +++ b/queue-4.19/xtensa-fix-return_address.patch @@ -0,0 +1,42 @@ +From ada770b1e74a77fff2d5f539bf6c42c25f4784db Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Thu, 4 Apr 2019 11:08:40 -0700 +Subject: xtensa: fix return_address + +From: Max Filippov + +commit ada770b1e74a77fff2d5f539bf6c42c25f4784db upstream. + +return_address returns the address that is one level higher in the call +stack than requested in its argument, because level 0 corresponds to its +caller's return address. Use requested level as the number of stack +frames to skip. + +This fixes the address reported by might_sleep and friends. + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + arch/xtensa/kernel/stacktrace.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/xtensa/kernel/stacktrace.c ++++ b/arch/xtensa/kernel/stacktrace.c +@@ -253,10 +253,14 @@ static int return_address_cb(struct stac + return 1; + } + ++/* ++ * level == 0 is for the return address from the caller of this function, ++ * not from this function itself. ++ */ + unsigned long return_address(unsigned level) + { + struct return_addr_data r = { +- .skip = level + 1, ++ .skip = level, + }; + walk_stackframe(stack_pointer(NULL), return_address_cb, &r); + return r.addr; -- 2.39.2