From da4f76bfb1e4ab4ccf95e1dd9c27ad21676a89a5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Oct 2019 09:44:23 +0200 Subject: [PATCH] 5.3-stable patches added patches: asoc-define-a-set-of-dapm-pre-post-up-events.patch asoc-sgtl5000-improve-vag-power-and-mute-control.patch can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch crypto-caam-qi-fix-error-handling-in-ern-handler.patch crypto-cavium-zip-add-missing-single_release.patch crypto-ccree-account-for-tee-not-ready-to-report.patch crypto-ccree-use-the-full-crypt-length-value.patch crypto-qat-silence-smp_processor_id-warning.patch crypto-skcipher-unmap-pages-after-an-external-error.patch kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch kvm-s390-fix-__insn32_query-inline-assembly.patch kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch kvm-x86-fix-userspace-set-invalid-cr4.patch mips-treat-loongson-extensions-as-ases.patch nbd-fix-max-number-of-supported-devs.patch pm-devfreq-tegra-fix-khz-to-hz-conversion.patch power-supply-sbs-battery-only-return-health-when-battery-present.patch power-supply-sbs-battery-use-correct-flags-field.patch powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch powerpc-603-fix-handling-of-the-dirty-flag.patch powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch powerpc-kasan-fix-parallel-loading-of-modules.patch powerpc-kasan-fix-shadow-area-set-up-for-modules.patch powerpc-mce-fix-mce-handling-for-huge-pages.patch powerpc-mce-schedule-work-from-irq_work.patch powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch powerpc-ptdump-fix-addresses-display-on-ppc32.patch powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch revert-s390-dasd-add-discard-support-for-ese-volumes.patch s390-cio-avoid-calling-strlen-on-null-pointer.patch s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch s390-dasd-fix-error-handling-during-online-processing.patch s390-process-avoid-potential-reading-of-freed-stack.patch s390-sclp-fix-bit-checked-for-has_sipl.patch s390-topology-avoid-firing-events-before-kobjs-are-created.patch timer-read-jiffies-once-when-forwarding-base-clk.patch tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch usercopy-avoid-highmem-pfn-warning.patch --- ...ine-a-set-of-dapm-pre-post-up-events.patch | 36 ++ ...0-improve-vag-power-and-mute-control.patch | 332 +++++++++++++++ ..._reset-allow-more-time-after-a-reset.patch | 57 +++ ...rency-issue-in-givencrypt-descriptor.patch | 95 +++++ ...qi-fix-error-handling-in-ern-handler.patch | 73 ++++ ...avium-zip-add-missing-single_release.patch | 48 +++ ...-account-for-tee-not-ready-to-report.patch | 42 ++ ...cree-use-the-full-crypt-length-value.patch | 34 ++ ...qat-silence-smp_processor_id-warning.patch | 68 +++ ...-unmap-pages-after-an-external-error.patch | 121 ++++++ ...-only-if-opal-has-required-functions.patch | 122 ++++++ ...u-ready-on-piggybacked-virtual-cores.patch | 71 ++++ ...-doorbell-request-on-migration-on-p9.patch | 52 +++ ...e-context-when-not-using-xive-device.patch | 89 ++++ ...-enabling-xive-escalation-interrupts.patch | 111 +++++ ...n-interrupts-before-disabling-the-vp.patch | 111 +++++ ...0-fix-__insn32_query-inline-assembly.patch | 49 +++ ...and-size-at-the-start-of-s390_mem_op.patch | 50 +++ ...vm-x86-fix-userspace-set-invalid-cr4.patch | 123 ++++++ ...ps-treat-loongson-extensions-as-ases.patch | 107 +++++ ...nbd-fix-max-number-of-supported-devs.patch | 159 +++++++ ...vfreq-tegra-fix-khz-to-hz-conversion.patch | 75 ++++ ...y-return-health-when-battery-present.patch | 74 ++++ ...-sbs-battery-use-correct-flags-field.patch | 37 ++ ...e-with-debug_pagealloc-without-kasan.patch | 61 +++ ...c-603-fix-handling-of-the-dirty-flag.patch | 50 +++ ...ie-fixup-for-some-hardware-revisions.patch | 81 ++++ ...me-cpu_ftr_p9_tlbie_bug-feature-flag.patch | 110 +++++ ...asan-fix-parallel-loading-of-modules.patch | 74 ++++ ...n-fix-shadow-area-set-up-for-modules.patch | 40 ++ ...-mce-fix-mce-handling-for-huge-pages.patch | 81 ++++ ...erpc-mce-schedule-work-from-irq_work.patch | 67 +++ ...lect-page_kernel_ro-or-page_readonly.patch | 76 ++++ ...rpc-mm-fix-an-oops-in-kasan_mmu_init.patch | 85 ++++ ...idr-mtlpidr-ordering-issue-on-power9.patch | 347 ++++++++++++++++ ...oda-fix-race-in-tce-level-allocation.patch | 72 ++++ ...mbol-map-to-only-be-readable-by-root.patch | 54 +++ ...tplug_lock-acquisition-in-resize_hpt.patch | 170 ++++++++ ...tdump-fix-addresses-display-on-ppc32.patch | 38 ++ ...method-for-xive-to-fix-shutdown-race.patch | 390 ++++++++++++++++++ ...-add-discard-support-for-ese-volumes.patch | 139 +++++++ ...avoid-calling-strlen-on-null-pointer.patch | 55 +++ ...els-with-no-parent-from-pseudo-check.patch | 54 +++ ...or-handling-during-online-processing.patch | 94 +++++ ...oid-potential-reading-of-freed-stack.patch | 62 +++ ...90-sclp-fix-bit-checked-for-has_sipl.patch | 31 ++ ...ring-events-before-kobjs-are-created.patch | 61 +++ queue-5.3/series | 52 +++ ...iffies-once-when-forwarding-base-clk.patch | 78 ++++ ...-cmdlines-in-add_new_comm-on-failure.patch | 52 +++ ...est-of-do_generate_dynamic_list_file.patch | 55 +++ ...erence-alias-has-correct-var_ref_idx.patch | 97 +++++ .../usercopy-avoid-highmem-pfn-warning.patch | 88 ++++ 53 files changed, 4850 insertions(+) create mode 100644 queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch create mode 100644 queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch create mode 100644 queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch create mode 100644 queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch create mode 100644 queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch create mode 100644 queue-5.3/crypto-cavium-zip-add-missing-single_release.patch create mode 100644 queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch create mode 100644 queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch create mode 100644 queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch create mode 100644 queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch create mode 100644 queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch create mode 100644 queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch create mode 100644 queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch create mode 100644 queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch create mode 100644 queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch create mode 100644 queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch create mode 100644 queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch create mode 100644 queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch create mode 100644 queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch create mode 100644 queue-5.3/mips-treat-loongson-extensions-as-ases.patch create mode 100644 queue-5.3/nbd-fix-max-number-of-supported-devs.patch create mode 100644 queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch create mode 100644 queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch create mode 100644 queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch create mode 100644 queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch create mode 100644 queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch create mode 100644 queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch create mode 100644 queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch create mode 100644 queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch create mode 100644 queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch create mode 100644 queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch create mode 100644 queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch create mode 100644 queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch create mode 100644 queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch create mode 100644 queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch create mode 100644 queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch create mode 100644 queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch create mode 100644 queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch create mode 100644 queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch create mode 100644 queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch create mode 100644 queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch create mode 100644 queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch create mode 100644 queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch create mode 100644 queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch create mode 100644 queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch create mode 100644 queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch create mode 100644 queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch create mode 100644 queue-5.3/series create mode 100644 queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch create mode 100644 queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch create mode 100644 queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch create mode 100644 queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch create mode 100644 queue-5.3/usercopy-avoid-highmem-pfn-warning.patch diff --git a/queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch b/queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch new file mode 100644 index 00000000000..1ebc789e207 --- /dev/null +++ b/queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch @@ -0,0 +1,36 @@ +From cfc8f568aada98f9608a0a62511ca18d647613e2 Mon Sep 17 00:00:00 2001 +From: Oleksandr Suvorov +Date: Fri, 19 Jul 2019 10:05:30 +0000 +Subject: ASoC: Define a set of DAPM pre/post-up events + +From: Oleksandr Suvorov + +commit cfc8f568aada98f9608a0a62511ca18d647613e2 upstream. + +Prepare to use SND_SOC_DAPM_PRE_POST_PMU definition to +reduce coming code size and make it more readable. + +Cc: stable@vger.kernel.org +Signed-off-by: Oleksandr Suvorov +Reviewed-by: Marcel Ziswiler +Reviewed-by: Igor Opaniuk +Reviewed-by: Fabio Estevam +Link: https://lore.kernel.org/r/20190719100524.23300-2-oleksandr.suvorov@toradex.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + include/sound/soc-dapm.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/sound/soc-dapm.h ++++ b/include/sound/soc-dapm.h +@@ -353,6 +353,8 @@ struct device; + #define SND_SOC_DAPM_WILL_PMD 0x80 /* called at start of sequence */ + #define SND_SOC_DAPM_PRE_POST_PMD \ + (SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD) ++#define SND_SOC_DAPM_PRE_POST_PMU \ ++ (SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU) + + /* convenience event type detection */ + #define SND_SOC_DAPM_EVENT_ON(e) \ diff --git a/queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch b/queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch new file mode 100644 index 00000000000..159d31a1def --- /dev/null +++ b/queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch @@ -0,0 +1,332 @@ +From b1f373a11d25fc9a5f7679c9b85799fe09b0dc4a Mon Sep 17 00:00:00 2001 +From: Oleksandr Suvorov +Date: Fri, 19 Jul 2019 10:05:31 +0000 +Subject: ASoC: sgtl5000: Improve VAG power and mute control + +From: Oleksandr Suvorov + +commit b1f373a11d25fc9a5f7679c9b85799fe09b0dc4a upstream. + +VAG power control is improved to fit the manual [1]. This patch fixes as +minimum one bug: if customer muxes Headphone to Line-In right after boot, +the VAG power remains off that leads to poor sound quality from line-in. + +I.e. after boot: + - Connect sound source to Line-In jack; + - Connect headphone to HP jack; + - Run following commands: + $ amixer set 'Headphone' 80% + $ amixer set 'Headphone Mux' LINE_IN + +Change VAG power on/off control according to the following algorithm: + - turn VAG power ON on the 1st incoming event. + - keep it ON if there is any active VAG consumer (ADC/DAC/HP/Line-In). + - turn VAG power OFF when there is the latest consumer's pre-down event + come. + - always delay after VAG power OFF to avoid pop. + - delay after VAG power ON if the initiative consumer is Line-In, this + prevents pop during line-in muxing. + +According to the data sheet [1], to avoid any pops/clicks, +the outputs should be muted during input/output +routing changes. + +[1] https://www.nxp.com/docs/en/data-sheet/SGTL5000.pdf + +Cc: stable@vger.kernel.org +Fixes: 9b34e6cc3bc2 ("ASoC: Add Freescale SGTL5000 codec support") +Signed-off-by: Oleksandr Suvorov +Reviewed-by: Marcel Ziswiler +Reviewed-by: Fabio Estevam +Reviewed-by: Cezary Rojewski +Link: https://lore.kernel.org/r/20190719100524.23300-3-oleksandr.suvorov@toradex.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/codecs/sgtl5000.c | 224 ++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 194 insertions(+), 30 deletions(-) + +--- a/sound/soc/codecs/sgtl5000.c ++++ b/sound/soc/codecs/sgtl5000.c +@@ -31,6 +31,13 @@ + #define SGTL5000_DAP_REG_OFFSET 0x0100 + #define SGTL5000_MAX_REG_OFFSET 0x013A + ++/* Delay for the VAG ramp up */ ++#define SGTL5000_VAG_POWERUP_DELAY 500 /* ms */ ++/* Delay for the VAG ramp down */ ++#define SGTL5000_VAG_POWERDOWN_DELAY 500 /* ms */ ++ ++#define SGTL5000_OUTPUTS_MUTE (SGTL5000_HP_MUTE | SGTL5000_LINE_OUT_MUTE) ++ + /* default value of sgtl5000 registers */ + static const struct reg_default sgtl5000_reg_defaults[] = { + { SGTL5000_CHIP_DIG_POWER, 0x0000 }, +@@ -123,6 +130,13 @@ enum { + I2S_SCLK_STRENGTH_HIGH, + }; + ++enum { ++ HP_POWER_EVENT, ++ DAC_POWER_EVENT, ++ ADC_POWER_EVENT, ++ LAST_POWER_EVENT = ADC_POWER_EVENT ++}; ++ + /* sgtl5000 private structure in codec */ + struct sgtl5000_priv { + int sysclk; /* sysclk rate */ +@@ -137,8 +151,109 @@ struct sgtl5000_priv { + u8 micbias_voltage; + u8 lrclk_strength; + u8 sclk_strength; ++ u16 mute_state[LAST_POWER_EVENT + 1]; + }; + ++static inline int hp_sel_input(struct snd_soc_component *component) ++{ ++ return (snd_soc_component_read32(component, SGTL5000_CHIP_ANA_CTRL) & ++ SGTL5000_HP_SEL_MASK) >> SGTL5000_HP_SEL_SHIFT; ++} ++ ++static inline u16 mute_output(struct snd_soc_component *component, ++ u16 mute_mask) ++{ ++ u16 mute_reg = snd_soc_component_read32(component, ++ SGTL5000_CHIP_ANA_CTRL); ++ ++ snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_CTRL, ++ mute_mask, mute_mask); ++ return mute_reg; ++} ++ ++static inline void restore_output(struct snd_soc_component *component, ++ u16 mute_mask, u16 mute_reg) ++{ ++ snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_CTRL, ++ mute_mask, mute_reg); ++} ++ ++static void vag_power_on(struct snd_soc_component *component, u32 source) ++{ ++ if (snd_soc_component_read32(component, SGTL5000_CHIP_ANA_POWER) & ++ SGTL5000_VAG_POWERUP) ++ return; ++ ++ snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER, ++ SGTL5000_VAG_POWERUP, SGTL5000_VAG_POWERUP); ++ ++ /* When VAG powering on to get local loop from Line-In, the sleep ++ * is required to avoid loud pop. ++ */ ++ if (hp_sel_input(component) == SGTL5000_HP_SEL_LINE_IN && ++ source == HP_POWER_EVENT) ++ msleep(SGTL5000_VAG_POWERUP_DELAY); ++} ++ ++static int vag_power_consumers(struct snd_soc_component *component, ++ u16 ana_pwr_reg, u32 source) ++{ ++ int consumers = 0; ++ ++ /* count dac/adc consumers unconditional */ ++ if (ana_pwr_reg & SGTL5000_DAC_POWERUP) ++ consumers++; ++ if (ana_pwr_reg & SGTL5000_ADC_POWERUP) ++ consumers++; ++ ++ /* ++ * If the event comes from HP and Line-In is selected, ++ * current action is 'DAC to be powered down'. ++ * As HP_POWERUP is not set when HP muxed to line-in, ++ * we need to keep VAG power ON. ++ */ ++ if (source == HP_POWER_EVENT) { ++ if (hp_sel_input(component) == SGTL5000_HP_SEL_LINE_IN) ++ consumers++; ++ } else { ++ if (ana_pwr_reg & SGTL5000_HP_POWERUP) ++ consumers++; ++ } ++ ++ return consumers; ++} ++ ++static void vag_power_off(struct snd_soc_component *component, u32 source) ++{ ++ u16 ana_pwr = snd_soc_component_read32(component, ++ SGTL5000_CHIP_ANA_POWER); ++ ++ if (!(ana_pwr & SGTL5000_VAG_POWERUP)) ++ return; ++ ++ /* ++ * This function calls when any of VAG power consumers is disappearing. ++ * Thus, if there is more than one consumer at the moment, as minimum ++ * one consumer will definitely stay after the end of the current ++ * event. ++ * Don't clear VAG_POWERUP if 2 or more consumers of VAG present: ++ * - LINE_IN (for HP events) / HP (for DAC/ADC events) ++ * - DAC ++ * - ADC ++ * (the current consumer is disappearing right now) ++ */ ++ if (vag_power_consumers(component, ana_pwr, source) >= 2) ++ return; ++ ++ snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER, ++ SGTL5000_VAG_POWERUP, 0); ++ /* In power down case, we need wait 400-1000 ms ++ * when VAG fully ramped down. ++ * As longer we wait, as smaller pop we've got. ++ */ ++ msleep(SGTL5000_VAG_POWERDOWN_DELAY); ++} ++ + /* + * mic_bias power on/off share the same register bits with + * output impedance of mic bias, when power on mic bias, we +@@ -170,36 +285,46 @@ static int mic_bias_event(struct snd_soc + return 0; + } + +-/* +- * As manual described, ADC/DAC only works when VAG powerup, +- * So enabled VAG before ADC/DAC up. +- * In power down case, we need wait 400ms when vag fully ramped down. +- */ +-static int power_vag_event(struct snd_soc_dapm_widget *w, +- struct snd_kcontrol *kcontrol, int event) ++static int vag_and_mute_control(struct snd_soc_component *component, ++ int event, int event_source) + { +- struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); +- const u32 mask = SGTL5000_DAC_POWERUP | SGTL5000_ADC_POWERUP; ++ static const u16 mute_mask[] = { ++ /* ++ * Mask for HP_POWER_EVENT. ++ * Muxing Headphones have to be wrapped with mute/unmute ++ * headphones only. ++ */ ++ SGTL5000_HP_MUTE, ++ /* ++ * Masks for DAC_POWER_EVENT/ADC_POWER_EVENT. ++ * Muxing DAC or ADC block have to wrapped with mute/unmute ++ * both headphones and line-out. ++ */ ++ SGTL5000_OUTPUTS_MUTE, ++ SGTL5000_OUTPUTS_MUTE ++ }; ++ ++ struct sgtl5000_priv *sgtl5000 = ++ snd_soc_component_get_drvdata(component); + + switch (event) { ++ case SND_SOC_DAPM_PRE_PMU: ++ sgtl5000->mute_state[event_source] = ++ mute_output(component, mute_mask[event_source]); ++ break; + case SND_SOC_DAPM_POST_PMU: +- snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER, +- SGTL5000_VAG_POWERUP, SGTL5000_VAG_POWERUP); +- msleep(400); ++ vag_power_on(component, event_source); ++ restore_output(component, mute_mask[event_source], ++ sgtl5000->mute_state[event_source]); + break; +- + case SND_SOC_DAPM_PRE_PMD: +- /* +- * Don't clear VAG_POWERUP, when both DAC and ADC are +- * operational to prevent inadvertently starving the +- * other one of them. +- */ +- if ((snd_soc_component_read32(component, SGTL5000_CHIP_ANA_POWER) & +- mask) != mask) { +- snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER, +- SGTL5000_VAG_POWERUP, 0); +- msleep(400); +- } ++ sgtl5000->mute_state[event_source] = ++ mute_output(component, mute_mask[event_source]); ++ vag_power_off(component, event_source); ++ break; ++ case SND_SOC_DAPM_POST_PMD: ++ restore_output(component, mute_mask[event_source], ++ sgtl5000->mute_state[event_source]); + break; + default: + break; +@@ -208,6 +333,41 @@ static int power_vag_event(struct snd_so + return 0; + } + ++/* ++ * Mute Headphone when power it up/down. ++ * Control VAG power on HP power path. ++ */ ++static int headphone_pga_event(struct snd_soc_dapm_widget *w, ++ struct snd_kcontrol *kcontrol, int event) ++{ ++ struct snd_soc_component *component = ++ snd_soc_dapm_to_component(w->dapm); ++ ++ return vag_and_mute_control(component, event, HP_POWER_EVENT); ++} ++ ++/* As manual describes, ADC/DAC powering up/down requires ++ * to mute outputs to avoid pops. ++ * Control VAG power on ADC/DAC power path. ++ */ ++static int adc_updown_depop(struct snd_soc_dapm_widget *w, ++ struct snd_kcontrol *kcontrol, int event) ++{ ++ struct snd_soc_component *component = ++ snd_soc_dapm_to_component(w->dapm); ++ ++ return vag_and_mute_control(component, event, ADC_POWER_EVENT); ++} ++ ++static int dac_updown_depop(struct snd_soc_dapm_widget *w, ++ struct snd_kcontrol *kcontrol, int event) ++{ ++ struct snd_soc_component *component = ++ snd_soc_dapm_to_component(w->dapm); ++ ++ return vag_and_mute_control(component, event, DAC_POWER_EVENT); ++} ++ + /* input sources for ADC */ + static const char *adc_mux_text[] = { + "MIC_IN", "LINE_IN" +@@ -280,7 +440,10 @@ static const struct snd_soc_dapm_widget + mic_bias_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + +- SND_SOC_DAPM_PGA("HP", SGTL5000_CHIP_ANA_POWER, 4, 0, NULL, 0), ++ SND_SOC_DAPM_PGA_E("HP", SGTL5000_CHIP_ANA_POWER, 4, 0, NULL, 0, ++ headphone_pga_event, ++ SND_SOC_DAPM_PRE_POST_PMU | ++ SND_SOC_DAPM_PRE_POST_PMD), + SND_SOC_DAPM_PGA("LO", SGTL5000_CHIP_ANA_POWER, 0, 0, NULL, 0), + + SND_SOC_DAPM_MUX("Capture Mux", SND_SOC_NOPM, 0, 0, &adc_mux), +@@ -301,11 +464,12 @@ static const struct snd_soc_dapm_widget + 0, SGTL5000_CHIP_DIG_POWER, + 1, 0), + +- SND_SOC_DAPM_ADC("ADC", "Capture", SGTL5000_CHIP_ANA_POWER, 1, 0), +- SND_SOC_DAPM_DAC("DAC", "Playback", SGTL5000_CHIP_ANA_POWER, 3, 0), +- +- SND_SOC_DAPM_PRE("VAG_POWER_PRE", power_vag_event), +- SND_SOC_DAPM_POST("VAG_POWER_POST", power_vag_event), ++ SND_SOC_DAPM_ADC_E("ADC", "Capture", SGTL5000_CHIP_ANA_POWER, 1, 0, ++ adc_updown_depop, SND_SOC_DAPM_PRE_POST_PMU | ++ SND_SOC_DAPM_PRE_POST_PMD), ++ SND_SOC_DAPM_DAC_E("DAC", "Playback", SGTL5000_CHIP_ANA_POWER, 3, 0, ++ dac_updown_depop, SND_SOC_DAPM_PRE_POST_PMU | ++ SND_SOC_DAPM_PRE_POST_PMD), + }; + + /* routes for sgtl5000 */ diff --git a/queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch b/queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch new file mode 100644 index 00000000000..9f58cd84000 --- /dev/null +++ b/queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch @@ -0,0 +1,57 @@ +From d84ea2123f8d27144e3f4d58cd88c9c6ddc799de Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Tue, 13 Aug 2019 16:01:02 +0200 +Subject: can: mcp251x: mcp251x_hw_reset(): allow more time after a reset + +From: Marc Kleine-Budde + +commit d84ea2123f8d27144e3f4d58cd88c9c6ddc799de upstream. + +Some boards take longer than 5ms to power up after a reset, so allow +some retries attempts before giving up. + +Fixes: ff06d611a31c ("can: mcp251x: Improve mcp251x_hw_reset()") +Cc: linux-stable +Tested-by: Sean Nyekjaer +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/can/spi/mcp251x.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +--- a/drivers/net/can/spi/mcp251x.c ++++ b/drivers/net/can/spi/mcp251x.c +@@ -612,7 +612,7 @@ static int mcp251x_setup(struct net_devi + static int mcp251x_hw_reset(struct spi_device *spi) + { + struct mcp251x_priv *priv = spi_get_drvdata(spi); +- u8 reg; ++ unsigned long timeout; + int ret; + + /* Wait for oscillator startup timer after power up */ +@@ -626,10 +626,19 @@ static int mcp251x_hw_reset(struct spi_d + /* Wait for oscillator startup timer after reset */ + mdelay(MCP251X_OST_DELAY_MS); + +- reg = mcp251x_read_reg(spi, CANSTAT); +- if ((reg & CANCTRL_REQOP_MASK) != CANCTRL_REQOP_CONF) +- return -ENODEV; +- ++ /* Wait for reset to finish */ ++ timeout = jiffies + HZ; ++ while ((mcp251x_read_reg(spi, CANSTAT) & CANCTRL_REQOP_MASK) != ++ CANCTRL_REQOP_CONF) { ++ usleep_range(MCP251X_OST_DELAY_MS * 1000, ++ MCP251X_OST_DELAY_MS * 1000 * 2); ++ ++ if (time_after(jiffies, timeout)) { ++ dev_err(&spi->dev, ++ "MCP251x didn't enter in conf mode after reset\n"); ++ return -EBUSY; ++ } ++ } + return 0; + } + diff --git a/queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch b/queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch new file mode 100644 index 00000000000..2e2f88257cd --- /dev/null +++ b/queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch @@ -0,0 +1,95 @@ +From 48f89d2a2920166c35b1c0b69917dbb0390ebec7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Horia=20Geant=C4=83?= +Date: Tue, 30 Jul 2019 08:48:33 +0300 +Subject: crypto: caam - fix concurrency issue in givencrypt descriptor +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Horia Geantă + +commit 48f89d2a2920166c35b1c0b69917dbb0390ebec7 upstream. + +IV transfer from ofifo to class2 (set up at [29][30]) is not guaranteed +to be scheduled before the data transfer from ofifo to external memory +(set up at [38]: + +[29] 10FA0004 ld: ind-nfifo (len=4) imm +[30] 81F00010 class2 type=msg len=16> +[31] 14820004 ld: ccb2-datasz len=4 offs=0 imm +[32] 00000010 data:0x00000010 +[33] 8210010D operation: cls1-op aes cbc init-final enc +[34] A8080B04 math: (seqin + math0)->vseqout len=4 +[35] 28000010 seqfifold: skip len=16 +[36] A8080A04 math: (seqin + math0)->vseqin len=4 +[37] 2F1E0000 seqfifold: both msg1->2-last2-last1 len=vseqinsz +[38] 69300000 seqfifostr: msg len=vseqoutsz +[39] 5C20000C seqstr: ccb2 ctx len=12 offs=0 + +If ofifo -> external memory transfer happens first, DECO will hang +(issuing a Watchdog Timeout error, if WDOG is enabled) waiting for +data availability in ofifo for the ofifo -> c2 ififo transfer. + +Make sure IV transfer happens first by waiting for all CAAM internal +transfers to end before starting payload transfer. + +New descriptor with jump command inserted at [37]: + +[..] +[36] A8080A04 math: (seqin + math0)->vseqin len=4 +[37] A1000401 jump: jsl1 all-match[!nfifopend] offset=[01] local->[38] +[38] 2F1E0000 seqfifold: both msg1->2-last2-last1 len=vseqinsz +[39] 69300000 seqfifostr: msg len=vseqoutsz +[40] 5C20000C seqstr: ccb2 ctx len=12 offs=0 + +[Note: the issue is present in the descriptor from the very beginning +(cf. Fixes tag). However I've marked it v4.19+ since it's the oldest +maintained kernel that the patch applies clean against.] + +Cc: # v4.19+ +Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation") +Signed-off-by: Horia Geantă +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/caam/caamalg_desc.c | 9 +++++++++ + drivers/crypto/caam/caamalg_desc.h | 2 +- + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/caam/caamalg_desc.c ++++ b/drivers/crypto/caam/caamalg_desc.c +@@ -503,6 +503,7 @@ void cnstr_shdsc_aead_givencap(u32 * con + const bool is_qi, int era) + { + u32 geniv, moveiv; ++ u32 *wait_cmd; + + /* Note: Context registers are saved. */ + init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era); +@@ -598,6 +599,14 @@ copy_iv: + + /* Will read cryptlen */ + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); ++ ++ /* ++ * Wait for IV transfer (ofifo -> class2) to finish before starting ++ * ciphertext transfer (ofifo -> external memory). ++ */ ++ wait_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NIFP); ++ set_jump_tgt_here(desc, wait_cmd); ++ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF | + FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); +--- a/drivers/crypto/caam/caamalg_desc.h ++++ b/drivers/crypto/caam/caamalg_desc.h +@@ -12,7 +12,7 @@ + #define DESC_AEAD_BASE (4 * CAAM_CMD_SZ) + #define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 11 * CAAM_CMD_SZ) + #define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ) +-#define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ) ++#define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 8 * CAAM_CMD_SZ) + #define DESC_QI_AEAD_ENC_LEN (DESC_AEAD_ENC_LEN + 3 * CAAM_CMD_SZ) + #define DESC_QI_AEAD_DEC_LEN (DESC_AEAD_DEC_LEN + 3 * CAAM_CMD_SZ) + #define DESC_QI_AEAD_GIVENC_LEN (DESC_AEAD_GIVENC_LEN + 3 * CAAM_CMD_SZ) diff --git a/queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch b/queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch new file mode 100644 index 00000000000..c42aba956eb --- /dev/null +++ b/queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch @@ -0,0 +1,73 @@ +From 51fab3d73054ca5b06b26e20edac0486b052c6f4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Horia=20Geant=C4=83?= +Date: Wed, 31 Jul 2019 16:08:02 +0300 +Subject: crypto: caam/qi - fix error handling in ERN handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Horia Geantă + +commit 51fab3d73054ca5b06b26e20edac0486b052c6f4 upstream. + +ERN handler calls the caam/qi frontend "done" callback with a status +of -EIO. This is incorrect, since the callback expects a status value +meaningful for the crypto engine - hence the cryptic messages +like the one below: +platform caam_qi: 15: unknown error source + +Fix this by providing the callback with: +-the status returned by the crypto engine (fd[status]) in case +it contains an error, OR +-a QI "No error" code otherwise; this will trigger the message: +platform caam_qi: 50000000: Queue Manager Interface: No error +which is fine, since QMan driver provides details about the cause of +failure + +Cc: # v5.1+ +Fixes: 67c2315def06 ("crypto: caam - add Queue Interface (QI) backend support") +Signed-off-by: Horia Geantă +Reviewed-by: Iuliana Prodan +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/caam/error.c | 1 + + drivers/crypto/caam/qi.c | 5 ++++- + drivers/crypto/caam/regs.h | 1 + + 3 files changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/caam/error.c ++++ b/drivers/crypto/caam/error.c +@@ -118,6 +118,7 @@ static const struct { + u8 value; + const char *error_text; + } qi_error_list[] = { ++ { 0x00, "No error" }, + { 0x1F, "Job terminated by FQ or ICID flush" }, + { 0x20, "FD format error"}, + { 0x21, "FD command format error"}, +--- a/drivers/crypto/caam/qi.c ++++ b/drivers/crypto/caam/qi.c +@@ -163,7 +163,10 @@ static void caam_fq_ern_cb(struct qman_p + dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), + sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); + +- drv_req->cbk(drv_req, -EIO); ++ if (fd->status) ++ drv_req->cbk(drv_req, be32_to_cpu(fd->status)); ++ else ++ drv_req->cbk(drv_req, JRSTA_SSRC_QI); + } + + static struct qman_fq *create_caam_req_fq(struct device *qidev, +--- a/drivers/crypto/caam/regs.h ++++ b/drivers/crypto/caam/regs.h +@@ -641,6 +641,7 @@ struct caam_job_ring { + #define JRSTA_SSRC_CCB_ERROR 0x20000000 + #define JRSTA_SSRC_JUMP_HALT_USER 0x30000000 + #define JRSTA_SSRC_DECO 0x40000000 ++#define JRSTA_SSRC_QI 0x50000000 + #define JRSTA_SSRC_JRERROR 0x60000000 + #define JRSTA_SSRC_JUMP_HALT_CC 0x70000000 + diff --git a/queue-5.3/crypto-cavium-zip-add-missing-single_release.patch b/queue-5.3/crypto-cavium-zip-add-missing-single_release.patch new file mode 100644 index 00000000000..69a166eebf2 --- /dev/null +++ b/queue-5.3/crypto-cavium-zip-add-missing-single_release.patch @@ -0,0 +1,48 @@ +From c552ffb5c93d9d65aaf34f5f001c4e7e8484ced1 Mon Sep 17 00:00:00 2001 +From: Wei Yongjun +Date: Wed, 4 Sep 2019 14:18:09 +0000 +Subject: crypto: cavium/zip - Add missing single_release() + +From: Wei Yongjun + +commit c552ffb5c93d9d65aaf34f5f001c4e7e8484ced1 upstream. + +When using single_open() for opening, single_release() should be +used instead of seq_release(), otherwise there is a memory leak. + +Fixes: 09ae5d37e093 ("crypto: zip - Add Compression/Decompression statistics") +Cc: +Signed-off-by: Wei Yongjun +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/cavium/zip/zip_main.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/crypto/cavium/zip/zip_main.c ++++ b/drivers/crypto/cavium/zip/zip_main.c +@@ -593,6 +593,7 @@ static const struct file_operations zip_ + .owner = THIS_MODULE, + .open = zip_stats_open, + .read = seq_read, ++ .release = single_release, + }; + + static int zip_clear_open(struct inode *inode, struct file *file) +@@ -604,6 +605,7 @@ static const struct file_operations zip_ + .owner = THIS_MODULE, + .open = zip_clear_open, + .read = seq_read, ++ .release = single_release, + }; + + static int zip_regs_open(struct inode *inode, struct file *file) +@@ -615,6 +617,7 @@ static const struct file_operations zip_ + .owner = THIS_MODULE, + .open = zip_regs_open, + .read = seq_read, ++ .release = single_release, + }; + + /* Root directory for thunderx_zip debugfs entry */ diff --git a/queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch b/queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch new file mode 100644 index 00000000000..4602d6ca2f8 --- /dev/null +++ b/queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch @@ -0,0 +1,42 @@ +From 76a95bd8f9e10cade9c4c8df93b5c20ff45dc0f5 Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Tue, 2 Jul 2019 14:39:19 +0300 +Subject: crypto: ccree - account for TEE not ready to report + +From: Gilad Ben-Yossef + +commit 76a95bd8f9e10cade9c4c8df93b5c20ff45dc0f5 upstream. + +When ccree driver runs it checks the state of the Trusted Execution +Environment CryptoCell driver before proceeding. We did not account +for cases where the TEE side is not ready or not available at all. +Fix it by only considering TEE error state after sync with the TEE +side driver. + +Signed-off-by: Gilad Ben-Yossef +Fixes: ab8ec9658f5a ("crypto: ccree - add FIPS support") +CC: stable@vger.kernel.org # v4.17+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_fips.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/ccree/cc_fips.c ++++ b/drivers/crypto/ccree/cc_fips.c +@@ -21,7 +21,13 @@ static bool cc_get_tee_fips_status(struc + u32 reg; + + reg = cc_ioread(drvdata, CC_REG(GPR_HOST)); +- return (reg == (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK)); ++ /* Did the TEE report status? */ ++ if (reg & CC_FIPS_SYNC_TEE_STATUS) ++ /* Yes. Is it OK? */ ++ return (reg & CC_FIPS_SYNC_MODULE_OK); ++ ++ /* No. It's either not in use or will be reported later */ ++ return true; + } + + /* diff --git a/queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch b/queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch new file mode 100644 index 00000000000..a93338bbe4e --- /dev/null +++ b/queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch @@ -0,0 +1,34 @@ +From 7a4be6c113c1f721818d1e3722a9015fe393295c Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Mon, 29 Jul 2019 13:40:18 +0300 +Subject: crypto: ccree - use the full crypt length value + +From: Gilad Ben-Yossef + +commit 7a4be6c113c1f721818d1e3722a9015fe393295c upstream. + +In case of AEAD decryption verifcation error we were using the +wrong value to zero out the plaintext buffer leaving the end of +the buffer with the false plaintext. + +Signed-off-by: Gilad Ben-Yossef +Fixes: ff27e85a85bb ("crypto: ccree - add AEAD support") +CC: stable@vger.kernel.org # v4.17+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_aead.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/crypto/ccree/cc_aead.c ++++ b/drivers/crypto/ccree/cc_aead.c +@@ -236,7 +236,7 @@ static void cc_aead_complete(struct devi + /* In case of payload authentication failure, MUST NOT + * revealed the decrypted message --> zero its memory. + */ +- cc_zero_sgl(areq->dst, areq_ctx->cryptlen); ++ cc_zero_sgl(areq->dst, areq->cryptlen); + err = -EBADMSG; + } + } else { /*ENCRYPT*/ diff --git a/queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch b/queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch new file mode 100644 index 00000000000..569785b995c --- /dev/null +++ b/queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch @@ -0,0 +1,68 @@ +From 1b82feb6c5e1996513d0fb0bbb475417088b4954 Mon Sep 17 00:00:00 2001 +From: Alexander Sverdlin +Date: Tue, 23 Jul 2019 07:24:01 +0000 +Subject: crypto: qat - Silence smp_processor_id() warning + +From: Alexander Sverdlin + +commit 1b82feb6c5e1996513d0fb0bbb475417088b4954 upstream. + +It seems that smp_processor_id() is only used for a best-effort +load-balancing, refer to qat_crypto_get_instance_node(). It's not feasible +to disable preemption for the duration of the crypto requests. Therefore, +just silence the warning. This commit is similar to e7a9b05ca4 +("crypto: cavium - Fix smp_processor_id() warnings"). + +Silences the following splat: +BUG: using smp_processor_id() in preemptible [00000000] code: cryptomgr_test/2904 +caller is qat_alg_ablkcipher_setkey+0x300/0x4a0 [intel_qat] +CPU: 1 PID: 2904 Comm: cryptomgr_test Tainted: P O 4.14.69 #1 +... +Call Trace: + dump_stack+0x5f/0x86 + check_preemption_disabled+0xd3/0xe0 + qat_alg_ablkcipher_setkey+0x300/0x4a0 [intel_qat] + skcipher_setkey_ablkcipher+0x2b/0x40 + __test_skcipher+0x1f3/0xb20 + ? cpumask_next_and+0x26/0x40 + ? find_busiest_group+0x10e/0x9d0 + ? preempt_count_add+0x49/0xa0 + ? try_module_get+0x61/0xf0 + ? crypto_mod_get+0x15/0x30 + ? __kmalloc+0x1df/0x1f0 + ? __crypto_alloc_tfm+0x116/0x180 + ? crypto_skcipher_init_tfm+0xa6/0x180 + ? crypto_create_tfm+0x4b/0xf0 + test_skcipher+0x21/0xa0 + alg_test_skcipher+0x3f/0xa0 + alg_test.part.6+0x126/0x2a0 + ? finish_task_switch+0x21b/0x260 + ? __schedule+0x1e9/0x800 + ? __wake_up_common+0x8d/0x140 + cryptomgr_test+0x40/0x50 + kthread+0xff/0x130 + ? cryptomgr_notify+0x540/0x540 + ? kthread_create_on_node+0x70/0x70 + ret_from_fork+0x24/0x50 + +Fixes: ed8ccaef52 ("crypto: qat - Add support for SRIOV") +Cc: stable@vger.kernel.org +Signed-off-by: Alexander Sverdlin +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/qat/qat_common/adf_common_drv.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/crypto/qat/qat_common/adf_common_drv.h ++++ b/drivers/crypto/qat/qat_common/adf_common_drv.h +@@ -95,7 +95,7 @@ struct service_hndl { + + static inline int get_current_node(void) + { +- return topology_physical_package_id(smp_processor_id()); ++ return topology_physical_package_id(raw_smp_processor_id()); + } + + int adf_service_register(struct service_hndl *service); diff --git a/queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch b/queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch new file mode 100644 index 00000000000..499a8b5ed8c --- /dev/null +++ b/queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch @@ -0,0 +1,121 @@ +From 0ba3c026e685573bd3534c17e27da7c505ac99c4 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Fri, 6 Sep 2019 13:13:06 +1000 +Subject: crypto: skcipher - Unmap pages after an external error + +From: Herbert Xu + +commit 0ba3c026e685573bd3534c17e27da7c505ac99c4 upstream. + +skcipher_walk_done may be called with an error by internal or +external callers. For those internal callers we shouldn't unmap +pages but for external callers we must unmap any pages that are +in use. + +This patch distinguishes between the two cases by checking whether +walk->nbytes is zero or not. For internal callers, we now set +walk->nbytes to zero prior to the call. For external callers, +walk->nbytes has always been non-zero (as zero is used to indicate +the termination of a walk). + +Reported-by: Ard Biesheuvel +Fixes: 5cde0af2a982 ("[CRYPTO] cipher: Added block cipher type") +Cc: +Signed-off-by: Herbert Xu +Tested-by: Ard Biesheuvel +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + crypto/skcipher.c | 42 +++++++++++++++++++++++------------------- + 1 file changed, 23 insertions(+), 19 deletions(-) + +--- a/crypto/skcipher.c ++++ b/crypto/skcipher.c +@@ -90,7 +90,7 @@ static inline u8 *skcipher_get_spot(u8 * + return max(start, end_page); + } + +-static void skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) ++static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) + { + u8 *addr; + +@@ -98,19 +98,21 @@ static void skcipher_done_slow(struct sk + addr = skcipher_get_spot(addr, bsize); + scatterwalk_copychunks(addr, &walk->out, bsize, + (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1); ++ return 0; + } + + int skcipher_walk_done(struct skcipher_walk *walk, int err) + { +- unsigned int n; /* bytes processed */ +- bool more; ++ unsigned int n = walk->nbytes; ++ unsigned int nbytes = 0; + +- if (unlikely(err < 0)) ++ if (!n) + goto finish; + +- n = walk->nbytes - err; +- walk->total -= n; +- more = (walk->total != 0); ++ if (likely(err >= 0)) { ++ n -= err; ++ nbytes = walk->total - n; ++ } + + if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS | + SKCIPHER_WALK_SLOW | +@@ -126,7 +128,7 @@ unmap_src: + memcpy(walk->dst.virt.addr, walk->page, n); + skcipher_unmap_dst(walk); + } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) { +- if (err) { ++ if (err > 0) { + /* + * Didn't process all bytes. Either the algorithm is + * broken, or this was the last step and it turned out +@@ -134,27 +136,29 @@ unmap_src: + * the algorithm requires it. + */ + err = -EINVAL; +- goto finish; +- } +- skcipher_done_slow(walk, n); +- goto already_advanced; ++ nbytes = 0; ++ } else ++ n = skcipher_done_slow(walk, n); + } + ++ if (err > 0) ++ err = 0; ++ ++ walk->total = nbytes; ++ walk->nbytes = 0; ++ + scatterwalk_advance(&walk->in, n); + scatterwalk_advance(&walk->out, n); +-already_advanced: +- scatterwalk_done(&walk->in, 0, more); +- scatterwalk_done(&walk->out, 1, more); ++ scatterwalk_done(&walk->in, 0, nbytes); ++ scatterwalk_done(&walk->out, 1, nbytes); + +- if (more) { ++ if (nbytes) { + crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ? + CRYPTO_TFM_REQ_MAY_SLEEP : 0); + return skcipher_walk_next(walk); + } +- err = 0; +-finish: +- walk->nbytes = 0; + ++finish: + /* Short-circuit for the common/fast path. */ + if (!((unsigned long)walk->buffer | (unsigned long)walk->page)) + goto out; diff --git a/queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch b/queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch new file mode 100644 index 00000000000..90aa872f9a9 --- /dev/null +++ b/queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch @@ -0,0 +1,122 @@ +From 2ad7a27deaf6d78545d97ab80874584f6990360e Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Mon, 26 Aug 2019 16:21:21 +1000 +Subject: KVM: PPC: Book3S: Enable XIVE native capability only if OPAL has required functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paul Mackerras + +commit 2ad7a27deaf6d78545d97ab80874584f6990360e upstream. + +There are some POWER9 machines where the OPAL firmware does not support +the OPAL_XIVE_GET_QUEUE_STATE and OPAL_XIVE_SET_QUEUE_STATE calls. +The impact of this is that a guest using XIVE natively will not be able +to be migrated successfully. On the source side, the get_attr operation +on the KVM native device for the KVM_DEV_XIVE_GRP_EQ_CONFIG attribute +will fail; on the destination side, the set_attr operation for the same +attribute will fail. + +This adds tests for the existence of the OPAL get/set queue state +functions, and if they are not supported, the XIVE-native KVM device +is not created and the KVM_CAP_PPC_IRQ_XIVE capability returns false. +Userspace can then either provide a software emulation of XIVE, or +else tell the guest that it does not have a XIVE controller available +to it. + +Cc: stable@vger.kernel.org # v5.2+ +Fixes: 3fab2d10588e ("KVM: PPC: Book3S HV: XIVE: Activate XIVE exploitation mode") +Reviewed-by: David Gibson +Reviewed-by: Cédric Le Goater +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/kvm_ppc.h | 1 + + arch/powerpc/include/asm/xive.h | 1 + + arch/powerpc/kvm/book3s.c | 8 +++++--- + arch/powerpc/kvm/book3s_xive_native.c | 5 +++++ + arch/powerpc/kvm/powerpc.c | 3 ++- + arch/powerpc/sysdev/xive/native.c | 7 +++++++ + 6 files changed, 21 insertions(+), 4 deletions(-) + +--- a/arch/powerpc/include/asm/kvm_ppc.h ++++ b/arch/powerpc/include/asm/kvm_ppc.h +@@ -598,6 +598,7 @@ extern int kvmppc_xive_native_get_vp(str + union kvmppc_one_reg *val); + extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, + union kvmppc_one_reg *val); ++extern bool kvmppc_xive_native_supported(void); + + #else + static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, +--- a/arch/powerpc/include/asm/xive.h ++++ b/arch/powerpc/include/asm/xive.h +@@ -127,6 +127,7 @@ extern int xive_native_get_queue_state(u + extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, + u32 qindex); + extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state); ++extern bool xive_native_has_queue_state_support(void); + + #else + +--- a/arch/powerpc/kvm/book3s.c ++++ b/arch/powerpc/kvm/book3s.c +@@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void) + if (xics_on_xive()) { + kvmppc_xive_init_module(); + kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); +- kvmppc_xive_native_init_module(); +- kvm_register_device_ops(&kvm_xive_native_ops, +- KVM_DEV_TYPE_XIVE); ++ if (kvmppc_xive_native_supported()) { ++ kvmppc_xive_native_init_module(); ++ kvm_register_device_ops(&kvm_xive_native_ops, ++ KVM_DEV_TYPE_XIVE); ++ } + } else + #endif + kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); +--- a/arch/powerpc/kvm/book3s_xive_native.c ++++ b/arch/powerpc/kvm/book3s_xive_native.c +@@ -1171,6 +1171,11 @@ int kvmppc_xive_native_set_vp(struct kvm + return 0; + } + ++bool kvmppc_xive_native_supported(void) ++{ ++ return xive_native_has_queue_state_support(); ++} ++ + static int xive_native_debug_show(struct seq_file *m, void *private) + { + struct kvmppc_xive *xive = m->private; +--- a/arch/powerpc/kvm/powerpc.c ++++ b/arch/powerpc/kvm/powerpc.c +@@ -561,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct + * a POWER9 processor) and the PowerNV platform, as + * nested is not yet supported. + */ +- r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE); ++ r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) && ++ kvmppc_xive_native_supported(); + break; + #endif + +--- a/arch/powerpc/sysdev/xive/native.c ++++ b/arch/powerpc/sysdev/xive/native.c +@@ -811,6 +811,13 @@ int xive_native_set_queue_state(u32 vp_i + } + EXPORT_SYMBOL_GPL(xive_native_set_queue_state); + ++bool xive_native_has_queue_state_support(void) ++{ ++ return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) && ++ opal_check_token(OPAL_XIVE_SET_QUEUE_STATE); ++} ++EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support); ++ + int xive_native_get_vp_state(u32 vp_id, u64 *out_state) + { + __be64 state; diff --git a/queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch b/queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch new file mode 100644 index 00000000000..6483604fcc7 --- /dev/null +++ b/queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch @@ -0,0 +1,71 @@ +From d28eafc5a64045c78136162af9d4ba42f8230080 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Tue, 27 Aug 2019 11:31:37 +1000 +Subject: KVM: PPC: Book3S HV: Check for MMU ready on piggybacked virtual cores + +From: Paul Mackerras + +commit d28eafc5a64045c78136162af9d4ba42f8230080 upstream. + +When we are running multiple vcores on the same physical core, they +could be from different VMs and so it is possible that one of the +VMs could have its arch.mmu_ready flag cleared (for example by a +concurrent HPT resize) when we go to run it on a physical core. +We currently check the arch.mmu_ready flag for the primary vcore +but not the flags for the other vcores that will be run alongside +it. This adds that check, and also a check when we select the +secondary vcores from the preempted vcores list. + +Cc: stable@vger.kernel.org # v4.14+ +Fixes: 38c53af85306 ("KVM: PPC: Book3S HV: Fix exclusion between HPT resizing and other HPT updates") +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -2860,7 +2860,7 @@ static void collect_piggybacks(struct co + if (!spin_trylock(&pvc->lock)) + continue; + prepare_threads(pvc); +- if (!pvc->n_runnable) { ++ if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) { + list_del_init(&pvc->preempt_list); + if (pvc->runner == NULL) { + pvc->vcore_state = VCORE_INACTIVE; +@@ -2881,15 +2881,20 @@ static void collect_piggybacks(struct co + spin_unlock(&lp->lock); + } + +-static bool recheck_signals(struct core_info *cip) ++static bool recheck_signals_and_mmu(struct core_info *cip) + { + int sub, i; + struct kvm_vcpu *vcpu; ++ struct kvmppc_vcore *vc; + +- for (sub = 0; sub < cip->n_subcores; ++sub) +- for_each_runnable_thread(i, vcpu, cip->vc[sub]) ++ for (sub = 0; sub < cip->n_subcores; ++sub) { ++ vc = cip->vc[sub]; ++ if (!vc->kvm->arch.mmu_ready) ++ return true; ++ for_each_runnable_thread(i, vcpu, vc) + if (signal_pending(vcpu->arch.run_task)) + return true; ++ } + return false; + } + +@@ -3119,7 +3124,7 @@ static noinline void kvmppc_run_core(str + local_irq_disable(); + hard_irq_disable(); + if (lazy_irq_pending() || need_resched() || +- recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) { ++ recheck_signals_and_mmu(&core_info)) { + local_irq_enable(); + vc->vcore_state = VCORE_INACTIVE; + /* Unlock all except the primary vcore */ diff --git a/queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch b/queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch new file mode 100644 index 00000000000..9259466b6d0 --- /dev/null +++ b/queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch @@ -0,0 +1,52 @@ +From ff42df49e75f053a8a6b4c2533100cdcc23afe69 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Tue, 27 Aug 2019 11:35:40 +1000 +Subject: KVM: PPC: Book3S HV: Don't lose pending doorbell request on migration on P9 + +From: Paul Mackerras + +commit ff42df49e75f053a8a6b4c2533100cdcc23afe69 upstream. + +On POWER9, when userspace reads the value of the DPDES register on a +vCPU, it is possible for 0 to be returned although there is a doorbell +interrupt pending for the vCPU. This can lead to a doorbell interrupt +being lost across migration. If the guest kernel uses doorbell +interrupts for IPIs, then it could malfunction because of the lost +interrupt. + +This happens because a newly-generated doorbell interrupt is signalled +by setting vcpu->arch.doorbell_request to 1; the DPDES value in +vcpu->arch.vcore->dpdes is not updated, because it can only be updated +when holding the vcpu mutex, in order to avoid races. + +To fix this, we OR in vcpu->arch.doorbell_request when reading the +DPDES value. + +Cc: stable@vger.kernel.org # v4.13+ +Fixes: 579006944e0d ("KVM: PPC: Book3S HV: Virtualize doorbell facility on POWER9") +Signed-off-by: Paul Mackerras +Tested-by: Alexey Kardashevskiy +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct + *val = get_reg_val(id, vcpu->arch.pspb); + break; + case KVM_REG_PPC_DPDES: +- *val = get_reg_val(id, vcpu->arch.vcore->dpdes); ++ /* ++ * On POWER9, where we are emulating msgsndp etc., ++ * we return 1 bit for each vcpu, which can come from ++ * either vcore->dpdes or doorbell_request. ++ * On POWER8, doorbell_request is 0. ++ */ ++ *val = get_reg_val(id, vcpu->arch.vcore->dpdes | ++ vcpu->arch.doorbell_request); + break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vcore->vtb); diff --git a/queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch b/queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch new file mode 100644 index 00000000000..93729702e4c --- /dev/null +++ b/queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch @@ -0,0 +1,89 @@ +From 8d4ba9c931bc384bcc6889a43915aaaf19d3e499 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Tue, 13 Aug 2019 20:01:00 +1000 +Subject: KVM: PPC: Book3S HV: Don't push XIVE context when not using XIVE device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paul Mackerras + +commit 8d4ba9c931bc384bcc6889a43915aaaf19d3e499 upstream. + +At present, when running a guest on POWER9 using HV KVM but not using +an in-kernel interrupt controller (XICS or XIVE), for example if QEMU +is run with the kernel_irqchip=off option, the guest entry code goes +ahead and tries to load the guest context into the XIVE hardware, even +though no context has been set up. + +To fix this, we check that the "CAM word" is non-zero before pushing +it to the hardware. The CAM word is initialized to a non-zero value +in kvmppc_xive_connect_vcpu() and kvmppc_xive_native_connect_vcpu(), +and is now cleared in kvmppc_xive_{,native_}cleanup_vcpu. + +Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") +Cc: stable@vger.kernel.org # v4.12+ +Reported-by: Cédric Le Goater +Signed-off-by: Paul Mackerras +Reviewed-by: Cédric Le Goater +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190813100100.GC9567@blackberry +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 ++ + arch/powerpc/kvm/book3s_xive.c | 11 ++++++++++- + arch/powerpc/kvm/book3s_xive_native.c | 3 +++ + 3 files changed, 15 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S ++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S +@@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_3 + ld r11, VCPU_XIVE_SAVED_STATE(r4) + li r9, TM_QW1_OS + lwz r8, VCPU_XIVE_CAM_WORD(r4) ++ cmpwi r8, 0 ++ beq no_xive + li r7, TM_QW1_OS + TM_WORD2 + mfmsr r0 + andi. r0, r0, MSR_DR /* in real mode? */ +--- a/arch/powerpc/kvm/book3s_xive.c ++++ b/arch/powerpc/kvm/book3s_xive.c +@@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vc + void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt; + u64 pq; + +- if (!tima) ++ /* ++ * Nothing to do if the platform doesn't have a XIVE ++ * or this vCPU doesn't have its own XIVE context ++ * (e.g. because it's not using an in-kernel interrupt controller). ++ */ ++ if (!tima || !vcpu->arch.xive_cam_word) + return; ++ + eieio(); + __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); + __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2); +@@ -1146,6 +1152,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); + ++ /* Clear the cam word so guest entry won't try to push context */ ++ vcpu->arch.xive_cam_word = 0; ++ + /* Free the queues */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; +--- a/arch/powerpc/kvm/book3s_xive_native.c ++++ b/arch/powerpc/kvm/book3s_xive_native.c +@@ -81,6 +81,9 @@ void kvmppc_xive_native_cleanup_vcpu(str + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); + ++ /* Clear the cam word so guest entry won't try to push context */ ++ vcpu->arch.xive_cam_word = 0; ++ + /* Free the queues */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + kvmppc_xive_native_cleanup_queue(vcpu, i); diff --git a/queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch b/queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch new file mode 100644 index 00000000000..21df931431c --- /dev/null +++ b/queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch @@ -0,0 +1,111 @@ +From 959c5d5134786b4988b6fdd08e444aa67d1667ed Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Tue, 13 Aug 2019 20:03:49 +1000 +Subject: KVM: PPC: Book3S HV: Fix race in re-enabling XIVE escalation interrupts + +From: Paul Mackerras + +commit 959c5d5134786b4988b6fdd08e444aa67d1667ed upstream. + +Escalation interrupts are interrupts sent to the host by the XIVE +hardware when it has an interrupt to deliver to a guest VCPU but that +VCPU is not running anywhere in the system. Hence we disable the +escalation interrupt for the VCPU being run when we enter the guest +and re-enable it when the guest does an H_CEDE hypercall indicating +it is idle. + +It is possible that an escalation interrupt gets generated just as we +are entering the guest. In that case the escalation interrupt may be +using a queue entry in one of the interrupt queues, and that queue +entry may not have been processed when the guest exits with an H_CEDE. +The existing entry code detects this situation and does not clear the +vcpu->arch.xive_esc_on flag as an indication that there is a pending +queue entry (if the queue entry gets processed, xive_esc_irq() will +clear the flag). There is a comment in the code saying that if the +flag is still set on H_CEDE, we have to abort the cede rather than +re-enabling the escalation interrupt, lest we end up with two +occurrences of the escalation interrupt in the interrupt queue. + +However, the exit code doesn't do that; it aborts the cede in the sense +that vcpu->arch.ceded gets cleared, but it still enables the escalation +interrupt by setting the source's PQ bits to 00. Instead we need to +set the PQ bits to 10, indicating that an interrupt has been triggered. +We also need to avoid setting vcpu->arch.xive_esc_on in this case +(i.e. vcpu->arch.xive_esc_on seen to be set on H_CEDE) because +xive_esc_irq() will run at some point and clear it, and if we race with +that we may end up with an incorrect result (i.e. xive_esc_on set when +the escalation interrupt has just been handled). + +It is extremely unlikely that having two queue entries would cause +observable problems; theoretically it could cause queue overflow, but +the CPU would have to have thousands of interrupts targetted to it for +that to be possible. However, this fix will also make it possible to +determine accurately whether there is an unhandled escalation +interrupt in the queue, which will be needed by the following patch. + +Fixes: 9b9b13a6d153 ("KVM: PPC: Book3S HV: Keep XIVE escalation interrupt masked unless ceded") +Cc: stable@vger.kernel.org # v4.16+ +Signed-off-by: Paul Mackerras +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190813100349.GD9567@blackberry +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 36 ++++++++++++++++++++------------ + 1 file changed, 23 insertions(+), 13 deletions(-) + +--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S ++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S +@@ -2833,29 +2833,39 @@ kvm_cede_prodded: + kvm_cede_exit: + ld r9, HSTATE_KVM_VCPU(r13) + #ifdef CONFIG_KVM_XICS +- /* Abort if we still have a pending escalation */ ++ /* are we using XIVE with single escalation? */ ++ ld r10, VCPU_XIVE_ESC_VADDR(r9) ++ cmpdi r10, 0 ++ beq 3f ++ li r6, XIVE_ESB_SET_PQ_00 ++ /* ++ * If we still have a pending escalation, abort the cede, ++ * and we must set PQ to 10 rather than 00 so that we don't ++ * potentially end up with two entries for the escalation ++ * interrupt in the XIVE interrupt queue. In that case ++ * we also don't want to set xive_esc_on to 1 here in ++ * case we race with xive_esc_irq(). ++ */ + lbz r5, VCPU_XIVE_ESC_ON(r9) + cmpwi r5, 0 +- beq 1f ++ beq 4f + li r0, 0 + stb r0, VCPU_CEDED(r9) +-1: /* Enable XIVE escalation */ +- li r5, XIVE_ESB_SET_PQ_00 ++ li r6, XIVE_ESB_SET_PQ_10 ++ b 5f ++4: li r0, 1 ++ stb r0, VCPU_XIVE_ESC_ON(r9) ++ /* make sure store to xive_esc_on is seen before xive_esc_irq runs */ ++ sync ++5: /* Enable XIVE escalation */ + mfmsr r0 + andi. r0, r0, MSR_DR /* in real mode? */ + beq 1f +- ld r10, VCPU_XIVE_ESC_VADDR(r9) +- cmpdi r10, 0 +- beq 3f +- ldx r0, r10, r5 ++ ldx r0, r10, r6 + b 2f + 1: ld r10, VCPU_XIVE_ESC_RADDR(r9) +- cmpdi r10, 0 +- beq 3f +- ldcix r0, r10, r5 ++ ldcix r0, r10, r6 + 2: sync +- li r0, 1 +- stb r0, VCPU_XIVE_ESC_ON(r9) + #endif /* CONFIG_KVM_XICS */ + 3: b guest_exit_cont + diff --git a/queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch b/queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch new file mode 100644 index 00000000000..8108b6a6978 --- /dev/null +++ b/queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch @@ -0,0 +1,111 @@ +From 237aed48c642328ff0ab19b63423634340224a06 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 6 Aug 2019 19:25:38 +0200 +Subject: KVM: PPC: Book3S HV: XIVE: Free escalation interrupts before disabling the VP +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Cédric Le Goater + +commit 237aed48c642328ff0ab19b63423634340224a06 upstream. + +When a vCPU is brought done, the XIVE VP (Virtual Processor) is first +disabled and then the event notification queues are freed. When freeing +the queues, we check for possible escalation interrupts and free them +also. + +But when a XIVE VP is disabled, the underlying XIVE ENDs also are +disabled in OPAL. When an END (Event Notification Descriptor) is +disabled, its ESB pages (ESn and ESe) are disabled and loads return all +1s. Which means that any access on the ESB page of the escalation +interrupt will return invalid values. + +When an interrupt is freed, the shutdown handler computes a 'saved_p' +field from the value returned by a load in xive_do_source_set_mask(). +This value is incorrect for escalation interrupts for the reason +described above. + +This has no impact on Linux/KVM today because we don't make use of it +but we will introduce in future changes a xive_get_irqchip_state() +handler. This handler will use the 'saved_p' field to return the state +of an interrupt and 'saved_p' being incorrect, softlockup will occur. + +Fix the vCPU cleanup sequence by first freeing the escalation interrupts +if any, then disable the XIVE VP and last free the queues. + +Fixes: 90c73795afa2 ("KVM: PPC: Book3S HV: Add a new KVM device for the XIVE native exploitation mode") +Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") +Cc: stable@vger.kernel.org # v4.12+ +Signed-off-by: Cédric Le Goater +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190806172538.5087-1-clg@kaod.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_xive.c | 18 ++++++++++-------- + arch/powerpc/kvm/book3s_xive_native.c | 12 +++++++----- + 2 files changed, 17 insertions(+), 13 deletions(-) + +--- a/arch/powerpc/kvm/book3s_xive.c ++++ b/arch/powerpc/kvm/book3s_xive.c +@@ -1134,20 +1134,22 @@ void kvmppc_xive_cleanup_vcpu(struct kvm + /* Mask the VP IPI */ + xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01); + +- /* Disable the VP */ +- xive_native_disable_vp(xc->vp_id); +- +- /* Free the queues & associated interrupts */ ++ /* Free escalations */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { +- struct xive_q *q = &xc->queues[i]; +- +- /* Free the escalation irq */ + if (xc->esc_virq[i]) { + free_irq(xc->esc_virq[i], vcpu); + irq_dispose_mapping(xc->esc_virq[i]); + kfree(xc->esc_virq_names[i]); + } +- /* Free the queue */ ++ } ++ ++ /* Disable the VP */ ++ xive_native_disable_vp(xc->vp_id); ++ ++ /* Free the queues */ ++ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { ++ struct xive_q *q = &xc->queues[i]; ++ + xive_native_disable_queue(xc->vp_id, q, i); + if (q->qpage) { + free_pages((unsigned long)q->qpage, +--- a/arch/powerpc/kvm/book3s_xive_native.c ++++ b/arch/powerpc/kvm/book3s_xive_native.c +@@ -67,10 +67,7 @@ void kvmppc_xive_native_cleanup_vcpu(str + xc->valid = false; + kvmppc_xive_disable_vcpu_interrupts(vcpu); + +- /* Disable the VP */ +- xive_native_disable_vp(xc->vp_id); +- +- /* Free the queues & associated interrupts */ ++ /* Free escalations */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + /* Free the escalation irq */ + if (xc->esc_virq[i]) { +@@ -79,8 +76,13 @@ void kvmppc_xive_native_cleanup_vcpu(str + kfree(xc->esc_virq_names[i]); + xc->esc_virq[i] = 0; + } ++ } + +- /* Free the queue */ ++ /* Disable the VP */ ++ xive_native_disable_vp(xc->vp_id); ++ ++ /* Free the queues */ ++ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + kvmppc_xive_native_cleanup_queue(vcpu, i); + } + diff --git a/queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch b/queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch new file mode 100644 index 00000000000..25d962a6963 --- /dev/null +++ b/queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch @@ -0,0 +1,49 @@ +From b1c41ac3ce569b04644bb1e3fd28926604637da3 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Wed, 2 Oct 2019 14:24:47 +0200 +Subject: KVM: s390: fix __insn32_query() inline assembly + +From: Heiko Carstens + +commit b1c41ac3ce569b04644bb1e3fd28926604637da3 upstream. + +The inline assembly constraints of __insn32_query() tell the compiler +that only the first byte of "query" is being written to. Intended was +probably that 32 bytes are written to. + +Fix and simplify the code and just use a "memory" clobber. + +Fixes: d668139718a9 ("KVM: s390: provide query function for instructions returning 32 byte") +Cc: stable@vger.kernel.org # v5.2+ +Acked-by: Christian Borntraeger +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/kvm-s390.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -332,7 +332,7 @@ static inline int plo_test_bit(unsigned + return cc == 0; + } + +-static inline void __insn32_query(unsigned int opcode, u8 query[32]) ++static inline void __insn32_query(unsigned int opcode, u8 *query) + { + register unsigned long r0 asm("0") = 0; /* query function */ + register unsigned long r1 asm("1") = (unsigned long) query; +@@ -340,9 +340,9 @@ static inline void __insn32_query(unsign + asm volatile( + /* Parameter regs are ignored */ + " .insn rrf,%[opc] << 16,2,4,6,0\n" +- : "=m" (*query) ++ : + : "d" (r0), "a" (r1), [opc] "i" (opcode) +- : "cc"); ++ : "cc", "memory"); + } + + #define INSN_SORTL 0xb938 diff --git a/queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch b/queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch new file mode 100644 index 00000000000..b890b7aa8dd --- /dev/null +++ b/queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch @@ -0,0 +1,50 @@ +From a13b03bbb4575b350b46090af4dfd30e735aaed1 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 29 Aug 2019 14:25:17 +0200 +Subject: KVM: s390: Test for bad access register and size at the start of S390_MEM_OP + +From: Thomas Huth + +commit a13b03bbb4575b350b46090af4dfd30e735aaed1 upstream. + +If the KVM_S390_MEM_OP ioctl is called with an access register >= 16, +then there is certainly a bug in the calling userspace application. +We check for wrong access registers, but only if the vCPU was already +in the access register mode before (i.e. the SIE block has recorded +it). The check is also buried somewhere deep in the calling chain (in +the function ar_translation()), so this is somewhat hard to find. + +It's better to always report an error to the userspace in case this +field is set wrong, and it's safer in the KVM code if we block wrong +values here early instead of relying on a check somewhere deep down +the calling chain, so let's add another check to kvm_s390_guest_mem_op() +directly. + +We also should check that the "size" is non-zero here (thanks to Janosch +Frank for the hint!). If we do not check the size, we could call vmalloc() +with this 0 value, and this will cause a kernel warning. + +Signed-off-by: Thomas Huth +Link: https://lkml.kernel.org/r/20190829122517.31042-1-thuth@redhat.com +Reviewed-by: Cornelia Huck +Reviewed-by: Janosch Frank +Reviewed-by: David Hildenbrand +Cc: stable@vger.kernel.org +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/kvm-s390.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -4257,7 +4257,7 @@ static long kvm_s390_guest_mem_op(struct + const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION + | KVM_S390_MEMOP_F_CHECK_ONLY; + +- if (mop->flags & ~supported_flags) ++ if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) + return -EINVAL; + + if (mop->size > MEM_OP_MAX_SIZE) diff --git a/queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch b/queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch new file mode 100644 index 00000000000..c4675157cf2 --- /dev/null +++ b/queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch @@ -0,0 +1,123 @@ +From 3ca94192278ca8de169d78c085396c424be123b3 Mon Sep 17 00:00:00 2001 +From: Wanpeng Li +Date: Wed, 18 Sep 2019 17:50:10 +0800 +Subject: KVM: X86: Fix userspace set invalid CR4 + +From: Wanpeng Li + +commit 3ca94192278ca8de169d78c085396c424be123b3 upstream. + +Reported by syzkaller: + + WARNING: CPU: 0 PID: 6544 at /home/kernel/data/kvm/arch/x86/kvm//vmx/vmx.c:4689 handle_desc+0x37/0x40 [kvm_intel] + CPU: 0 PID: 6544 Comm: a.out Tainted: G OE 5.3.0-rc4+ #4 + RIP: 0010:handle_desc+0x37/0x40 [kvm_intel] + Call Trace: + vmx_handle_exit+0xbe/0x6b0 [kvm_intel] + vcpu_enter_guest+0x4dc/0x18d0 [kvm] + kvm_arch_vcpu_ioctl_run+0x407/0x660 [kvm] + kvm_vcpu_ioctl+0x3ad/0x690 [kvm] + do_vfs_ioctl+0xa2/0x690 + ksys_ioctl+0x6d/0x80 + __x64_sys_ioctl+0x1a/0x20 + do_syscall_64+0x74/0x720 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +When CR4.UMIP is set, guest should have UMIP cpuid flag. Current +kvm set_sregs function doesn't have such check when userspace inputs +sregs values. SECONDARY_EXEC_DESC is enabled on writes to CR4.UMIP +in vmx_set_cr4 though guest doesn't have UMIP cpuid flag. The testcast +triggers handle_desc warning when executing ltr instruction since +guest architectural CR4 doesn't set UMIP. This patch fixes it by +adding valid CR4 and CPUID combination checking in __set_sregs. + +syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=138efb99600000 + +Reported-by: syzbot+0f1819555fbdce992df9@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org +Signed-off-by: Wanpeng Li +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 38 +++++++++++++++++++++----------------- + 1 file changed, 21 insertions(+), 17 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -884,34 +884,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u + } + EXPORT_SYMBOL_GPL(kvm_set_xcr); + +-int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) + { +- unsigned long old_cr4 = kvm_read_cr4(vcpu); +- unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | +- X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; +- + if (cr4 & CR4_RESERVED_BITS) +- return 1; ++ return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) +- return 1; ++ return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) +- return 1; ++ return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) +- return 1; ++ return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) +- return 1; ++ return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) +- return 1; ++ return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) +- return 1; ++ return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++{ ++ unsigned long old_cr4 = kvm_read_cr4(vcpu); ++ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | ++ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; ++ ++ if (kvm_valid_cr4(vcpu, cr4)) + return 1; + + if (is_long_mode(vcpu)) { +@@ -8598,10 +8606,6 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); + + static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) + { +- if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && +- (sregs->cr4 & X86_CR4_OSXSAVE)) +- return -EINVAL; +- + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { + /* + * When EFER.LME and CR0.PG are set, the processor is in +@@ -8620,7 +8624,7 @@ static int kvm_valid_sregs(struct kvm_vc + return -EINVAL; + } + +- return 0; ++ return kvm_valid_cr4(vcpu, sregs->cr4); + } + + static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) diff --git a/queue-5.3/mips-treat-loongson-extensions-as-ases.patch b/queue-5.3/mips-treat-loongson-extensions-as-ases.patch new file mode 100644 index 00000000000..eab654e54bf --- /dev/null +++ b/queue-5.3/mips-treat-loongson-extensions-as-ases.patch @@ -0,0 +1,107 @@ +From d2f965549006acb865c4638f1f030ebcefdc71f6 Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Wed, 29 May 2019 16:42:59 +0800 +Subject: MIPS: Treat Loongson Extensions as ASEs + +From: Jiaxun Yang + +commit d2f965549006acb865c4638f1f030ebcefdc71f6 upstream. + +Recently, binutils had split Loongson-3 Extensions into four ASEs: +MMI, CAM, EXT, EXT2. This patch do the samething in kernel and expose +them in cpuinfo so applications can probe supported ASEs at runtime. + +Signed-off-by: Jiaxun Yang +Cc: Huacai Chen +Cc: Yunqiang Su +Cc: stable@vger.kernel.org # v4.14+ +Signed-off-by: Paul Burton +Cc: linux-mips@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/include/asm/cpu-features.h | 16 ++++++++++++++++ + arch/mips/include/asm/cpu.h | 4 ++++ + arch/mips/kernel/cpu-probe.c | 6 ++++++ + arch/mips/kernel/proc.c | 4 ++++ + 4 files changed, 30 insertions(+) + +--- a/arch/mips/include/asm/cpu-features.h ++++ b/arch/mips/include/asm/cpu-features.h +@@ -397,6 +397,22 @@ + #define cpu_has_dsp3 __ase(MIPS_ASE_DSP3) + #endif + ++#ifndef cpu_has_loongson_mmi ++#define cpu_has_loongson_mmi __ase(MIPS_ASE_LOONGSON_MMI) ++#endif ++ ++#ifndef cpu_has_loongson_cam ++#define cpu_has_loongson_cam __ase(MIPS_ASE_LOONGSON_CAM) ++#endif ++ ++#ifndef cpu_has_loongson_ext ++#define cpu_has_loongson_ext __ase(MIPS_ASE_LOONGSON_EXT) ++#endif ++ ++#ifndef cpu_has_loongson_ext2 ++#define cpu_has_loongson_ext2 __ase(MIPS_ASE_LOONGSON_EXT2) ++#endif ++ + #ifndef cpu_has_mipsmt + #define cpu_has_mipsmt __isa_lt_and_ase(6, MIPS_ASE_MIPSMT) + #endif +--- a/arch/mips/include/asm/cpu.h ++++ b/arch/mips/include/asm/cpu.h +@@ -433,5 +433,9 @@ enum cpu_type_enum { + #define MIPS_ASE_MSA 0x00000100 /* MIPS SIMD Architecture */ + #define MIPS_ASE_DSP3 0x00000200 /* Signal Processing ASE Rev 3*/ + #define MIPS_ASE_MIPS16E2 0x00000400 /* MIPS16e2 */ ++#define MIPS_ASE_LOONGSON_MMI 0x00000800 /* Loongson MultiMedia extensions Instructions */ ++#define MIPS_ASE_LOONGSON_CAM 0x00001000 /* Loongson CAM */ ++#define MIPS_ASE_LOONGSON_EXT 0x00002000 /* Loongson EXTensions */ ++#define MIPS_ASE_LOONGSON_EXT2 0x00004000 /* Loongson EXTensions R2 */ + + #endif /* _ASM_CPU_H */ +--- a/arch/mips/kernel/cpu-probe.c ++++ b/arch/mips/kernel/cpu-probe.c +@@ -1573,6 +1573,8 @@ static inline void cpu_probe_legacy(stru + __cpu_name[cpu] = "ICT Loongson-3"; + set_elf_platform(cpu, "loongson3a"); + set_isa(c, MIPS_CPU_ISA_M64R1); ++ c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | ++ MIPS_ASE_LOONGSON_EXT); + break; + case PRID_REV_LOONGSON3B_R1: + case PRID_REV_LOONGSON3B_R2: +@@ -1580,6 +1582,8 @@ static inline void cpu_probe_legacy(stru + __cpu_name[cpu] = "ICT Loongson-3"; + set_elf_platform(cpu, "loongson3b"); + set_isa(c, MIPS_CPU_ISA_M64R1); ++ c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | ++ MIPS_ASE_LOONGSON_EXT); + break; + } + +@@ -1946,6 +1950,8 @@ static inline void cpu_probe_loongson(st + decode_configs(c); + c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; + c->writecombine = _CACHE_UNCACHED_ACCELERATED; ++ c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | ++ MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); + break; + default: + panic("Unknown Loongson Processor ID!"); +--- a/arch/mips/kernel/proc.c ++++ b/arch/mips/kernel/proc.c +@@ -124,6 +124,10 @@ static int show_cpuinfo(struct seq_file + if (cpu_has_eva) seq_printf(m, "%s", " eva"); + if (cpu_has_htw) seq_printf(m, "%s", " htw"); + if (cpu_has_xpa) seq_printf(m, "%s", " xpa"); ++ if (cpu_has_loongson_mmi) seq_printf(m, "%s", " loongson-mmi"); ++ if (cpu_has_loongson_cam) seq_printf(m, "%s", " loongson-cam"); ++ if (cpu_has_loongson_ext) seq_printf(m, "%s", " loongson-ext"); ++ if (cpu_has_loongson_ext2) seq_printf(m, "%s", " loongson-ext2"); + seq_printf(m, "\n"); + + if (cpu_has_mmips) { diff --git a/queue-5.3/nbd-fix-max-number-of-supported-devs.patch b/queue-5.3/nbd-fix-max-number-of-supported-devs.patch new file mode 100644 index 00000000000..89a14de3bbe --- /dev/null +++ b/queue-5.3/nbd-fix-max-number-of-supported-devs.patch @@ -0,0 +1,159 @@ +From e9e006f5fcf2bab59149cb38a48a4817c1b538b4 Mon Sep 17 00:00:00 2001 +From: Mike Christie +Date: Sun, 4 Aug 2019 14:10:06 -0500 +Subject: nbd: fix max number of supported devs + +From: Mike Christie + +commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4 upstream. + +This fixes a bug added in 4.10 with commit: + +commit 9561a7ade0c205bc2ee035a2ac880478dcc1a024 +Author: Josef Bacik +Date: Tue Nov 22 14:04:40 2016 -0500 + + nbd: add multi-connection support + +that limited the number of devices to 256. Before the patch we could +create 1000s of devices, but the patch switched us from using our +own thread to using a work queue which has a default limit of 256 +active works. + +The problem is that our recv_work function sits in a loop until +disconnection but only handles IO for one connection. The work is +started when the connection is started/restarted, but if we end up +creating 257 or more connections, the queue_work call just queues +connection257+'s recv_work and that waits for connection 1 - 256's +recv_work to be disconnected and that work instance completing. + +Instead of reverting back to kthreads, this has us allocate a +workqueue_struct per device, so we can block in the work. + +Cc: stable@vger.kernel.org +Reviewed-by: Josef Bacik +Signed-off-by: Mike Christie +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/nbd.c | 39 +++++++++++++++++++++++++-------------- + 1 file changed, 25 insertions(+), 14 deletions(-) + +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -108,6 +108,7 @@ struct nbd_device { + struct nbd_config *config; + struct mutex config_lock; + struct gendisk *disk; ++ struct workqueue_struct *recv_workq; + + struct list_head list; + struct task_struct *task_recv; +@@ -138,7 +139,6 @@ static struct dentry *nbd_dbg_dir; + + static unsigned int nbds_max = 16; + static int max_part = 16; +-static struct workqueue_struct *recv_workqueue; + static int part_shift; + + static int nbd_dev_dbg_init(struct nbd_device *nbd); +@@ -1038,7 +1038,7 @@ static int nbd_reconnect_socket(struct n + /* We take the tx_mutex in an error path in the recv_work, so we + * need to queue_work outside of the tx_mutex. + */ +- queue_work(recv_workqueue, &args->work); ++ queue_work(nbd->recv_workq, &args->work); + + atomic_inc(&config->live_connections); + wake_up(&config->conn_wait); +@@ -1139,6 +1139,10 @@ static void nbd_config_put(struct nbd_de + kfree(nbd->config); + nbd->config = NULL; + ++ if (nbd->recv_workq) ++ destroy_workqueue(nbd->recv_workq); ++ nbd->recv_workq = NULL; ++ + nbd->tag_set.timeout = 0; + nbd->disk->queue->limits.discard_granularity = 0; + nbd->disk->queue->limits.discard_alignment = 0; +@@ -1167,6 +1171,14 @@ static int nbd_start_device(struct nbd_d + return -EINVAL; + } + ++ nbd->recv_workq = alloc_workqueue("knbd%d-recv", ++ WQ_MEM_RECLAIM | WQ_HIGHPRI | ++ WQ_UNBOUND, 0, nbd->index); ++ if (!nbd->recv_workq) { ++ dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n"); ++ return -ENOMEM; ++ } ++ + blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); + nbd->task_recv = current; + +@@ -1197,7 +1209,7 @@ static int nbd_start_device(struct nbd_d + INIT_WORK(&args->work, recv_work); + args->nbd = nbd; + args->index = i; +- queue_work(recv_workqueue, &args->work); ++ queue_work(nbd->recv_workq, &args->work); + } + nbd_size_update(nbd); + return error; +@@ -1217,8 +1229,10 @@ static int nbd_start_device_ioctl(struct + mutex_unlock(&nbd->config_lock); + ret = wait_event_interruptible(config->recv_wq, + atomic_read(&config->recv_threads) == 0); +- if (ret) ++ if (ret) { + sock_shutdown(nbd); ++ flush_workqueue(nbd->recv_workq); ++ } + mutex_lock(&nbd->config_lock); + nbd_bdev_reset(bdev); + /* user requested, ignore socket errors */ +@@ -1877,6 +1891,12 @@ static void nbd_disconnect_and_put(struc + nbd_disconnect(nbd); + nbd_clear_sock(nbd); + mutex_unlock(&nbd->config_lock); ++ /* ++ * Make sure recv thread has finished, so it does not drop the last ++ * config ref and try to destroy the workqueue from inside the work ++ * queue. ++ */ ++ flush_workqueue(nbd->recv_workq); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); +@@ -2263,20 +2283,12 @@ static int __init nbd_init(void) + + if (nbds_max > 1UL << (MINORBITS - part_shift)) + return -EINVAL; +- recv_workqueue = alloc_workqueue("knbd-recv", +- WQ_MEM_RECLAIM | WQ_HIGHPRI | +- WQ_UNBOUND, 0); +- if (!recv_workqueue) +- return -ENOMEM; + +- if (register_blkdev(NBD_MAJOR, "nbd")) { +- destroy_workqueue(recv_workqueue); ++ if (register_blkdev(NBD_MAJOR, "nbd")) + return -EIO; +- } + + if (genl_register_family(&nbd_genl_family)) { + unregister_blkdev(NBD_MAJOR, "nbd"); +- destroy_workqueue(recv_workqueue); + return -EINVAL; + } + nbd_dbg_init(); +@@ -2318,7 +2330,6 @@ static void __exit nbd_cleanup(void) + + idr_destroy(&nbd_index_idr); + genl_unregister_family(&nbd_genl_family); +- destroy_workqueue(recv_workqueue); + unregister_blkdev(NBD_MAJOR, "nbd"); + } + diff --git a/queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch b/queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch new file mode 100644 index 00000000000..ee1908b016e --- /dev/null +++ b/queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch @@ -0,0 +1,75 @@ +From 62bacb06b9f08965c4ef10e17875450490c948c0 Mon Sep 17 00:00:00 2001 +From: Dmitry Osipenko +Date: Thu, 2 May 2019 02:38:00 +0300 +Subject: PM / devfreq: tegra: Fix kHz to Hz conversion + +From: Dmitry Osipenko + +commit 62bacb06b9f08965c4ef10e17875450490c948c0 upstream. + +The kHz to Hz is incorrectly converted in a few places in the code, +this results in a wrong frequency being calculated because devfreq core +uses OPP frequencies that are given in Hz to clamp the rate, while +tegra-devfreq gives to the core value in kHz and then it also expects to +receive value in kHz from the core. In a result memory freq is always set +to a value which is close to ULONG_MAX because of the bug. Hence the EMC +frequency is always capped to the maximum and the driver doesn't do +anything useful. This patch was tested on Tegra30 and Tegra124 SoC's, EMC +frequency scaling works properly now. + +Cc: # 4.14+ +Tested-by: Steev Klimaszewski +Reviewed-by: Chanwoo Choi +Signed-off-by: Dmitry Osipenko +Acked-by: Thierry Reding +Signed-off-by: MyungJoo Ham +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/devfreq/tegra-devfreq.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/devfreq/tegra-devfreq.c ++++ b/drivers/devfreq/tegra-devfreq.c +@@ -474,11 +474,11 @@ static int tegra_devfreq_target(struct d + { + struct tegra_devfreq *tegra = dev_get_drvdata(dev); + struct dev_pm_opp *opp; +- unsigned long rate = *freq * KHZ; ++ unsigned long rate; + +- opp = devfreq_recommended_opp(dev, &rate, flags); ++ opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) { +- dev_err(dev, "Failed to find opp for %lu KHz\n", *freq); ++ dev_err(dev, "Failed to find opp for %lu Hz\n", *freq); + return PTR_ERR(opp); + } + rate = dev_pm_opp_get_freq(opp); +@@ -487,8 +487,6 @@ static int tegra_devfreq_target(struct d + clk_set_min_rate(tegra->emc_clock, rate); + clk_set_rate(tegra->emc_clock, 0); + +- *freq = rate; +- + return 0; + } + +@@ -498,7 +496,7 @@ static int tegra_devfreq_get_dev_status( + struct tegra_devfreq *tegra = dev_get_drvdata(dev); + struct tegra_devfreq_device *actmon_dev; + +- stat->current_frequency = tegra->cur_freq; ++ stat->current_frequency = tegra->cur_freq * KHZ; + + /* To be used by the tegra governor */ + stat->private_data = tegra; +@@ -553,7 +551,7 @@ static int tegra_governor_get_target(str + target_freq = max(target_freq, dev->target_freq); + } + +- *freq = target_freq; ++ *freq = target_freq * KHZ; + + return 0; + } diff --git a/queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch b/queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch new file mode 100644 index 00000000000..e583b82650a --- /dev/null +++ b/queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch @@ -0,0 +1,74 @@ +From fe55e770327363304c4111423e6f7ff3c650136d Mon Sep 17 00:00:00 2001 +From: Michael Nosthoff +Date: Fri, 16 Aug 2019 09:58:42 +0200 +Subject: power: supply: sbs-battery: only return health when battery present + +From: Michael Nosthoff + +commit fe55e770327363304c4111423e6f7ff3c650136d upstream. + +when the battery is set to sbs-mode and no gpio detection is enabled +"health" is always returning a value even when the battery is not present. +All other fields return "not present". +This leads to a scenario where the driver is constantly switching between +"present" and "not present" state. This generates a lot of constant +traffic on the i2c. + +This commit changes the response of "health" to an error when the battery +is not responding leading to a consistent "not present" state. + +Fixes: 76b16f4cdfb8 ("power: supply: sbs-battery: don't assume MANUFACTURER_DATA formats") +Cc: +Signed-off-by: Michael Nosthoff +Reviewed-by: Brian Norris +Tested-by: Brian Norris +Signed-off-by: Sebastian Reichel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/power/supply/sbs-battery.c | 25 ++++++++++++++++--------- + 1 file changed, 16 insertions(+), 9 deletions(-) + +--- a/drivers/power/supply/sbs-battery.c ++++ b/drivers/power/supply/sbs-battery.c +@@ -314,17 +314,22 @@ static int sbs_get_battery_presence_and_ + { + int ret; + +- if (psp == POWER_SUPPLY_PROP_PRESENT) { +- /* Dummy command; if it succeeds, battery is present. */ +- ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr); +- if (ret < 0) +- val->intval = 0; /* battery disconnected */ +- else +- val->intval = 1; /* battery present */ +- } else { /* POWER_SUPPLY_PROP_HEALTH */ ++ /* Dummy command; if it succeeds, battery is present. */ ++ ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr); ++ ++ if (ret < 0) { /* battery not present*/ ++ if (psp == POWER_SUPPLY_PROP_PRESENT) { ++ val->intval = 0; ++ return 0; ++ } ++ return ret; ++ } ++ ++ if (psp == POWER_SUPPLY_PROP_PRESENT) ++ val->intval = 1; /* battery present */ ++ else /* POWER_SUPPLY_PROP_HEALTH */ + /* SBS spec doesn't have a general health command. */ + val->intval = POWER_SUPPLY_HEALTH_UNKNOWN; +- } + + return 0; + } +@@ -626,6 +631,8 @@ static int sbs_get_property(struct power + else + ret = sbs_get_battery_presence_and_health(client, psp, + val); ++ ++ /* this can only be true if no gpio is used */ + if (psp == POWER_SUPPLY_PROP_PRESENT) + return 0; + break; diff --git a/queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch b/queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch new file mode 100644 index 00000000000..e4270a860e6 --- /dev/null +++ b/queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch @@ -0,0 +1,37 @@ +From 99956a9e08251a1234434b492875b1eaff502a12 Mon Sep 17 00:00:00 2001 +From: Michael Nosthoff +Date: Fri, 16 Aug 2019 09:37:42 +0200 +Subject: power: supply: sbs-battery: use correct flags field + +From: Michael Nosthoff + +commit 99956a9e08251a1234434b492875b1eaff502a12 upstream. + +the type flag is stored in the chip->flags field not in the +client->flags field. This currently leads to never using the ti +specific health function as client->flags doesn't use that bit. +So it's always falling back to the general one. + +Fixes: 76b16f4cdfb8 ("power: supply: sbs-battery: don't assume MANUFACTURER_DATA formats") +Cc: +Signed-off-by: Michael Nosthoff +Reviewed-by: Brian Norris +Reviewed-by: Enric Balletbo i Serra +Signed-off-by: Sebastian Reichel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/power/supply/sbs-battery.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/power/supply/sbs-battery.c ++++ b/drivers/power/supply/sbs-battery.c +@@ -620,7 +620,7 @@ static int sbs_get_property(struct power + switch (psp) { + case POWER_SUPPLY_PROP_PRESENT: + case POWER_SUPPLY_PROP_HEALTH: +- if (client->flags & SBS_FLAGS_TI_BQ20Z75) ++ if (chip->flags & SBS_FLAGS_TI_BQ20Z75) + ret = sbs_get_ti_battery_presence_and_health(client, + psp, val); + else diff --git a/queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch b/queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch new file mode 100644 index 00000000000..271b26f513f --- /dev/null +++ b/queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch @@ -0,0 +1,61 @@ +From 9d6d712fbf7766f21c838940eebcd7b4d476c5e6 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Wed, 14 Aug 2019 10:02:20 +0000 +Subject: powerpc/32s: Fix boot failure with DEBUG_PAGEALLOC without KASAN. + +From: Christophe Leroy + +commit 9d6d712fbf7766f21c838940eebcd7b4d476c5e6 upstream. + +When KASAN is selected, the definitive hash table has to be +set up later, but there is already an early temporary one. + +When KASAN is not selected, there is no early hash table, +so the setup of the definitive hash table cannot be delayed. + +Fixes: 72f208c6a8f7 ("powerpc/32s: move hash code patching out of MMU_init_hw()") +Cc: stable@vger.kernel.org # v5.2+ +Reported-by: Jonathan Neuschafer +Tested-by: Jonathan Neuschafer +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/b7860c5e1e784d6b96ba67edf47dd6cbc2e78ab6.1565776892.git.christophe.leroy@c-s.fr +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kernel/head_32.S | 2 ++ + arch/powerpc/mm/book3s32/mmu.c | 9 +++++++++ + 2 files changed, 11 insertions(+) + +--- a/arch/powerpc/kernel/head_32.S ++++ b/arch/powerpc/kernel/head_32.S +@@ -897,9 +897,11 @@ start_here: + bl machine_init + bl __save_cpu_setup + bl MMU_init ++#ifdef CONFIG_KASAN + BEGIN_MMU_FTR_SECTION + bl MMU_init_hw_patch + END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) ++#endif + + /* + * Go back to running unmapped so we can load up new values +--- a/arch/powerpc/mm/book3s32/mmu.c ++++ b/arch/powerpc/mm/book3s32/mmu.c +@@ -358,6 +358,15 @@ void __init MMU_init_hw(void) + hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; + if (lg_n_hpteg > 16) + hash_mb2 = 16 - LG_HPTEG_SIZE; ++ ++ /* ++ * When KASAN is selected, there is already an early temporary hash ++ * table and the switch to the final hash table is done later. ++ */ ++ if (IS_ENABLED(CONFIG_KASAN)) ++ return; ++ ++ MMU_init_hw_patch(); + } + + void __init MMU_init_hw_patch(void) diff --git a/queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch b/queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch new file mode 100644 index 00000000000..c48e454d1e5 --- /dev/null +++ b/queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch @@ -0,0 +1,50 @@ +From 415480dce2ef03bb8335deebd2f402f475443ce0 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Mon, 19 Aug 2019 06:40:25 +0000 +Subject: powerpc/603: Fix handling of the DIRTY flag + +From: Christophe Leroy + +commit 415480dce2ef03bb8335deebd2f402f475443ce0 upstream. + +If a page is already mapped RW without the DIRTY flag, the DIRTY +flag is never set and a TLB store miss exception is taken forever. + +This is easily reproduced with the following app: + +void main(void) +{ + volatile char *ptr = mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + *ptr = *ptr; +} + +When DIRTY flag is not set, bail out of TLB miss handler and take +a minor page fault which will set the DIRTY flag. + +Fixes: f8b58c64eaef ("powerpc/603: let's handle PAGE_DIRTY directly") +Cc: stable@vger.kernel.org # v5.1+ +Reported-by: Doug Crawford +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/80432f71194d7ee75b2f5043ecf1501cf1cca1f3.1566196646.git.christophe.leroy@c-s.fr +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kernel/head_32.S | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/powerpc/kernel/head_32.S ++++ b/arch/powerpc/kernel/head_32.S +@@ -557,9 +557,9 @@ DataStoreTLBMiss: + cmplw 0,r1,r3 + mfspr r2, SPRN_SPRG_PGDIR + #ifdef CONFIG_SWAP +- li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED ++ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + #else +- li r1, _PAGE_RW | _PAGE_PRESENT ++ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT + #endif + bge- 112f + lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ diff --git a/queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch b/queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch new file mode 100644 index 00000000000..d5691096a72 --- /dev/null +++ b/queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch @@ -0,0 +1,81 @@ +From 677733e296b5c7a37c47da391fc70a43dc40bd67 Mon Sep 17 00:00:00 2001 +From: "Aneesh Kumar K.V" +Date: Tue, 24 Sep 2019 09:22:51 +0530 +Subject: powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions + +From: Aneesh Kumar K.V + +commit 677733e296b5c7a37c47da391fc70a43dc40bd67 upstream. + +The store ordering vs tlbie issue mentioned in commit +a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on +POWER9") is fixed for Nimbus 2.3 and Cumulus 1.3 revisions. We don't +need to apply the fixup if we are running on them + +We can only do this on PowerNV. On pseries guest with KVM we still +don't support redoing the feature fixup after migration. So we should +be enabling all the workarounds needed, because whe can possibly +migrate between DD 2.3 and DD 2.2 + +Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9") +Cc: stable@vger.kernel.org # v4.16+ +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190924035254.24612-1-aneesh.kumar@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kernel/dt_cpu_ftrs.c | 30 ++++++++++++++++++++++++++++-- + 1 file changed, 28 insertions(+), 2 deletions(-) + +--- a/arch/powerpc/kernel/dt_cpu_ftrs.c ++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c +@@ -691,9 +691,35 @@ static bool __init cpufeatures_process_f + return true; + } + ++/* ++ * Handle POWER9 broadcast tlbie invalidation issue using ++ * cpu feature flag. ++ */ ++static __init void update_tlbie_feature_flag(unsigned long pvr) ++{ ++ if (PVR_VER(pvr) == PVR_POWER9) { ++ /* ++ * Set the tlbie feature flag for anything below ++ * Nimbus DD 2.3 and Cumulus DD 1.3 ++ */ ++ if ((pvr & 0xe000) == 0) { ++ /* Nimbus */ ++ if ((pvr & 0xfff) < 0x203) ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ } else if ((pvr & 0xc000) == 0) { ++ /* Cumulus */ ++ if ((pvr & 0xfff) < 0x103) ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ } else { ++ WARN_ONCE(1, "Unknown PVR"); ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ } ++ } ++} ++ + static __init void cpufeatures_cpu_quirks(void) + { +- int version = mfspr(SPRN_PVR); ++ unsigned long version = mfspr(SPRN_PVR); + + /* + * Not all quirks can be derived from the cpufeatures device tree. +@@ -712,10 +738,10 @@ static __init void cpufeatures_cpu_quirk + + if ((version & 0xffff0000) == 0x004e0000) { + cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR; + } + ++ update_tlbie_feature_flag(version); + /* + * PKEY was not in the initial base or feature node + * specification, but it should become optional in the next diff --git a/queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch b/queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch new file mode 100644 index 00000000000..91852cc3717 --- /dev/null +++ b/queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch @@ -0,0 +1,110 @@ +From 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 Mon Sep 17 00:00:00 2001 +From: "Aneesh Kumar K.V" +Date: Tue, 24 Sep 2019 09:22:52 +0530 +Subject: powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag + +From: Aneesh Kumar K.V + +commit 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 upstream. + +Rename the #define to indicate this is related to store vs tlbie +ordering issue. In the next patch, we will be adding another feature +flag that is used to handles ERAT flush vs tlbie ordering issue. + +Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9") +Cc: stable@vger.kernel.org # v4.16+ +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190924035254.24612-2-aneesh.kumar@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/cputable.h | 4 ++-- + arch/powerpc/kernel/dt_cpu_ftrs.c | 6 +++--- + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- + arch/powerpc/mm/book3s64/hash_native.c | 2 +- + arch/powerpc/mm/book3s64/radix_tlb.c | 4 ++-- + 5 files changed, 9 insertions(+), 9 deletions(-) + +--- a/arch/powerpc/include/asm/cputable.h ++++ b/arch/powerpc/include/asm/cputable.h +@@ -213,7 +213,7 @@ static inline void cpu_feature_keys_init + #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000) + #define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) + #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) +-#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000) ++#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) + #define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) + + #ifndef __ASSEMBLY__ +@@ -461,7 +461,7 @@ static inline void cpu_feature_keys_init + CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ +- CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR) ++ CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR) + #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 + #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) + #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ +--- a/arch/powerpc/kernel/dt_cpu_ftrs.c ++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c +@@ -705,14 +705,14 @@ static __init void update_tlbie_feature_ + if ((pvr & 0xe000) == 0) { + /* Nimbus */ + if ((pvr & 0xfff) < 0x203) +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else if ((pvr & 0xc000) == 0) { + /* Cumulus */ + if ((pvr & 0xfff) < 0x103) +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else { + WARN_ONCE(1, "Unknown PVR"); +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } + } + } +--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c ++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c +@@ -451,7 +451,7 @@ static void do_tlbies(struct kvm *kvm, u + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); + } + +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie +--- a/arch/powerpc/mm/book3s64/hash_native.c ++++ b/arch/powerpc/mm/book3s64/hash_native.c +@@ -199,7 +199,7 @@ static inline unsigned long ___tlbie(un + + static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) + { +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); +--- a/arch/powerpc/mm/book3s64/radix_tlb.c ++++ b/arch/powerpc/mm/book3s64/radix_tlb.c +@@ -216,7 +216,7 @@ static inline void fixup_tlbie(void) + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +@@ -226,7 +226,7 @@ static inline void fixup_tlbie_lpid(unsi + { + unsigned long va = ((1UL << 52) - 1); + +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } diff --git a/queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch b/queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch new file mode 100644 index 00000000000..ced38bdd667 --- /dev/null +++ b/queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch @@ -0,0 +1,74 @@ +From 45ff3c55958542c3b76075d59741297b8cb31cbb Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Fri, 9 Aug 2019 14:58:09 +0000 +Subject: powerpc/kasan: Fix parallel loading of modules. + +From: Christophe Leroy + +commit 45ff3c55958542c3b76075d59741297b8cb31cbb upstream. + +Parallel loading of modules may lead to bad setup of shadow page table +entries. + +First, lets align modules so that two modules never share the same +shadow page. + +Second, ensure that two modules cannot allocate two page tables for +the same PMD entry at the same time. This is done by using +init_mm.page_table_lock in the same way as __pte_alloc_kernel() + +Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") +Cc: stable@vger.kernel.org # v5.2+ +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/c97284f912128cbc3f2fe09d68e90e65fb3e6026.1565361876.git.christophe.leroy@c-s.fr +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/kasan/kasan_init_32.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/arch/powerpc/mm/kasan/kasan_init_32.c ++++ b/arch/powerpc/mm/kasan/kasan_init_32.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -46,7 +47,19 @@ static int __ref kasan_init_shadow_page_ + kasan_populate_pte(new, PAGE_READONLY); + else + kasan_populate_pte(new, PAGE_KERNEL_RO); +- pmd_populate_kernel(&init_mm, pmd, new); ++ ++ smp_wmb(); /* See comment in __pte_alloc */ ++ ++ spin_lock(&init_mm.page_table_lock); ++ /* Has another populated it ? */ ++ if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) { ++ pmd_populate_kernel(&init_mm, pmd, new); ++ new = NULL; ++ } ++ spin_unlock(&init_mm.page_table_lock); ++ ++ if (new && slab_is_available()) ++ pte_free_kernel(&init_mm, new); + } + return 0; + } +@@ -137,7 +150,11 @@ void __init kasan_init(void) + #ifdef CONFIG_MODULES + void *module_alloc(unsigned long size) + { +- void *base = vmalloc_exec(size); ++ void *base; ++ ++ base = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, VMALLOC_END, ++ GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, ++ NUMA_NO_NODE, __builtin_return_address(0)); + + if (!base) + return NULL; diff --git a/queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch b/queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch new file mode 100644 index 00000000000..51c6b7cf4d3 --- /dev/null +++ b/queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch @@ -0,0 +1,40 @@ +From 663c0c9496a69f80011205ba3194049bcafd681d Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Fri, 9 Aug 2019 14:58:10 +0000 +Subject: powerpc/kasan: Fix shadow area set up for modules. + +From: Christophe Leroy + +commit 663c0c9496a69f80011205ba3194049bcafd681d upstream. + +When loading modules, from time to time an Oops is encountered during +the init of shadow area for globals. This is due to the last page not +always being mapped depending on the exact distance between the start +and the end of the shadow area and the alignment with the page +addresses. + +Fix this by aligning the starting address with the page address. + +Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") +Cc: stable@vger.kernel.org # v5.2+ +Reported-by: Erhard F. +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/4f887e9b77d0d725cbb52035c7ece485c1c5fc14.1565361881.git.christophe.leroy@c-s.fr +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/kasan/kasan_init_32.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/powerpc/mm/kasan/kasan_init_32.c ++++ b/arch/powerpc/mm/kasan/kasan_init_32.c +@@ -87,7 +87,7 @@ static int __ref kasan_init_region(void + if (!slab_is_available()) + block = memblock_alloc(k_end - k_start, PAGE_SIZE); + +- for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) { ++ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + void *va = block ? block + k_cur - k_start : kasan_get_one_page(); + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); diff --git a/queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch b/queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch new file mode 100644 index 00000000000..2a2d31fe6d7 --- /dev/null +++ b/queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch @@ -0,0 +1,81 @@ +From 99ead78afd1128bfcebe7f88f3b102fb2da09aee Mon Sep 17 00:00:00 2001 +From: Balbir Singh +Date: Tue, 20 Aug 2019 13:43:47 +0530 +Subject: powerpc/mce: Fix MCE handling for huge pages + +From: Balbir Singh + +commit 99ead78afd1128bfcebe7f88f3b102fb2da09aee upstream. + +The current code would fail on huge pages addresses, since the shift would +be incorrect. Use the correct page shift value returned by +__find_linux_pte() to get the correct physical address. The code is more +generic and can handle both regular and compound pages. + +Fixes: ba41e1e1ccb9 ("powerpc/mce: Hookup derror (load/store) UE errors") +Signed-off-by: Balbir Singh +[arbab@linux.ibm.com: Fixup pseries_do_memory_failure()] +Signed-off-by: Reza Arbab +Tested-by: Mahesh Salgaonkar +Signed-off-by: Santosh Sivaraj +Cc: stable@vger.kernel.org # v4.15+ +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190820081352.8641-3-santosh@fossix.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kernel/mce_power.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +--- a/arch/powerpc/kernel/mce_power.c ++++ b/arch/powerpc/kernel/mce_power.c +@@ -26,6 +26,7 @@ + unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) + { + pte_t *ptep; ++ unsigned int shift; + unsigned long flags; + struct mm_struct *mm; + +@@ -35,13 +36,18 @@ unsigned long addr_to_pfn(struct pt_regs + mm = &init_mm; + + local_irq_save(flags); +- if (mm == current->mm) +- ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL); +- else +- ptep = find_init_mm_pte(addr, NULL); ++ ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift); + local_irq_restore(flags); ++ + if (!ptep || pte_special(*ptep)) + return ULONG_MAX; ++ ++ if (shift > PAGE_SHIFT) { ++ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; ++ ++ return pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask))); ++ } ++ + return pte_pfn(*ptep); + } + +@@ -344,7 +350,7 @@ static const struct mce_derror_table mce + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, + { 0, false, 0, 0, 0, 0, 0 } }; + +-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, ++static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, + uint64_t *phys_addr) + { + /* +@@ -541,7 +547,8 @@ static int mce_handle_derror(struct pt_r + * kernel/exception-64s.h + */ + if (get_paca()->in_mce < MAX_MCE_DEPTH) +- mce_find_instr_ea_and_pfn(regs, addr, phys_addr); ++ mce_find_instr_ea_and_phys(regs, addr, ++ phys_addr); + } + found = 1; + } diff --git a/queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch b/queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch new file mode 100644 index 00000000000..7ea39e7e386 --- /dev/null +++ b/queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch @@ -0,0 +1,67 @@ +From b5bda6263cad9a927e1a4edb7493d542da0c1410 Mon Sep 17 00:00:00 2001 +From: Santosh Sivaraj +Date: Tue, 20 Aug 2019 13:43:46 +0530 +Subject: powerpc/mce: Schedule work from irq_work + +From: Santosh Sivaraj + +commit b5bda6263cad9a927e1a4edb7493d542da0c1410 upstream. + +schedule_work() cannot be called from MCE exception context as MCE can +interrupt even in interrupt disabled context. + +Fixes: 733e4a4c4467 ("powerpc/mce: hookup memory_failure for UE errors") +Cc: stable@vger.kernel.org # v4.15+ +Reviewed-by: Mahesh Salgaonkar +Reviewed-by: Nicholas Piggin +Acked-by: Balbir Singh +Signed-off-by: Santosh Sivaraj +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190820081352.8641-2-santosh@fossix.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kernel/mce.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kernel/mce.c ++++ b/arch/powerpc/kernel/mce.c +@@ -33,6 +33,7 @@ static DEFINE_PER_CPU(struct machine_che + mce_ue_event_queue); + + static void machine_check_process_queued_event(struct irq_work *work); ++static void machine_check_ue_irq_work(struct irq_work *work); + void machine_check_ue_event(struct machine_check_event *evt); + static void machine_process_ue_event(struct work_struct *work); + +@@ -40,6 +41,10 @@ static struct irq_work mce_event_process + .func = machine_check_process_queued_event, + }; + ++static struct irq_work mce_ue_event_irq_work = { ++ .func = machine_check_ue_irq_work, ++}; ++ + DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); + + static void mce_set_error_info(struct machine_check_event *mce, +@@ -199,6 +204,10 @@ void release_mce_event(void) + get_mce_event(NULL, true); + } + ++static void machine_check_ue_irq_work(struct irq_work *work) ++{ ++ schedule_work(&mce_ue_event_work); ++} + + /* + * Queue up the MCE event which then can be handled later. +@@ -216,7 +225,7 @@ void machine_check_ue_event(struct machi + memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); + + /* Queue work to process this event later. */ +- schedule_work(&mce_ue_event_work); ++ irq_work_queue(&mce_ue_event_irq_work); + } + + /* diff --git a/queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch b/queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch new file mode 100644 index 00000000000..e75b849cd8b --- /dev/null +++ b/queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch @@ -0,0 +1,76 @@ +From 4c0f5d1eb4072871c34530358df45f05ab80edd6 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Wed, 21 Aug 2019 10:20:00 +0000 +Subject: powerpc/mm: Add a helper to select PAGE_KERNEL_RO or PAGE_READONLY + +From: Christophe Leroy + +commit 4c0f5d1eb4072871c34530358df45f05ab80edd6 upstream. + +In a couple of places there is a need to select whether read-only +protection of shadow pages is performed with PAGE_KERNEL_RO or with +PAGE_READONLY. + +Add a helper to avoid duplicating the choice. + +Signed-off-by: Christophe Leroy +Cc: stable@vger.kernel.org +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/9f33f44b9cd741c4a02b3dce7b8ef9438fe2cd2a.1566382750.git.christophe.leroy@c-s.fr +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/kasan/kasan_init_32.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +--- a/arch/powerpc/mm/kasan/kasan_init_32.c ++++ b/arch/powerpc/mm/kasan/kasan_init_32.c +@@ -12,6 +12,14 @@ + #include + #include + ++static pgprot_t kasan_prot_ro(void) ++{ ++ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) ++ return PAGE_READONLY; ++ ++ return PAGE_KERNEL_RO; ++} ++ + static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) + { + unsigned long va = (unsigned long)kasan_early_shadow_page; +@@ -26,6 +34,7 @@ static int __ref kasan_init_shadow_page_ + { + pmd_t *pmd; + unsigned long k_cur, k_next; ++ pgprot_t prot = kasan_prot_ro(); + + pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + +@@ -43,10 +52,7 @@ static int __ref kasan_init_shadow_page_ + + if (!new) + return -ENOMEM; +- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) +- kasan_populate_pte(new, PAGE_READONLY); +- else +- kasan_populate_pte(new, PAGE_KERNEL_RO); ++ kasan_populate_pte(new, prot); + + smp_wmb(); /* See comment in __pte_alloc */ + +@@ -103,10 +109,9 @@ static int __ref kasan_init_region(void + + static void __init kasan_remap_early_shadow_ro(void) + { +- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) +- kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY); +- else +- kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO); ++ pgprot_t prot = kasan_prot_ro(); ++ ++ kasan_populate_pte(kasan_early_shadow_pte, prot); + + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + } diff --git a/queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch b/queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch new file mode 100644 index 00000000000..efe07f86e06 --- /dev/null +++ b/queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch @@ -0,0 +1,85 @@ +From cbd18991e24fea2c31da3bb117c83e4a3538cd11 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Wed, 21 Aug 2019 10:20:11 +0000 +Subject: powerpc/mm: Fix an Oops in kasan_mmu_init() + +From: Christophe Leroy + +commit cbd18991e24fea2c31da3bb117c83e4a3538cd11 upstream. + +Uncompressing Kernel Image ... OK + Loading Device Tree to 01ff7000, end 01fff74f ... OK +[ 0.000000] printk: bootconsole [udbg0] enabled +[ 0.000000] BUG: Unable to handle kernel data access at 0xf818c000 +[ 0.000000] Faulting instruction address: 0xc0013c7c +[ 0.000000] Thread overran stack, or stack corrupted +[ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1] +[ 0.000000] BE PAGE_SIZE=16K PREEMPT +[ 0.000000] Modules linked in: +[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.3.0-rc4-s3k-dev-00743-g5abe4a3e8fd3-dirty #2080 +[ 0.000000] NIP: c0013c7c LR: c0013310 CTR: 00000000 +[ 0.000000] REGS: c0c5ff38 TRAP: 0300 Not tainted (5.3.0-rc4-s3k-dev-00743-g5abe4a3e8fd3-dirty) +[ 0.000000] MSR: 00001032 CR: 99033955 XER: 80002100 +[ 0.000000] DAR: f818c000 DSISR: 82000000 +[ 0.000000] GPR00: c0013310 c0c5fff0 c0ad6ac0 c0c600c0 f818c031 82000000 00000000 ffffffff +[ 0.000000] GPR08: 00000000 f1f1f1f1 c0013c2c c0013304 99033955 00400008 00000000 07ff9598 +[ 0.000000] GPR16: 00000000 07ffb94c 00000000 00000000 00000000 00000000 00000000 f818cfb2 +[ 0.000000] GPR24: 00000000 00000000 00001000 ffffffff 00000000 c07dbf80 00000000 f818c000 +[ 0.000000] NIP [c0013c7c] do_page_fault+0x50/0x904 +[ 0.000000] LR [c0013310] handle_page_fault+0xc/0x38 +[ 0.000000] Call Trace: +[ 0.000000] Instruction dump: +[ 0.000000] be010080 91410014 553fe8fe 3d40c001 3d20f1f1 7d800026 394a3c2c 3fffe000 +[ 0.000000] 6129f1f1 900100c4 9181007c 91410018 <913f0000> 3d2001f4 6129f4f4 913f0004 + +Don't map the early shadow page read-only yet when creating the new +page tables for the real shadow memory, otherwise the memblock +allocations that immediately follows to create the real shadow pages +that are about to replace the early shadow page trigger a page fault +if they fall into the region being worked on at the moment. + +Signed-off-by: Christophe Leroy +Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") +Cc: stable@vger.kernel.org +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/fe86886fb8db44360417cee0dc515ad47ca6ef72.1566382750.git.christophe.leroy@c-s.fr +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/kasan/kasan_init_32.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/mm/kasan/kasan_init_32.c ++++ b/arch/powerpc/mm/kasan/kasan_init_32.c +@@ -34,7 +34,7 @@ static int __ref kasan_init_shadow_page_ + { + pmd_t *pmd; + unsigned long k_cur, k_next; +- pgprot_t prot = kasan_prot_ro(); ++ pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL; + + pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + +@@ -110,9 +110,22 @@ static int __ref kasan_init_region(void + static void __init kasan_remap_early_shadow_ro(void) + { + pgprot_t prot = kasan_prot_ro(); ++ unsigned long k_start = KASAN_SHADOW_START; ++ unsigned long k_end = KASAN_SHADOW_END; ++ unsigned long k_cur; ++ phys_addr_t pa = __pa(kasan_early_shadow_page); + + kasan_populate_pte(kasan_early_shadow_pte, prot); + ++ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { ++ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); ++ pte_t *ptep = pte_offset_kernel(pmd, k_cur); ++ ++ if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) ++ continue; ++ ++ __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); ++ } + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + } + diff --git a/queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch b/queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch new file mode 100644 index 00000000000..54194969bce --- /dev/null +++ b/queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch @@ -0,0 +1,347 @@ +From 047e6575aec71d75b765c22111820c4776cd1c43 Mon Sep 17 00:00:00 2001 +From: "Aneesh Kumar K.V" +Date: Tue, 24 Sep 2019 09:22:53 +0530 +Subject: powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9 + +From: Aneesh Kumar K.V + +commit 047e6575aec71d75b765c22111820c4776cd1c43 upstream. + +On POWER9, under some circumstances, a broadcast TLB invalidation will +fail to invalidate the ERAT cache on some threads when there are +parallel mtpidr/mtlpidr happening on other threads of the same core. +This can cause stores to continue to go to a page after it's unmapped. + +The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie +flush. This additional TLB flush will cause the ERAT cache +invalidation. Since we are using PID=0 or LPID=0, we don't get +filtered out by the TLB snoop filtering logic. + +We need to still follow this up with another tlbie to take care of +store vs tlbie ordering issue explained in commit: +a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on +POWER9"). The presence of ERAT cache implies we can still get new +stores and they may miss store queue marking flush. + +Cc: stable@vger.kernel.org +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190924035254.24612-3-aneesh.kumar@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/cputable.h | 3 - + arch/powerpc/kernel/dt_cpu_ftrs.c | 2 + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 42 +++++++++++++---- + arch/powerpc/mm/book3s64/hash_native.c | 29 ++++++++++- + arch/powerpc/mm/book3s64/radix_tlb.c | 80 +++++++++++++++++++++++++++++---- + 5 files changed, 134 insertions(+), 22 deletions(-) + +--- a/arch/powerpc/include/asm/cputable.h ++++ b/arch/powerpc/include/asm/cputable.h +@@ -215,6 +215,7 @@ static inline void cpu_feature_keys_init + #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) + #define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) + #define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) ++#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) + + #ifndef __ASSEMBLY__ + +@@ -461,7 +462,7 @@ static inline void cpu_feature_keys_init + CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ +- CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR) ++ CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) + #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 + #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) + #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ +--- a/arch/powerpc/kernel/dt_cpu_ftrs.c ++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c +@@ -714,6 +714,8 @@ static __init void update_tlbie_feature_ + WARN_ONCE(1, "Unknown PVR"); + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } ++ ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG; + } + } + +--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c ++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c +@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); + } + ++static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) ++{ ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ /* Radix flush for a hash guest */ ++ ++ unsigned long rb,rs,prs,r,ric; ++ ++ rb = PPC_BIT(52); /* IS = 2 */ ++ rs = 0; /* lpid = 0 */ ++ prs = 0; /* partition scoped */ ++ r = 1; /* radix format */ ++ ric = 0; /* RIC_FLSUH_TLB */ ++ ++ /* ++ * Need the extra ptesync to make sure we don't ++ * re-order the tlbie ++ */ ++ asm volatile("ptesync": : :"memory"); ++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) ++ : : "r"(rb), "i"(r), "i"(prs), ++ "i"(ric), "r"(rs) : "memory"); ++ } ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : ++ "r" (rb_value), "r" (lpid)); ++ } ++} ++ + static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, + long npages, int global, bool need_sync) + { +@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, u + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); + } + +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { +- /* +- * Need the extra ptesync to make sure we don't +- * re-order the tlbie +- */ +- asm volatile("ptesync": : :"memory"); +- asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : +- "r" (rbvalues[0]), "r" (kvm->arch.lpid)); +- } +- ++ fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + } else { + if (need_sync) +--- a/arch/powerpc/mm/book3s64/hash_native.c ++++ b/arch/powerpc/mm/book3s64/hash_native.c +@@ -197,8 +197,31 @@ static inline unsigned long ___tlbie(un + return va; + } + +-static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) ++static inline void fixup_tlbie_vpn(unsigned long vpn, int psize, ++ int apsize, int ssize) + { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ /* Radix flush for a hash guest */ ++ ++ unsigned long rb,rs,prs,r,ric; ++ ++ rb = PPC_BIT(52); /* IS = 2 */ ++ rs = 0; /* lpid = 0 */ ++ prs = 0; /* partition scoped */ ++ r = 1; /* radix format */ ++ ric = 0; /* RIC_FLSUH_TLB */ ++ ++ /* ++ * Need the extra ptesync to make sure we don't ++ * re-order the tlbie ++ */ ++ asm volatile("ptesync": : :"memory"); ++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) ++ : : "r"(rb), "i"(r), "i"(prs), ++ "i"(ric), "r"(rs) : "memory"); ++ } ++ ++ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); +@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long v + asm volatile("ptesync": : :"memory"); + } else { + __tlbie(vpn, psize, apsize, ssize); +- fixup_tlbie(vpn, psize, apsize, ssize); ++ fixup_tlbie_vpn(vpn, psize, apsize, ssize); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + if (lock_tlbie && !use_local) +@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsi + /* + * Just do one more with the last used values. + */ +- fixup_tlbie(vpn, psize, psize, ssize); ++ fixup_tlbie_vpn(vpn, psize, psize, ssize); + asm volatile("eieio; tlbsync; ptesync":::"memory"); + + if (lock_tlbie) +--- a/arch/powerpc/mm/book3s64/radix_tlb.c ++++ b/arch/powerpc/mm/book3s64/radix_tlb.c +@@ -211,21 +211,82 @@ static __always_inline void __tlbie_lpid + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); + } + +-static inline void fixup_tlbie(void) ++ ++static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, ++ unsigned long ap) + { +- unsigned long pid = 0; ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); ++ } ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); ++ } ++} ++ ++static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, ++ unsigned long ap) ++{ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_pid(0, RIC_FLUSH_TLB); ++ } ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); ++ } ++} ++ ++static inline void fixup_tlbie_pid(unsigned long pid) ++{ ++ /* ++ * We can use any address for the invalidation, pick one which is ++ * probably unused as an optimisation. ++ */ + unsigned long va = ((1UL << 52) - 1); + ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_pid(0, RIC_FLUSH_TLB); ++ } ++ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } + } + ++ ++static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, ++ unsigned long ap) ++{ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); ++ } ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); ++ } ++} ++ + static inline void fixup_tlbie_lpid(unsigned long lpid) + { ++ /* ++ * We can use any address for the invalidation, pick one which is ++ * probably unused as an optimisation. ++ */ + unsigned long va = ((1UL << 52) - 1); + ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_lpid(0, RIC_FLUSH_TLB); ++ } ++ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); +@@ -273,6 +334,7 @@ static inline void _tlbie_pid(unsigned l + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); ++ fixup_tlbie_pid(pid); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); +@@ -280,8 +342,8 @@ static inline void _tlbie_pid(unsigned l + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); ++ fixup_tlbie_pid(pid); + } +- fixup_tlbie(); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + +@@ -325,6 +387,7 @@ static inline void _tlbie_lpid(unsigned + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_lpid(lpid, RIC_FLUSH_TLB); ++ fixup_tlbie_lpid(lpid); + break; + case RIC_FLUSH_PWC: + __tlbie_lpid(lpid, RIC_FLUSH_PWC); +@@ -332,8 +395,8 @@ static inline void _tlbie_lpid(unsigned + case RIC_FLUSH_ALL: + default: + __tlbie_lpid(lpid, RIC_FLUSH_ALL); ++ fixup_tlbie_lpid(lpid); + } +- fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + +@@ -407,6 +470,8 @@ static inline void __tlbie_va_range(unsi + + for (addr = start; addr < end; addr += page_size) + __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); ++ ++ fixup_tlbie_va_range(addr - page_size, pid, ap); + } + + static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, +@@ -416,7 +481,7 @@ static __always_inline void _tlbie_va(un + + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, ap, ric); +- fixup_tlbie(); ++ fixup_tlbie_va(va, pid, ap); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + +@@ -427,7 +492,7 @@ static __always_inline void _tlbie_lpid_ + + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, ap, ric); +- fixup_tlbie_lpid(lpid); ++ fixup_tlbie_lpid_va(va, lpid, ap); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + +@@ -439,7 +504,6 @@ static inline void _tlbie_va_range(unsig + if (also_pwc) + __tlbie_pid(pid, RIC_FLUSH_PWC); + __tlbie_va_range(start, end, pid, page_size, psize); +- fixup_tlbie(); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + +@@ -775,7 +839,7 @@ is_local: + if (gflush) + __tlbie_va_range(gstart, gend, pid, + PUD_SIZE, MMU_PAGE_1G); +- fixup_tlbie(); ++ + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + } diff --git a/queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch b/queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch new file mode 100644 index 00000000000..a1b441f664d --- /dev/null +++ b/queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch @@ -0,0 +1,72 @@ +From 56090a3902c80c296e822d11acdb6a101b322c52 Mon Sep 17 00:00:00 2001 +From: Alexey Kardashevskiy +Date: Thu, 18 Jul 2019 15:11:36 +1000 +Subject: powerpc/powernv/ioda: Fix race in TCE level allocation + +From: Alexey Kardashevskiy + +commit 56090a3902c80c296e822d11acdb6a101b322c52 upstream. + +pnv_tce() returns a pointer to a TCE entry and originally a TCE table +would be pre-allocated. For the default case of 2GB window the table +needs only a single level and that is fine. However if more levels are +requested, it is possible to get a race when 2 threads want a pointer +to a TCE entry from the same page of TCEs. + +This adds cmpxchg to handle the race. Note that once TCE is non-zero, +it cannot become zero again. + +Fixes: a68bd1267b72 ("powerpc/powernv/ioda: Allocate indirect TCE levels on demand") +CC: stable@vger.kernel.org # v4.19+ +Signed-off-by: Alexey Kardashevskiy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190718051139.74787-2-aik@ozlabs.ru +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/platforms/powernv/pci-ioda-tce.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c ++++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c +@@ -49,6 +49,9 @@ static __be64 *pnv_alloc_tce_level(int n + return addr; + } + ++static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, ++ unsigned long size, unsigned int levels); ++ + static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) + { + __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; +@@ -58,9 +61,9 @@ static __be64 *pnv_tce(struct iommu_tabl + + while (level) { + int n = (idx & mask) >> (level * shift); +- unsigned long tce; ++ unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n])); + +- if (tmp[n] == 0) { ++ if (!tce) { + __be64 *tmp2; + + if (!alloc) +@@ -71,10 +74,15 @@ static __be64 *pnv_tce(struct iommu_tabl + if (!tmp2) + return NULL; + +- tmp[n] = cpu_to_be64(__pa(tmp2) | +- TCE_PCI_READ | TCE_PCI_WRITE); ++ tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE; ++ oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0, ++ cpu_to_be64(tce))); ++ if (oldtce) { ++ pnv_pci_ioda2_table_do_free_pages(tmp2, ++ ilog2(tbl->it_level_size) + 3, 1); ++ tce = oldtce; ++ } + } +- tce = be64_to_cpu(tmp[n]); + + tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); + idx &= ~mask; diff --git a/queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch b/queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch new file mode 100644 index 00000000000..81133a2151c --- /dev/null +++ b/queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch @@ -0,0 +1,54 @@ +From e7de4f7b64c23e503a8c42af98d56f2a7462bd6d Mon Sep 17 00:00:00 2001 +From: Andrew Donnellan +Date: Fri, 3 May 2019 17:52:53 +1000 +Subject: powerpc/powernv: Restrict OPAL symbol map to only be readable by root + +From: Andrew Donnellan + +commit e7de4f7b64c23e503a8c42af98d56f2a7462bd6d upstream. + +Currently the OPAL symbol map is globally readable, which seems bad as +it contains physical addresses. + +Restrict it to root. + +Fixes: c8742f85125d ("powerpc/powernv: Expose OPAL firmware symbol map") +Cc: stable@vger.kernel.org # v3.19+ +Suggested-by: Michael Ellerman +Signed-off-by: Andrew Donnellan +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190503075253.22798-1-ajd@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/platforms/powernv/opal.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/arch/powerpc/platforms/powernv/opal.c ++++ b/arch/powerpc/platforms/powernv/opal.c +@@ -705,7 +705,10 @@ static ssize_t symbol_map_read(struct fi + bin_attr->size); + } + +-static BIN_ATTR_RO(symbol_map, 0); ++static struct bin_attribute symbol_map_attr = { ++ .attr = {.name = "symbol_map", .mode = 0400}, ++ .read = symbol_map_read ++}; + + static void opal_export_symmap(void) + { +@@ -722,10 +725,10 @@ static void opal_export_symmap(void) + return; + + /* Setup attributes */ +- bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0])); +- bin_attr_symbol_map.size = be64_to_cpu(syms[1]); ++ symbol_map_attr.private = __va(be64_to_cpu(syms[0])); ++ symbol_map_attr.size = be64_to_cpu(syms[1]); + +- rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map); ++ rc = sysfs_create_bin_file(opal_kobj, &symbol_map_attr); + if (rc) + pr_warn("Error %d creating OPAL symbols file\n", rc); + } diff --git a/queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch b/queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch new file mode 100644 index 00000000000..caa3b53a08d --- /dev/null +++ b/queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch @@ -0,0 +1,170 @@ +From c784be435d5dae28d3b03db31753dd7a18733f0c Mon Sep 17 00:00:00 2001 +From: "Gautham R. Shenoy" +Date: Wed, 15 May 2019 13:15:52 +0530 +Subject: powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt() + +From: Gautham R. Shenoy + +commit c784be435d5dae28d3b03db31753dd7a18733f0c upstream. + +The calls to arch_add_memory()/arch_remove_memory() are always made +with the read-side cpu_hotplug_lock acquired via memory_hotplug_begin(). +On pSeries, arch_add_memory()/arch_remove_memory() eventually call +resize_hpt() which in turn calls stop_machine() which acquires the +read-side cpu_hotplug_lock again, thereby resulting in the recursive +acquisition of this lock. + +In the absence of CONFIG_PROVE_LOCKING, we hadn't observed a system +lockup during a memory hotplug operation because cpus_read_lock() is a +per-cpu rwsem read, which, in the fast-path (in the absence of the +writer, which in our case is a CPU-hotplug operation) simply +increments the read_count on the semaphore. Thus a recursive read in +the fast-path doesn't cause any problems. + +However, we can hit this problem in practice if there is a concurrent +CPU-Hotplug operation in progress which is waiting to acquire the +write-side of the lock. This will cause the second recursive read to +block until the writer finishes. While the writer is blocked since the +first read holds the lock. Thus both the reader as well as the writers +fail to make any progress thereby blocking both CPU-Hotplug as well as +Memory Hotplug operations. + +Memory-Hotplug CPU-Hotplug +CPU 0 CPU 1 +------ ------ + +1. down_read(cpu_hotplug_lock.rw_sem) + [memory_hotplug_begin] + 2. down_write(cpu_hotplug_lock.rw_sem) + [cpu_up/cpu_down] +3. down_read(cpu_hotplug_lock.rw_sem) + [stop_machine()] + +Lockdep complains as follows in these code-paths. + + swapper/0/1 is trying to acquire lock: + (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: stop_machine+0x2c/0x60 + +but task is already holding lock: +(____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50 + + other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(cpu_hotplug_lock.rw_sem); + lock(cpu_hotplug_lock.rw_sem); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + + 3 locks held by swapper/0/1: + #0: (____ptrval____) (&dev->mutex){....}, at: __driver_attach+0x12c/0x1b0 + #1: (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50 + #2: (____ptrval____) (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x54/0x1a0 + +stack backtrace: + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc5-58373-gbc99402235f3-dirty #166 + Call Trace: + dump_stack+0xe8/0x164 (unreliable) + __lock_acquire+0x1110/0x1c70 + lock_acquire+0x240/0x290 + cpus_read_lock+0x64/0xf0 + stop_machine+0x2c/0x60 + pseries_lpar_resize_hpt+0x19c/0x2c0 + resize_hpt_for_hotplug+0x70/0xd0 + arch_add_memory+0x58/0xfc + devm_memremap_pages+0x5e8/0x8f0 + pmem_attach_disk+0x764/0x830 + nvdimm_bus_probe+0x118/0x240 + really_probe+0x230/0x4b0 + driver_probe_device+0x16c/0x1e0 + __driver_attach+0x148/0x1b0 + bus_for_each_dev+0x90/0x130 + driver_attach+0x34/0x50 + bus_add_driver+0x1a8/0x360 + driver_register+0x108/0x170 + __nd_driver_register+0xd0/0xf0 + nd_pmem_driver_init+0x34/0x48 + do_one_initcall+0x1e0/0x45c + kernel_init_freeable+0x540/0x64c + kernel_init+0x2c/0x160 + ret_from_kernel_thread+0x5c/0x68 + +Fix this issue by + 1) Requiring all the calls to pseries_lpar_resize_hpt() be made + with cpu_hotplug_lock held. + + 2) In pseries_lpar_resize_hpt() invoke stop_machine_cpuslocked() + as a consequence of 1) + + 3) To satisfy 1), in hpt_order_set(), call mmu_hash_ops.resize_hpt() + with cpu_hotplug_lock held. + +Fixes: dbcf929c0062 ("powerpc/pseries: Add support for hash table resizing") +Cc: stable@vger.kernel.org # v4.11+ +Reported-by: Aneesh Kumar K.V +Signed-off-by: Gautham R. Shenoy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/1557906352-29048-1-git-send-email-ego@linux.vnet.ibm.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/book3s64/hash_utils.c | 9 ++++++++- + arch/powerpc/platforms/pseries/lpar.c | 8 ++++++-- + 2 files changed, 14 insertions(+), 3 deletions(-) + +--- a/arch/powerpc/mm/book3s64/hash_utils.c ++++ b/arch/powerpc/mm/book3s64/hash_utils.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1931,10 +1932,16 @@ static int hpt_order_get(void *data, u64 + + static int hpt_order_set(void *data, u64 val) + { ++ int ret; ++ + if (!mmu_hash_ops.resize_hpt) + return -ENODEV; + +- return mmu_hash_ops.resize_hpt(val); ++ cpus_read_lock(); ++ ret = mmu_hash_ops.resize_hpt(val); ++ cpus_read_unlock(); ++ ++ return ret; + } + + DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); +--- a/arch/powerpc/platforms/pseries/lpar.c ++++ b/arch/powerpc/platforms/pseries/lpar.c +@@ -1413,7 +1413,10 @@ static int pseries_lpar_resize_hpt_commi + return 0; + } + +-/* Must be called in user context */ ++/* ++ * Must be called in process context. The caller must hold the ++ * cpus_lock. ++ */ + static int pseries_lpar_resize_hpt(unsigned long shift) + { + struct hpt_resize_state state = { +@@ -1467,7 +1470,8 @@ static int pseries_lpar_resize_hpt(unsig + + t1 = ktime_get(); + +- rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); ++ rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit, ++ &state, NULL); + + t2 = ktime_get(); + diff --git a/queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch b/queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch new file mode 100644 index 00000000000..5eec2c766e0 --- /dev/null +++ b/queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch @@ -0,0 +1,38 @@ +From 7c7a532ba3fc51bf9527d191fb410786c1fdc73c Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Wed, 14 Aug 2019 12:36:09 +0000 +Subject: powerpc/ptdump: Fix addresses display on PPC32 + +From: Christophe Leroy + +commit 7c7a532ba3fc51bf9527d191fb410786c1fdc73c upstream. + +Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") +wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a +shift in the displayed addresses. + +Lets revert that change to resync walk_pagetables()'s addr val and +pgd_t pointer for PPC32. + +Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") +Cc: stable@vger.kernel.org # v5.2+ +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/eb4d626514e22f85814830012642329018ef6af9.1565786091.git.christophe.leroy@c-s.fr +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/ptdump/ptdump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/powerpc/mm/ptdump/ptdump.c ++++ b/arch/powerpc/mm/ptdump/ptdump.c +@@ -27,7 +27,7 @@ + #include "ptdump.h" + + #ifdef CONFIG_PPC32 +-#define KERN_VIRT_START PAGE_OFFSET ++#define KERN_VIRT_START 0 + #endif + + /* diff --git a/queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch b/queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch new file mode 100644 index 00000000000..b521d007903 --- /dev/null +++ b/queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch @@ -0,0 +1,390 @@ +From da15c03b047dca891d37b9f4ef9ca14d84a6484f Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Tue, 13 Aug 2019 20:06:48 +1000 +Subject: powerpc/xive: Implement get_irqchip_state method for XIVE to fix shutdown race + +From: Paul Mackerras + +commit da15c03b047dca891d37b9f4ef9ca14d84a6484f upstream. + +Testing has revealed the existence of a race condition where a XIVE +interrupt being shut down can be in one of the XIVE interrupt queues +(of which there are up to 8 per CPU, one for each priority) at the +point where free_irq() is called. If this happens, can return an +interrupt number which has been shut down. This can lead to various +symptoms: + +- irq_to_desc(irq) can be NULL. In this case, no end-of-interrupt + function gets called, resulting in the CPU's elevated interrupt + priority (numerically lowered CPPR) never gets reset. That then + means that the CPU stops processing interrupts, causing device + timeouts and other errors in various device drivers. + +- The irq descriptor or related data structures can be in the process + of being freed as the interrupt code is using them. This typically + leads to crashes due to bad pointer dereferences. + +This race is basically what commit 62e0468650c3 ("genirq: Add optional +hardware synchronization for shutdown", 2019-06-28) is intended to +fix, given a get_irqchip_state() method for the interrupt controller +being used. It works by polling the interrupt controller when an +interrupt is being freed until the controller says it is not pending. + +With XIVE, the PQ bits of the interrupt source indicate the state of +the interrupt source, and in particular the P bit goes from 0 to 1 at +the point where the hardware writes an entry into the interrupt queue +that this interrupt is directed towards. Normally, the code will then +process the interrupt and do an end-of-interrupt (EOI) operation which +will reset PQ to 00 (assuming another interrupt hasn't been generated +in the meantime). However, there are situations where the code resets +P even though a queue entry exists (for example, by setting PQ to 01, +which disables the interrupt source), and also situations where the +code leaves P at 1 after removing the queue entry (for example, this +is done for escalation interrupts so they cannot fire again until +they are explicitly re-enabled). + +The code already has a 'saved_p' flag for the interrupt source which +indicates that a queue entry exists, although it isn't maintained +consistently. This patch adds a 'stale_p' flag to indicate that +P has been left at 1 after processing a queue entry, and adds code +to set and clear saved_p and stale_p as necessary to maintain a +consistent indication of whether a queue entry may or may not exist. + +With this, we can implement xive_get_irqchip_state() by looking at +stale_p, saved_p and the ESB PQ bits for the interrupt. + +There is some additional code to handle escalation interrupts +properly; because they are enabled and disabled in KVM assembly code, +which does not have access to the xive_irq_data struct for the +escalation interrupt. Hence, stale_p may be incorrect when the +escalation interrupt is freed in kvmppc_xive_{,native_}cleanup_vcpu(). +Fortunately, we can fix it up by looking at vcpu->arch.xive_esc_on, +with some careful attention to barriers in order to ensure the correct +result if xive_esc_irq() races with kvmppc_xive_cleanup_vcpu(). + +Finally, this adds code to make noise on the console (pr_crit and +WARN_ON(1)) if we find an interrupt queue entry for an interrupt +which does not have a descriptor. While this won't catch the race +reliably, if it does get triggered it will be an indication that +the race is occurring and needs to be debugged. + +Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller") +Cc: stable@vger.kernel.org # v4.12+ +Signed-off-by: Paul Mackerras +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190813100648.GE9567@blackberry +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/xive.h | 8 +++ + arch/powerpc/kvm/book3s_xive.c | 31 ++++++++++++ + arch/powerpc/kvm/book3s_xive.h | 2 + arch/powerpc/kvm/book3s_xive_native.c | 3 + + arch/powerpc/sysdev/xive/common.c | 87 +++++++++++++++++++++++++--------- + 5 files changed, 108 insertions(+), 23 deletions(-) + +--- a/arch/powerpc/include/asm/xive.h ++++ b/arch/powerpc/include/asm/xive.h +@@ -46,7 +46,15 @@ struct xive_irq_data { + + /* Setup/used by frontend */ + int target; ++ /* ++ * saved_p means that there is a queue entry for this interrupt ++ * in some CPU's queue (not including guest vcpu queues), even ++ * if P is not set in the source ESB. ++ * stale_p means that there is no queue entry for this interrupt ++ * in some CPU's queue, even if P is set in the source ESB. ++ */ + bool saved_p; ++ bool stale_p; + }; + #define XIVE_IRQ_FLAG_STORE_EOI 0x01 + #define XIVE_IRQ_FLAG_LSI 0x02 +--- a/arch/powerpc/kvm/book3s_xive.c ++++ b/arch/powerpc/kvm/book3s_xive.c +@@ -166,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, + */ + vcpu->arch.xive_esc_on = false; + ++ /* This orders xive_esc_on = false vs. subsequent stale_p = true */ ++ smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */ ++ + return IRQ_HANDLED; + } + +@@ -1119,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts + vcpu->arch.xive_esc_raddr = 0; + } + ++/* ++ * In single escalation mode, the escalation interrupt is marked so ++ * that EOI doesn't re-enable it, but just sets the stale_p flag to ++ * indicate that the P bit has already been dealt with. However, the ++ * assembly code that enters the guest sets PQ to 00 without clearing ++ * stale_p (because it has no easy way to address it). Hence we have ++ * to adjust stale_p before shutting down the interrupt. ++ */ ++void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, ++ struct kvmppc_xive_vcpu *xc, int irq) ++{ ++ struct irq_data *d = irq_get_irq_data(irq); ++ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); ++ ++ /* ++ * This slightly odd sequence gives the right result ++ * (i.e. stale_p set if xive_esc_on is false) even if ++ * we race with xive_esc_irq() and xive_irq_eoi(). ++ */ ++ xd->stale_p = false; ++ smp_mb(); /* paired with smb_wmb in xive_esc_irq */ ++ if (!vcpu->arch.xive_esc_on) ++ xd->stale_p = true; ++} ++ + void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) + { + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; +@@ -1143,6 +1171,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm + /* Free escalations */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + if (xc->esc_virq[i]) { ++ if (xc->xive->single_escalation) ++ xive_cleanup_single_escalation(vcpu, xc, ++ xc->esc_virq[i]); + free_irq(xc->esc_virq[i], vcpu); + irq_dispose_mapping(xc->esc_virq[i]); + kfree(xc->esc_virq_names[i]); +--- a/arch/powerpc/kvm/book3s_xive.h ++++ b/arch/powerpc/kvm/book3s_xive.h +@@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm + int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, + bool single_escalation); + struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); ++void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, ++ struct kvmppc_xive_vcpu *xc, int irq); + + #endif /* CONFIG_KVM_XICS */ + #endif /* _KVM_PPC_BOOK3S_XICS_H */ +--- a/arch/powerpc/kvm/book3s_xive_native.c ++++ b/arch/powerpc/kvm/book3s_xive_native.c +@@ -71,6 +71,9 @@ void kvmppc_xive_native_cleanup_vcpu(str + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + /* Free the escalation irq */ + if (xc->esc_virq[i]) { ++ if (xc->xive->single_escalation) ++ xive_cleanup_single_escalation(vcpu, xc, ++ xc->esc_virq[i]); + free_irq(xc->esc_virq[i], vcpu); + irq_dispose_mapping(xc->esc_virq[i]); + kfree(xc->esc_virq_names[i]); +--- a/arch/powerpc/sysdev/xive/common.c ++++ b/arch/powerpc/sysdev/xive/common.c +@@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q + static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) + { + u32 irq = 0; +- u8 prio; ++ u8 prio = 0; + + /* Find highest pending priority */ + while (xc->pending_prio != 0) { +@@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct x + irq = xive_read_eq(&xc->queue[prio], just_peek); + + /* Found something ? That's it */ +- if (irq) +- break; ++ if (irq) { ++ if (just_peek || irq_to_desc(irq)) ++ break; ++ /* ++ * We should never get here; if we do then we must ++ * have failed to synchronize the interrupt properly ++ * when shutting it down. ++ */ ++ pr_crit("xive: got interrupt %d without descriptor, dropping\n", ++ irq); ++ WARN_ON(1); ++ continue; ++ } + + /* Clear pending bits */ + xc->pending_prio &= ~(1 << prio); +@@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xiv + */ + static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) + { ++ xd->stale_p = false; + /* If the XIVE supports the new "store EOI facility, use it */ + if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); +@@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_ir + } + } + +-/* irq_chip eoi callback */ ++/* irq_chip eoi callback, called with irq descriptor lock held */ + static void xive_irq_eoi(struct irq_data *d) + { + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); +@@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data + if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && + !(xd->flags & XIVE_IRQ_NO_EOI)) + xive_do_source_eoi(irqd_to_hwirq(d), xd); ++ else ++ xd->stale_p = true; + + /* + * Clear saved_p to indicate that it's no longer occupying +@@ -397,11 +411,16 @@ static void xive_do_source_set_mask(stru + */ + if (mask) { + val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01); +- xd->saved_p = !!(val & XIVE_ESB_VAL_P); +- } else if (xd->saved_p) ++ if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P)) ++ xd->saved_p = true; ++ xd->stale_p = false; ++ } else if (xd->saved_p) { + xive_esb_read(xd, XIVE_ESB_SET_PQ_10); +- else ++ xd->saved_p = false; ++ } else { + xive_esb_read(xd, XIVE_ESB_SET_PQ_00); ++ xd->stale_p = false; ++ } + } + + /* +@@ -541,6 +560,8 @@ static unsigned int xive_irq_startup(str + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int target, rc; + ++ xd->saved_p = false; ++ xd->stale_p = false; + pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", + d->irq, hw_irq, d); + +@@ -587,6 +608,7 @@ static unsigned int xive_irq_startup(str + return 0; + } + ++/* called with irq descriptor lock held */ + static void xive_irq_shutdown(struct irq_data *d) + { + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); +@@ -602,16 +624,6 @@ static void xive_irq_shutdown(struct irq + xive_do_source_set_mask(xd, true); + + /* +- * The above may have set saved_p. We clear it otherwise it +- * will prevent re-enabling later on. It is ok to forget the +- * fact that the interrupt might be in a queue because we are +- * accounting that already in xive_dec_target_count() and will +- * be re-routing it to a new queue with proper accounting when +- * it's started up again +- */ +- xd->saved_p = false; +- +- /* + * Mask the interrupt in HW in the IVT/EAS and set the number + * to be the "bad" IRQ number + */ +@@ -797,6 +809,10 @@ static int xive_irq_retrigger(struct irq + return 1; + } + ++/* ++ * Caller holds the irq descriptor lock, so this won't be called ++ * concurrently with xive_get_irqchip_state on the same interrupt. ++ */ + static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) + { + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); +@@ -820,6 +836,10 @@ static int xive_irq_set_vcpu_affinity(st + + /* Set it to PQ=10 state to prevent further sends */ + pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10); ++ if (!xd->stale_p) { ++ xd->saved_p = !!(pq & XIVE_ESB_VAL_P); ++ xd->stale_p = !xd->saved_p; ++ } + + /* No target ? nothing to do */ + if (xd->target == XIVE_INVALID_TARGET) { +@@ -827,7 +847,7 @@ static int xive_irq_set_vcpu_affinity(st + * An untargetted interrupt should have been + * also masked at the source + */ +- WARN_ON(pq & 2); ++ WARN_ON(xd->saved_p); + + return 0; + } +@@ -847,9 +867,8 @@ static int xive_irq_set_vcpu_affinity(st + * This saved_p is cleared by the host EOI, when we know + * for sure the queue slot is no longer in use. + */ +- if (pq & 2) { +- pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11); +- xd->saved_p = true; ++ if (xd->saved_p) { ++ xive_esb_read(xd, XIVE_ESB_SET_PQ_11); + + /* + * Sync the XIVE source HW to ensure the interrupt +@@ -862,8 +881,7 @@ static int xive_irq_set_vcpu_affinity(st + */ + if (xive_ops->sync_source) + xive_ops->sync_source(hw_irq); +- } else +- xd->saved_p = false; ++ } + } else { + irqd_clr_forwarded_to_vcpu(d); + +@@ -914,6 +932,23 @@ static int xive_irq_set_vcpu_affinity(st + return 0; + } + ++/* Called with irq descriptor lock held. */ ++static int xive_get_irqchip_state(struct irq_data *data, ++ enum irqchip_irq_state which, bool *state) ++{ ++ struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); ++ ++ switch (which) { ++ case IRQCHIP_STATE_ACTIVE: ++ *state = !xd->stale_p && ++ (xd->saved_p || ++ !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P)); ++ return 0; ++ default: ++ return -EINVAL; ++ } ++} ++ + static struct irq_chip xive_irq_chip = { + .name = "XIVE-IRQ", + .irq_startup = xive_irq_startup, +@@ -925,6 +960,7 @@ static struct irq_chip xive_irq_chip = { + .irq_set_type = xive_irq_set_type, + .irq_retrigger = xive_irq_retrigger, + .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity, ++ .irq_get_irqchip_state = xive_get_irqchip_state, + }; + + bool is_xive_irq(struct irq_chip *chip) +@@ -1338,6 +1374,11 @@ static void xive_flush_cpu_queue(unsigne + xd = irq_desc_get_handler_data(desc); + + /* ++ * Clear saved_p to indicate that it's no longer pending ++ */ ++ xd->saved_p = false; ++ ++ /* + * For LSIs, we EOI, this will cause a resend if it's + * still asserted. Otherwise do an MSI retrigger. + */ diff --git a/queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch b/queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch new file mode 100644 index 00000000000..9b9849395ed --- /dev/null +++ b/queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch @@ -0,0 +1,139 @@ +From 964ce509e2ded52c1a61ad86044cc4d70abd9eb8 Mon Sep 17 00:00:00 2001 +From: Stefan Haberland +Date: Tue, 1 Oct 2019 17:34:39 +0200 +Subject: Revert "s390/dasd: Add discard support for ESE volumes" + +From: Stefan Haberland + +commit 964ce509e2ded52c1a61ad86044cc4d70abd9eb8 upstream. + +This reverts commit 7e64db1597fe114b83fe17d0ba96c6aa5fca419a. + +The thin provisioning feature introduces an IOCTL and the discard support +to allow userspace tools and filesystems to release unused and previously +allocated space respectively. + +During some internal performance improvements and further tests, the +release of allocated space revealed some issues that may lead to data +corruption in some configurations when filesystems are mounted with +discard support enabled. + +While we're working on a fix and trying to clarify the situation, +this commit reverts the discard support for ESE volumes to prevent +potential data corruption. + +Cc: # 5.3 +Signed-off-by: Stefan Haberland +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/block/dasd_eckd.c | 57 ++--------------------------------------- + 1 file changed, 3 insertions(+), 54 deletions(-) + +--- a/drivers/s390/block/dasd_eckd.c ++++ b/drivers/s390/block/dasd_eckd.c +@@ -2055,9 +2055,6 @@ dasd_eckd_check_characteristics(struct d + if (readonly) + set_bit(DASD_FLAG_DEVICE_RO, &device->flags); + +- if (dasd_eckd_is_ese(device)) +- dasd_set_feature(device->cdev, DASD_FEATURE_DISCARD, 1); +- + dev_info(&device->cdev->dev, "New DASD %04X/%02X (CU %04X/%02X) " + "with %d cylinders, %d heads, %d sectors%s\n", + private->rdc_data.dev_type, +@@ -3691,14 +3688,6 @@ static int dasd_eckd_release_space(struc + return -EINVAL; + } + +-static struct dasd_ccw_req * +-dasd_eckd_build_cp_discard(struct dasd_device *device, struct dasd_block *block, +- struct request *req, sector_t first_trk, +- sector_t last_trk) +-{ +- return dasd_eckd_dso_ras(device, block, req, first_trk, last_trk, 1); +-} +- + static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( + struct dasd_device *startdev, + struct dasd_block *block, +@@ -4443,10 +4432,6 @@ static struct dasd_ccw_req *dasd_eckd_bu + cmdwtd = private->features.feature[12] & 0x40; + use_prefix = private->features.feature[8] & 0x01; + +- if (req_op(req) == REQ_OP_DISCARD) +- return dasd_eckd_build_cp_discard(startdev, block, req, +- first_trk, last_trk); +- + cqr = NULL; + if (cdlspecial || dasd_page_cache) { + /* do nothing, just fall through to the cmd mode single case */ +@@ -4725,14 +4710,12 @@ static struct dasd_ccw_req *dasd_eckd_bu + struct dasd_block *block, + struct request *req) + { +- struct dasd_device *startdev = NULL; + struct dasd_eckd_private *private; +- struct dasd_ccw_req *cqr; ++ struct dasd_device *startdev; + unsigned long flags; ++ struct dasd_ccw_req *cqr; + +- /* Discard requests can only be processed on base devices */ +- if (req_op(req) != REQ_OP_DISCARD) +- startdev = dasd_alias_get_start_dev(base); ++ startdev = dasd_alias_get_start_dev(base); + if (!startdev) + startdev = base; + private = startdev->private; +@@ -6513,20 +6496,8 @@ static void dasd_eckd_setup_blk_queue(st + unsigned int logical_block_size = block->bp_block; + struct request_queue *q = block->request_queue; + struct dasd_device *device = block->base; +- struct dasd_eckd_private *private; +- unsigned int max_discard_sectors; +- unsigned int max_bytes; +- unsigned int ext_bytes; /* Extent Size in Bytes */ +- int recs_per_trk; +- int trks_per_cyl; +- int ext_limit; +- int ext_size; /* Extent Size in Cylinders */ + int max; + +- private = device->private; +- trks_per_cyl = private->rdc_data.trk_per_cyl; +- recs_per_trk = recs_per_track(&private->rdc_data, 0, logical_block_size); +- + if (device->features & DASD_FEATURE_USERAW) { + /* + * the max_blocks value for raw_track access is 256 +@@ -6547,28 +6518,6 @@ static void dasd_eckd_setup_blk_queue(st + /* With page sized segments each segment can be translated into one idaw/tidaw */ + blk_queue_max_segment_size(q, PAGE_SIZE); + blk_queue_segment_boundary(q, PAGE_SIZE - 1); +- +- if (dasd_eckd_is_ese(device)) { +- /* +- * Depending on the extent size, up to UINT_MAX bytes can be +- * accepted. However, neither DASD_ECKD_RAS_EXTS_MAX nor the +- * device limits should be exceeded. +- */ +- ext_size = dasd_eckd_ext_size(device); +- ext_limit = min(private->real_cyl / ext_size, DASD_ECKD_RAS_EXTS_MAX); +- ext_bytes = ext_size * trks_per_cyl * recs_per_trk * +- logical_block_size; +- max_bytes = UINT_MAX - (UINT_MAX % ext_bytes); +- if (max_bytes / ext_bytes > ext_limit) +- max_bytes = ext_bytes * ext_limit; +- +- max_discard_sectors = max_bytes / 512; +- +- blk_queue_max_discard_sectors(q, max_discard_sectors); +- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); +- q->limits.discard_granularity = ext_bytes; +- q->limits.discard_alignment = ext_bytes; +- } + } + + static struct ccw_driver dasd_eckd_driver = { diff --git a/queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch b/queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch new file mode 100644 index 00000000000..e8c44130814 --- /dev/null +++ b/queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch @@ -0,0 +1,55 @@ +From ea298e6ee8b34b3ed4366be7eb799d0650ebe555 Mon Sep 17 00:00:00 2001 +From: Vasily Gorbik +Date: Tue, 17 Sep 2019 20:04:04 +0200 +Subject: s390/cio: avoid calling strlen on null pointer + +From: Vasily Gorbik + +commit ea298e6ee8b34b3ed4366be7eb799d0650ebe555 upstream. + +Fix the following kasan finding: +BUG: KASAN: global-out-of-bounds in ccwgroup_create_dev+0x850/0x1140 +Read of size 1 at addr 0000000000000000 by task systemd-udevd.r/561 + +CPU: 30 PID: 561 Comm: systemd-udevd.r Tainted: G B +Hardware name: IBM 3906 M04 704 (LPAR) +Call Trace: +([<0000000231b3db7e>] show_stack+0x14e/0x1a8) + [<0000000233826410>] dump_stack+0x1d0/0x218 + [<000000023216fac4>] print_address_description+0x64/0x380 + [<000000023216f5a8>] __kasan_report+0x138/0x168 + [<00000002331b8378>] ccwgroup_create_dev+0x850/0x1140 + [<00000002332b618a>] group_store+0x3a/0x50 + [<00000002323ac706>] kernfs_fop_write+0x246/0x3b8 + [<00000002321d409a>] vfs_write+0x132/0x450 + [<00000002321d47da>] ksys_write+0x122/0x208 + [<0000000233877102>] system_call+0x2a6/0x2c8 + +Triggered by: +openat(AT_FDCWD, "/sys/bus/ccwgroup/drivers/qeth/group", + O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = 16 +write(16, "0.0.bd00,0.0.bd01,0.0.bd02", 26) = 26 + +The problem is that __get_next_id in ccwgroup_create_dev might set "buf" +buffer pointer to NULL and explicit check for that is required. + +Cc: stable@vger.kernel.org +Reviewed-by: Sebastian Ott +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/cio/ccwgroup.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/s390/cio/ccwgroup.c ++++ b/drivers/s390/cio/ccwgroup.c +@@ -372,7 +372,7 @@ int ccwgroup_create_dev(struct device *p + goto error; + } + /* Check for trailing stuff. */ +- if (i == num_devices && strlen(buf) > 0) { ++ if (i == num_devices && buf && strlen(buf) > 0) { + rc = -EINVAL; + goto error; + } diff --git a/queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch b/queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch new file mode 100644 index 00000000000..f1ae804bae2 --- /dev/null +++ b/queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch @@ -0,0 +1,54 @@ +From ab5758848039de9a4b249d46e4ab591197eebaf2 Mon Sep 17 00:00:00 2001 +From: Vasily Gorbik +Date: Thu, 19 Sep 2019 15:55:17 +0200 +Subject: s390/cio: exclude subchannels with no parent from pseudo check + +From: Vasily Gorbik + +commit ab5758848039de9a4b249d46e4ab591197eebaf2 upstream. + +ccw console is created early in start_kernel and used before css is +initialized or ccw console subchannel is registered. Until then console +subchannel does not have a parent. For that reason assume subchannels +with no parent are not pseudo subchannels. This fixes the following +kasan finding: + +BUG: KASAN: global-out-of-bounds in sch_is_pseudo_sch+0x8e/0x98 +Read of size 8 at addr 00000000000005e8 by task swapper/0/0 + +CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-rc8-07370-g6ac43dd12538 #2 +Hardware name: IBM 2964 NC9 702 (z/VM 6.4.0) +Call Trace: +([<000000000012cd76>] show_stack+0x14e/0x1e0) + [<0000000001f7fb44>] dump_stack+0x1a4/0x1f8 + [<00000000007d7afc>] print_address_description+0x64/0x3c8 + [<00000000007d75f6>] __kasan_report+0x14e/0x180 + [<00000000018a2986>] sch_is_pseudo_sch+0x8e/0x98 + [<000000000189b950>] cio_enable_subchannel+0x1d0/0x510 + [<00000000018cac7c>] ccw_device_recognition+0x12c/0x188 + [<0000000002ceb1a8>] ccw_device_enable_console+0x138/0x340 + [<0000000002cf1cbe>] con3215_init+0x25e/0x300 + [<0000000002c8770a>] console_init+0x68a/0x9b8 + [<0000000002c6a3d6>] start_kernel+0x4fe/0x728 + [<0000000000100070>] startup_continue+0x70/0xd0 + +Cc: stable@vger.kernel.org +Reviewed-by: Sebastian Ott +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/cio/css.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/s390/cio/css.c ++++ b/drivers/s390/cio/css.c +@@ -1388,6 +1388,8 @@ device_initcall(cio_settle_init); + + int sch_is_pseudo_sch(struct subchannel *sch) + { ++ if (!sch->dev.parent) ++ return 0; + return sch == to_css(sch->dev.parent)->pseudo_subchannel; + } + diff --git a/queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch b/queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch new file mode 100644 index 00000000000..a12e1b1410f --- /dev/null +++ b/queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch @@ -0,0 +1,94 @@ +From dd45483981ac62f432e073fea6e5e11200b9070d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20H=C3=B6ppner?= +Date: Tue, 1 Oct 2019 17:34:38 +0200 +Subject: s390/dasd: Fix error handling during online processing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jan Höppner + +commit dd45483981ac62f432e073fea6e5e11200b9070d upstream. + +It is possible that the CCW commands for reading volume and extent pool +information are not supported, either by the storage server (for +dedicated DASDs) or by z/VM (for virtual devices, such as MDISKs). + +As a command reject will occur in such a case, the current error +handling leads to a failing online processing and thus the DASD can't be +used at all. + +Since the data being read is not essential for an fully operational +DASD, the error handling can be removed. Information about the failing +command is sent to the s390dbf debug feature. + +Fixes: c729696bcf8b ("s390/dasd: Recognise data for ESE volumes") +Cc: # 5.3 +Reported-by: Frank Heimes +Signed-off-by: Jan Höppner +Signed-off-by: Stefan Haberland +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/block/dasd_eckd.c | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +--- a/drivers/s390/block/dasd_eckd.c ++++ b/drivers/s390/block/dasd_eckd.c +@@ -1553,8 +1553,8 @@ static int dasd_eckd_read_vol_info(struc + if (rc == 0) { + memcpy(&private->vsq, vsq, sizeof(*vsq)); + } else { +- dev_warn(&device->cdev->dev, +- "Reading the volume storage information failed with rc=%d\n", rc); ++ DBF_EVENT_DEVID(DBF_WARNING, device->cdev, ++ "Reading the volume storage information failed with rc=%d", rc); + } + + if (useglobal) +@@ -1737,8 +1737,8 @@ static int dasd_eckd_read_ext_pool_info( + if (rc == 0) { + dasd_eckd_cpy_ext_pool_data(device, lcq); + } else { +- dev_warn(&device->cdev->dev, +- "Reading the logical configuration failed with rc=%d\n", rc); ++ DBF_EVENT_DEVID(DBF_WARNING, device->cdev, ++ "Reading the logical configuration failed with rc=%d", rc); + } + + dasd_sfree_request(cqr, cqr->memdev); +@@ -2020,14 +2020,10 @@ dasd_eckd_check_characteristics(struct d + dasd_eckd_read_features(device); + + /* Read Volume Information */ +- rc = dasd_eckd_read_vol_info(device); +- if (rc) +- goto out_err3; ++ dasd_eckd_read_vol_info(device); + + /* Read Extent Pool Information */ +- rc = dasd_eckd_read_ext_pool_info(device); +- if (rc) +- goto out_err3; ++ dasd_eckd_read_ext_pool_info(device); + + /* Read Device Characteristics */ + rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, +@@ -5663,14 +5659,10 @@ static int dasd_eckd_restore_device(stru + dasd_eckd_read_features(device); + + /* Read Volume Information */ +- rc = dasd_eckd_read_vol_info(device); +- if (rc) +- goto out_err2; ++ dasd_eckd_read_vol_info(device); + + /* Read Extent Pool Information */ +- rc = dasd_eckd_read_ext_pool_info(device); +- if (rc) +- goto out_err2; ++ dasd_eckd_read_ext_pool_info(device); + + /* Read Device Characteristics */ + rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, diff --git a/queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch b/queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch new file mode 100644 index 00000000000..72a4a9721ee --- /dev/null +++ b/queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch @@ -0,0 +1,62 @@ +From 8769f610fe6d473e5e8e221709c3ac402037da6c Mon Sep 17 00:00:00 2001 +From: Vasily Gorbik +Date: Tue, 13 Aug 2019 20:11:08 +0200 +Subject: s390/process: avoid potential reading of freed stack + +From: Vasily Gorbik + +commit 8769f610fe6d473e5e8e221709c3ac402037da6c upstream. + +With THREAD_INFO_IN_TASK (which is selected on s390) task's stack usage +is refcounted and should always be protected by get/put when touching +other task's stack to avoid race conditions with task's destruction code. + +Fixes: d5c352cdd022 ("s390: move thread_info into task_struct") +Cc: stable@vger.kernel.org # v4.10+ +Acked-by: Ilya Leoshkevich +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kernel/process.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +--- a/arch/s390/kernel/process.c ++++ b/arch/s390/kernel/process.c +@@ -184,20 +184,30 @@ unsigned long get_wchan(struct task_stru + + if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p)) + return 0; ++ ++ if (!try_get_task_stack(p)) ++ return 0; ++ + low = task_stack_page(p); + high = (struct stack_frame *) task_pt_regs(p); + sf = (struct stack_frame *) p->thread.ksp; +- if (sf <= low || sf > high) +- return 0; ++ if (sf <= low || sf > high) { ++ return_address = 0; ++ goto out; ++ } + for (count = 0; count < 16; count++) { + sf = (struct stack_frame *) sf->back_chain; +- if (sf <= low || sf > high) +- return 0; ++ if (sf <= low || sf > high) { ++ return_address = 0; ++ goto out; ++ } + return_address = sf->gprs[8]; + if (!in_sched_functions(return_address)) +- return return_address; ++ goto out; + } +- return 0; ++out: ++ put_task_stack(p); ++ return return_address; + } + + unsigned long arch_align_stack(unsigned long sp) diff --git a/queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch b/queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch new file mode 100644 index 00000000000..578674c6802 --- /dev/null +++ b/queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch @@ -0,0 +1,31 @@ +From 4df9a82549cfed5b52da21e7d007b79b2ea1769a Mon Sep 17 00:00:00 2001 +From: Philipp Rudo +Date: Thu, 29 Aug 2019 15:38:37 +0200 +Subject: s390/sclp: Fix bit checked for has_sipl + +From: Philipp Rudo + +commit 4df9a82549cfed5b52da21e7d007b79b2ea1769a upstream. + +Fixes: c9896acc7851 ("s390/ipl: Provide has_secure sysfs attribute") +Cc: stable@vger.kernel.org # 5.2+ +Reviewed-by: Heiko Carstens +Signed-off-by: Philipp Rudo +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/char/sclp_early.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/s390/char/sclp_early.c ++++ b/drivers/s390/char/sclp_early.c +@@ -40,7 +40,7 @@ static void __init sclp_early_facilities + sclp.has_gisaf = !!(sccb->fac118 & 0x08); + sclp.has_hvs = !!(sccb->fac119 & 0x80); + sclp.has_kss = !!(sccb->fac98 & 0x01); +- sclp.has_sipl = !!(sccb->cbl & 0x02); ++ sclp.has_sipl = !!(sccb->cbl & 0x4000); + if (sccb->fac85 & 0x02) + S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; + if (sccb->fac91 & 0x40) diff --git a/queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch b/queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch new file mode 100644 index 00000000000..032e25fe5d1 --- /dev/null +++ b/queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch @@ -0,0 +1,61 @@ +From f3122a79a1b0a113d3aea748e0ec26f2cb2889de Mon Sep 17 00:00:00 2001 +From: Vasily Gorbik +Date: Tue, 17 Sep 2019 22:59:03 +0200 +Subject: s390/topology: avoid firing events before kobjs are created + +From: Vasily Gorbik + +commit f3122a79a1b0a113d3aea748e0ec26f2cb2889de upstream. + +arch_update_cpu_topology is first called from: +kernel_init_freeable->sched_init_smp->sched_init_domains + +even before cpus has been registered in: +kernel_init_freeable->do_one_initcall->s390_smp_init + +Do not trigger kobject_uevent change events until cpu devices are +actually created. Fixes the following kasan findings: + +BUG: KASAN: global-out-of-bounds in kobject_uevent_env+0xb40/0xee0 +Read of size 8 at addr 0000000000000020 by task swapper/0/1 + +BUG: KASAN: global-out-of-bounds in kobject_uevent_env+0xb36/0xee0 +Read of size 8 at addr 0000000000000018 by task swapper/0/1 + +CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B +Hardware name: IBM 3906 M04 704 (LPAR) +Call Trace: +([<0000000143c6db7e>] show_stack+0x14e/0x1a8) + [<0000000145956498>] dump_stack+0x1d0/0x218 + [<000000014429fb4c>] print_address_description+0x64/0x380 + [<000000014429f630>] __kasan_report+0x138/0x168 + [<0000000145960b96>] kobject_uevent_env+0xb36/0xee0 + [<0000000143c7c47c>] arch_update_cpu_topology+0x104/0x108 + [<0000000143df9e22>] sched_init_domains+0x62/0xe8 + [<000000014644c94a>] sched_init_smp+0x3a/0xc0 + [<0000000146433a20>] kernel_init_freeable+0x558/0x958 + [<000000014599002a>] kernel_init+0x22/0x160 + [<00000001459a71d4>] ret_from_fork+0x28/0x30 + [<00000001459a71dc>] kernel_thread_starter+0x0/0x10 + +Cc: stable@vger.kernel.org +Reviewed-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kernel/topology.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/s390/kernel/topology.c ++++ b/arch/s390/kernel/topology.c +@@ -311,7 +311,8 @@ int arch_update_cpu_topology(void) + on_each_cpu(__arch_update_dedicated_flag, NULL, 0); + for_each_online_cpu(cpu) { + dev = get_cpu_device(cpu); +- kobject_uevent(&dev->kobj, KOBJ_CHANGE); ++ if (dev) ++ kobject_uevent(&dev->kobj, KOBJ_CHANGE); + } + return rc; + } diff --git a/queue-5.3/series b/queue-5.3/series new file mode 100644 index 00000000000..b1c703d20e5 --- /dev/null +++ b/queue-5.3/series @@ -0,0 +1,52 @@ +s390-process-avoid-potential-reading-of-freed-stack.patch +s390-sclp-fix-bit-checked-for-has_sipl.patch +kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch +s390-topology-avoid-firing-events-before-kobjs-are-created.patch +s390-cio-avoid-calling-strlen-on-null-pointer.patch +s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch +s390-dasd-fix-error-handling-during-online-processing.patch +revert-s390-dasd-add-discard-support-for-ese-volumes.patch +kvm-s390-fix-__insn32_query-inline-assembly.patch +kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch +kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch +kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch +kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch +kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch +kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch +kvm-x86-fix-userspace-set-invalid-cr4.patch +nbd-fix-max-number-of-supported-devs.patch +pm-devfreq-tegra-fix-khz-to-hz-conversion.patch +asoc-define-a-set-of-dapm-pre-post-up-events.patch +asoc-sgtl5000-improve-vag-power-and-mute-control.patch +powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch +powerpc-mce-fix-mce-handling-for-huge-pages.patch +powerpc-mce-schedule-work-from-irq_work.patch +powerpc-603-fix-handling-of-the-dirty-flag.patch +powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch +powerpc-ptdump-fix-addresses-display-on-ppc32.patch +powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch +powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch +powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch +powerpc-kasan-fix-parallel-loading-of-modules.patch +powerpc-kasan-fix-shadow-area-set-up-for-modules.patch +powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch +powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch +powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch +powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch +powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch +can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch +tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch +tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch +crypto-qat-silence-smp_processor_id-warning.patch +crypto-skcipher-unmap-pages-after-an-external-error.patch +crypto-cavium-zip-add-missing-single_release.patch +crypto-caam-qi-fix-error-handling-in-ern-handler.patch +crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch +crypto-ccree-account-for-tee-not-ready-to-report.patch +crypto-ccree-use-the-full-crypt-length-value.patch +mips-treat-loongson-extensions-as-ases.patch +power-supply-sbs-battery-use-correct-flags-field.patch +power-supply-sbs-battery-only-return-health-when-battery-present.patch +tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch +usercopy-avoid-highmem-pfn-warning.patch +timer-read-jiffies-once-when-forwarding-base-clk.patch diff --git a/queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch b/queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch new file mode 100644 index 00000000000..86e1756f30c --- /dev/null +++ b/queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch @@ -0,0 +1,78 @@ +From e430d802d6a3aaf61bd3ed03d9404888a29b9bf9 Mon Sep 17 00:00:00 2001 +From: Li RongQing +Date: Thu, 19 Sep 2019 20:04:47 +0800 +Subject: timer: Read jiffies once when forwarding base clk + +From: Li RongQing + +commit e430d802d6a3aaf61bd3ed03d9404888a29b9bf9 upstream. + +The timer delayed for more than 3 seconds warning was triggered during +testing. + + Workqueue: events_unbound sched_tick_remote + RIP: 0010:sched_tick_remote+0xee/0x100 + ... + Call Trace: + process_one_work+0x18c/0x3a0 + worker_thread+0x30/0x380 + kthread+0x113/0x130 + ret_from_fork+0x22/0x40 + +The reason is that the code in collect_expired_timers() uses jiffies +unprotected: + + if (next_event > jiffies) + base->clk = jiffies; + +As the compiler is allowed to reload the value base->clk can advance +between the check and the store and in the worst case advance farther than +next event. That causes the timer expiry to be delayed until the wheel +pointer wraps around. + +Convert the code to use READ_ONCE() + +Fixes: 236968383cf5 ("timers: Optimize collect_expired_timers() for NOHZ") +Signed-off-by: Li RongQing +Signed-off-by: Liang ZhiCheng +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1568894687-14499-1-git-send-email-lirongqing@baidu.com +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/timer.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1593,24 +1593,26 @@ void timer_clear_idle(void) + static int collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) + { ++ unsigned long now = READ_ONCE(jiffies); ++ + /* + * NOHZ optimization. After a long idle sleep we need to forward the + * base to current jiffies. Avoid a loop by searching the bitfield for + * the next expiring timer. + */ +- if ((long)(jiffies - base->clk) > 2) { ++ if ((long)(now - base->clk) > 2) { + unsigned long next = __next_timer_interrupt(base); + + /* + * If the next timer is ahead of time forward to current + * jiffies, otherwise forward to the next expiry time: + */ +- if (time_after(next, jiffies)) { ++ if (time_after(next, now)) { + /* + * The call site will increment base->clk and then + * terminate the expiry loop immediately. + */ +- base->clk = jiffies; ++ base->clk = now; + return 0; + } + base->clk = next; diff --git a/queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch b/queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch new file mode 100644 index 00000000000..47728a03d8e --- /dev/null +++ b/queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch @@ -0,0 +1,52 @@ +From b0215e2d6a18d8331b2d4a8b38ccf3eff783edb1 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 28 Aug 2019 15:05:28 -0400 +Subject: tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on failure + +From: Steven Rostedt (VMware) + +commit b0215e2d6a18d8331b2d4a8b38ccf3eff783edb1 upstream. + +If the re-allocation of tep->cmdlines succeeds, then the previous +allocation of tep->cmdlines will be freed. If we later fail in +add_new_comm(), we must not free cmdlines, and also should assign +tep->cmdlines to the new allocation. Otherwise when freeing tep, the +tep->cmdlines will be pointing to garbage. + +Fixes: a6d2a61ac653a ("tools lib traceevent: Remove some die() calls") +Signed-off-by: Steven Rostedt (VMware) +Cc: Andrew Morton +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: linux-trace-devel@vger.kernel.org +Cc: stable@vger.kernel.org +Link: http://lkml.kernel.org/r/20190828191819.970121417@goodmis.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/lib/traceevent/event-parse.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/tools/lib/traceevent/event-parse.c ++++ b/tools/lib/traceevent/event-parse.c +@@ -269,10 +269,10 @@ static int add_new_comm(struct tep_handl + errno = ENOMEM; + return -1; + } ++ tep->cmdlines = cmdlines; + + cmdlines[tep->cmdline_count].comm = strdup(comm); + if (!cmdlines[tep->cmdline_count].comm) { +- free(cmdlines); + errno = ENOMEM; + return -1; + } +@@ -283,7 +283,6 @@ static int add_new_comm(struct tep_handl + tep->cmdline_count++; + + qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); +- tep->cmdlines = cmdlines; + + return 0; + } diff --git a/queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch b/queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch new file mode 100644 index 00000000000..65f18a53b93 --- /dev/null +++ b/queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch @@ -0,0 +1,55 @@ +From 82a2f88458d70704be843961e10b5cef9a6e95d3 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Mon, 5 Aug 2019 13:01:50 -0400 +Subject: tools lib traceevent: Fix "robust" test of do_generate_dynamic_list_file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Steven Rostedt (VMware) + +commit 82a2f88458d70704be843961e10b5cef9a6e95d3 upstream. + +The tools/lib/traceevent/Makefile had a test added to it to detect a failure +of the "nm" when making the dynamic list file (whatever that is). The +problem is that the test sorts the values "U W w" and some versions of sort +will place "w" ahead of "W" (even though it has a higher ASCII value, and +break the test. + +Add 'tr "w" "W"' to merge the two and not worry about the ordering. + +Reported-by: Tzvetomir Stoyanov +Signed-off-by: Steven Rostedt (VMware) +Cc: Alexander Shishkin +Cc: David Carrillo-Cisneros +Cc: He Kuang +Cc: Jiri Olsa +Cc: Michal rarek +Cc: Paul Turner +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Uwe Kleine-König +Cc: Wang Nan +Cc: stable@vger.kernel.org +Fixes: 6467753d61399 ("tools lib traceevent: Robustify do_generate_dynamic_list_file") +Link: http://lkml.kernel.org/r/20190805130150.25acfeb1@gandalf.local.home +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/lib/traceevent/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/lib/traceevent/Makefile ++++ b/tools/lib/traceevent/Makefile +@@ -266,8 +266,8 @@ endef + + define do_generate_dynamic_list_file + symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ +- xargs echo "U W w" | tr ' ' '\n' | sort -u | xargs echo`;\ +- if [ "$$symbol_type" = "U W w" ];then \ ++ xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ ++ if [ "$$symbol_type" = "U W" ];then \ + (echo '{'; \ + $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\ + echo '};'; \ diff --git a/queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch b/queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch new file mode 100644 index 00000000000..f1dc504f104 --- /dev/null +++ b/queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch @@ -0,0 +1,97 @@ +From 17f8607a1658a8e70415eef67909f990d13017b5 Mon Sep 17 00:00:00 2001 +From: Tom Zanussi +Date: Sun, 1 Sep 2019 17:02:01 -0500 +Subject: tracing: Make sure variable reference alias has correct var_ref_idx + +From: Tom Zanussi + +commit 17f8607a1658a8e70415eef67909f990d13017b5 upstream. + +Original changelog from Steve Rostedt (except last sentence which +explains the problem, and the Fixes: tag): + +I performed a three way histogram with the following commands: + +echo 'irq_lat u64 lat pid_t pid' > synthetic_events +echo 'wake_lat u64 lat u64 irqlat pid_t pid' >> synthetic_events +echo 'hist:keys=common_pid:irqts=common_timestamp.usecs if function == 0xffffffff81200580' > events/timer/hrtimer_start/trigger +echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$irqts:onmatch(timer.hrtimer_start).irq_lat($lat,pid) if common_flags & 1' > events/sched/sched_waking/trigger +echo 'hist:keys=pid:wakets=common_timestamp.usecs,irqlat=lat' > events/synthetic/irq_lat/trigger +echo 'hist:keys=next_pid:lat=common_timestamp.usecs-$wakets,irqlat=$irqlat:onmatch(synthetic.irq_lat).wake_lat($lat,$irqlat,next_pid)' > events/sched/sched_switch/trigger +echo 1 > events/synthetic/wake_lat/enable + +Basically I wanted to see: + + hrtimer_start (calling function tick_sched_timer) + +Note: + + # grep tick_sched_timer /proc/kallsyms +ffffffff81200580 t tick_sched_timer + +And save the time of that, and then record sched_waking if it is called +in interrupt context and with the same pid as the hrtimer_start, it +will record the latency between that and the waking event. + +I then look at when the task that is woken is scheduled in, and record +the latency between the wakeup and the task running. + +At the end, the wake_lat synthetic event will show the wakeup to +scheduled latency, as well as the irq latency in from hritmer_start to +the wakeup. The problem is that I found this: + + -0 [007] d... 190.485261: wake_lat: lat=27 irqlat=190485230 pid=698 + -0 [005] d... 190.485283: wake_lat: lat=40 irqlat=190485239 pid=10 + -0 [002] d... 190.488327: wake_lat: lat=56 irqlat=190488266 pid=335 + -0 [005] d... 190.489330: wake_lat: lat=64 irqlat=190489262 pid=10 + -0 [003] d... 190.490312: wake_lat: lat=43 irqlat=190490265 pid=77 + -0 [005] d... 190.493322: wake_lat: lat=54 irqlat=190493262 pid=10 + -0 [005] d... 190.497305: wake_lat: lat=35 irqlat=190497267 pid=10 + -0 [005] d... 190.501319: wake_lat: lat=50 irqlat=190501264 pid=10 + +The irqlat seemed quite large! Investigating this further, if I had +enabled the irq_lat synthetic event, I noticed this: + + -0 [002] d.s. 249.429308: irq_lat: lat=164968 pid=335 + -0 [002] d... 249.429369: wake_lat: lat=55 irqlat=249429308 pid=335 + +Notice that the timestamp of the irq_lat "249.429308" is awfully +similar to the reported irqlat variable. In fact, all instances were +like this. It appeared that: + + irqlat=$irqlat + +Wasn't assigning the old $irqlat to the new irqlat variable, but +instead was assigning the $irqts to it. + +The issue is that assigning the old $irqlat to the new irqlat variable +creates a variable reference alias, but the alias creation code +forgets to make sure the alias uses the same var_ref_idx to access the +reference. + +Link: http://lkml.kernel.org/r/1567375321.5282.12.camel@kernel.org + +Cc: Linux Trace Devel +Cc: linux-rt-users +Cc: stable@vger.kernel.org +Fixes: 7e8b88a30b085 ("tracing: Add hist trigger support for variable reference aliases") +Reported-by: Steven Rostedt (VMware) +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace_events_hist.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -2785,6 +2785,8 @@ static struct hist_field *create_alias(s + return NULL; + } + ++ alias->var_ref_idx = var_ref->var_ref_idx; ++ + return alias; + } + diff --git a/queue-5.3/usercopy-avoid-highmem-pfn-warning.patch b/queue-5.3/usercopy-avoid-highmem-pfn-warning.patch new file mode 100644 index 00000000000..1039bba005f --- /dev/null +++ b/queue-5.3/usercopy-avoid-highmem-pfn-warning.patch @@ -0,0 +1,88 @@ +From 314eed30ede02fa925990f535652254b5bad6b65 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Tue, 17 Sep 2019 11:00:25 -0700 +Subject: usercopy: Avoid HIGHMEM pfn warning + +From: Kees Cook + +commit 314eed30ede02fa925990f535652254b5bad6b65 upstream. + +When running on a system with >512MB RAM with a 32-bit kernel built with: + + CONFIG_DEBUG_VIRTUAL=y + CONFIG_HIGHMEM=y + CONFIG_HARDENED_USERCOPY=y + +all execve()s will fail due to argv copying into kmap()ed pages, and on +usercopy checking the calls ultimately of virt_to_page() will be looking +for "bad" kmap (highmem) pointers due to CONFIG_DEBUG_VIRTUAL=y: + + ------------[ cut here ]------------ + kernel BUG at ../arch/x86/mm/physaddr.c:83! + invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC + CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.3.0-rc8 #6 + Hardware name: Dell Inc. Inspiron 1318/0C236D, BIOS A04 01/15/2009 + EIP: __phys_addr+0xaf/0x100 + ... + Call Trace: + __check_object_size+0xaf/0x3c0 + ? __might_sleep+0x80/0xa0 + copy_strings+0x1c2/0x370 + copy_strings_kernel+0x2b/0x40 + __do_execve_file+0x4ca/0x810 + ? kmem_cache_alloc+0x1c7/0x370 + do_execve+0x1b/0x20 + ... + +The check is from arch/x86/mm/physaddr.c: + + VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn); + +Due to the kmap() in fs/exec.c: + + kaddr = kmap(kmapped_page); + ... + if (copy_from_user(kaddr+offset, str, bytes_to_copy)) ... + +Now we can fetch the correct page to avoid the pfn check. In both cases, +hardened usercopy will need to walk the page-span checker (if enabled) +to do sanity checking. + +Reported-by: Randy Dunlap +Tested-by: Randy Dunlap +Fixes: f5509cc18daa ("mm: Hardened usercopy") +Cc: Matthew Wilcox +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Reviewed-by: Matthew Wilcox (Oracle) +Link: https://lore.kernel.org/r/201909171056.7F2FFD17@keescook +Signed-off-by: Greg Kroah-Hartman + +--- + mm/usercopy.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/mm/usercopy.c ++++ b/mm/usercopy.c +@@ -11,6 +11,7 @@ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + + #include ++#include + #include + #include + #include +@@ -227,7 +228,12 @@ static inline void check_heap_object(con + if (!virt_addr_valid(ptr)) + return; + +- page = virt_to_head_page(ptr); ++ /* ++ * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the ++ * highmem page or fallback to virt_to_page(). The following ++ * is effectively a highmem-aware virt_to_head_page(). ++ */ ++ page = compound_head(kmap_to_page((void *)ptr)); + + if (PageSlab(page)) { + /* Check slab allocator for flags and size. */ -- 2.47.2