From d25154239599f37cf6d83d25a7b0ba34cc477cf8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 19 Nov 2018 12:05:33 +0100 Subject: [PATCH] 4.19-stable patches added patches: acpi-nfit-fix-ars-overflow-continuation.patch acpi-nfit-x86-mce-handle-only-uncorrectable-machine-checks.patch acpi-nfit-x86-mce-validate-a-mce-s-address-before-using-it.patch arm-8809-1-proc-v7-fix-thumb-annotation-of-cpu_v7_hvc_switch_mm.patch bonding-802.3ad-fix-link_failure_count-tracking.patch clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch clk-rockchip-fix-wrong-mmc-sample-phase-shift-for-rk3328.patch clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch clk-sunxi-ng-h6-fix-bus-clocks-divider-position.patch crypto-hisilicon-fix-null-dereference-for-same-dst-and-src.patch crypto-hisilicon-fix-reference-after-free-of-memories-on-error-path.patch hwmon-core-fix-double-free-in-__hwmon_device_register.patch libceph-bump-ceph_msg_max_data_len.patch mach64-fix-display-corruption-on-big-endian-machines.patch mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch memory_hotplug-cond_resched-in-__remove_pages.patch mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch mtd-nand-fix-nanddev_neraseblocks.patch mtd-spi-nor-cadence-quadspi-return-error-code-in-cqspi_direct_read_execute.patch netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch ocfs2-free-up-write-context-when-direct-io-failed.patch of-numa-validate-some-distance-map-rules.patch perf-callchain-honour-the-ordering-of-perf_context_-user-kernel-etc.patch perf-cs-etm-correct-cpu-mode-for-samples.patch perf-intel-pt-bts-calculate-cpumode-for-synthesized-samples.patch perf-intel-pt-insert-callchain-context-into-synthesized-callchains.patch perf-stat-handle-different-pmu-names-with-common-prefix.patch reset-hisilicon-fix-potential-null-pointer-dereference.patch revert-ceph-fix-dentry-leak-in-splice_dentry.patch revert-powerpc-8xx-use-l1-entry-apg-to-handle-_page_accessed-for-config_swap.patch scsi-fix-queue-cleanup-race-before-queue-initialization-is-done.patch scsi-qla2xxx-initialize-port-speed-to-avoid-setting-lower-speed.patch soc-ti-qmss-fix-usage-of-irq_set_affinity_hint.patch thermal-core-fix-use-after-free-in-thermal_cooling_device_destroy_sysfs.patch um-drop-own-definition-of-ptrace_sysemu-_singlestep.patch vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch xtensa-add-notes-section-to-the-linker-script.patch xtensa-fix-boot-parameters-address-translation.patch xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch --- ...i-nfit-fix-ars-overflow-continuation.patch | 40 +++ ...le-only-uncorrectable-machine-checks.patch | 100 ++++++++ ...date-a-mce-s-address-before-using-it.patch | 91 +++++++ ...b-annotation-of-cpu_v7_hvc_switch_mm.patch | 57 +++++ ....3ad-fix-link_failure_count-tracking.patch | 53 ++++ ...-division-by-zero-in-pll-recalc_rate.patch | 37 +++ ...g-in-rockchip_ddrclk_get_parent-call.patch | 45 ++++ ...ng-mmc-sample-phase-shift-for-rk3328.patch | 52 ++++ ...dule-and-dt-node-contains-compatible.patch | 77 ++++++ ...g-h6-fix-bus-clocks-divider-position.patch | 59 +++++ ...ull-dereference-for-same-dst-and-src.patch | 123 ++++++++++ ...after-free-of-memories-on-error-path.patch | 83 +++++++ ...uble-free-in-__hwmon_device_register.patch | 132 ++++++++++ .../libceph-bump-ceph_msg_max_data_len.patch | 42 ++++ ...ay-corruption-on-big-endian-machines.patch | 59 +++++ ...due-to-reading-accelerator-registers.patch | 114 +++++++++ ...tplug-cond_resched-in-__remove_pages.patch | 60 +++++ ..._thisnode-for-madv_hugepage-mappings.patch | 228 ++++++++++++++++++ ...-conflicting-bch_const_params-option.patch | 51 ++++ .../mtd-nand-fix-nanddev_neraseblocks.patch | 36 +++ ...or-code-in-cqspi_direct_read_execute.patch | 36 +++ ...-of-next-bucket-number-in-early_drop.patch | 59 +++++ ...-after-failing-ocfs2_check_dir_entry.patch | 54 +++++ ...-write-context-when-direct-io-failed.patch | 85 +++++++ ...uma-validate-some-distance-map-rules.patch | 80 ++++++ ...ing-of-perf_context_-user-kernel-etc.patch | 108 +++++++++ ...-cs-etm-correct-cpu-mode-for-samples.patch | 143 +++++++++++ ...late-cpumode-for-synthesized-samples.patch | 125 ++++++++++ ...-context-into-synthesized-callchains.patch | 120 +++++++++ ...fferent-pmu-names-with-common-prefix.patch | 155 ++++++++++++ ...x-potential-null-pointer-dereference.patch | 42 ++++ ...eph-fix-dentry-leak-in-splice_dentry.patch | 41 ++++ ...andle-_page_accessed-for-config_swap.patch | 202 ++++++++++++++++ ...-before-queue-initialization-is-done.patch | 87 +++++++ ...t-speed-to-avoid-setting-lower-speed.patch | 35 +++ queue-4.19/series | 42 ++++ ...s-fix-usage-of-irq_set_affinity_hint.patch | 128 ++++++++++ ...thermal_cooling_device_destroy_sysfs.patch | 40 +++ ...inition-of-ptrace_sysemu-_singlestep.patch | 55 +++++ ...uncate-t10-pi-iov_iter-to-prot_bytes.patch | 47 ++++ ...d-notes-section-to-the-linker-script.patch | 44 ++++ ...-boot-parameters-address-translation.patch | 43 ++++ ...e-sure-bflt-stack-is-16-byte-aligned.patch | 45 ++++ 43 files changed, 3355 insertions(+) create mode 100644 queue-4.19/acpi-nfit-fix-ars-overflow-continuation.patch create mode 100644 queue-4.19/acpi-nfit-x86-mce-handle-only-uncorrectable-machine-checks.patch create mode 100644 queue-4.19/acpi-nfit-x86-mce-validate-a-mce-s-address-before-using-it.patch create mode 100644 queue-4.19/arm-8809-1-proc-v7-fix-thumb-annotation-of-cpu_v7_hvc_switch_mm.patch create mode 100644 queue-4.19/bonding-802.3ad-fix-link_failure_count-tracking.patch create mode 100644 queue-4.19/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch create mode 100644 queue-4.19/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch create mode 100644 queue-4.19/clk-rockchip-fix-wrong-mmc-sample-phase-shift-for-rk3328.patch create mode 100644 queue-4.19/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch create mode 100644 queue-4.19/clk-sunxi-ng-h6-fix-bus-clocks-divider-position.patch create mode 100644 queue-4.19/crypto-hisilicon-fix-null-dereference-for-same-dst-and-src.patch create mode 100644 queue-4.19/crypto-hisilicon-fix-reference-after-free-of-memories-on-error-path.patch create mode 100644 queue-4.19/hwmon-core-fix-double-free-in-__hwmon_device_register.patch create mode 100644 queue-4.19/libceph-bump-ceph_msg_max_data_len.patch create mode 100644 queue-4.19/mach64-fix-display-corruption-on-big-endian-machines.patch create mode 100644 queue-4.19/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch create mode 100644 queue-4.19/memory_hotplug-cond_resched-in-__remove_pages.patch create mode 100644 queue-4.19/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch create mode 100644 queue-4.19/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch create mode 100644 queue-4.19/mtd-nand-fix-nanddev_neraseblocks.patch create mode 100644 queue-4.19/mtd-spi-nor-cadence-quadspi-return-error-code-in-cqspi_direct_read_execute.patch create mode 100644 queue-4.19/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch create mode 100644 queue-4.19/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch create mode 100644 queue-4.19/ocfs2-free-up-write-context-when-direct-io-failed.patch create mode 100644 queue-4.19/of-numa-validate-some-distance-map-rules.patch create mode 100644 queue-4.19/perf-callchain-honour-the-ordering-of-perf_context_-user-kernel-etc.patch create mode 100644 queue-4.19/perf-cs-etm-correct-cpu-mode-for-samples.patch create mode 100644 queue-4.19/perf-intel-pt-bts-calculate-cpumode-for-synthesized-samples.patch create mode 100644 queue-4.19/perf-intel-pt-insert-callchain-context-into-synthesized-callchains.patch create mode 100644 queue-4.19/perf-stat-handle-different-pmu-names-with-common-prefix.patch create mode 100644 queue-4.19/reset-hisilicon-fix-potential-null-pointer-dereference.patch create mode 100644 queue-4.19/revert-ceph-fix-dentry-leak-in-splice_dentry.patch create mode 100644 queue-4.19/revert-powerpc-8xx-use-l1-entry-apg-to-handle-_page_accessed-for-config_swap.patch create mode 100644 queue-4.19/scsi-fix-queue-cleanup-race-before-queue-initialization-is-done.patch create mode 100644 queue-4.19/scsi-qla2xxx-initialize-port-speed-to-avoid-setting-lower-speed.patch create mode 100644 queue-4.19/soc-ti-qmss-fix-usage-of-irq_set_affinity_hint.patch create mode 100644 queue-4.19/thermal-core-fix-use-after-free-in-thermal_cooling_device_destroy_sysfs.patch create mode 100644 queue-4.19/um-drop-own-definition-of-ptrace_sysemu-_singlestep.patch create mode 100644 queue-4.19/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch create mode 100644 queue-4.19/xtensa-add-notes-section-to-the-linker-script.patch create mode 100644 queue-4.19/xtensa-fix-boot-parameters-address-translation.patch create mode 100644 queue-4.19/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch diff --git a/queue-4.19/acpi-nfit-fix-ars-overflow-continuation.patch b/queue-4.19/acpi-nfit-fix-ars-overflow-continuation.patch new file mode 100644 index 00000000000..3fe8dcd7513 --- /dev/null +++ b/queue-4.19/acpi-nfit-fix-ars-overflow-continuation.patch @@ -0,0 +1,40 @@ +From 3fa58dcab50a0aa16817f16a8d38aee869eb3fb9 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 1 Nov 2018 00:30:22 -0700 +Subject: acpi, nfit: Fix ARS overflow continuation + +From: Dan Williams + +commit 3fa58dcab50a0aa16817f16a8d38aee869eb3fb9 upstream. + +When the platform BIOS is unable to report all the media error records +it requires the OS to restart the scrub at a prescribed location. The +driver detects the overflow condition, but then fails to report it to +the ARS state machine after reaping the records. Propagate -ENOSPC +correctly to continue the ARS operation. + +Cc: +Fixes: 1cf03c00e7c1 ("nfit: scrub and register regions in a workqueue") +Reported-by: Jacek Zloch +Reviewed-by: Dave Jiang +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/acpi/nfit/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/acpi/nfit/core.c ++++ b/drivers/acpi/nfit/core.c +@@ -2845,9 +2845,9 @@ static int acpi_nfit_query_poison(struct + return rc; + + if (ars_status_process_records(acpi_desc)) +- return -ENOMEM; ++ dev_err(acpi_desc->dev, "Failed to process ARS records\n"); + +- return 0; ++ return rc; + } + + static int ars_register(struct acpi_nfit_desc *acpi_desc, diff --git a/queue-4.19/acpi-nfit-x86-mce-handle-only-uncorrectable-machine-checks.patch b/queue-4.19/acpi-nfit-x86-mce-handle-only-uncorrectable-machine-checks.patch new file mode 100644 index 00000000000..393abaf8713 --- /dev/null +++ b/queue-4.19/acpi-nfit-x86-mce-handle-only-uncorrectable-machine-checks.patch @@ -0,0 +1,100 @@ +From 5d96c9342c23ee1d084802dcf064caa67ecaa45b Mon Sep 17 00:00:00 2001 +From: Vishal Verma +Date: Thu, 25 Oct 2018 18:37:28 -0600 +Subject: acpi/nfit, x86/mce: Handle only uncorrectable machine checks + +From: Vishal Verma + +commit 5d96c9342c23ee1d084802dcf064caa67ecaa45b upstream. + +The MCE handler for nfit devices is called for memory errors on a +Non-Volatile DIMM and adds the error location to a 'badblocks' list. +This list is used by the various NVDIMM drivers to avoid consuming known +poison locations during IO. + +The MCE handler gets called for both corrected and uncorrectable errors. +Until now, both kinds of errors have been added to the badblocks list. +However, corrected memory errors indicate that the problem has already +been fixed by hardware, and the resulting interrupt is merely a +notification to Linux. + +As far as future accesses to that location are concerned, it is +perfectly fine to use, and thus doesn't need to be included in the above +badblocks list. + +Add a check in the nfit MCE handler to filter out corrected mce events, +and only process uncorrectable errors. + +Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") +Reported-by: Omar Avelar +Signed-off-by: Vishal Verma +Signed-off-by: Borislav Petkov +CC: Arnd Bergmann +CC: Dan Williams +CC: Dave Jiang +CC: elliott@hpe.com +CC: "H. Peter Anvin" +CC: Ingo Molnar +CC: Len Brown +CC: linux-acpi@vger.kernel.org +CC: linux-edac +CC: linux-nvdimm@lists.01.org +CC: Qiuxu Zhuo +CC: "Rafael J. Wysocki" +CC: Ross Zwisler +CC: stable +CC: Thomas Gleixner +CC: Tony Luck +CC: x86-ml +CC: Yazen Ghannam +Link: http://lkml.kernel.org/r/20181026003729.8420-1-vishal.l.verma@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mce.h | 1 + + arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- + drivers/acpi/nfit/mce.c | 4 ++-- + 3 files changed, 5 insertions(+), 3 deletions(-) + +--- a/arch/x86/include/asm/mce.h ++++ b/arch/x86/include/asm/mce.h +@@ -216,6 +216,7 @@ static inline int umc_normaddr_to_sysadd + + int mce_available(struct cpuinfo_x86 *c); + bool mce_is_memory_error(struct mce *m); ++bool mce_is_correctable(struct mce *m); + + DECLARE_PER_CPU(unsigned, mce_exception_count); + DECLARE_PER_CPU(unsigned, mce_poll_count); +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -534,7 +534,7 @@ bool mce_is_memory_error(struct mce *m) + } + EXPORT_SYMBOL_GPL(mce_is_memory_error); + +-static bool mce_is_correctable(struct mce *m) ++bool mce_is_correctable(struct mce *m) + { + if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) + return false; +@@ -544,6 +544,7 @@ static bool mce_is_correctable(struct mc + + return true; + } ++EXPORT_SYMBOL_GPL(mce_is_correctable); + + static bool cec_add_mce(struct mce *m) + { +--- a/drivers/acpi/nfit/mce.c ++++ b/drivers/acpi/nfit/mce.c +@@ -25,8 +25,8 @@ static int nfit_handle_mce(struct notifi + struct acpi_nfit_desc *acpi_desc; + struct nfit_spa *nfit_spa; + +- /* We only care about memory errors */ +- if (!mce_is_memory_error(mce)) ++ /* We only care about uncorrectable memory errors */ ++ if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) + return NOTIFY_DONE; + + /* diff --git a/queue-4.19/acpi-nfit-x86-mce-validate-a-mce-s-address-before-using-it.patch b/queue-4.19/acpi-nfit-x86-mce-validate-a-mce-s-address-before-using-it.patch new file mode 100644 index 00000000000..6d2c64a57d0 --- /dev/null +++ b/queue-4.19/acpi-nfit-x86-mce-validate-a-mce-s-address-before-using-it.patch @@ -0,0 +1,91 @@ +From e8a308e5f47e545e0d41d0686c00f5f5217c5f61 Mon Sep 17 00:00:00 2001 +From: Vishal Verma +Date: Thu, 25 Oct 2018 18:37:29 -0600 +Subject: acpi/nfit, x86/mce: Validate a MCE's address before using it + +From: Vishal Verma + +commit e8a308e5f47e545e0d41d0686c00f5f5217c5f61 upstream. + +The NFIT machine check handler uses the physical address from the mce +structure, and compares it against information in the ACPI NFIT table +to determine whether that location lies on an NVDIMM. The mce->addr +field however may not always be valid, and this is indicated by the +MCI_STATUS_ADDRV bit in the status field. + +Export mce_usable_address() which already performs validation for the +address, and use it in the NFIT handler. + +Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") +Reported-by: Robert Elliott +Signed-off-by: Vishal Verma +Signed-off-by: Borislav Petkov +CC: Arnd Bergmann +Cc: Dan Williams +CC: Dave Jiang +CC: elliott@hpe.com +CC: "H. Peter Anvin" +CC: Ingo Molnar +CC: Len Brown +CC: linux-acpi@vger.kernel.org +CC: linux-edac +CC: linux-nvdimm@lists.01.org +CC: Qiuxu Zhuo +CC: "Rafael J. Wysocki" +CC: Ross Zwisler +CC: stable +CC: Thomas Gleixner +CC: Tony Luck +CC: x86-ml +CC: Yazen Ghannam +Link: http://lkml.kernel.org/r/20181026003729.8420-2-vishal.l.verma@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mce.h | 1 + + arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- + drivers/acpi/nfit/mce.c | 4 ++++ + 3 files changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/mce.h ++++ b/arch/x86/include/asm/mce.h +@@ -217,6 +217,7 @@ static inline int umc_normaddr_to_sysadd + int mce_available(struct cpuinfo_x86 *c); + bool mce_is_memory_error(struct mce *m); + bool mce_is_correctable(struct mce *m); ++int mce_usable_address(struct mce *m); + + DECLARE_PER_CPU(unsigned, mce_exception_count); + DECLARE_PER_CPU(unsigned, mce_poll_count); +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -485,7 +485,7 @@ static void mce_report_event(struct pt_r + * be somewhat complicated (e.g. segment offset would require an instruction + * parser). So only support physical addresses up to page granuality for now. + */ +-static int mce_usable_address(struct mce *m) ++int mce_usable_address(struct mce *m) + { + if (!(m->status & MCI_STATUS_ADDRV)) + return 0; +@@ -505,6 +505,7 @@ static int mce_usable_address(struct mce + + return 1; + } ++EXPORT_SYMBOL_GPL(mce_usable_address); + + bool mce_is_memory_error(struct mce *m) + { +--- a/drivers/acpi/nfit/mce.c ++++ b/drivers/acpi/nfit/mce.c +@@ -29,6 +29,10 @@ static int nfit_handle_mce(struct notifi + if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) + return NOTIFY_DONE; + ++ /* Verify the address reported in the MCE is valid. */ ++ if (!mce_usable_address(mce)) ++ return NOTIFY_DONE; ++ + /* + * mce->addr contains the physical addr accessed that caused the + * machine check. We need to walk through the list of NFITs, and see diff --git a/queue-4.19/arm-8809-1-proc-v7-fix-thumb-annotation-of-cpu_v7_hvc_switch_mm.patch b/queue-4.19/arm-8809-1-proc-v7-fix-thumb-annotation-of-cpu_v7_hvc_switch_mm.patch new file mode 100644 index 00000000000..f12c95ae8e9 --- /dev/null +++ b/queue-4.19/arm-8809-1-proc-v7-fix-thumb-annotation-of-cpu_v7_hvc_switch_mm.patch @@ -0,0 +1,57 @@ +From 6282e916f774e37845c65d1eae9f8c649004f033 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Mon, 5 Nov 2018 14:54:56 +0100 +Subject: ARM: 8809/1: proc-v7: fix Thumb annotation of cpu_v7_hvc_switch_mm + +From: Ard Biesheuvel + +commit 6282e916f774e37845c65d1eae9f8c649004f033 upstream. + +Due to what appears to be a copy/paste error, the opening ENTRY() +of cpu_v7_hvc_switch_mm() lacks a matching ENDPROC(), and instead, +the one for cpu_v7_smc_switch_mm() is duplicated. + +Given that it is ENDPROC() that emits the Thumb annotation, the +cpu_v7_hvc_switch_mm() routine will be called in ARM mode on a +Thumb2 kernel, resulting in the following splat: + + Internal error: Oops - undefined instruction: 0 [#1] SMP THUMB2 + Modules linked in: + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-rc1-00030-g4d28ad89189d-dirty #488 + Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 + PC is at cpu_v7_hvc_switch_mm+0x12/0x18 + LR is at flush_old_exec+0x31b/0x570 + pc : [] lr : [] psr: 00000013 + sp : ee899e50 ip : 00000000 fp : 00000001 + r10: eda28f34 r9 : eda31800 r8 : c12470e0 + r7 : eda1fc00 r6 : eda53000 r5 : 00000000 r4 : ee88c000 + r3 : c0316eec r2 : 00000001 r1 : eda53000 r0 : 6da6c000 + Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none + +Note the 'ISA ARM' in the last line. + +Fix this by using the correct name in ENDPROC(). + +Cc: +Fixes: 10115105cb3a ("ARM: spectre-v2: add firmware based hardening") +Reviewed-by: Dave Martin +Acked-by: Marc Zyngier +Signed-off-by: Ard Biesheuvel +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/mm/proc-v7.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/mm/proc-v7.S ++++ b/arch/arm/mm/proc-v7.S +@@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm) + hvc #0 + ldmfd sp!, {r0 - r3} + b cpu_v7_switch_mm +-ENDPROC(cpu_v7_smc_switch_mm) ++ENDPROC(cpu_v7_hvc_switch_mm) + #endif + ENTRY(cpu_v7_iciallu_switch_mm) + mov r3, #0 diff --git a/queue-4.19/bonding-802.3ad-fix-link_failure_count-tracking.patch b/queue-4.19/bonding-802.3ad-fix-link_failure_count-tracking.patch new file mode 100644 index 00000000000..dfa1d7e1b2f --- /dev/null +++ b/queue-4.19/bonding-802.3ad-fix-link_failure_count-tracking.patch @@ -0,0 +1,53 @@ +From ea53abfab960909d622ca37bcfb8e1c5378d21cc Mon Sep 17 00:00:00 2001 +From: Jarod Wilson +Date: Sun, 4 Nov 2018 14:59:46 -0500 +Subject: bonding/802.3ad: fix link_failure_count tracking + +From: Jarod Wilson + +commit ea53abfab960909d622ca37bcfb8e1c5378d21cc upstream. + +Commit 4d2c0cda07448ea6980f00102dc3964eb25e241c set slave->link to +BOND_LINK_DOWN for 802.3ad bonds whenever invalid speed/duplex values +were read, to fix a problem with slaves getting into weird states, but +in the process, broke tracking of link failures, as going straight to +BOND_LINK_DOWN when a link is indeed down (cable pulled, switch rebooted) +means we broke out of bond_miimon_inspect()'s BOND_LINK_DOWN case because +!link_state was already true, we never incremented commit, and never got +a chance to call bond_miimon_commit(), where slave->link_failure_count +would be incremented. I believe the simple fix here is to mark the slave +as BOND_LINK_FAIL, and let bond_miimon_inspect() transition the link from +_FAIL to either _UP or _DOWN, and in the latter case, we now get proper +incrementing of link_failure_count again. + +Fixes: 4d2c0cda0744 ("bonding: speed/duplex update at NETDEV_UP event") +CC: Mahesh Bandewar +CC: David S. Miller +CC: netdev@vger.kernel.org +CC: stable@vger.kernel.org +Signed-off-by: Jarod Wilson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/bonding/bond_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -3111,13 +3111,13 @@ static int bond_slave_netdev_event(unsig + case NETDEV_CHANGE: + /* For 802.3ad mode only: + * Getting invalid Speed/Duplex values here will put slave +- * in weird state. So mark it as link-down for the time ++ * in weird state. So mark it as link-fail for the time + * being and let link-monitoring (miimon) set it right when + * correct speeds/duplex are available. + */ + if (bond_update_speed_duplex(slave) && + BOND_MODE(bond) == BOND_MODE_8023AD) +- slave->link = BOND_LINK_DOWN; ++ slave->link = BOND_LINK_FAIL; + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + bond_3ad_adapter_speed_duplex_changed(slave); diff --git a/queue-4.19/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch b/queue-4.19/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch new file mode 100644 index 00000000000..4aa88a70e69 --- /dev/null +++ b/queue-4.19/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch @@ -0,0 +1,37 @@ +From 0f5cb0e6225cae2f029944cb8c74617aab6ddd49 Mon Sep 17 00:00:00 2001 +From: Ronald Wahl +Date: Wed, 10 Oct 2018 15:54:54 +0200 +Subject: clk: at91: Fix division by zero in PLL recalc_rate() + +From: Ronald Wahl + +commit 0f5cb0e6225cae2f029944cb8c74617aab6ddd49 upstream. + +Commit a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached MUL +and DIV values") removed a check that prevents a division by zero. This +now causes a stacktrace when booting the kernel on a at91 platform if +the PLL DIV register contains zero. This commit reintroduces this check. + +Fixes: a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached...") +Cc: +Signed-off-by: Ronald Wahl +Acked-by: Ludovic Desroches +Signed-off-by: Stephen Boyd +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/at91/clk-pll.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/clk/at91/clk-pll.c ++++ b/drivers/clk/at91/clk-pll.c +@@ -133,6 +133,9 @@ static unsigned long clk_pll_recalc_rate + { + struct clk_pll *pll = to_clk_pll(hw); + ++ if (!pll->div || !pll->mul) ++ return 0; ++ + return (parent_rate / pll->div) * (pll->mul + 1); + } + diff --git a/queue-4.19/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch b/queue-4.19/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch new file mode 100644 index 00000000000..d6a28fcaad7 --- /dev/null +++ b/queue-4.19/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch @@ -0,0 +1,45 @@ +From 665636b2940d0897c4130253467f5e8c42eea392 Mon Sep 17 00:00:00 2001 +From: Enric Balletbo i Serra +Date: Tue, 16 Oct 2018 15:41:44 +0200 +Subject: clk: rockchip: Fix static checker warning in rockchip_ddrclk_get_parent call + +From: Enric Balletbo i Serra + +commit 665636b2940d0897c4130253467f5e8c42eea392 upstream. + +Fixes the signedness bug returning '(-22)' on the return type by removing the +sanity checker in rockchip_ddrclk_get_parent(). The function should return +and unsigned value only and it's safe to remove the sanity checker as the +core functions that call get_parent like clk_core_get_parent_by_index already +ensures the validity of the clk index returned (index >= core->num_parents). + +Fixes: a4f182bf81f18 ("clk: rockchip: add new clock-type for the ddrclk") +Cc: stable@vger.kernel.org +Signed-off-by: Enric Balletbo i Serra +Reviewed-by: Stephen Boyd +Signed-off-by: Heiko Stuebner +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/rockchip/clk-ddr.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/drivers/clk/rockchip/clk-ddr.c ++++ b/drivers/clk/rockchip/clk-ddr.c +@@ -80,16 +80,12 @@ static long rockchip_ddrclk_sip_round_ra + static u8 rockchip_ddrclk_get_parent(struct clk_hw *hw) + { + struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw); +- int num_parents = clk_hw_get_num_parents(hw); + u32 val; + + val = clk_readl(ddrclk->reg_base + + ddrclk->mux_offset) >> ddrclk->mux_shift; + val &= GENMASK(ddrclk->mux_width - 1, 0); + +- if (val >= num_parents) +- return -EINVAL; +- + return val; + } + diff --git a/queue-4.19/clk-rockchip-fix-wrong-mmc-sample-phase-shift-for-rk3328.patch b/queue-4.19/clk-rockchip-fix-wrong-mmc-sample-phase-shift-for-rk3328.patch new file mode 100644 index 00000000000..df2ad56135a --- /dev/null +++ b/queue-4.19/clk-rockchip-fix-wrong-mmc-sample-phase-shift-for-rk3328.patch @@ -0,0 +1,52 @@ +From 82f4b67f018c88a7cc9337f0067ed3d6ec352648 Mon Sep 17 00:00:00 2001 +From: Ziyuan Xu +Date: Thu, 11 Oct 2018 15:26:43 +0800 +Subject: clk: rockchip: fix wrong mmc sample phase shift for rk3328 + +From: Ziyuan Xu + +commit 82f4b67f018c88a7cc9337f0067ed3d6ec352648 upstream. + +mmc sample shift is 0 for RK3328 referring to the TRM. +So fix them. + +Fixes: fe3511ad8a1c ("clk: rockchip: add clock controller for rk3328") +Cc: stable@vger.kernel.org +Signed-off-by: Ziyuan Xu +Signed-off-by: Shawn Lin +Signed-off-by: Heiko Stuebner +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/rockchip/clk-rk3328.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/clk/rockchip/clk-rk3328.c ++++ b/drivers/clk/rockchip/clk-rk3328.c +@@ -813,22 +813,22 @@ static struct rockchip_clk_branch rk3328 + MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", + RK3328_SDMMC_CON0, 1), + MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", +- RK3328_SDMMC_CON1, 1), ++ RK3328_SDMMC_CON1, 0), + + MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", + RK3328_SDIO_CON0, 1), + MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", +- RK3328_SDIO_CON1, 1), ++ RK3328_SDIO_CON1, 0), + + MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", + RK3328_EMMC_CON0, 1), + MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", +- RK3328_EMMC_CON1, 1), ++ RK3328_EMMC_CON1, 0), + + MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "clk_sdmmc_ext", + RK3328_SDMMC_EXT_CON0, 1), + MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "clk_sdmmc_ext", +- RK3328_SDMMC_EXT_CON1, 1), ++ RK3328_SDMMC_EXT_CON1, 0), + }; + + static const char *const rk3328_critical_clocks[] __initconst = { diff --git a/queue-4.19/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch b/queue-4.19/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch new file mode 100644 index 00000000000..2c7ec1e29a4 --- /dev/null +++ b/queue-4.19/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch @@ -0,0 +1,77 @@ +From 8985167ecf57f97061599a155bb9652c84ea4913 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Wed, 29 Aug 2018 21:20:10 +0200 +Subject: clk: s2mps11: Fix matching when built as module and DT node contains compatible + +From: Krzysztof Kozlowski + +commit 8985167ecf57f97061599a155bb9652c84ea4913 upstream. + +When driver is built as module and DT node contains clocks compatible +(e.g. "samsung,s2mps11-clk"), the module will not be autoloaded because +module aliases won't match. + +The modalias from uevent: of:NclocksTCsamsung,s2mps11-clk +The modalias from driver: platform:s2mps11-clk + +The devices are instantiated by parent's MFD. However both Device Tree +bindings and parent define the compatible for clocks devices. In case +of module matching this DT compatible will be used. + +The issue will not happen if this is a built-in (no need for module +matching) or when clocks DT node does not contain compatible (not +correct from bindings perspective but working for driver). + +Note when backporting to stable kernels: adjust the list of device ID +entries. + +Cc: +Fixes: 53c31b3437a6 ("mfd: sec-core: Add of_compatible strings for clock MFD cells") +Signed-off-by: Krzysztof Kozlowski +Acked-by: Stephen Boyd +Signed-off-by: Stephen Boyd +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/clk-s2mps11.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +--- a/drivers/clk/clk-s2mps11.c ++++ b/drivers/clk/clk-s2mps11.c +@@ -245,6 +245,36 @@ static const struct platform_device_id s + }; + MODULE_DEVICE_TABLE(platform, s2mps11_clk_id); + ++#ifdef CONFIG_OF ++/* ++ * Device is instantiated through parent MFD device and device matching is done ++ * through platform_device_id. ++ * ++ * However if device's DT node contains proper clock compatible and driver is ++ * built as a module, then the *module* matching will be done trough DT aliases. ++ * This requires of_device_id table. In the same time this will not change the ++ * actual *device* matching so do not add .of_match_table. ++ */ ++static const struct of_device_id s2mps11_dt_match[] = { ++ { ++ .compatible = "samsung,s2mps11-clk", ++ .data = (void *)S2MPS11X, ++ }, { ++ .compatible = "samsung,s2mps13-clk", ++ .data = (void *)S2MPS13X, ++ }, { ++ .compatible = "samsung,s2mps14-clk", ++ .data = (void *)S2MPS14X, ++ }, { ++ .compatible = "samsung,s5m8767-clk", ++ .data = (void *)S5M8767X, ++ }, { ++ /* Sentinel */ ++ }, ++}; ++MODULE_DEVICE_TABLE(of, s2mps11_dt_match); ++#endif ++ + static struct platform_driver s2mps11_clk_driver = { + .driver = { + .name = "s2mps11-clk", diff --git a/queue-4.19/clk-sunxi-ng-h6-fix-bus-clocks-divider-position.patch b/queue-4.19/clk-sunxi-ng-h6-fix-bus-clocks-divider-position.patch new file mode 100644 index 00000000000..0cc17fd6edb --- /dev/null +++ b/queue-4.19/clk-sunxi-ng-h6-fix-bus-clocks-divider-position.patch @@ -0,0 +1,59 @@ +From 2852bfbf4f168fec27049ad9ed20941fc9e84b95 Mon Sep 17 00:00:00 2001 +From: Icenowy Zheng +Date: Thu, 9 Aug 2018 01:19:52 +0800 +Subject: clk: sunxi-ng: h6: fix bus clocks' divider position + +From: Icenowy Zheng + +commit 2852bfbf4f168fec27049ad9ed20941fc9e84b95 upstream. + +The bus clocks (AHB/APB) on Allwinner H6 have their second divider start +at bit 8, according to the user manual and the BSP code. However, +currently the divider offset is incorrectly set to 16, thus the divider +is not correctly read and the clock frequency is not correctly calculated. + +Fix this bit offset on all affected bus clocks in ccu-sun50i-h6. + +Cc: stable@vger.kernel.org # v4.17.y +Signed-off-by: Icenowy Zheng +Signed-off-by: Chen-Yu Tsai +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c ++++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c +@@ -224,7 +224,7 @@ static SUNXI_CCU_MP_WITH_MUX(psi_ahb1_ah + psi_ahb1_ahb2_parents, + 0x510, + 0, 5, /* M */ +- 16, 2, /* P */ ++ 8, 2, /* P */ + 24, 2, /* mux */ + 0); + +@@ -233,19 +233,19 @@ static const char * const ahb3_apb1_apb2 + "pll-periph0" }; + static SUNXI_CCU_MP_WITH_MUX(ahb3_clk, "ahb3", ahb3_apb1_apb2_parents, 0x51c, + 0, 5, /* M */ +- 16, 2, /* P */ ++ 8, 2, /* P */ + 24, 2, /* mux */ + 0); + + static SUNXI_CCU_MP_WITH_MUX(apb1_clk, "apb1", ahb3_apb1_apb2_parents, 0x520, + 0, 5, /* M */ +- 16, 2, /* P */ ++ 8, 2, /* P */ + 24, 2, /* mux */ + 0); + + static SUNXI_CCU_MP_WITH_MUX(apb2_clk, "apb2", ahb3_apb1_apb2_parents, 0x524, + 0, 5, /* M */ +- 16, 2, /* P */ ++ 8, 2, /* P */ + 24, 2, /* mux */ + 0); + diff --git a/queue-4.19/crypto-hisilicon-fix-null-dereference-for-same-dst-and-src.patch b/queue-4.19/crypto-hisilicon-fix-null-dereference-for-same-dst-and-src.patch new file mode 100644 index 00000000000..3b1adf5a20b --- /dev/null +++ b/queue-4.19/crypto-hisilicon-fix-null-dereference-for-same-dst-and-src.patch @@ -0,0 +1,123 @@ +From 68a031d22c57b94870ba13513c9d93b8a8119ab2 Mon Sep 17 00:00:00 2001 +From: John Garry +Date: Mon, 5 Nov 2018 20:35:14 +0800 +Subject: crypto: hisilicon - Fix NULL dereference for same dst and src + +From: John Garry + +commit 68a031d22c57b94870ba13513c9d93b8a8119ab2 upstream. + +When the source and destination addresses for the cipher are the same, we +will get a NULL dereference from accessing the split destination +scatterlist memories, as shown: + +[ 56.565719] tcrypt: +[ 56.565719] testing speed of async ecb(aes) (hisi_sec_aes_ecb) encryption +[ 56.574683] tcrypt: test 0 (128 bit key, 16 byte blocks): +[ 56.587585] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 +[ 56.596361] Mem abort info: +[ 56.599151] ESR = 0x96000006 +[ 56.602196] Exception class = DABT (current EL), IL = 32 bits +[ 56.608105] SET = 0, FnV = 0 +[ 56.611149] EA = 0, S1PTW = 0 +[ 56.614280] Data abort info: +[ 56.617151] ISV = 0, ISS = 0x00000006 +[ 56.620976] CM = 0, WnR = 0 +[ 56.623930] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____) +[ 56.630533] [0000000000000000] pgd=0000041fc7e4d003, pud=0000041fcd9bf003, pmd=0000000000000000 +[ 56.639224] Internal error: Oops: 96000006 [#1] PREEMPT SMP +[ 56.644782] Modules linked in: tcrypt(+) +[ 56.648695] CPU: 21 PID: 2326 Comm: insmod Tainted: G W 4.19.0-rc6-00001-g3fabfb8-dirty #716 +[ 56.658420] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT17 Nemo 2.0 RC0 10/05/2018 +[ 56.667537] pstate: 20000005 (nzCv daif -PAN -UAO) +[ 56.672322] pc : sec_alg_skcipher_crypto+0x318/0x748 +[ 56.677274] lr : sec_alg_skcipher_crypto+0x178/0x748 +[ 56.682224] sp : ffff0000118e3840 +[ 56.685525] x29: ffff0000118e3840 x28: ffff841fbb3f8118 +[ 56.690825] x27: 0000000000000000 x26: 0000000000000000 +[ 56.696125] x25: ffff841fbb3f8080 x24: ffff841fbadc0018 +[ 56.701425] x23: ffff000009119000 x22: ffff841fbb24e280 +[ 56.706724] x21: ffff841ff212e780 x20: ffff841ff212e700 +[ 56.712023] x19: 0000000000000001 x18: ffffffffffffffff +[ 56.717322] x17: 0000000000000000 x16: 0000000000000000 +[ 56.722621] x15: ffff0000091196c8 x14: 72635f7265687069 +[ 56.727920] x13: 636b735f676c615f x12: ffff000009119940 +[ 56.733219] x11: 0000000000000000 x10: 00000000006080c0 +[ 56.738519] x9 : 0000000000000000 x8 : ffff841fbb24e480 +[ 56.743818] x7 : ffff841fbb24e500 x6 : ffff841ff00cdcc0 +[ 56.749117] x5 : 0000000000000010 x4 : 0000000000000000 +[ 56.754416] x3 : ffff841fbb24e380 x2 : ffff841fbb24e480 +[ 56.759715] x1 : 0000000000000000 x0 : ffff000008f682c8 +[ 56.765016] Process insmod (pid: 2326, stack limit = 0x(____ptrval____)) +[ 56.771702] Call trace: +[ 56.774136] sec_alg_skcipher_crypto+0x318/0x748 +[ 56.778740] sec_alg_skcipher_encrypt+0x10/0x18 +[ 56.783259] test_skcipher_speed+0x2a0/0x700 [tcrypt] +[ 56.788298] do_test+0x18f8/0x48c8 [tcrypt] +[ 56.792469] tcrypt_mod_init+0x60/0x1000 [tcrypt] +[ 56.797161] do_one_initcall+0x5c/0x178 +[ 56.800985] do_init_module+0x58/0x1b4 +[ 56.804721] load_module+0x1da4/0x2150 +[ 56.808456] __se_sys_init_module+0x14c/0x1e8 +[ 56.812799] __arm64_sys_init_module+0x18/0x20 +[ 56.817231] el0_svc_common+0x60/0xe8 +[ 56.820880] el0_svc_handler+0x2c/0x80 +[ 56.824615] el0_svc+0x8/0xc +[ 56.827483] Code: a94c87a3 910b2000 f87b7842 f9004ba2 (b87b7821) +[ 56.833564] ---[ end trace 0f63290590e93d94 ]--- +Segmentation fault + +Fix this by only accessing these memories when we have different src and +dst. + +Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver") +Reviewed-by: Jonathan Cameron +Cc: +Signed-off-by: John Garry +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/hisilicon/sec/sec_algs.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/crypto/hisilicon/sec/sec_algs.c ++++ b/drivers/crypto/hisilicon/sec/sec_algs.c +@@ -732,6 +732,7 @@ static int sec_alg_skcipher_crypto(struc + int *splits_in_nents; + int *splits_out_nents = NULL; + struct sec_request_el *el, *temp; ++ bool split = skreq->src != skreq->dst; + + mutex_init(&sec_req->lock); + sec_req->req_base = &skreq->base; +@@ -750,7 +751,7 @@ static int sec_alg_skcipher_crypto(struc + if (ret) + goto err_free_split_sizes; + +- if (skreq->src != skreq->dst) { ++ if (split) { + sec_req->len_out = sg_nents(skreq->dst); + ret = sec_map_and_split_sg(skreq->dst, split_sizes, steps, + &splits_out, &splits_out_nents, +@@ -785,8 +786,9 @@ static int sec_alg_skcipher_crypto(struc + split_sizes[i], + skreq->src != skreq->dst, + splits_in[i], splits_in_nents[i], +- splits_out[i], +- splits_out_nents[i], info); ++ split ? splits_out[i] : NULL, ++ split ? splits_out_nents[i] : 0, ++ info); + if (IS_ERR(el)) { + ret = PTR_ERR(el); + goto err_free_elements; +@@ -854,7 +856,7 @@ err_free_elements: + crypto_skcipher_ivsize(atfm), + DMA_BIDIRECTIONAL); + err_unmap_out_sg: +- if (skreq->src != skreq->dst) ++ if (split) + sec_unmap_sg_on_err(skreq->dst, steps, splits_out, + splits_out_nents, sec_req->len_out, + info->dev); diff --git a/queue-4.19/crypto-hisilicon-fix-reference-after-free-of-memories-on-error-path.patch b/queue-4.19/crypto-hisilicon-fix-reference-after-free-of-memories-on-error-path.patch new file mode 100644 index 00000000000..5a019172ea9 --- /dev/null +++ b/queue-4.19/crypto-hisilicon-fix-reference-after-free-of-memories-on-error-path.patch @@ -0,0 +1,83 @@ +From 0b0cf6af3f3151c26c27e8e51def5527091c3e69 Mon Sep 17 00:00:00 2001 +From: John Garry +Date: Mon, 5 Nov 2018 20:35:15 +0800 +Subject: crypto: hisilicon - Fix reference after free of memories on error path + +From: John Garry + +commit 0b0cf6af3f3151c26c27e8e51def5527091c3e69 upstream. + +coccicheck currently warns of the following issues in the driver: +drivers/crypto/hisilicon/sec/sec_algs.c:864:51-66: ERROR: reference preceded by free on line 812 +drivers/crypto/hisilicon/sec/sec_algs.c:864:40-49: ERROR: reference preceded by free on line 813 +drivers/crypto/hisilicon/sec/sec_algs.c:861:8-24: ERROR: reference preceded by free on line 814 +drivers/crypto/hisilicon/sec/sec_algs.c:860:41-51: ERROR: reference preceded by free on line 815 +drivers/crypto/hisilicon/sec/sec_algs.c:867:7-18: ERROR: reference preceded by free on line 816 + +It would appear than on certain error paths that we may attempt reference- +after-free some memories. + +This patch fixes those issues. The solution doesn't look perfect, but +having same memories free'd possibly from separate functions makes it +tricky. + +Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver") +Reviewed-by: Jonathan Cameron +Cc: +Signed-off-by: John Garry +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/hisilicon/sec/sec_algs.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +--- a/drivers/crypto/hisilicon/sec/sec_algs.c ++++ b/drivers/crypto/hisilicon/sec/sec_algs.c +@@ -808,13 +808,6 @@ static int sec_alg_skcipher_crypto(struc + * more refined but this is unlikely to happen so no need. + */ + +- /* Cleanup - all elements in pointer arrays have been coppied */ +- kfree(splits_in_nents); +- kfree(splits_in); +- kfree(splits_out_nents); +- kfree(splits_out); +- kfree(split_sizes); +- + /* Grab a big lock for a long time to avoid concurrency issues */ + mutex_lock(&queue->queuelock); + +@@ -829,13 +822,13 @@ static int sec_alg_skcipher_crypto(struc + (!queue->havesoftqueue || + kfifo_avail(&queue->softqueue) > steps)) || + !list_empty(&ctx->backlog)) { ++ ret = -EBUSY; + if ((skreq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + list_add_tail(&sec_req->backlog_head, &ctx->backlog); + mutex_unlock(&queue->queuelock); +- return -EBUSY; ++ goto out; + } + +- ret = -EBUSY; + mutex_unlock(&queue->queuelock); + goto err_free_elements; + } +@@ -844,7 +837,15 @@ static int sec_alg_skcipher_crypto(struc + if (ret) + goto err_free_elements; + +- return -EINPROGRESS; ++ ret = -EINPROGRESS; ++out: ++ /* Cleanup - all elements in pointer arrays have been copied */ ++ kfree(splits_in_nents); ++ kfree(splits_in); ++ kfree(splits_out_nents); ++ kfree(splits_out); ++ kfree(split_sizes); ++ return ret; + + err_free_elements: + list_for_each_entry_safe(el, temp, &sec_req->elements, head) { diff --git a/queue-4.19/hwmon-core-fix-double-free-in-__hwmon_device_register.patch b/queue-4.19/hwmon-core-fix-double-free-in-__hwmon_device_register.patch new file mode 100644 index 00000000000..1b7eb2e8b79 --- /dev/null +++ b/queue-4.19/hwmon-core-fix-double-free-in-__hwmon_device_register.patch @@ -0,0 +1,132 @@ +From 74e3512731bd5c9673176425a76a7cc5efa8ddb6 Mon Sep 17 00:00:00 2001 +From: Dmitry Osipenko +Date: Wed, 24 Oct 2018 22:37:13 +0300 +Subject: hwmon: (core) Fix double-free in __hwmon_device_register() + +From: Dmitry Osipenko + +commit 74e3512731bd5c9673176425a76a7cc5efa8ddb6 upstream. + +Fix double-free that happens when thermal zone setup fails, see KASAN log +below. + +================================================================== +BUG: KASAN: double-free or invalid-free in __hwmon_device_register+0x5dc/0xa7c + +CPU: 0 PID: 132 Comm: kworker/0:2 Tainted: G B 4.19.0-rc8-next-20181016-00042-gb52cd80401e9-dirty #41 +Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) +Workqueue: events deferred_probe_work_func +Backtrace: +[] (dump_backtrace) from [] (show_stack+0x20/0x24) +[] (show_stack) from [] (dump_stack+0x9c/0xb0) +[] (dump_stack) from [] (print_address_description+0x68/0x250) +[] (print_address_description) from [] (kasan_report_invalid_free+0x68/0x88) +[] (kasan_report_invalid_free) from [] (__kasan_slab_free+0x1f4/0x200) +[] (__kasan_slab_free) from [] (kasan_slab_free+0x14/0x18) +[] (kasan_slab_free) from [] (kfree+0x90/0x294) +[] (kfree) from [] (__hwmon_device_register+0x5dc/0xa7c) +[] (__hwmon_device_register) from [] (hwmon_device_register_with_info+0xa0/0xa8) +[] (hwmon_device_register_with_info) from [] (devm_hwmon_device_register_with_info+0x74/0xb4) +[] (devm_hwmon_device_register_with_info) from [] (lm90_probe+0x414/0x578) +[] (lm90_probe) from [] (i2c_device_probe+0x35c/0x384) +[] (i2c_device_probe) from [] (really_probe+0x290/0x3e4) +[] (really_probe) from [] (driver_probe_device+0x80/0x1c4) +[] (driver_probe_device) from [] (__device_attach_driver+0x104/0x11c) +[] (__device_attach_driver) from [] (bus_for_each_drv+0xa4/0xc8) +[] (bus_for_each_drv) from [] (__device_attach+0xf0/0x15c) +[] (__device_attach) from [] (device_initial_probe+0x1c/0x20) +[] (device_initial_probe) from [] (bus_probe_device+0xdc/0xec) +[] (bus_probe_device) from [] (deferred_probe_work_func+0xa8/0xd4) +[] (deferred_probe_work_func) from [] (process_one_work+0x3dc/0x96c) +[] (process_one_work) from [] (worker_thread+0x4ec/0x8bc) +[] (worker_thread) from [] (kthread+0x230/0x240) +[] (kthread) from [] (ret_from_fork+0x14/0x38) +Exception stack(0xcf743fb0 to 0xcf743ff8) +3fa0: 00000000 00000000 00000000 00000000 +3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 + +Allocated by task 132: + kasan_kmalloc.part.1+0x58/0xf4 + kasan_kmalloc+0x90/0xa4 + kmem_cache_alloc_trace+0x90/0x2a0 + __hwmon_device_register+0xbc/0xa7c + hwmon_device_register_with_info+0xa0/0xa8 + devm_hwmon_device_register_with_info+0x74/0xb4 + lm90_probe+0x414/0x578 + i2c_device_probe+0x35c/0x384 + really_probe+0x290/0x3e4 + driver_probe_device+0x80/0x1c4 + __device_attach_driver+0x104/0x11c + bus_for_each_drv+0xa4/0xc8 + __device_attach+0xf0/0x15c + device_initial_probe+0x1c/0x20 + bus_probe_device+0xdc/0xec + deferred_probe_work_func+0xa8/0xd4 + process_one_work+0x3dc/0x96c + worker_thread+0x4ec/0x8bc + kthread+0x230/0x240 + ret_from_fork+0x14/0x38 + (null) + +Freed by task 132: + __kasan_slab_free+0x12c/0x200 + kasan_slab_free+0x14/0x18 + kfree+0x90/0x294 + hwmon_dev_release+0x1c/0x20 + device_release+0x4c/0xe8 + kobject_put+0xac/0x11c + device_unregister+0x2c/0x30 + __hwmon_device_register+0xa58/0xa7c + hwmon_device_register_with_info+0xa0/0xa8 + devm_hwmon_device_register_with_info+0x74/0xb4 + lm90_probe+0x414/0x578 + i2c_device_probe+0x35c/0x384 + really_probe+0x290/0x3e4 + driver_probe_device+0x80/0x1c4 + __device_attach_driver+0x104/0x11c + bus_for_each_drv+0xa4/0xc8 + __device_attach+0xf0/0x15c + device_initial_probe+0x1c/0x20 + bus_probe_device+0xdc/0xec + deferred_probe_work_func+0xa8/0xd4 + process_one_work+0x3dc/0x96c + worker_thread+0x4ec/0x8bc + kthread+0x230/0x240 + ret_from_fork+0x14/0x38 + (null) + +Cc: # v4.15+ +Fixes: 47c332deb8e8 ("hwmon: Deal with errors from the thermal subsystem") +Signed-off-by: Dmitry Osipenko +Signed-off-by: Guenter Roeck +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hwmon/hwmon.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/hwmon/hwmon.c ++++ b/drivers/hwmon/hwmon.c +@@ -635,8 +635,10 @@ __hwmon_device_register(struct device *d + if (info[i]->config[j] & HWMON_T_INPUT) { + err = hwmon_thermal_add_sensor(dev, + hwdev, j); +- if (err) +- goto free_device; ++ if (err) { ++ device_unregister(hdev); ++ goto ida_remove; ++ } + } + } + } +@@ -644,8 +646,6 @@ __hwmon_device_register(struct device *d + + return hdev; + +-free_device: +- device_unregister(hdev); + free_hwmon: + kfree(hwdev); + ida_remove: diff --git a/queue-4.19/libceph-bump-ceph_msg_max_data_len.patch b/queue-4.19/libceph-bump-ceph_msg_max_data_len.patch new file mode 100644 index 00000000000..a82ab560b2e --- /dev/null +++ b/queue-4.19/libceph-bump-ceph_msg_max_data_len.patch @@ -0,0 +1,42 @@ +From 94e6992bb560be8bffb47f287194adf070b57695 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Wed, 26 Sep 2018 18:03:16 +0200 +Subject: libceph: bump CEPH_MSG_MAX_DATA_LEN + +From: Ilya Dryomov + +commit 94e6992bb560be8bffb47f287194adf070b57695 upstream. + +If the read is large enough, we end up spinning in the messenger: + + libceph: osd0 192.168.122.1:6801 io error + libceph: osd0 192.168.122.1:6801 io error + libceph: osd0 192.168.122.1:6801 io error + +This is a receive side limit, so only reads were affected. + +Cc: stable@vger.kernel.org +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/ceph/libceph.h | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/include/linux/ceph/libceph.h ++++ b/include/linux/ceph/libceph.h +@@ -81,7 +81,13 @@ struct ceph_options { + + #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) + #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) +-#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) ++ ++/* ++ * Handle the largest possible rbd object in one message. ++ * There is no limit on the size of cephfs objects, but it has to obey ++ * rsize and wsize mount options anyway. ++ */ ++#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024) + + #define CEPH_AUTH_NAME_DEFAULT "guest" + diff --git a/queue-4.19/mach64-fix-display-corruption-on-big-endian-machines.patch b/queue-4.19/mach64-fix-display-corruption-on-big-endian-machines.patch new file mode 100644 index 00000000000..cc0e1552593 --- /dev/null +++ b/queue-4.19/mach64-fix-display-corruption-on-big-endian-machines.patch @@ -0,0 +1,59 @@ +From 3c6c6a7878d00a3ac997a779c5b9861ff25dfcc8 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 8 Oct 2018 12:57:34 +0200 +Subject: mach64: fix display corruption on big endian machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mikulas Patocka + +commit 3c6c6a7878d00a3ac997a779c5b9861ff25dfcc8 upstream. + +The code for manual bit triple is not endian-clean. It builds the variable +"hostdword" using byte accesses, therefore we must read the variable with +"le32_to_cpu". + +The patch also enables (hardware or software) bit triple only if the image +is monochrome (image->depth). If we want to blit full-color image, we +shouldn't use the triple code. + +Signed-off-by: Mikulas Patocka +Reviewed-by: Ville Syrjälä +Cc: stable@vger.kernel.org +Signed-off-by: Bartlomiej Zolnierkiewicz +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/fbdev/aty/mach64_accel.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/video/fbdev/aty/mach64_accel.c ++++ b/drivers/video/fbdev/aty/mach64_accel.c +@@ -345,7 +345,7 @@ void atyfb_imageblit(struct fb_info *inf + * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit + * this hwaccelerated triple has an issue with not aligned data + */ +- if (M64_HAS(HW_TRIPLE) && image->width % 8 == 0) ++ if (image->depth == 1 && M64_HAS(HW_TRIPLE) && image->width % 8 == 0) + pix_width |= DP_HOST_TRIPLE_EN; + } + +@@ -382,7 +382,7 @@ void atyfb_imageblit(struct fb_info *inf + src_bytes = (((image->width * image->depth) + 7) / 8) * image->height; + + /* manual triple each pixel */ +- if (info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { ++ if (image->depth == 1 && info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { + int inbit, outbit, mult24, byte_id_in_dword, width; + u8 *pbitmapin = (u8*)image->data, *pbitmapout; + u32 hostdword; +@@ -415,7 +415,7 @@ void atyfb_imageblit(struct fb_info *inf + } + } + wait_for_fifo(1, par); +- aty_st_le32(HOST_DATA0, hostdword, par); ++ aty_st_le32(HOST_DATA0, le32_to_cpu(hostdword), par); + } + } else { + u32 *pbitmap, dwords = (src_bytes + 3) / 4; diff --git a/queue-4.19/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch b/queue-4.19/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch new file mode 100644 index 00000000000..ac20fa3171e --- /dev/null +++ b/queue-4.19/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch @@ -0,0 +1,114 @@ +From c09bcc91bb94ed91f1391bffcbe294963d605732 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 8 Oct 2018 12:57:35 +0200 +Subject: mach64: fix image corruption due to reading accelerator registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mikulas Patocka + +commit c09bcc91bb94ed91f1391bffcbe294963d605732 upstream. + +Reading the registers without waiting for engine idle returns +unpredictable values. These unpredictable values result in display +corruption - if atyfb_imageblit reads the content of DP_PIX_WIDTH with the +bit DP_HOST_TRIPLE_EN set (from previous invocation), the driver would +never ever clear the bit, resulting in display corruption. + +We don't want to wait for idle because it would degrade performance, so +this patch modifies the driver so that it never reads accelerator +registers. + +HOST_CNTL doesn't have to be read, we can just write it with +HOST_BYTE_ALIGN because no other part of the driver cares if +HOST_BYTE_ALIGN is set. + +DP_PIX_WIDTH is written in the functions atyfb_copyarea and atyfb_fillrect +with the default value and in atyfb_imageblit with the value set according +to the source image data. + +Signed-off-by: Mikulas Patocka +Reviewed-by: Ville Syrjälä +Cc: stable@vger.kernel.org +Signed-off-by: Bartlomiej Zolnierkiewicz +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/fbdev/aty/mach64_accel.c | 22 +++++++++------------- + 1 file changed, 9 insertions(+), 13 deletions(-) + +--- a/drivers/video/fbdev/aty/mach64_accel.c ++++ b/drivers/video/fbdev/aty/mach64_accel.c +@@ -127,7 +127,7 @@ void aty_init_engine(struct atyfb_par *p + + /* set host attributes */ + wait_for_fifo(13, par); +- aty_st_le32(HOST_CNTL, 0, par); ++ aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); + + /* set pattern attributes */ + aty_st_le32(PAT_REG0, 0, par); +@@ -233,7 +233,8 @@ void atyfb_copyarea(struct fb_info *info + rotation = rotation24bpp(dx, direction); + } + +- wait_for_fifo(4, par); ++ wait_for_fifo(5, par); ++ aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); + aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par); + aty_st_le32(SRC_Y_X, (sx << 16) | sy, par); + aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | area->height, par); +@@ -269,7 +270,8 @@ void atyfb_fillrect(struct fb_info *info + rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); + } + +- wait_for_fifo(3, par); ++ wait_for_fifo(4, par); ++ aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); + aty_st_le32(DP_FRGD_CLR, color, par); + aty_st_le32(DP_SRC, + BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, +@@ -284,7 +286,7 @@ void atyfb_imageblit(struct fb_info *inf + { + struct atyfb_par *par = (struct atyfb_par *) info->par; + u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width; +- u32 pix_width_save, pix_width, host_cntl, rotation = 0, src, mix; ++ u32 pix_width, rotation = 0, src, mix; + + if (par->asleep) + return; +@@ -296,8 +298,7 @@ void atyfb_imageblit(struct fb_info *inf + return; + } + +- pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par); +- host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN; ++ pix_width = par->crtc.dp_pix_width; + + switch (image->depth) { + case 1: +@@ -370,12 +371,11 @@ void atyfb_imageblit(struct fb_info *inf + mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D; + } + +- wait_for_fifo(6, par); +- aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par); ++ wait_for_fifo(5, par); + aty_st_le32(DP_PIX_WIDTH, pix_width, par); + aty_st_le32(DP_MIX, mix, par); + aty_st_le32(DP_SRC, src, par); +- aty_st_le32(HOST_CNTL, host_cntl, par); ++ aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); + aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par); + + draw_rect(dx, dy, width, image->height, par); +@@ -424,8 +424,4 @@ void atyfb_imageblit(struct fb_info *inf + aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par); + } + } +- +- /* restore pix_width */ +- wait_for_fifo(1, par); +- aty_st_le32(DP_PIX_WIDTH, pix_width_save, par); + } diff --git a/queue-4.19/memory_hotplug-cond_resched-in-__remove_pages.patch b/queue-4.19/memory_hotplug-cond_resched-in-__remove_pages.patch new file mode 100644 index 00000000000..e3494367c92 --- /dev/null +++ b/queue-4.19/memory_hotplug-cond_resched-in-__remove_pages.patch @@ -0,0 +1,60 @@ +From dd33ad7b251f900481701b2a82d25de583867708 Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Fri, 2 Nov 2018 15:48:46 -0700 +Subject: memory_hotplug: cond_resched in __remove_pages + +From: Michal Hocko + +commit dd33ad7b251f900481701b2a82d25de583867708 upstream. + +We have received a bug report that unbinding a large pmem (>1TB) can +result in a soft lockup: + + NMI watchdog: BUG: soft lockup - CPU#9 stuck for 23s! [ndctl:4365] + [...] + Supported: Yes + CPU: 9 PID: 4365 Comm: ndctl Not tainted 4.12.14-94.40-default #1 SLE12-SP4 + Hardware name: Intel Corporation S2600WFD/S2600WFD, BIOS SE5C620.86B.01.00.0833.051120182255 05/11/2018 + task: ffff9cce7d4410c0 task.stack: ffffbe9eb1bc4000 + RIP: 0010:__put_page+0x62/0x80 + Call Trace: + devm_memremap_pages_release+0x152/0x260 + release_nodes+0x18d/0x1d0 + device_release_driver_internal+0x160/0x210 + unbind_store+0xb3/0xe0 + kernfs_fop_write+0x102/0x180 + __vfs_write+0x26/0x150 + vfs_write+0xad/0x1a0 + SyS_write+0x42/0x90 + do_syscall_64+0x74/0x150 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + RIP: 0033:0x7fd13166b3d0 + +It has been reported on an older (4.12) kernel but the current upstream +code doesn't cond_resched in the hot remove code at all and the given +range to remove might be really large. Fix the issue by calling +cond_resched once per memory section. + +Link: http://lkml.kernel.org/r/20181031125840.23982-1-mhocko@kernel.org +Signed-off-by: Michal Hocko +Acked-by: Johannes Thumshirn +Cc: Dan Williams +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory_hotplug.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -587,6 +587,7 @@ int __remove_pages(struct zone *zone, un + for (i = 0; i < sections_to_remove; i++) { + unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; + ++ cond_resched(); + ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, + altmap); + map_offset = 0; diff --git a/queue-4.19/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch b/queue-4.19/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch new file mode 100644 index 00000000000..3c40eddec37 --- /dev/null +++ b/queue-4.19/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch @@ -0,0 +1,228 @@ +From ac5b2c18911ffe95c08d69273917f90212cf5659 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Fri, 2 Nov 2018 15:47:59 -0700 +Subject: mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings + +From: Andrea Arcangeli + +commit ac5b2c18911ffe95c08d69273917f90212cf5659 upstream. + +THP allocation might be really disruptive when allocated on NUMA system +with the local node full or hard to reclaim. Stefan has posted an +allocation stall report on 4.12 based SLES kernel which suggests the +same issue: + + kvm: page allocation stalls for 194572ms, order:9, mode:0x4740ca(__GFP_HIGHMEM|__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|__GFP_MOVABLE|__GFP_DIRECT_RECLAIM), nodemask=(null) + kvm cpuset=/ mems_allowed=0-1 + CPU: 10 PID: 84752 Comm: kvm Tainted: G W 4.12.0+98-ph 0000001 SLE15 (unreleased) + Hardware name: Supermicro SYS-1029P-WTRT/X11DDW-NT, BIOS 2.0 12/05/2017 + Call Trace: + dump_stack+0x5c/0x84 + warn_alloc+0xe0/0x180 + __alloc_pages_slowpath+0x820/0xc90 + __alloc_pages_nodemask+0x1cc/0x210 + alloc_pages_vma+0x1e5/0x280 + do_huge_pmd_wp_page+0x83f/0xf00 + __handle_mm_fault+0x93d/0x1060 + handle_mm_fault+0xc6/0x1b0 + __do_page_fault+0x230/0x430 + do_page_fault+0x2a/0x70 + page_fault+0x7b/0x80 + [...] + Mem-Info: + active_anon:126315487 inactive_anon:1612476 isolated_anon:5 + active_file:60183 inactive_file:245285 isolated_file:0 + unevictable:15657 dirty:286 writeback:1 unstable:0 + slab_reclaimable:75543 slab_unreclaimable:2509111 + mapped:81814 shmem:31764 pagetables:370616 bounce:0 + free:32294031 free_pcp:6233 free_cma:0 + Node 0 active_anon:254680388kB inactive_anon:1112760kB active_file:240648kB inactive_file:981168kB unevictable:13368kB isolated(anon):0kB isolated(file):0kB mapped:280240kB dirty:1144kB writeback:0kB shmem:95832kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 81225728kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no + Node 1 active_anon:250583072kB inactive_anon:5337144kB active_file:84kB inactive_file:0kB unevictable:49260kB isolated(anon):20kB isolated(file):0kB mapped:47016kB dirty:0kB writeback:4kB shmem:31224kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 31897600kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no + +The defrag mode is "madvise" and from the above report it is clear that +the THP has been allocated for MADV_HUGEPAGA vma. + +Andrea has identified that the main source of the problem is +__GFP_THISNODE usage: + +: The problem is that direct compaction combined with the NUMA +: __GFP_THISNODE logic in mempolicy.c is telling reclaim to swap very +: hard the local node, instead of failing the allocation if there's no +: THP available in the local node. +: +: Such logic was ok until __GFP_THISNODE was added to the THP allocation +: path even with MPOL_DEFAULT. +: +: The idea behind the __GFP_THISNODE addition, is that it is better to +: provide local memory in PAGE_SIZE units than to use remote NUMA THP +: backed memory. That largely depends on the remote latency though, on +: threadrippers for example the overhead is relatively low in my +: experience. +: +: The combination of __GFP_THISNODE and __GFP_DIRECT_RECLAIM results in +: extremely slow qemu startup with vfio, if the VM is larger than the +: size of one host NUMA node. This is because it will try very hard to +: unsuccessfully swapout get_user_pages pinned pages as result of the +: __GFP_THISNODE being set, instead of falling back to PAGE_SIZE +: allocations and instead of trying to allocate THP on other nodes (it +: would be even worse without vfio type1 GUP pins of course, except it'd +: be swapping heavily instead). + +Fix this by removing __GFP_THISNODE for THP requests which are +requesting the direct reclaim. This effectivelly reverts 5265047ac301 +on the grounds that the zone/node reclaim was known to be disruptive due +to premature reclaim when there was memory free. While it made sense at +the time for HPC workloads without NUMA awareness on rare machines, it +was ultimately harmful in the majority of cases. The existing behaviour +is similar, if not as widespare as it applies to a corner case but +crucially, it cannot be tuned around like zone_reclaim_mode can. The +default behaviour should always be to cause the least harm for the +common case. + +If there are specialised use cases out there that want zone_reclaim_mode +in specific cases, then it can be built on top. Longterm we should +consider a memory policy which allows for the node reclaim like behavior +for the specific memory ranges which would allow a + +[1] http://lkml.kernel.org/r/20180820032204.9591-1-aarcange@redhat.com + +Mel said: + +: Both patches look correct to me but I'm responding to this one because +: it's the fix. The change makes sense and moves further away from the +: severe stalling behaviour we used to see with both THP and zone reclaim +: mode. +: +: I put together a basic experiment with usemem configured to reference a +: buffer multiple times that is 80% the size of main memory on a 2-socket +: box with symmetric node sizes and defrag set to "always". The defrag +: setting is not the default but it would be functionally similar to +: accessing a buffer with madvise(MADV_HUGEPAGE). Usemem is configured to +: reference the buffer multiple times and while it's not an interesting +: workload, it would be expected to complete reasonably quickly as it fits +: within memory. The results were; +: +: usemem +: vanilla noreclaim-v1 +: Amean Elapsd-1 42.78 ( 0.00%) 26.87 ( 37.18%) +: Amean Elapsd-3 27.55 ( 0.00%) 7.44 ( 73.00%) +: Amean Elapsd-4 5.72 ( 0.00%) 5.69 ( 0.45%) +: +: This shows the elapsed time in seconds for 1 thread, 3 threads and 4 +: threads referencing buffers 80% the size of memory. With the patches +: applied, it's 37.18% faster for the single thread and 73% faster with two +: threads. Note that 4 threads showing little difference does not indicate +: the problem is related to thread counts. It's simply the case that 4 +: threads gets spread so their workload mostly fits in one node. +: +: The overall view from /proc/vmstats is more startling +: +: 4.19.0-rc1 4.19.0-rc1 +: vanillanoreclaim-v1r1 +: Minor Faults 35593425 708164 +: Major Faults 484088 36 +: Swap Ins 3772837 0 +: Swap Outs 3932295 0 +: +: Massive amounts of swap in/out without the patch +: +: Direct pages scanned 6013214 0 +: Kswapd pages scanned 0 0 +: Kswapd pages reclaimed 0 0 +: Direct pages reclaimed 4033009 0 +: +: Lots of reclaim activity without the patch +: +: Kswapd efficiency 100% 100% +: Kswapd velocity 0.000 0.000 +: Direct efficiency 67% 100% +: Direct velocity 11191.956 0.000 +: +: Mostly from direct reclaim context as you'd expect without the patch. +: +: Page writes by reclaim 3932314.000 0.000 +: Page writes file 19 0 +: Page writes anon 3932295 0 +: Page reclaim immediate 42336 0 +: +: Writes from reclaim context is never good but the patch eliminates it. +: +: We should never have default behaviour to thrash the system for such a +: basic workload. If zone reclaim mode behaviour is ever desired but on a +: single task instead of a global basis then the sensible option is to build +: a mempolicy that enforces that behaviour. + +This was a severe regression compared to previous kernels that made +important workloads unusable and it starts when __GFP_THISNODE was +added to THP allocations under MADV_HUGEPAGE. It is not a significant +risk to go to the previous behavior before __GFP_THISNODE was added, it +worked like that for years. + +This was simply an optimization to some lucky workloads that can fit in +a single node, but it ended up breaking the VM for others that can't +possibly fit in a single node, so going back is safe. + +[mhocko@suse.com: rewrote the changelog based on the one from Andrea] +Link: http://lkml.kernel.org/r/20180925120326.24392-2-mhocko@kernel.org +Fixes: 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node") +Signed-off-by: Andrea Arcangeli +Signed-off-by: Michal Hocko +Reported-by: Stefan Priebe +Debugged-by: Andrea Arcangeli +Reported-by: Alex Williamson +Reviewed-by: Mel Gorman +Tested-by: Mel Gorman +Cc: Zi Yan +Cc: Vlastimil Babka +Cc: David Rientjes +Cc: "Kirill A. Shutemov" +Cc: [4.1+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 32 ++++++++++++++++++++++++++++++-- + 1 file changed, 30 insertions(+), 2 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2046,8 +2046,36 @@ alloc_pages_vma(gfp_t gfp, int order, st + nmask = policy_nodemask(gfp, pol); + if (!nmask || node_isset(hpage_node, *nmask)) { + mpol_cond_put(pol); +- page = __alloc_pages_node(hpage_node, +- gfp | __GFP_THISNODE, order); ++ /* ++ * We cannot invoke reclaim if __GFP_THISNODE ++ * is set. Invoking reclaim with ++ * __GFP_THISNODE set, would cause THP ++ * allocations to trigger heavy swapping ++ * despite there may be tons of free memory ++ * (including potentially plenty of THP ++ * already available in the buddy) on all the ++ * other NUMA nodes. ++ * ++ * At most we could invoke compaction when ++ * __GFP_THISNODE is set (but we would need to ++ * refrain from invoking reclaim even if ++ * compaction returned COMPACT_SKIPPED because ++ * there wasn't not enough memory to succeed ++ * compaction). For now just avoid ++ * __GFP_THISNODE instead of limiting the ++ * allocation path to a strict and single ++ * compaction invocation. ++ * ++ * Supposedly if direct reclaim was enabled by ++ * the caller, the app prefers THP regardless ++ * of the node it comes from so this would be ++ * more desiderable behavior than only ++ * providing THP originated from the local ++ * node in such case. ++ */ ++ if (!(gfp & __GFP_DIRECT_RECLAIM)) ++ gfp |= __GFP_THISNODE; ++ page = __alloc_pages_node(hpage_node, gfp, order); + goto out; + } + } diff --git a/queue-4.19/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch b/queue-4.19/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch new file mode 100644 index 00000000000..8448303a4a9 --- /dev/null +++ b/queue-4.19/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch @@ -0,0 +1,51 @@ +From be2e1c9dcf76886a83fb1c433a316e26d4ca2550 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Thu, 11 Oct 2018 13:06:16 +0200 +Subject: mtd: docg3: don't set conflicting BCH_CONST_PARAMS option + +From: Arnd Bergmann + +commit be2e1c9dcf76886a83fb1c433a316e26d4ca2550 upstream. + +I noticed during the creation of another bugfix that the BCH_CONST_PARAMS +option that is set by DOCG3 breaks setting variable parameters for any +other users of the BCH library code. + +The only other user we have today is the MTD_NAND software BCH +implementation (most flash controllers use hardware BCH these days +and are not affected). I considered removing BCH_CONST_PARAMS entirely +because of the inherent conflict, but according to the description in +lib/bch.c there is a significant performance benefit in keeping it. + +To avoid the immediate problem of the conflict between MTD_NAND_BCH +and DOCG3, this only sets the constant parameters if MTD_NAND_BCH +is disabled, which should fix the problem for all cases that +are affected. This should also work for all stable kernels. + +Note that there is only one machine that actually seems to use the +DOCG3 driver (arch/arm/mach-pxa/mioa701.c), so most users should have +the driver disabled, but it almost certainly shows up if we wanted +to test random kernels on machines that use software BCH in MTD. + +Fixes: d13d19ece39f ("mtd: docg3: add ECC correction code") +Cc: stable@vger.kernel.org +Cc: Robert Jarzmik +Signed-off-by: Arnd Bergmann +Signed-off-by: Boris Brezillon +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/devices/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mtd/devices/Kconfig ++++ b/drivers/mtd/devices/Kconfig +@@ -207,7 +207,7 @@ comment "Disk-On-Chip Device Drivers" + config MTD_DOCG3 + tristate "M-Systems Disk-On-Chip G3" + select BCH +- select BCH_CONST_PARAMS ++ select BCH_CONST_PARAMS if !MTD_NAND_BCH + select BITREVERSE + help + This provides an MTD device driver for the M-Systems DiskOnChip diff --git a/queue-4.19/mtd-nand-fix-nanddev_neraseblocks.patch b/queue-4.19/mtd-nand-fix-nanddev_neraseblocks.patch new file mode 100644 index 00000000000..6e67dc44699 --- /dev/null +++ b/queue-4.19/mtd-nand-fix-nanddev_neraseblocks.patch @@ -0,0 +1,36 @@ +From d098093ba06eb032057d1aca1c2e45889e099d00 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Sun, 28 Oct 2018 12:29:55 +0100 +Subject: mtd: nand: Fix nanddev_neraseblocks() + +From: Boris Brezillon + +commit d098093ba06eb032057d1aca1c2e45889e099d00 upstream. + +nanddev_neraseblocks() currently returns the number pages per LUN +instead of the total number of eraseblocks. + +Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices") +Cc: +Signed-off-by: Boris Brezillon +Reviewed-by: Miquel Raynal +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mtd/nand.h | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/include/linux/mtd/nand.h ++++ b/include/linux/mtd/nand.h +@@ -324,9 +324,8 @@ static inline unsigned int nanddev_ntarg + */ + static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand) + { +- return (u64)nand->memorg.luns_per_target * +- nand->memorg.eraseblocks_per_lun * +- nand->memorg.pages_per_eraseblock; ++ return nand->memorg.ntargets * nand->memorg.luns_per_target * ++ nand->memorg.eraseblocks_per_lun; + } + + /** diff --git a/queue-4.19/mtd-spi-nor-cadence-quadspi-return-error-code-in-cqspi_direct_read_execute.patch b/queue-4.19/mtd-spi-nor-cadence-quadspi-return-error-code-in-cqspi_direct_read_execute.patch new file mode 100644 index 00000000000..2ce1a47c8cc --- /dev/null +++ b/queue-4.19/mtd-spi-nor-cadence-quadspi-return-error-code-in-cqspi_direct_read_execute.patch @@ -0,0 +1,36 @@ +From 91d7b67000c6e9bd605624079fee5a084238ad92 Mon Sep 17 00:00:00 2001 +From: Christophe JAILLET +Date: Tue, 16 Oct 2018 09:13:46 +0200 +Subject: mtd: spi-nor: cadence-quadspi: Return error code in cqspi_direct_read_execute() + +From: Christophe JAILLET + +commit 91d7b67000c6e9bd605624079fee5a084238ad92 upstream. + +We return 0 unconditionally in 'cqspi_direct_read_execute()'. +However, 'ret' is set to some error codes in several error handling +paths. + +Return 'ret' instead to propagate the error code. + +Fixes: ffa639e069fb ("mtd: spi-nor: cadence-quadspi: Add DMA support for direct mode reads") +Cc: +Signed-off-by: Christophe JAILLET +Signed-off-by: Boris Brezillon +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/spi-nor/cadence-quadspi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mtd/spi-nor/cadence-quadspi.c ++++ b/drivers/mtd/spi-nor/cadence-quadspi.c +@@ -996,7 +996,7 @@ static int cqspi_direct_read_execute(str + err_unmap: + dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM); + +- return 0; ++ return ret; + } + + static ssize_t cqspi_read(struct spi_nor *nor, loff_t from, diff --git a/queue-4.19/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch b/queue-4.19/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch new file mode 100644 index 00000000000..2eafb9c2d2c --- /dev/null +++ b/queue-4.19/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch @@ -0,0 +1,59 @@ +From f393808dc64149ccd0e5a8427505ba2974a59854 Mon Sep 17 00:00:00 2001 +From: Vasily Khoruzhick +Date: Thu, 25 Oct 2018 12:15:43 -0700 +Subject: netfilter: conntrack: fix calculation of next bucket number in early_drop + +From: Vasily Khoruzhick + +commit f393808dc64149ccd0e5a8427505ba2974a59854 upstream. + +If there's no entry to drop in bucket that corresponds to the hash, +early_drop() should look for it in other buckets. But since it increments +hash instead of bucket number, it actually looks in the same bucket 8 +times: hsize is 16k by default (14 bits) and hash is 32-bit value, so +reciprocal_scale(hash, hsize) returns the same value for hash..hash+7 in +most cases. + +Fix it by increasing bucket number instead of hash and rename _hash +to bucket to avoid future confusion. + +Fixes: 3e86638e9a0b ("netfilter: conntrack: consider ct netns in early_drop logic") +Cc: # v4.7+ +Signed-off-by: Vasily Khoruzhick +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nf_conntrack_core.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(stru + return drops; + } + +-static noinline int early_drop(struct net *net, unsigned int _hash) ++static noinline int early_drop(struct net *net, unsigned int hash) + { +- unsigned int i; ++ unsigned int i, bucket; + + for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { + struct hlist_nulls_head *ct_hash; +- unsigned int hash, hsize, drops; ++ unsigned int hsize, drops; + + rcu_read_lock(); + nf_conntrack_get_ht(&ct_hash, &hsize); +- hash = reciprocal_scale(_hash++, hsize); ++ if (!i) ++ bucket = reciprocal_scale(hash, hsize); ++ else ++ bucket = (bucket + 1) % hsize; + +- drops = early_drop_list(net, &ct_hash[hash]); ++ drops = early_drop_list(net, &ct_hash[bucket]); + rcu_read_unlock(); + + if (drops) { diff --git a/queue-4.19/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch b/queue-4.19/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch new file mode 100644 index 00000000000..850078122bd --- /dev/null +++ b/queue-4.19/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch @@ -0,0 +1,54 @@ +From 29aa30167a0a2e6045a0d6d2e89d8168132333d5 Mon Sep 17 00:00:00 2001 +From: Changwei Ge +Date: Fri, 2 Nov 2018 15:48:15 -0700 +Subject: ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry + +From: Changwei Ge + +commit 29aa30167a0a2e6045a0d6d2e89d8168132333d5 upstream. + +Somehow, file system metadata was corrupted, which causes +ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el(). + +According to the original design intention, if above happens we should +skip the problematic block and continue to retrieve dir entry. But +there is obviouse misuse of brelse around related code. + +After failure of ocfs2_check_dir_entry(), current code just moves to +next position and uses the problematic buffer head again and again +during which the problematic buffer head is released for multiple times. +I suppose, this a serious issue which is long-lived in ocfs2. This may +cause other file systems which is also used in a the same host insane. + +So we should also consider about bakcporting this patch into linux +-stable. + +Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB0452.apcprd06.prod.outlook.com +Signed-off-by: Changwei Ge +Suggested-by: Changkuo Shi +Reviewed-by: Andrew Morton +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Joseph Qi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/dir.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/ocfs2/dir.c ++++ b/fs/ocfs2/dir.c +@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(stru + /* On error, skip the f_pos to the + next block. */ + ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; +- brelse(bh); +- continue; ++ break; + } + if (le64_to_cpu(de->inode)) { + unsigned char d_type = DT_UNKNOWN; diff --git a/queue-4.19/ocfs2-free-up-write-context-when-direct-io-failed.patch b/queue-4.19/ocfs2-free-up-write-context-when-direct-io-failed.patch new file mode 100644 index 00000000000..65ed1c73ec7 --- /dev/null +++ b/queue-4.19/ocfs2-free-up-write-context-when-direct-io-failed.patch @@ -0,0 +1,85 @@ +From 5040f8df56fb90c7919f1c9b0b6e54c843437456 Mon Sep 17 00:00:00 2001 +From: Wengang Wang +Date: Fri, 16 Nov 2018 15:08:25 -0800 +Subject: ocfs2: free up write context when direct IO failed + +From: Wengang Wang + +commit 5040f8df56fb90c7919f1c9b0b6e54c843437456 upstream. + +The write context should also be freed even when direct IO failed. +Otherwise a memory leak is introduced and entries remain in +oi->ip_unwritten_list causing the following BUG later in unlink path: + + ERROR: bug expression: !list_empty(&oi->ip_unwritten_list) + ERROR: Clear inode of 215043, inode has unwritten extents + ... + Call Trace: + ? __set_current_blocked+0x42/0x68 + ocfs2_evict_inode+0x91/0x6a0 [ocfs2] + ? bit_waitqueue+0x40/0x33 + evict+0xdb/0x1af + iput+0x1a2/0x1f7 + do_unlinkat+0x194/0x28f + SyS_unlinkat+0x1b/0x2f + do_syscall_64+0x79/0x1ae + entry_SYSCALL_64_after_hwframe+0x151/0x0 + +This patch also logs, with frequency limit, direct IO failures. + +Link: http://lkml.kernel.org/r/20181102170632.25921-1-wen.gang.wang@oracle.com +Signed-off-by: Wengang Wang +Reviewed-by: Junxiao Bi +Reviewed-by: Changwei Ge +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/aops.c | 12 ++++++++++-- + fs/ocfs2/cluster/masklog.h | 9 +++++++++ + 2 files changed, 19 insertions(+), 2 deletions(-) + +--- a/fs/ocfs2/aops.c ++++ b/fs/ocfs2/aops.c +@@ -2412,8 +2412,16 @@ static int ocfs2_dio_end_io(struct kiocb + /* this io's submitter should not have unlocked this before we could */ + BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); + +- if (bytes > 0 && private) +- ret = ocfs2_dio_end_io_write(inode, private, offset, bytes); ++ if (bytes <= 0) ++ mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld", ++ (long long)bytes); ++ if (private) { ++ if (bytes > 0) ++ ret = ocfs2_dio_end_io_write(inode, private, offset, ++ bytes); ++ else ++ ocfs2_dio_free_write_ctx(inode, private); ++ } + + ocfs2_iocb_clear_rw_locked(iocb); + +--- a/fs/ocfs2/cluster/masklog.h ++++ b/fs/ocfs2/cluster/masklog.h +@@ -178,6 +178,15 @@ do { \ + ##__VA_ARGS__); \ + } while (0) + ++#define mlog_ratelimited(mask, fmt, ...) \ ++do { \ ++ static DEFINE_RATELIMIT_STATE(_rs, \ ++ DEFAULT_RATELIMIT_INTERVAL, \ ++ DEFAULT_RATELIMIT_BURST); \ ++ if (__ratelimit(&_rs)) \ ++ mlog(mask, fmt, ##__VA_ARGS__); \ ++} while (0) ++ + #define mlog_errno(st) ({ \ + int _st = (st); \ + if (_st != -ERESTARTSYS && _st != -EINTR && \ diff --git a/queue-4.19/of-numa-validate-some-distance-map-rules.patch b/queue-4.19/of-numa-validate-some-distance-map-rules.patch new file mode 100644 index 00000000000..b82656cd724 --- /dev/null +++ b/queue-4.19/of-numa-validate-some-distance-map-rules.patch @@ -0,0 +1,80 @@ +From 89c38422e072bb453e3045b8f1b962a344c3edea Mon Sep 17 00:00:00 2001 +From: John Garry +Date: Thu, 8 Nov 2018 18:17:03 +0800 +Subject: of, numa: Validate some distance map rules + +From: John Garry + +commit 89c38422e072bb453e3045b8f1b962a344c3edea upstream. + +Currently the NUMA distance map parsing does not validate the distance +table for the distance-matrix rules 1-2 in [1]. + +However the arch NUMA code may enforce some of these rules, but not all. +Such is the case for the arm64 port, which does not enforce the rule that +the distance between separates nodes cannot equal LOCAL_DISTANCE. + +The patch adds the following rules validation: +- distance of node to self equals LOCAL_DISTANCE +- distance of separate nodes > LOCAL_DISTANCE + +This change avoids a yet-unresolved crash reported in [2]. + +A note on dealing with symmetrical distances between nodes: + +Validating symmetrical distances between nodes is difficult. If it were +mandated in the bindings that every distance must be recorded in the +table, then it would be easy. However, it isn't. + +In addition to this, it is also possible to record [b, a] distance only +(and not [a, b]). So, when processing the table for [b, a], we cannot +assert that current distance of [a, b] != [b, a] as invalid, as [a, b] +distance may not be present in the table and current distance would be +default at REMOTE_DISTANCE. + +As such, we maintain the policy that we overwrite distance [a, b] = [b, a] +for b > a. This policy is different to kernel ACPI SLIT validation, which +allows non-symmetrical distances (ACPI spec SLIT rules allow it). However, +the distance debug message is dropped as it may be misleading (for a distance +which is later overwritten). + +Some final notes on semantics: + +- It is implied that it is the responsibility of the arch NUMA code to + reset the NUMA distance map for an error in distance map parsing. + +- It is the responsibility of the FW NUMA topology parsing (whether OF or + ACPI) to enforce NUMA distance rules, and not arch NUMA code. + +[1] Documents/devicetree/bindings/numa.txt +[2] https://www.spinics.net/lists/arm-kernel/msg683304.html + +Cc: stable@vger.kernel.org # 4.7 +Signed-off-by: John Garry +Acked-by: Will Deacon +Signed-off-by: Rob Herring +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/of/of_numa.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/drivers/of/of_numa.c ++++ b/drivers/of/of_numa.c +@@ -115,9 +115,14 @@ static int __init of_numa_parse_distance + distance = of_read_number(matrix, 1); + matrix++; + ++ if ((nodea == nodeb && distance != LOCAL_DISTANCE) || ++ (nodea != nodeb && distance <= LOCAL_DISTANCE)) { ++ pr_err("Invalid distance[node%d -> node%d] = %d\n", ++ nodea, nodeb, distance); ++ return -EINVAL; ++ } ++ + numa_set_distance(nodea, nodeb, distance); +- pr_debug("distance[node%d -> node%d] = %d\n", +- nodea, nodeb, distance); + + /* Set default distance of node B->A same as A->B */ + if (nodeb > nodea) diff --git a/queue-4.19/perf-callchain-honour-the-ordering-of-perf_context_-user-kernel-etc.patch b/queue-4.19/perf-callchain-honour-the-ordering-of-perf_context_-user-kernel-etc.patch new file mode 100644 index 00000000000..5c0274cc160 --- /dev/null +++ b/queue-4.19/perf-callchain-honour-the-ordering-of-perf_context_-user-kernel-etc.patch @@ -0,0 +1,108 @@ +From e9024d519d892b38176cafd46f68a7cdddd77412 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" +Date: Tue, 30 Oct 2018 12:12:26 -0300 +Subject: perf callchain: Honour the ordering of PERF_CONTEXT_{USER,KERNEL,etc} + +From: David S. Miller + +commit e9024d519d892b38176cafd46f68a7cdddd77412 upstream. + +When processing using 'perf report -g caller', which is the default, we +ended up reverting the callchain entries received from the kernel, but +simply reverting throws away the information that tells that from a +point onwards the addresses are for userspace, kernel, guest kernel, +guest user, hypervisor. + +The idea is that if we are walking backwards, for each cluster of +non-cpumode entries we have to first scan backwards for the next one and +use that for the cluster. + +This seems silly and more expensive than it needs to be but it is enough +for a initial fix. + +The code here is really complicated because it is intimately intertwined +with the lbr and branch handling, as well as this callchain order, +further fixes will be needed to properly take into account the cpumode +in those cases. + +Another problem with ORDER_CALLER is that the NULL "0" IP that is at the +end of most callchains shows up at the top of the histogram because +every callchain contains it and with ORDER_CALLER it is the first entry. + +Signed-off-by: David S. Miller +Tested-by: Arnaldo Carvalho de Melo +Cc: Adrian Hunter +Cc: David Ahern +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Souvik Banerjee +Cc: Wang Nan +Cc: stable@vger.kernel.org # 4.19 +Link: https://lkml.kernel.org/n/tip-2wt3ayp6j2y2f2xowixa8y6y@git.kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/machine.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +--- a/tools/perf/util/machine.c ++++ b/tools/perf/util/machine.c +@@ -2140,6 +2140,27 @@ static int resolve_lbr_callchain_sample( + return 0; + } + ++static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, ++ struct callchain_cursor *cursor, ++ struct symbol **parent, ++ struct addr_location *root_al, ++ u8 *cpumode, int ent) ++{ ++ int err = 0; ++ ++ while (--ent >= 0) { ++ u64 ip = chain->ips[ent]; ++ ++ if (ip >= PERF_CONTEXT_MAX) { ++ err = add_callchain_ip(thread, cursor, parent, ++ root_al, cpumode, ip, ++ false, NULL, NULL, 0); ++ break; ++ } ++ } ++ return err; ++} ++ + static int thread__resolve_callchain_sample(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_evsel *evsel, +@@ -2246,6 +2267,12 @@ static int thread__resolve_callchain_sam + } + + check_calls: ++ if (callchain_param.order != ORDER_CALLEE) { ++ err = find_prev_cpumode(chain, thread, cursor, parent, root_al, ++ &cpumode, chain->nr - first_call); ++ if (err) ++ return (err < 0) ? err : 0; ++ } + for (i = first_call, nr_entries = 0; + i < chain_nr && nr_entries < max_stack; i++) { + u64 ip; +@@ -2260,9 +2287,15 @@ check_calls: + continue; + #endif + ip = chain->ips[j]; +- + if (ip < PERF_CONTEXT_MAX) + ++nr_entries; ++ else if (callchain_param.order != ORDER_CALLEE) { ++ err = find_prev_cpumode(chain, thread, cursor, parent, ++ root_al, &cpumode, j); ++ if (err) ++ return (err < 0) ? err : 0; ++ continue; ++ } + + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, diff --git a/queue-4.19/perf-cs-etm-correct-cpu-mode-for-samples.patch b/queue-4.19/perf-cs-etm-correct-cpu-mode-for-samples.patch new file mode 100644 index 00000000000..271d913d293 --- /dev/null +++ b/queue-4.19/perf-cs-etm-correct-cpu-mode-for-samples.patch @@ -0,0 +1,143 @@ +From d6c9c05fe1eb4b213b183d8a1e79416256dc833a Mon Sep 17 00:00:00 2001 +From: Leo Yan +Date: Tue, 30 Oct 2018 15:18:28 +0800 +Subject: perf cs-etm: Correct CPU mode for samples + +From: Leo Yan + +commit d6c9c05fe1eb4b213b183d8a1e79416256dc833a upstream. + +Since commit edeb0c90df35 ("perf tools: Stop fallbacking to kallsyms for +vdso symbols lookup"), the kernel address cannot be properly parsed to +kernel symbol with command 'perf script -k vmlinux'. The reason is +CoreSight samples is always to set CPU mode as PERF_RECORD_MISC_USER, +thus it fails to find corresponding map/dso in below flows: + + process_sample_event() + `-> machine__resolve() + `-> thread__find_map(thread, sample->cpumode, sample->ip, al); + +In this flow it needs to pass argument 'sample->cpumode' to tell what's +the CPU mode, before it always passed PERF_RECORD_MISC_USER but without +any failure until the commit edeb0c90df35 ("perf tools: Stop fallbacking +to kallsyms for vdso symbols lookup") has been merged. The reason is +even with the wrong CPU mode the function thread__find_map() firstly +fails to find map but it will rollback to find kernel map for vdso +symbols lookup. In the latest code it has removed the fallback code, +thus if CPU mode is PERF_RECORD_MISC_USER then it cannot find map +anymore with kernel address. + +This patch is to correct samples CPU mode setting, it creates a new +helper function cs_etm__cpu_mode() to tell what's the CPU mode based on +the address with the info from machine structure; this patch has a bit +extension to check not only kernel and user mode, but also check for +host/guest and hypervisor mode. Finally this patch uses the function in +instruction and branch samples and also apply in cs_etm__mem_access() +for a minor polishing. + +Signed-off-by: Leo Yan +Cc: Adrian Hunter +Cc: Alexander Shishkin +Cc: David Miller +Cc: Jiri Olsa +Cc: Mathieu Poirier +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: coresight@lists.linaro.org +Cc: linux-arm-kernel@lists.infradead.org +Cc: stable@kernel.org # v4.19 +Link: http://lkml.kernel.org/r/1540883908-17018-1-git-send-email-leo.yan@linaro.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/cs-etm.c | 39 ++++++++++++++++++++++++++++++--------- + 1 file changed, 30 insertions(+), 9 deletions(-) + +--- a/tools/perf/util/cs-etm.c ++++ b/tools/perf/util/cs-etm.c +@@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_ses + zfree(&aux); + } + ++static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) ++{ ++ struct machine *machine; ++ ++ machine = etmq->etm->machine; ++ ++ if (address >= etmq->etm->kernel_start) { ++ if (machine__is_host(machine)) ++ return PERF_RECORD_MISC_KERNEL; ++ else ++ return PERF_RECORD_MISC_GUEST_KERNEL; ++ } else { ++ if (machine__is_host(machine)) ++ return PERF_RECORD_MISC_USER; ++ else if (perf_guest) ++ return PERF_RECORD_MISC_GUEST_USER; ++ else ++ return PERF_RECORD_MISC_HYPERVISOR; ++ } ++} ++ + static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, + size_t size, u8 *buffer) + { +@@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_ + return -1; + + machine = etmq->etm->machine; +- if (address >= etmq->etm->kernel_start) +- cpumode = PERF_RECORD_MISC_KERNEL; +- else +- cpumode = PERF_RECORD_MISC_USER; ++ cpumode = cs_etm__cpu_mode(etmq, address); + + thread = etmq->thread; + if (!thread) { +@@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sam + struct perf_sample sample = {.ip = 0,}; + + event->sample.header.type = PERF_RECORD_SAMPLE; +- event->sample.header.misc = PERF_RECORD_MISC_USER; ++ event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); + event->sample.header.size = sizeof(struct perf_event_header); + + sample.ip = addr; +@@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sam + sample.cpu = etmq->packet->cpu; + sample.flags = 0; + sample.insn_len = 1; +- sample.cpumode = event->header.misc; ++ sample.cpumode = event->sample.header.misc; + + if (etm->synth_opts.last_branch) { + cs_etm__copy_last_branch_rb(etmq); +@@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(s + u64 nr; + struct branch_entry entries; + } dummy_bs; ++ u64 ip; ++ ++ ip = cs_etm__last_executed_instr(etmq->prev_packet); + + event->sample.header.type = PERF_RECORD_SAMPLE; +- event->sample.header.misc = PERF_RECORD_MISC_USER; ++ event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); + event->sample.header.size = sizeof(struct perf_event_header); + +- sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); ++ sample.ip = ip; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.addr = cs_etm__first_executed_instr(etmq->packet); +@@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(s + sample.period = 1; + sample.cpu = etmq->packet->cpu; + sample.flags = 0; +- sample.cpumode = PERF_RECORD_MISC_USER; ++ sample.cpumode = event->sample.header.misc; + + /* + * perf report cannot handle events without a branch stack diff --git a/queue-4.19/perf-intel-pt-bts-calculate-cpumode-for-synthesized-samples.patch b/queue-4.19/perf-intel-pt-bts-calculate-cpumode-for-synthesized-samples.patch new file mode 100644 index 00000000000..2bc8e62c4e5 --- /dev/null +++ b/queue-4.19/perf-intel-pt-bts-calculate-cpumode-for-synthesized-samples.patch @@ -0,0 +1,125 @@ +From 5d4f0edaa3ac4f1844ed7c64cd2bae6f1912bac5 Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Wed, 31 Oct 2018 11:10:43 +0200 +Subject: perf intel-pt/bts: Calculate cpumode for synthesized samples + +From: Adrian Hunter + +commit 5d4f0edaa3ac4f1844ed7c64cd2bae6f1912bac5 upstream. + +In the absence of a fallback, samples must provide a correct cpumode for +the 'ip'. Do that now there is no fallback. + +Signed-off-by: Adrian Hunter +Reviewed-by: Jiri Olsa +Cc: Andi Kleen +Cc: David S. Miller +Cc: Leo Yan +Cc: Mathieu Poirier +Cc: stable@vger.kernel.org # 4.19 +Link: http://lkml.kernel.org/r/20181031091043.23465-6-adrian.hunter@intel.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/intel-bts.c | 17 ++++++++++++----- + tools/perf/util/intel-pt.c | 22 +++++++++++++--------- + 2 files changed, 25 insertions(+), 14 deletions(-) + +--- a/tools/perf/util/intel-bts.c ++++ b/tools/perf/util/intel-bts.c +@@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(stru + return 0; + } + ++static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip) ++{ ++ return machine__kernel_ip(bts->machine, ip) ? ++ PERF_RECORD_MISC_KERNEL : ++ PERF_RECORD_MISC_USER; ++} ++ + static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, + struct branch *branch) + { +@@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample + bts->num_events++ <= bts->synth_opts.initial_skip) + return 0; + +- event.sample.header.type = PERF_RECORD_SAMPLE; +- event.sample.header.misc = PERF_RECORD_MISC_USER; +- event.sample.header.size = sizeof(struct perf_event_header); +- +- sample.cpumode = PERF_RECORD_MISC_USER; + sample.ip = le64_to_cpu(branch->from); ++ sample.cpumode = intel_bts_cpumode(bts, sample.ip); + sample.pid = btsq->pid; + sample.tid = btsq->tid; + sample.addr = le64_to_cpu(branch->to); +@@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample + sample.insn_len = btsq->intel_pt_insn.length; + memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ); + ++ event.sample.header.type = PERF_RECORD_SAMPLE; ++ event.sample.header.misc = sample.cpumode; ++ event.sample.header.size = sizeof(struct perf_event_header); ++ + if (bts->synth_opts.inject) { + event.sample.header.size = bts->branches_event_size; + ret = perf_event__synthesize_sample(&event, +--- a/tools/perf/util/intel-pt.c ++++ b/tools/perf/util/intel-pt.c +@@ -407,6 +407,13 @@ intel_pt_cache_lookup(struct dso *dso, s + return auxtrace_cache__lookup(dso->auxtrace_cache, offset); + } + ++static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) ++{ ++ return ip >= pt->kernel_start ? ++ PERF_RECORD_MISC_KERNEL : ++ PERF_RECORD_MISC_USER; ++} ++ + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, + uint64_t to_ip, uint64_t max_insn_cnt, +@@ -429,10 +436,7 @@ static int intel_pt_walk_next_insn(struc + if (to_ip && *ip == to_ip) + goto out_no_cache; + +- if (*ip >= ptq->pt->kernel_start) +- cpumode = PERF_RECORD_MISC_KERNEL; +- else +- cpumode = PERF_RECORD_MISC_USER; ++ cpumode = intel_pt_cpumode(ptq->pt, *ip); + + thread = ptq->thread; + if (!thread) { +@@ -1053,15 +1057,11 @@ static void intel_pt_prep_b_sample(struc + union perf_event *event, + struct perf_sample *sample) + { +- event->sample.header.type = PERF_RECORD_SAMPLE; +- event->sample.header.misc = PERF_RECORD_MISC_USER; +- event->sample.header.size = sizeof(struct perf_event_header); +- + if (!pt->timeless_decoding) + sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + +- sample->cpumode = PERF_RECORD_MISC_USER; + sample->ip = ptq->state->from_ip; ++ sample->cpumode = intel_pt_cpumode(pt, sample->ip); + sample->pid = ptq->pid; + sample->tid = ptq->tid; + sample->addr = ptq->state->to_ip; +@@ -1070,6 +1070,10 @@ static void intel_pt_prep_b_sample(struc + sample->flags = ptq->flags; + sample->insn_len = ptq->insn_len; + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); ++ ++ event->sample.header.type = PERF_RECORD_SAMPLE; ++ event->sample.header.misc = sample->cpumode; ++ event->sample.header.size = sizeof(struct perf_event_header); + } + + static int intel_pt_inject_event(union perf_event *event, diff --git a/queue-4.19/perf-intel-pt-insert-callchain-context-into-synthesized-callchains.patch b/queue-4.19/perf-intel-pt-insert-callchain-context-into-synthesized-callchains.patch new file mode 100644 index 00000000000..fbaac03f28a --- /dev/null +++ b/queue-4.19/perf-intel-pt-insert-callchain-context-into-synthesized-callchains.patch @@ -0,0 +1,120 @@ +From 242483068b4b9ad02f1653819b6e683577681e0e Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Wed, 31 Oct 2018 11:10:42 +0200 +Subject: perf intel-pt: Insert callchain context into synthesized callchains + +From: Adrian Hunter + +commit 242483068b4b9ad02f1653819b6e683577681e0e upstream. + +In the absence of a fallback, callchains must encode also the callchain +context. Do that now there is no fallback. + +Signed-off-by: Adrian Hunter +Reviewed-by: Jiri Olsa +Cc: Andi Kleen +Cc: David S. Miller +Cc: Leo Yan +Cc: Mathieu Poirier +Cc: stable@vger.kernel.org # 4.19 +Link: http://lkml.kernel.org/r/100ea2ec-ed14-b56d-d810-e0a6d2f4b069@intel.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/intel-pt.c | 6 +++-- + tools/perf/util/thread-stack.c | 44 ++++++++++++++++++++++++++++++++--------- + tools/perf/util/thread-stack.h | 2 - + 3 files changed, 40 insertions(+), 12 deletions(-) + +--- a/tools/perf/util/intel-pt.c ++++ b/tools/perf/util/intel-pt.c +@@ -763,7 +763,8 @@ static struct intel_pt_queue *intel_pt_a + if (pt->synth_opts.callchain) { + size_t sz = sizeof(struct ip_callchain); + +- sz += pt->synth_opts.callchain_sz * sizeof(u64); ++ /* Add 1 to callchain_sz for callchain context */ ++ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); + ptq->chain = zalloc(sz); + if (!ptq->chain) + goto out_free; +@@ -1159,7 +1160,8 @@ static void intel_pt_prep_sample(struct + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, +- pt->synth_opts.callchain_sz, sample->ip); ++ pt->synth_opts.callchain_sz + 1, ++ sample->ip, pt->kernel_start); + sample->callchain = ptq->chain; + } + +--- a/tools/perf/util/thread-stack.c ++++ b/tools/perf/util/thread-stack.c +@@ -285,20 +285,46 @@ void thread_stack__free(struct thread *t + } + } + ++static inline u64 callchain_context(u64 ip, u64 kernel_start) ++{ ++ return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; ++} ++ + void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, +- size_t sz, u64 ip) ++ size_t sz, u64 ip, u64 kernel_start) + { +- size_t i; ++ u64 context = callchain_context(ip, kernel_start); ++ u64 last_context; ++ size_t i, j; ++ ++ if (sz < 2) { ++ chain->nr = 0; ++ return; ++ } + +- if (!thread || !thread->ts) +- chain->nr = 1; +- else +- chain->nr = min(sz, thread->ts->cnt + 1); ++ chain->ips[0] = context; ++ chain->ips[1] = ip; + +- chain->ips[0] = ip; ++ if (!thread || !thread->ts) { ++ chain->nr = 2; ++ return; ++ } ++ ++ last_context = context; ++ ++ for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { ++ ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; ++ context = callchain_context(ip, kernel_start); ++ if (context != last_context) { ++ if (i >= sz - 1) ++ break; ++ chain->ips[i++] = context; ++ last_context = context; ++ } ++ chain->ips[i] = ip; ++ } + +- for (i = 1; i < chain->nr; i++) +- chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; ++ chain->nr = i; + } + + struct call_return_processor * +--- a/tools/perf/util/thread-stack.h ++++ b/tools/perf/util/thread-stack.h +@@ -84,7 +84,7 @@ int thread_stack__event(struct thread *t + u64 to_ip, u16 insn_len, u64 trace_nr); + void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); + void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, +- size_t sz, u64 ip); ++ size_t sz, u64 ip, u64 kernel_start); + int thread_stack__flush(struct thread *thread); + void thread_stack__free(struct thread *thread); + size_t thread_stack__depth(struct thread *thread); diff --git a/queue-4.19/perf-stat-handle-different-pmu-names-with-common-prefix.patch b/queue-4.19/perf-stat-handle-different-pmu-names-with-common-prefix.patch new file mode 100644 index 00000000000..4de766e9af1 --- /dev/null +++ b/queue-4.19/perf-stat-handle-different-pmu-names-with-common-prefix.patch @@ -0,0 +1,155 @@ +From ea1fa48c055f833eb25f0c33188feecb7002ada5 Mon Sep 17 00:00:00 2001 +From: Thomas Richter +Date: Tue, 23 Oct 2018 17:16:16 +0200 +Subject: perf stat: Handle different PMU names with common prefix + +From: Thomas Richter + +commit ea1fa48c055f833eb25f0c33188feecb7002ada5 upstream. + +On s390 the CPU Measurement Facility for counters now supports +2 PMUs named cpum_cf (CPU Measurement Facility for counters) and +cpum_cf_diag (CPU Measurement Facility for diagnostic counters) +for one and the same CPU. + +Running command + + [root@s35lp76 perf]# ./perf stat -e tx_c_tend \ + -- ~/mytests/cf-tx-events 1 + + Measuring transactions + TX_C_TABORT_NO_SPECIAL: 0 expected:0 + TX_C_TABORT_SPECIAL: 0 expected:0 + TX_C_TEND: 1 expected:1 + TX_NC_TABORT: 11 expected:11 + TX_NC_TEND: 1 expected:1 + + Performance counter stats for '/root/mytests/cf-tx-events 1': + + 2 tx_c_tend + + 0.002120091 seconds time elapsed + + 0.000121000 seconds user + 0.002127000 seconds sys + + [root@s35lp76 perf]# + +displays output which is unexpected (and wrong): + + 2 tx_c_tend + +The test program definitely triggers only one transaction, as shown +in line 'TX_C_TEND: 1 expected:1'. + +This is caused by the following call sequence: + +pmu_lookup() scans and installs a PMU. ++--> pmu_aliases() parses all aliases in directory + ...//events/* which are file names. + +--> pmu_aliases_parse() Read each file in directory and create + an new alias entry. This is done with + +--> perf_pmu__new_alias() and + +--> __perf_pmu__new_alias() which also check for + identical alias names. + +After pmu_aliases() returns, a complete list of event names +for this pmu has been created. Now function + +pmu_add_cpu_aliases() is called to add the events listed in the json +| files to the alias list of the cpu. ++--> perf_pmu__find_map() Returns a pointer to the json events. + +Now function pmu_add_cpu_aliases() scans through all events listed +in the JSON files for this CPU. +Each json event pmu name is compared with the current PMU being +built up and if they mismatch, the json event is added to the +current PMUs alias list. +To avoid duplicate entries the following comparison is done: + + if (!is_arm_pmu_core(name)) { + pname = pe->pmu ? pe->pmu : "cpu"; + if (strncmp(pname, name, strlen(pname))) + continue; + } + +The culprit is the strncmp() function. + +Using current s390 PMU naming, the first PMU is 'cpum_cf' +and a long list of events is added, among them 'tx_c_tend' + +When the second PMU named 'cpum_cf_diag' is added, only one event +named 'CF_DIAG' is added by the pmu_aliases() function. + +Now function pmu_add_cpu_aliases() is invoked for PMU 'cpum_cf_diag'. +Since the CPUID string is the same for both PMUs, json file events +for PMU named 'cpum_cf' are added to the PMU 'cpm_cf_diag' + +This happens because the strncmp() actually compares: + + strncmp("cpum_cf", "cpum_cf_diag", 6); + +The first parameter is the pmu name taken from the event in +the json file. The second parameter is the pmu name of the PMU +currently being built. +They are different, but the length of the compare only tests the +common prefix and this returns 0(true) when it should return false. + +Now all events for PMU cpum_cf are added to the alias list for pmu +cpum_cf_diag. + +Later on in function parse_events_add_pmu() the event 'tx_c_end' is +searched in all available PMUs and found twice, adding it two +times to the evsel_list global variable which is the root +of all events. This results in a counter value of 2 instead +of 1. + +Output with this patch: + + [root@s35lp76 perf]# ./perf stat -e tx_c_tend \ + -- ~/mytests/cf-tx-events 1 + Measuring transactions + TX_C_TABORT_NO_SPECIAL: 0 expected:0 + TX_C_TABORT_SPECIAL: 0 expected:0 + TX_C_TEND: 1 expected:1 + TX_NC_TABORT: 11 expected:11 + TX_NC_TEND: 1 expected:1 + + Performance counter stats for '/root/mytests/cf-tx-events 1': + + 1 tx_c_tend + + 0.001815365 seconds time elapsed + + 0.000123000 seconds user + 0.001756000 seconds sys + + [root@s35lp76 perf]# + +Signed-off-by: Thomas Richter +Reviewed-by: Hendrik Brueckner +Reviewed-by: Sebastien Boisvert +Cc: Heiko Carstens +Cc: Kan Liang +Cc: Martin Schwidefsky +Cc: stable@vger.kernel.org +Fixes: 292c34c10249 ("perf pmu: Fix core PMU alias list for X86 platform") +Link: http://lkml.kernel.org/r/20181023151616.78193-1-tmricht@linux.ibm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/pmu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/perf/util/pmu.c ++++ b/tools/perf/util/pmu.c +@@ -773,7 +773,7 @@ static void pmu_add_cpu_aliases(struct l + + if (!is_arm_pmu_core(name)) { + pname = pe->pmu ? pe->pmu : "cpu"; +- if (strncmp(pname, name, strlen(pname))) ++ if (strcmp(pname, name)) + continue; + } + diff --git a/queue-4.19/reset-hisilicon-fix-potential-null-pointer-dereference.patch b/queue-4.19/reset-hisilicon-fix-potential-null-pointer-dereference.patch new file mode 100644 index 00000000000..f1d79e6aed8 --- /dev/null +++ b/queue-4.19/reset-hisilicon-fix-potential-null-pointer-dereference.patch @@ -0,0 +1,42 @@ +From e9a2310fb689151166df7fd9971093362d34bd79 Mon Sep 17 00:00:00 2001 +From: "Gustavo A. R. Silva" +Date: Wed, 25 Jul 2018 19:47:19 -0500 +Subject: reset: hisilicon: fix potential NULL pointer dereference + +From: Gustavo A. R. Silva + +commit e9a2310fb689151166df7fd9971093362d34bd79 upstream. + +There is a potential execution path in which function +platform_get_resource() returns NULL. If this happens, +we will end up having a NULL pointer dereference. + +Fix this by replacing devm_ioremap with devm_ioremap_resource, +which has the NULL check and the memory region request. + +This code was detected with the help of Coccinelle. + +Cc: stable@vger.kernel.org +Fixes: 97b7129cd2af ("reset: hisilicon: change the definition of hisi_reset_init") +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Stephen Boyd +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/hisilicon/reset.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/clk/hisilicon/reset.c ++++ b/drivers/clk/hisilicon/reset.c +@@ -109,9 +109,8 @@ struct hisi_reset_controller *hisi_reset + return NULL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +- rstc->membase = devm_ioremap(&pdev->dev, +- res->start, resource_size(res)); +- if (!rstc->membase) ++ rstc->membase = devm_ioremap_resource(&pdev->dev, res); ++ if (IS_ERR(rstc->membase)) + return NULL; + + spin_lock_init(&rstc->lock); diff --git a/queue-4.19/revert-ceph-fix-dentry-leak-in-splice_dentry.patch b/queue-4.19/revert-ceph-fix-dentry-leak-in-splice_dentry.patch new file mode 100644 index 00000000000..2278921b956 --- /dev/null +++ b/queue-4.19/revert-ceph-fix-dentry-leak-in-splice_dentry.patch @@ -0,0 +1,41 @@ +From efe328230dc01aa0b1269aad0b5fae73eea4677a Mon Sep 17 00:00:00 2001 +From: "Yan, Zheng" +Date: Thu, 27 Sep 2018 21:16:05 +0800 +Subject: Revert "ceph: fix dentry leak in splice_dentry()" + +From: Yan, Zheng + +commit efe328230dc01aa0b1269aad0b5fae73eea4677a upstream. + +This reverts commit 8b8f53af1ed9df88a4c0fbfdf3db58f62060edf3. + +splice_dentry() is used by three places. For two places, req->r_dentry +is passed to splice_dentry(). In the case of error, req->r_dentry does +not get updated. So splice_dentry() should not drop reference. + +Cc: stable@vger.kernel.org # 4.18+ +Signed-off-by: "Yan, Zheng" +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ceph/inode.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -1132,8 +1132,12 @@ static struct dentry *splice_dentry(stru + if (IS_ERR(realdn)) { + pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", + PTR_ERR(realdn), dn, in, ceph_vinop(in)); +- dput(dn); +- dn = realdn; /* note realdn contains the error */ ++ dn = realdn; ++ /* ++ * Caller should release 'dn' in the case of error. ++ * If 'req->r_dentry' is passed to this function, ++ * caller should leave 'req->r_dentry' untouched. ++ */ + goto out; + } else if (realdn) { + dout("dn %p (%d) spliced with %p (%d) " diff --git a/queue-4.19/revert-powerpc-8xx-use-l1-entry-apg-to-handle-_page_accessed-for-config_swap.patch b/queue-4.19/revert-powerpc-8xx-use-l1-entry-apg-to-handle-_page_accessed-for-config_swap.patch new file mode 100644 index 00000000000..59c00ae4441 --- /dev/null +++ b/queue-4.19/revert-powerpc-8xx-use-l1-entry-apg-to-handle-_page_accessed-for-config_swap.patch @@ -0,0 +1,202 @@ +From cc4ebf5c0a3440ed0a32d25c55ebdb6ce5f3c0bc Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Fri, 19 Oct 2018 06:54:54 +0000 +Subject: Revert "powerpc/8xx: Use L1 entry APG to handle _PAGE_ACCESSED for CONFIG_SWAP" + +From: Christophe Leroy + +commit cc4ebf5c0a3440ed0a32d25c55ebdb6ce5f3c0bc upstream. + +This reverts commit 4f94b2c7462d9720b2afa7e8e8d4c19446bb31ce. + +That commit was buggy, as it used rlwinm instead of rlwimi. +Instead of fixing that bug, we revert the previous commit in order to +reduce the dependency between L1 entries and L2 entries + +Fixes: 4f94b2c7462d9 ("powerpc/8xx: Use L1 entry APG to handle _PAGE_ACCESSED for CONFIG_SWAP") +Cc: stable@vger.kernel.org +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/mmu-8xx.h | 34 ++++----------------------- + arch/powerpc/kernel/head_8xx.S | 45 ++++++++++++++++++++++--------------- + arch/powerpc/mm/8xx_mmu.c | 2 - + 3 files changed, 34 insertions(+), 47 deletions(-) + +--- a/arch/powerpc/include/asm/mmu-8xx.h ++++ b/arch/powerpc/include/asm/mmu-8xx.h +@@ -34,20 +34,12 @@ + * respectively NA for All or X for Supervisor and no access for User. + * Then we use the APG to say whether accesses are according to Page rules or + * "all Supervisor" rules (Access to all) +- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: +- * When that bit is not set access is done iaw "all user" +- * which means no access iaw page rules. +- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED +- * 0x => No access => 11 (all accesses performed as user iaw page definition) +- * 10 => No user => 01 (all accesses performed according to page definition) +- * 11 => User => 00 (all accesses performed as supervisor iaw page definition) ++ * Therefore, we define 2 APG groups. lsb is _PMD_USER ++ * 0 => No user => 01 (all accesses performed according to page definition) ++ * 1 => User => 00 (all accesses performed as supervisor iaw page definition) + * We define all 16 groups so that all other bits of APG can take any value + */ +-#ifdef CONFIG_SWAP +-#define MI_APG_INIT 0xf4f4f4f4 +-#else + #define MI_APG_INIT 0x44444444 +-#endif + + /* The effective page number register. When read, contains the information + * about the last instruction TLB miss. When MI_RPN is written, bits in +@@ -115,20 +107,12 @@ + * Supervisor and no access for user and NA for ALL. + * Then we use the APG to say whether accesses are according to Page rules or + * "all Supervisor" rules (Access to all) +- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: +- * When that bit is not set access is done iaw "all user" +- * which means no access iaw page rules. +- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED +- * 0x => No access => 11 (all accesses performed as user iaw page definition) +- * 10 => No user => 01 (all accesses performed according to page definition) +- * 11 => User => 00 (all accesses performed as supervisor iaw page definition) ++ * Therefore, we define 2 APG groups. lsb is _PMD_USER ++ * 0 => No user => 01 (all accesses performed according to page definition) ++ * 1 => User => 00 (all accesses performed as supervisor iaw page definition) + * We define all 16 groups so that all other bits of APG can take any value + */ +-#ifdef CONFIG_SWAP +-#define MD_APG_INIT 0xf4f4f4f4 +-#else + #define MD_APG_INIT 0x44444444 +-#endif + + /* The effective page number register. When read, contains the information + * about the last instruction TLB miss. When MD_RPN is written, bits in +@@ -180,12 +164,6 @@ + */ + #define SPRN_M_TW 799 + +-/* APGs */ +-#define M_APG0 0x00000000 +-#define M_APG1 0x00000020 +-#define M_APG2 0x00000040 +-#define M_APG3 0x00000060 +- + #ifdef CONFIG_PPC_MM_SLICES + #include + #define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) +--- a/arch/powerpc/kernel/head_8xx.S ++++ b/arch/powerpc/kernel/head_8xx.S +@@ -353,13 +353,14 @@ _ENTRY(ITLBMiss_cmp) + #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) + mtcr r12 + #endif +- +-#ifdef CONFIG_SWAP +- rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 +-#endif + /* Load the MI_TWC with the attributes for this "segment." */ + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + ++#ifdef CONFIG_SWAP ++ rlwinm r11, r10, 32-5, _PAGE_PRESENT ++ and r11, r11, r10 ++ rlwimi r10, r11, 0, _PAGE_PRESENT ++#endif + li r11, RPN_PATTERN | 0x200 + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 20 and 23 must be clear. +@@ -470,14 +471,22 @@ _ENTRY(DTLBMiss_jmp) + * above. + */ + rlwimi r11, r10, 0, _PAGE_GUARDED +-#ifdef CONFIG_SWAP +- /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0 +- * on that bit will represent a Non Access group +- */ +- rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 +-#endif + mtspr SPRN_MD_TWC, r11 + ++ /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. ++ * We also need to know if the insn is a load/store, so: ++ * Clear _PAGE_PRESENT and load that which will ++ * trap into DTLB Error with store bit set accordinly. ++ */ ++ /* PRESENT=0x1, ACCESSED=0x20 ++ * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); ++ * r10 = (r10 & ~PRESENT) | r11; ++ */ ++#ifdef CONFIG_SWAP ++ rlwinm r11, r10, 32-5, _PAGE_PRESENT ++ and r11, r11, r10 ++ rlwimi r10, r11, 0, _PAGE_PRESENT ++#endif + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior +@@ -637,8 +646,8 @@ InstructionBreakpoint: + */ + DTLBMissIMMR: + mtcr r12 +- /* Set 512k byte guarded page and mark it valid and accessed */ +- li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2 ++ /* Set 512k byte guarded page and mark it valid */ ++ li r10, MD_PS512K | MD_GUARDED | MD_SVALID + mtspr SPRN_MD_TWC, r10 + mfspr r10, SPRN_IMMR /* Get current IMMR */ + rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ +@@ -656,8 +665,8 @@ _ENTRY(dtlb_miss_exit_2) + + DTLBMissLinear: + mtcr r12 +- /* Set 8M byte page and mark it valid and accessed */ +- li r11, MD_PS8MEG | MD_SVALID | M_APG2 ++ /* Set 8M byte page and mark it valid */ ++ li r11, MD_PS8MEG | MD_SVALID + mtspr SPRN_MD_TWC, r11 + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ +@@ -675,8 +684,8 @@ _ENTRY(dtlb_miss_exit_3) + #ifndef CONFIG_PIN_TLB_TEXT + ITLBMissLinear: + mtcr r12 +- /* Set 8M byte page and mark it valid,accessed */ +- li r11, MI_PS8MEG | MI_SVALID | M_APG2 ++ /* Set 8M byte page and mark it valid */ ++ li r11, MI_PS8MEG | MI_SVALID + mtspr SPRN_MI_TWC, r11 + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ + ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ +@@ -960,7 +969,7 @@ initial_mmu: + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MI_EPN, r8 + li r8, MI_PS8MEG /* Set 8M byte page */ +- ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */ ++ ori r8, r8, MI_SVALID /* Make it valid */ + mtspr SPRN_MI_TWC, r8 + li r8, MI_BOOTINIT /* Create RPN for address 0 */ + mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ +@@ -987,7 +996,7 @@ initial_mmu: + ori r8, r8, MD_EVALID /* Mark it valid */ + mtspr SPRN_MD_EPN, r8 + li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ +- ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */ ++ ori r8, r8, MD_SVALID /* Make it valid */ + mtspr SPRN_MD_TWC, r8 + mr r8, r9 /* Create paddr for TLB */ + ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ +--- a/arch/powerpc/mm/8xx_mmu.c ++++ b/arch/powerpc/mm/8xx_mmu.c +@@ -79,7 +79,7 @@ void __init MMU_init_hw(void) + for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + mtspr(SPRN_MD_CTR, ctr | (i << 8)); + mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); +- mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2); ++ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); + addr += LARGE_PAGE_SIZE_8M; + mem -= LARGE_PAGE_SIZE_8M; diff --git a/queue-4.19/scsi-fix-queue-cleanup-race-before-queue-initialization-is-done.patch b/queue-4.19/scsi-fix-queue-cleanup-race-before-queue-initialization-is-done.patch new file mode 100644 index 00000000000..9c631259fcc --- /dev/null +++ b/queue-4.19/scsi-fix-queue-cleanup-race-before-queue-initialization-is-done.patch @@ -0,0 +1,87 @@ +From 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Wed, 14 Nov 2018 16:25:51 +0800 +Subject: SCSI: fix queue cleanup race before queue initialization is done + +From: Ming Lei + +commit 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a upstream. + +c2856ae2f315d ("blk-mq: quiesce queue before freeing queue") has +already fixed this race, however the implied synchronize_rcu() +in blk_mq_quiesce_queue() can slow down LUN probe a lot, so caused +performance regression. + +Then 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") +tried to quiesce queue for avoiding unnecessary synchronize_rcu() +only when queue initialization is done, because it is usual to see +lots of inexistent LUNs which need to be probed. + +However, turns out it isn't safe to quiesce queue only when queue +initialization is done. Because when one SCSI command is completed, +the user of sending command can be waken up immediately, then the +scsi device may be removed, meantime the run queue in scsi_end_request() +is still in-progress, so kernel panic can be caused. + +In Red Hat QE lab, there are several reports about this kind of kernel +panic triggered during kernel booting. + +This patch tries to address the issue by grabing one queue usage +counter during freeing one request and the following run queue. + +Fixes: 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") +Cc: Andrew Jones +Cc: Bart Van Assche +Cc: linux-scsi@vger.kernel.org +Cc: Martin K. Petersen +Cc: Christoph Hellwig +Cc: James E.J. Bottomley +Cc: stable +Cc: jianchao.wang +Signed-off-by: Ming Lei +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-core.c | 5 ++--- + drivers/scsi/scsi_lib.c | 8 ++++++++ + 2 files changed, 10 insertions(+), 3 deletions(-) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -793,9 +793,8 @@ void blk_cleanup_queue(struct request_qu + * dispatch may still be in-progress since we dispatch requests + * from more than one contexts. + * +- * No need to quiesce queue if it isn't initialized yet since +- * blk_freeze_queue() should be enough for cases of passthrough +- * request. ++ * We rely on driver to deal with the race in case that queue ++ * initialization isn't done. + */ + if (q->mq_ops && blk_queue_init_done(q)) + blk_mq_quiesce_queue(q); +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -697,6 +697,12 @@ static bool scsi_end_request(struct requ + */ + scsi_mq_uninit_cmd(cmd); + ++ /* ++ * queue is still alive, so grab the ref for preventing it ++ * from being cleaned up during running queue. ++ */ ++ percpu_ref_get(&q->q_usage_counter); ++ + __blk_mq_end_request(req, error); + + if (scsi_target(sdev)->single_lun || +@@ -704,6 +710,8 @@ static bool scsi_end_request(struct requ + kblockd_schedule_work(&sdev->requeue_work); + else + blk_mq_run_hw_queues(q, true); ++ ++ percpu_ref_put(&q->q_usage_counter); + } else { + unsigned long flags; + diff --git a/queue-4.19/scsi-qla2xxx-initialize-port-speed-to-avoid-setting-lower-speed.patch b/queue-4.19/scsi-qla2xxx-initialize-port-speed-to-avoid-setting-lower-speed.patch new file mode 100644 index 00000000000..bd32411ca33 --- /dev/null +++ b/queue-4.19/scsi-qla2xxx-initialize-port-speed-to-avoid-setting-lower-speed.patch @@ -0,0 +1,35 @@ +From f635e48e866ee1a47d2d42ce012fdcc07bf55853 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Tue, 6 Nov 2018 00:51:21 -0800 +Subject: scsi: qla2xxx: Initialize port speed to avoid setting lower speed + +From: Quinn Tran + +commit f635e48e866ee1a47d2d42ce012fdcc07bf55853 upstream. + +This patch initializes port speed so that firmware does not set lower +operating speed. Setting lower speed in firmware impacts WRITE perfomance. + +Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") +Cc: +Signed-off-by: Quinn Tran +Signed-off-by: Himanshu Madhani +Tested-by: Laurence Oberman +Reviewed-by: Ewan D. Milne +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/qla2xxx/qla_init.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -4714,6 +4714,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vh + fcport->loop_id = FC_NO_LOOP_ID; + qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED); + fcport->supported_classes = FC_COS_UNSPECIFIED; ++ fcport->fp_speed = PORT_SPEED_UNKNOWN; + + fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev, + sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma, diff --git a/queue-4.19/series b/queue-4.19/series index b37cb7ae854..a0828353c1b 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -73,3 +73,45 @@ clk-meson-axg-mark-fdiv2-and-fdiv3-as-critical.patch zram-close-udev-startup-race-condition-as-default-gr.patch mips-loongson-3-fix-cpu-uart-irq-delivery-problem.patch mips-loongson-3-fix-bridge-irq-delivery-problem.patch +xtensa-add-notes-section-to-the-linker-script.patch +xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch +xtensa-fix-boot-parameters-address-translation.patch +um-drop-own-definition-of-ptrace_sysemu-_singlestep.patch +clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch +clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch +clk-sunxi-ng-h6-fix-bus-clocks-divider-position.patch +clk-rockchip-fix-wrong-mmc-sample-phase-shift-for-rk3328.patch +clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch +libceph-bump-ceph_msg_max_data_len.patch +revert-ceph-fix-dentry-leak-in-splice_dentry.patch +thermal-core-fix-use-after-free-in-thermal_cooling_device_destroy_sysfs.patch +mach64-fix-display-corruption-on-big-endian-machines.patch +mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch +acpi-nfit-x86-mce-handle-only-uncorrectable-machine-checks.patch +acpi-nfit-x86-mce-validate-a-mce-s-address-before-using-it.patch +acpi-nfit-fix-ars-overflow-continuation.patch +reset-hisilicon-fix-potential-null-pointer-dereference.patch +crypto-hisilicon-fix-null-dereference-for-same-dst-and-src.patch +crypto-hisilicon-fix-reference-after-free-of-memories-on-error-path.patch +vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch +scsi-qla2xxx-initialize-port-speed-to-avoid-setting-lower-speed.patch +scsi-fix-queue-cleanup-race-before-queue-initialization-is-done.patch +revert-powerpc-8xx-use-l1-entry-apg-to-handle-_page_accessed-for-config_swap.patch +soc-ti-qmss-fix-usage-of-irq_set_affinity_hint.patch +ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch +ocfs2-free-up-write-context-when-direct-io-failed.patch +mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch +memory_hotplug-cond_resched-in-__remove_pages.patch +netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch +arm-8809-1-proc-v7-fix-thumb-annotation-of-cpu_v7_hvc_switch_mm.patch +bonding-802.3ad-fix-link_failure_count-tracking.patch +mtd-spi-nor-cadence-quadspi-return-error-code-in-cqspi_direct_read_execute.patch +mtd-nand-fix-nanddev_neraseblocks.patch +mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch +hwmon-core-fix-double-free-in-__hwmon_device_register.patch +perf-cs-etm-correct-cpu-mode-for-samples.patch +perf-stat-handle-different-pmu-names-with-common-prefix.patch +perf-callchain-honour-the-ordering-of-perf_context_-user-kernel-etc.patch +perf-intel-pt-bts-calculate-cpumode-for-synthesized-samples.patch +perf-intel-pt-insert-callchain-context-into-synthesized-callchains.patch +of-numa-validate-some-distance-map-rules.patch diff --git a/queue-4.19/soc-ti-qmss-fix-usage-of-irq_set_affinity_hint.patch b/queue-4.19/soc-ti-qmss-fix-usage-of-irq_set_affinity_hint.patch new file mode 100644 index 00000000000..a1a607373b4 --- /dev/null +++ b/queue-4.19/soc-ti-qmss-fix-usage-of-irq_set_affinity_hint.patch @@ -0,0 +1,128 @@ +From 832ad0e3da4510fd17f98804abe512ea9a747035 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Wed, 31 Oct 2018 08:41:34 +0000 +Subject: soc: ti: QMSS: Fix usage of irq_set_affinity_hint + +From: Marc Zyngier + +commit 832ad0e3da4510fd17f98804abe512ea9a747035 upstream. + +The Keystone QMSS driver is pretty damaged, in the sense that it +does things like this: + + irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); + +where cpu_map is a local variable. As we leave the function, this +will point to nowhere-land, and things will end-up badly. + +Instead, let's use a proper cpumask that gets allocated, giving +the driver a chance to actually work with things like irqbalance +as well as have a hypothetical 64bit future. + +Cc: stable@vger.kernel.org +Acked-by: Santosh Shilimkar +Signed-off-by: Marc Zyngier +Signed-off-by: Olof Johansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/soc/ti/knav_qmss.h | 4 ++-- + drivers/soc/ti/knav_qmss_acc.c | 10 +++++----- + drivers/soc/ti/knav_qmss_queue.c | 22 +++++++++++++++------- + 3 files changed, 22 insertions(+), 14 deletions(-) + +--- a/drivers/soc/ti/knav_qmss.h ++++ b/drivers/soc/ti/knav_qmss.h +@@ -329,8 +329,8 @@ struct knav_range_ops { + }; + + struct knav_irq_info { +- int irq; +- u32 cpu_map; ++ int irq; ++ struct cpumask *cpu_mask; + }; + + struct knav_range_info { +--- a/drivers/soc/ti/knav_qmss_acc.c ++++ b/drivers/soc/ti/knav_qmss_acc.c +@@ -205,18 +205,18 @@ static int knav_range_setup_acc_irq(stru + { + struct knav_device *kdev = range->kdev; + struct knav_acc_channel *acc; +- unsigned long cpu_map; ++ struct cpumask *cpu_mask; + int ret = 0, irq; + u32 old, new; + + if (range->flags & RANGE_MULTI_QUEUE) { + acc = range->acc; + irq = range->irqs[0].irq; +- cpu_map = range->irqs[0].cpu_map; ++ cpu_mask = range->irqs[0].cpu_mask; + } else { + acc = range->acc + queue; + irq = range->irqs[queue].irq; +- cpu_map = range->irqs[queue].cpu_map; ++ cpu_mask = range->irqs[queue].cpu_mask; + } + + old = acc->open_mask; +@@ -239,8 +239,8 @@ static int knav_range_setup_acc_irq(stru + acc->name, acc->name); + ret = request_irq(irq, knav_acc_int_handler, 0, acc->name, + range); +- if (!ret && cpu_map) { +- ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); ++ if (!ret && cpu_mask) { ++ ret = irq_set_affinity_hint(irq, cpu_mask); + if (ret) { + dev_warn(range->kdev->dev, + "Failed to set IRQ affinity\n"); +--- a/drivers/soc/ti/knav_qmss_queue.c ++++ b/drivers/soc/ti/knav_qmss_queue.c +@@ -118,19 +118,17 @@ static int knav_queue_setup_irq(struct k + struct knav_queue_inst *inst) + { + unsigned queue = inst->id - range->queue_base; +- unsigned long cpu_map; + int ret = 0, irq; + + if (range->flags & RANGE_HAS_IRQ) { + irq = range->irqs[queue].irq; +- cpu_map = range->irqs[queue].cpu_map; + ret = request_irq(irq, knav_queue_int_handler, 0, + inst->irq_name, inst); + if (ret) + return ret; + disable_irq(irq); +- if (cpu_map) { +- ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); ++ if (range->irqs[queue].cpu_mask) { ++ ret = irq_set_affinity_hint(irq, range->irqs[queue].cpu_mask); + if (ret) { + dev_warn(range->kdev->dev, + "Failed to set IRQ affinity\n"); +@@ -1262,9 +1260,19 @@ static int knav_setup_queue_range(struct + + range->num_irqs++; + +- if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) +- range->irqs[i].cpu_map = +- (oirq.args[2] & 0x0000ff00) >> 8; ++ if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) { ++ unsigned long mask; ++ int bit; ++ ++ range->irqs[i].cpu_mask = devm_kzalloc(dev, ++ cpumask_size(), GFP_KERNEL); ++ if (!range->irqs[i].cpu_mask) ++ return -ENOMEM; ++ ++ mask = (oirq.args[2] & 0x0000ff00) >> 8; ++ for_each_set_bit(bit, &mask, BITS_PER_LONG) ++ cpumask_set_cpu(bit, range->irqs[i].cpu_mask); ++ } + } + + range->num_irqs = min(range->num_irqs, range->num_queues); diff --git a/queue-4.19/thermal-core-fix-use-after-free-in-thermal_cooling_device_destroy_sysfs.patch b/queue-4.19/thermal-core-fix-use-after-free-in-thermal_cooling_device_destroy_sysfs.patch new file mode 100644 index 00000000000..69bd6d5f6b3 --- /dev/null +++ b/queue-4.19/thermal-core-fix-use-after-free-in-thermal_cooling_device_destroy_sysfs.patch @@ -0,0 +1,40 @@ +From 3c587768271e9c20276522025729e4ebca51583b Mon Sep 17 00:00:00 2001 +From: Dmitry Osipenko +Date: Mon, 13 Aug 2018 20:14:00 +0300 +Subject: thermal: core: Fix use-after-free in thermal_cooling_device_destroy_sysfs + +From: Dmitry Osipenko + +commit 3c587768271e9c20276522025729e4ebca51583b upstream. + +This patch fixes use-after-free that was detected by KASAN. The bug is +triggered on a CPUFreq driver module unload by freeing 'cdev' on device +unregister and then using the freed structure during of the cdev's sysfs +data destruction. The solution is to unregister the sysfs at first, then +destroy sysfs data and finally release the cooling device. + +Cc: # v4.17+ +Fixes: 8ea229511e06 ("thermal: Add cooling device's statistics in sysfs") +Signed-off-by: Dmitry Osipenko +Acked-by: Viresh Kumar +Acked-by: Eduardo Valentin +Signed-off-by: Zhang Rui +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/thermal/thermal_core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/thermal/thermal_core.c ++++ b/drivers/thermal/thermal_core.c +@@ -1102,8 +1102,9 @@ void thermal_cooling_device_unregister(s + mutex_unlock(&thermal_list_lock); + + ida_simple_remove(&thermal_cdev_ida, cdev->id); +- device_unregister(&cdev->device); ++ device_del(&cdev->device); + thermal_cooling_device_destroy_sysfs(cdev); ++ put_device(&cdev->device); + } + EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister); + diff --git a/queue-4.19/um-drop-own-definition-of-ptrace_sysemu-_singlestep.patch b/queue-4.19/um-drop-own-definition-of-ptrace_sysemu-_singlestep.patch new file mode 100644 index 00000000000..6a824ab811c --- /dev/null +++ b/queue-4.19/um-drop-own-definition-of-ptrace_sysemu-_singlestep.patch @@ -0,0 +1,55 @@ +From 0676b957c24bfb6e495449ba7b7e72c5b5d79233 Mon Sep 17 00:00:00 2001 +From: Richard Weinberger +Date: Fri, 15 Jun 2018 16:42:54 +0200 +Subject: um: Drop own definition of PTRACE_SYSEMU/_SINGLESTEP + +From: Richard Weinberger + +commit 0676b957c24bfb6e495449ba7b7e72c5b5d79233 upstream. + +32bit UML used to define PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP +own its own because many years ago not all libcs had these request codes +in their UAPI. +These days PTRACE_SYSEMU/_SINGLESTEP is well known and part of glibc +and our own define becomes problematic. + +With change c48831d0eebf ("linux/x86: sync sys/ptrace.h with Linux 4.14 +[BZ #22433]") glibc turned PTRACE_SYSEMU/_SINGLESTEP into a enum and +UML failed to build. + +Let's drop our define and rely on the fact that every libc has +PTRACE_SYSEMU/_SINGLESTEP. + +Cc: +Cc: Ritesh Raj Sarraf +Reported-and-tested-by: Ritesh Raj Sarraf +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/um/shared/sysdep/ptrace_32.h | 10 ---------- + 1 file changed, 10 deletions(-) + +--- a/arch/x86/um/shared/sysdep/ptrace_32.h ++++ b/arch/x86/um/shared/sysdep/ptrace_32.h +@@ -10,20 +10,10 @@ + + static inline void update_debugregs(int seq) {} + +-/* syscall emulation path in ptrace */ +- +-#ifndef PTRACE_SYSEMU +-#define PTRACE_SYSEMU 31 +-#endif +- + void set_using_sysemu(int value); + int get_using_sysemu(void); + extern int sysemu_supported; + +-#ifndef PTRACE_SYSEMU_SINGLESTEP +-#define PTRACE_SYSEMU_SINGLESTEP 32 +-#endif +- + #define UPT_SYSCALL_ARG1(r) UPT_BX(r) + #define UPT_SYSCALL_ARG2(r) UPT_CX(r) + #define UPT_SYSCALL_ARG3(r) UPT_DX(r) diff --git a/queue-4.19/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch b/queue-4.19/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch new file mode 100644 index 00000000000..906676887c2 --- /dev/null +++ b/queue-4.19/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch @@ -0,0 +1,47 @@ +From 4542d623c7134bc1738f8a68ccb6dd546f1c264f Mon Sep 17 00:00:00 2001 +From: Greg Edwards +Date: Wed, 22 Aug 2018 13:21:53 -0600 +Subject: vhost/scsi: truncate T10 PI iov_iter to prot_bytes + +From: Greg Edwards + +commit 4542d623c7134bc1738f8a68ccb6dd546f1c264f upstream. + +Commands with protection information included were not truncating the +protection iov_iter to the number of protection bytes in the command. +This resulted in vhost_scsi mis-calculating the size of the protection +SGL in vhost_scsi_calc_sgls(), and including both the protection and +data SG entries in the protection SGL. + +Fixes: 09b13fa8c1a1 ("vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq") +Signed-off-by: Greg Edwards +Signed-off-by: Michael S. Tsirkin +Fixes: 09b13fa8c1a1093e9458549ac8bb203a7c65c62a +Cc: stable@vger.kernel.org +Reviewed-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/vhost/scsi.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/vhost/scsi.c ++++ b/drivers/vhost/scsi.c +@@ -964,7 +964,8 @@ vhost_scsi_handle_vq(struct vhost_scsi * + prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); + } + /* +- * Set prot_iter to data_iter, and advance past any ++ * Set prot_iter to data_iter and truncate it to ++ * prot_bytes, and advance data_iter past any + * preceeding prot_bytes that may be present. + * + * Also fix up the exp_data_len to reflect only the +@@ -973,6 +974,7 @@ vhost_scsi_handle_vq(struct vhost_scsi * + if (prot_bytes) { + exp_data_len -= prot_bytes; + prot_iter = data_iter; ++ iov_iter_truncate(&prot_iter, prot_bytes); + iov_iter_advance(&data_iter, prot_bytes); + } + tag = vhost64_to_cpu(vq, v_req_pi.tag); diff --git a/queue-4.19/xtensa-add-notes-section-to-the-linker-script.patch b/queue-4.19/xtensa-add-notes-section-to-the-linker-script.patch new file mode 100644 index 00000000000..31a1307514c --- /dev/null +++ b/queue-4.19/xtensa-add-notes-section-to-the-linker-script.patch @@ -0,0 +1,44 @@ +From 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Mon, 29 Oct 2018 18:30:13 -0700 +Subject: xtensa: add NOTES section to the linker script + +From: Max Filippov + +commit 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 upstream. + +This section collects all source .note.* sections together in the +vmlinux image. Without it .note.Linux section may be placed at address +0, while the rest of the kernel is at its normal address, resulting in a +huge vmlinux.bin image that may not be linked into the xtensa Image.elf. + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + arch/xtensa/boot/Makefile | 2 +- + arch/xtensa/kernel/vmlinux.lds.S | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/xtensa/boot/Makefile ++++ b/arch/xtensa/boot/Makefile +@@ -33,7 +33,7 @@ uImage: $(obj)/uImage + boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y)) + $(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS) + +-OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary ++OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary + + vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) +--- a/arch/xtensa/kernel/vmlinux.lds.S ++++ b/arch/xtensa/kernel/vmlinux.lds.S +@@ -131,6 +131,7 @@ SECTIONS + .fixup : { *(.fixup) } + + EXCEPTION_TABLE(16) ++ NOTES + /* Data section */ + + _sdata = .; diff --git a/queue-4.19/xtensa-fix-boot-parameters-address-translation.patch b/queue-4.19/xtensa-fix-boot-parameters-address-translation.patch new file mode 100644 index 00000000000..906f7206e2a --- /dev/null +++ b/queue-4.19/xtensa-fix-boot-parameters-address-translation.patch @@ -0,0 +1,43 @@ +From 40dc948f234b73497c3278875eb08a01d5854d3f Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Tue, 13 Nov 2018 23:46:42 -0800 +Subject: xtensa: fix boot parameters address translation + +From: Max Filippov + +commit 40dc948f234b73497c3278875eb08a01d5854d3f upstream. + +The bootloader may pass physical address of the boot parameters structure +to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in +the arch/xtensa/kernel/head.S is supposed to map that physical address to +the virtual address in the configured virtual memory layout. + +This code haven't been updated when additional 256+256 and 512+512 +memory layouts were introduced and it may produce wrong addresses when +used with these layouts. + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + arch/xtensa/kernel/head.S | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/xtensa/kernel/head.S ++++ b/arch/xtensa/kernel/head.S +@@ -88,9 +88,12 @@ _SetupMMU: + initialize_mmu + #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY + rsr a2, excsave1 +- movi a3, 0x08000000 ++ movi a3, XCHAL_KSEG_PADDR ++ bltu a2, a3, 1f ++ sub a2, a2, a3 ++ movi a3, XCHAL_KSEG_SIZE + bgeu a2, a3, 1f +- movi a3, 0xd0000000 ++ movi a3, XCHAL_KSEG_CACHED_VADDR + add a2, a2, a3 + wsr a2, excsave1 + 1: diff --git a/queue-4.19/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch b/queue-4.19/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch new file mode 100644 index 00000000000..8b34c220883 --- /dev/null +++ b/queue-4.19/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch @@ -0,0 +1,45 @@ +From 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Sun, 4 Nov 2018 01:46:00 -0700 +Subject: xtensa: make sure bFLT stack is 16 byte aligned + +From: Max Filippov + +commit 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 upstream. + +Xtensa ABI requires stack alignment to be at least 16. In noMMU +configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at +least 16. + +This fixes the following runtime error in noMMU configuration, caused by +interaction between insufficiently aligned stack and alloca function, +that results in corruption of on-stack variable in the libc function +glob: + + Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65) + - should not happen + EXCCAUSE is 15 + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + arch/xtensa/include/asm/processor.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/xtensa/include/asm/processor.h ++++ b/arch/xtensa/include/asm/processor.h +@@ -23,7 +23,11 @@ + # error Linux requires the Xtensa Windowed Registers Option. + #endif + +-#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH ++/* Xtensa ABI requires stack alignment to be at least 16 */ ++ ++#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) ++ ++#define ARCH_SLAB_MINALIGN STACK_ALIGN + + /* + * User space process size: 1 GB. -- 2.47.2