]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Dec 2020 09:37:04 +0000 (10:37 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Dec 2020 09:37:04 +0000 (10:37 +0100)
added patches:
arm-dts-at91-sama5d2-fix-can-message-ram-offset-and-size.patch
arm-dts-pandaboard-fix-pinmux-for-gpio-user-button-of-pandaboard-es.patch
arm-tegra-populate-opp-table-for-tegra20-ventana.patch
arm64-dts-marvell-keep-smmu-disabled-by-default-for-armada-7040-and-8040.patch
arm64-dts-ti-k3-am65-mark-dss-as-dma-coherent.patch
btrfs-do-not-shorten-unpin-len-for-caching-block-groups.patch
btrfs-fix-race-when-defragmenting-leads-to-unnecessary-io.patch
btrfs-update-last_byte_to_unpin-in-switch_commit_roots.patch
ceph-fix-race-in-concurrent-__ceph_remove_cap-invocations.patch
ext4-don-t-remount-read-only-with-errors-continue-on-reboot.patch
ext4-fix-a-memory-leak-of-ext4_free_data.patch
ext4-fix-an-is_err-vs-null-check.patch
ext4-fix-deadlock-with-fs-freezing-and-ea-inodes.patch
fsnotify-fix-events-reported-to-watching-parent-and-child.patch
fsnotify-generalize-handle_inode_event.patch
ima-don-t-modify-file-descriptor-mode-on-the-fly.patch
inotify-convert-to-handle_inode_event-interface.patch
iommu-arm-smmu-allow-implementation-specific-write_s2cr.patch
iommu-arm-smmu-qcom-implement-s2cr-quirk.patch
iommu-arm-smmu-qcom-read-back-stream-mappings.patch
jffs2-fix-gc-exit-abnormally.patch
jffs2-fix-ignoring-mounting-options-problem-during-remounting.patch
kvm-arm64-introduce-handling-of-aarch32-ttbcr2-traps.patch
kvm-svm-remove-the-call-to-sev_platform_status-during-setup.patch
kvm-x86-reinstate-vendor-agnostic-check-on-spec_ctrl-cpuid-bits.patch
ovl-make-ioctl-safe.patch
powerpc-32-fix-vmap-stack-properly-set-r1-before-activating-mmu-on-syscall-too.patch
powerpc-8xx-fix-early-debug-when-smc1-is-relocated.patch
powerpc-bitops-fix-possible-undefined-behaviour-with-fls-and-fls64.patch
powerpc-feature-add-cpu_ftr_noexecute-to-g2_le.patch
powerpc-fix-incorrect-stw-ux-u-x-instructions-in-__set_pte_at.patch
powerpc-mm-fix-verification-of-mmu_ftr_type_44x.patch
powerpc-powernv-memtrace-don-t-leak-kernel-memory-to-user-space.patch
powerpc-powernv-memtrace-fix-crashing-the-kernel-when-enabling-concurrently.patch
powerpc-powernv-npu-do-not-attempt-npu2-setup-on-power8nvl-npu.patch
powerpc-rtas-fix-typo-of-ibm-open-errinjct-in-rtas-filter.patch
powerpc-xmon-change-printk-to-pr_cont.patch
risc-v-fix-usage-of-memblock_enforce_memory_limit.patch
smb3-avoid-confusing-warning-message-on-mount-to-azure.patch
smb3.1.1-do-not-log-warning-message-if-server-doesn-t-populate-salt.patch
smb3.1.1-remove-confusing-mount-warning-when-no-spnego-info-on-negprot-rsp.patch
ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch
um-fix-time-travel-mode.patch
um-remove-use-of-asprinf-in-umid.c.patch
xprtrdma-fix-xdrbuf_sparse_pages-support.patch

46 files changed:
queue-5.10/arm-dts-at91-sama5d2-fix-can-message-ram-offset-and-size.patch [new file with mode: 0644]
queue-5.10/arm-dts-pandaboard-fix-pinmux-for-gpio-user-button-of-pandaboard-es.patch [new file with mode: 0644]
queue-5.10/arm-tegra-populate-opp-table-for-tegra20-ventana.patch [new file with mode: 0644]
queue-5.10/arm64-dts-marvell-keep-smmu-disabled-by-default-for-armada-7040-and-8040.patch [new file with mode: 0644]
queue-5.10/arm64-dts-ti-k3-am65-mark-dss-as-dma-coherent.patch [new file with mode: 0644]
queue-5.10/btrfs-do-not-shorten-unpin-len-for-caching-block-groups.patch [new file with mode: 0644]
queue-5.10/btrfs-fix-race-when-defragmenting-leads-to-unnecessary-io.patch [new file with mode: 0644]
queue-5.10/btrfs-update-last_byte_to_unpin-in-switch_commit_roots.patch [new file with mode: 0644]
queue-5.10/ceph-fix-race-in-concurrent-__ceph_remove_cap-invocations.patch [new file with mode: 0644]
queue-5.10/ext4-don-t-remount-read-only-with-errors-continue-on-reboot.patch [new file with mode: 0644]
queue-5.10/ext4-fix-a-memory-leak-of-ext4_free_data.patch [new file with mode: 0644]
queue-5.10/ext4-fix-an-is_err-vs-null-check.patch [new file with mode: 0644]
queue-5.10/ext4-fix-deadlock-with-fs-freezing-and-ea-inodes.patch [new file with mode: 0644]
queue-5.10/fsnotify-fix-events-reported-to-watching-parent-and-child.patch [new file with mode: 0644]
queue-5.10/fsnotify-generalize-handle_inode_event.patch [new file with mode: 0644]
queue-5.10/ima-don-t-modify-file-descriptor-mode-on-the-fly.patch [new file with mode: 0644]
queue-5.10/inotify-convert-to-handle_inode_event-interface.patch [new file with mode: 0644]
queue-5.10/iommu-arm-smmu-allow-implementation-specific-write_s2cr.patch [new file with mode: 0644]
queue-5.10/iommu-arm-smmu-qcom-implement-s2cr-quirk.patch [new file with mode: 0644]
queue-5.10/iommu-arm-smmu-qcom-read-back-stream-mappings.patch [new file with mode: 0644]
queue-5.10/jffs2-fix-gc-exit-abnormally.patch [new file with mode: 0644]
queue-5.10/jffs2-fix-ignoring-mounting-options-problem-during-remounting.patch [new file with mode: 0644]
queue-5.10/kvm-arm64-introduce-handling-of-aarch32-ttbcr2-traps.patch [new file with mode: 0644]
queue-5.10/kvm-svm-remove-the-call-to-sev_platform_status-during-setup.patch [new file with mode: 0644]
queue-5.10/kvm-x86-reinstate-vendor-agnostic-check-on-spec_ctrl-cpuid-bits.patch [new file with mode: 0644]
queue-5.10/ovl-make-ioctl-safe.patch [new file with mode: 0644]
queue-5.10/powerpc-32-fix-vmap-stack-properly-set-r1-before-activating-mmu-on-syscall-too.patch [new file with mode: 0644]
queue-5.10/powerpc-8xx-fix-early-debug-when-smc1-is-relocated.patch [new file with mode: 0644]
queue-5.10/powerpc-bitops-fix-possible-undefined-behaviour-with-fls-and-fls64.patch [new file with mode: 0644]
queue-5.10/powerpc-feature-add-cpu_ftr_noexecute-to-g2_le.patch [new file with mode: 0644]
queue-5.10/powerpc-fix-incorrect-stw-ux-u-x-instructions-in-__set_pte_at.patch [new file with mode: 0644]
queue-5.10/powerpc-mm-fix-verification-of-mmu_ftr_type_44x.patch [new file with mode: 0644]
queue-5.10/powerpc-powernv-memtrace-don-t-leak-kernel-memory-to-user-space.patch [new file with mode: 0644]
queue-5.10/powerpc-powernv-memtrace-fix-crashing-the-kernel-when-enabling-concurrently.patch [new file with mode: 0644]
queue-5.10/powerpc-powernv-npu-do-not-attempt-npu2-setup-on-power8nvl-npu.patch [new file with mode: 0644]
queue-5.10/powerpc-rtas-fix-typo-of-ibm-open-errinjct-in-rtas-filter.patch [new file with mode: 0644]
queue-5.10/powerpc-xmon-change-printk-to-pr_cont.patch [new file with mode: 0644]
queue-5.10/risc-v-fix-usage-of-memblock_enforce_memory_limit.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/smb3-avoid-confusing-warning-message-on-mount-to-azure.patch [new file with mode: 0644]
queue-5.10/smb3.1.1-do-not-log-warning-message-if-server-doesn-t-populate-salt.patch [new file with mode: 0644]
queue-5.10/smb3.1.1-remove-confusing-mount-warning-when-no-spnego-info-on-negprot-rsp.patch [new file with mode: 0644]
queue-5.10/ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch [new file with mode: 0644]
queue-5.10/um-fix-time-travel-mode.patch [new file with mode: 0644]
queue-5.10/um-remove-use-of-asprinf-in-umid.c.patch [new file with mode: 0644]
queue-5.10/xprtrdma-fix-xdrbuf_sparse_pages-support.patch [new file with mode: 0644]

diff --git a/queue-5.10/arm-dts-at91-sama5d2-fix-can-message-ram-offset-and-size.patch b/queue-5.10/arm-dts-at91-sama5d2-fix-can-message-ram-offset-and-size.patch
new file mode 100644 (file)
index 0000000..352881a
--- /dev/null
@@ -0,0 +1,64 @@
+From 85b8350ae99d1300eb6dc072459246c2649a8e50 Mon Sep 17 00:00:00 2001
+From: Nicolas Ferre <nicolas.ferre@microchip.com>
+Date: Thu, 3 Dec 2020 10:19:49 +0100
+Subject: ARM: dts: at91: sama5d2: fix CAN message ram offset and size
+
+From: Nicolas Ferre <nicolas.ferre@microchip.com>
+
+commit 85b8350ae99d1300eb6dc072459246c2649a8e50 upstream.
+
+CAN0 and CAN1 instances share the same message ram configured
+at 0x210000 on sama5d2 Linux systems.
+According to current configuration of CAN0, we need 0x1c00 bytes
+so that the CAN1 don't overlap its message ram:
+64 x RX FIFO0 elements => 64 x 72 bytes
+32 x TXE (TX Event FIFO) elements => 32 x 8 bytes
+32 x TXB (TX Buffer) elements => 32 x 72 bytes
+So a total of 7168 bytes (0x1C00).
+
+Fix offset to match this needed size.
+Make the CAN0 message ram ioremap match exactly this size so that is
+easily understandable.  Adapt CAN1 size accordingly.
+
+Fixes: bc6d5d7666b7 ("ARM: dts: at91: sama5d2: add m_can nodes")
+Reported-by: Dan Sneddon <dan.sneddon@microchip.com>
+Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Tested-by: Cristian Birsan <cristian.birsan@microchip.com>
+Cc: stable@vger.kernel.org # v4.13+
+Link: https://lore.kernel.org/r/20201203091949.9015-1-nicolas.ferre@microchip.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/sama5d2.dtsi |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/boot/dts/sama5d2.dtsi
++++ b/arch/arm/boot/dts/sama5d2.dtsi
+@@ -725,7 +725,7 @@
+                       can0: can@f8054000 {
+                               compatible = "bosch,m_can";
+-                              reg = <0xf8054000 0x4000>, <0x210000 0x4000>;
++                              reg = <0xf8054000 0x4000>, <0x210000 0x1c00>;
+                               reg-names = "m_can", "message_ram";
+                               interrupts = <56 IRQ_TYPE_LEVEL_HIGH 7>,
+                                            <64 IRQ_TYPE_LEVEL_HIGH 7>;
+@@ -1131,7 +1131,7 @@
+                       can1: can@fc050000 {
+                               compatible = "bosch,m_can";
+-                              reg = <0xfc050000 0x4000>, <0x210000 0x4000>;
++                              reg = <0xfc050000 0x4000>, <0x210000 0x3800>;
+                               reg-names = "m_can", "message_ram";
+                               interrupts = <57 IRQ_TYPE_LEVEL_HIGH 7>,
+                                            <65 IRQ_TYPE_LEVEL_HIGH 7>;
+@@ -1141,7 +1141,7 @@
+                               assigned-clocks = <&pmc PMC_TYPE_GCK 57>;
+                               assigned-clock-parents = <&pmc PMC_TYPE_CORE PMC_UTMI>;
+                               assigned-clock-rates = <40000000>;
+-                              bosch,mram-cfg = <0x1100 0 0 64 0 0 32 32>;
++                              bosch,mram-cfg = <0x1c00 0 0 64 0 0 32 32>;
+                               status = "disabled";
+                       };
diff --git a/queue-5.10/arm-dts-pandaboard-fix-pinmux-for-gpio-user-button-of-pandaboard-es.patch b/queue-5.10/arm-dts-pandaboard-fix-pinmux-for-gpio-user-button-of-pandaboard-es.patch
new file mode 100644 (file)
index 0000000..993579f
--- /dev/null
@@ -0,0 +1,32 @@
+From df9dbaf2c415cd94ad520067a1eccfee62f00a33 Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Sat, 3 Oct 2020 16:10:00 +0200
+Subject: ARM: dts: pandaboard: fix pinmux for gpio user button of Pandaboard ES
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit df9dbaf2c415cd94ad520067a1eccfee62f00a33 upstream.
+
+The pinmux control register offset passed to OMAP4_IOPAD is odd.
+
+Fixes: ab9a13665e7c ("ARM: dts: pandaboard: add gpio user button")
+Cc: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/omap4-panda-es.dts |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/boot/dts/omap4-panda-es.dts
++++ b/arch/arm/boot/dts/omap4-panda-es.dts
+@@ -46,7 +46,7 @@
+       button_pins: pinmux_button_pins {
+               pinctrl-single,pins = <
+-                      OMAP4_IOPAD(0x11b, PIN_INPUT_PULLUP | MUX_MODE3) /* gpio_113 */
++                      OMAP4_IOPAD(0x0fc, PIN_INPUT_PULLUP | MUX_MODE3) /* gpio_113 */
+               >;
+       };
+ };
diff --git a/queue-5.10/arm-tegra-populate-opp-table-for-tegra20-ventana.patch b/queue-5.10/arm-tegra-populate-opp-table-for-tegra20-ventana.patch
new file mode 100644 (file)
index 0000000..994471a
--- /dev/null
@@ -0,0 +1,54 @@
+From bd7cd7e05a42491469ca19861da44abc3168cf5f Mon Sep 17 00:00:00 2001
+From: Jon Hunter <jonathanh@nvidia.com>
+Date: Wed, 11 Nov 2020 10:38:47 +0000
+Subject: ARM: tegra: Populate OPP table for Tegra20 Ventana
+
+From: Jon Hunter <jonathanh@nvidia.com>
+
+commit bd7cd7e05a42491469ca19861da44abc3168cf5f upstream.
+
+Commit 9ce274630495 ("cpufreq: tegra20: Use generic cpufreq-dt driver
+(Tegra30 supported now)") update the Tegra20 CPUFREQ driver to use the
+generic CPUFREQ device-tree driver. Since this change CPUFREQ support
+on the Tegra20 Ventana platform has been broken because the necessary
+device-tree nodes with the operating point information are not populated
+for this platform. Fix this by updating device-tree for Venata to
+include the operating point informration for Tegra20.
+
+Fixes: 9ce274630495 ("cpufreq: tegra20: Use generic cpufreq-dt driver (Tegra30 supported now)")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/tegra20-ventana.dts |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/arch/arm/boot/dts/tegra20-ventana.dts
++++ b/arch/arm/boot/dts/tegra20-ventana.dts
+@@ -3,6 +3,7 @@
+ #include <dt-bindings/input/input.h>
+ #include "tegra20.dtsi"
++#include "tegra20-cpu-opp.dtsi"
+ / {
+       model = "NVIDIA Tegra20 Ventana evaluation board";
+@@ -592,6 +593,16 @@
+               #clock-cells = <0>;
+       };
++      cpus {
++              cpu0: cpu@0 {
++                      operating-points-v2 = <&cpu0_opp_table>;
++              };
++
++              cpu@1 {
++                      operating-points-v2 = <&cpu0_opp_table>;
++              };
++      };
++
+       gpio-keys {
+               compatible = "gpio-keys";
diff --git a/queue-5.10/arm64-dts-marvell-keep-smmu-disabled-by-default-for-armada-7040-and-8040.patch b/queue-5.10/arm64-dts-marvell-keep-smmu-disabled-by-default-for-armada-7040-and-8040.patch
new file mode 100644 (file)
index 0000000..340ec1a
--- /dev/null
@@ -0,0 +1,58 @@
+From f43cadef2df260101497a6aace05e24201f00202 Mon Sep 17 00:00:00 2001
+From: Tomasz Nowicki <tn@semihalf.com>
+Date: Thu, 5 Nov 2020 12:26:02 +0100
+Subject: arm64: dts: marvell: keep SMMU disabled by default for Armada 7040 and 8040
+
+From: Tomasz Nowicki <tn@semihalf.com>
+
+commit f43cadef2df260101497a6aace05e24201f00202 upstream.
+
+FW has to configure devices' StreamIDs so that SMMU is able to lookup
+context and do proper translation later on. For Armada 7040 & 8040 and
+publicly available FW, most of the devices are configured properly,
+but some like ap_sdhci0, PCIe, NIC still remain unassigned which
+results in SMMU faults about unmatched StreamID (assuming
+ARM_SMMU_DISABLE_BYPASS_BY_DEFAUL=y).
+
+Since there is dependency on custom FW let SMMU be disabled by default.
+People who still willing to use SMMU need to enable manually and
+use ARM_SMMU_DISABLE_BYPASS_BY_DEFAUL=n (or via kernel command line)
+with extra caution.
+
+Fixes: 83a3545d9c37 ("arm64: dts: marvell: add SMMU support")
+Cc: <stable@vger.kernel.org> # 5.9+
+Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/boot/dts/marvell/armada-7040.dtsi |    4 ----
+ arch/arm64/boot/dts/marvell/armada-8040.dtsi |    4 ----
+ 2 files changed, 8 deletions(-)
+
+--- a/arch/arm64/boot/dts/marvell/armada-7040.dtsi
++++ b/arch/arm64/boot/dts/marvell/armada-7040.dtsi
+@@ -15,10 +15,6 @@
+                    "marvell,armada-ap806";
+ };
+-&smmu {
+-      status = "okay";
+-};
+-
+ &cp0_pcie0 {
+       iommu-map =
+               <0x0   &smmu 0x480 0x20>,
+--- a/arch/arm64/boot/dts/marvell/armada-8040.dtsi
++++ b/arch/arm64/boot/dts/marvell/armada-8040.dtsi
+@@ -15,10 +15,6 @@
+                    "marvell,armada-ap806";
+ };
+-&smmu {
+-      status = "okay";
+-};
+-
+ &cp0_pcie0 {
+       iommu-map =
+               <0x0   &smmu 0x480 0x20>,
diff --git a/queue-5.10/arm64-dts-ti-k3-am65-mark-dss-as-dma-coherent.patch b/queue-5.10/arm64-dts-ti-k3-am65-mark-dss-as-dma-coherent.patch
new file mode 100644 (file)
index 0000000..c166683
--- /dev/null
@@ -0,0 +1,35 @@
+From 50301e8815c681bc5de8ca7050c4b426923d4e19 Mon Sep 17 00:00:00 2001
+From: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Date: Mon, 2 Nov 2020 15:46:50 +0200
+Subject: arm64: dts: ti: k3-am65: mark dss as dma-coherent
+
+From: Tomi Valkeinen <tomi.valkeinen@ti.com>
+
+commit 50301e8815c681bc5de8ca7050c4b426923d4e19 upstream.
+
+DSS is IO coherent on AM65, so we should mark it as such with
+'dma-coherent' property in the DT file.
+
+Fixes: fc539b90eda2 ("arm64: dts: ti: am654: Add DSS node")
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Nishanth Menon <nm@ti.com>
+Acked-by: Nikhil Devshatwar <nikhil.nd@ti.com>
+Cc: stable@vger.kernel.org # v5.8+
+Link: https://lore.kernel.org/r/20201102134650.55321-1-tomi.valkeinen@ti.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/boot/dts/ti/k3-am65-main.dtsi |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+@@ -867,6 +867,8 @@
+               status = "disabled";
++              dma-coherent;
++
+               dss_ports: ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
diff --git a/queue-5.10/btrfs-do-not-shorten-unpin-len-for-caching-block-groups.patch b/queue-5.10/btrfs-do-not-shorten-unpin-len-for-caching-block-groups.patch
new file mode 100644 (file)
index 0000000..9f43f90
--- /dev/null
@@ -0,0 +1,47 @@
+From 9076dbd5ee837c3882fc42891c14cecd0354a849 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 23 Oct 2020 09:58:04 -0400
+Subject: btrfs: do not shorten unpin len for caching block groups
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 9076dbd5ee837c3882fc42891c14cecd0354a849 upstream.
+
+While fixing up our ->last_byte_to_unpin locking I noticed that we will
+shorten len based on ->last_byte_to_unpin if we're caching when we're
+adding back the free space.  This is correct for the free space, as we
+cannot unpin more than ->last_byte_to_unpin, however we use len to
+adjust the ->bytes_pinned counters and such, which need to track the
+actual pinned usage.  This could result in
+WARN_ON(space_info->bytes_pinned) triggering at unmount time.
+
+Fix this by using a local variable for the amount to add to free space
+cache, and leave len untouched in this case.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2816,10 +2816,10 @@ static int unpin_extent_range(struct btr
+               len = cache->start + cache->length - start;
+               len = min(len, end + 1 - start);
+-              if (start < cache->last_byte_to_unpin) {
+-                      len = min(len, cache->last_byte_to_unpin - start);
+-                      if (return_free_space)
+-                              btrfs_add_free_space(cache, start, len);
++              if (start < cache->last_byte_to_unpin && return_free_space) {
++                      u64 add_len = min(len, cache->last_byte_to_unpin - start);
++
++                      btrfs_add_free_space(cache, start, add_len);
+               }
+               start += len;
diff --git a/queue-5.10/btrfs-fix-race-when-defragmenting-leads-to-unnecessary-io.patch b/queue-5.10/btrfs-fix-race-when-defragmenting-leads-to-unnecessary-io.patch
new file mode 100644 (file)
index 0000000..4352b97
--- /dev/null
@@ -0,0 +1,105 @@
+From 7f458a3873ae94efe1f37c8b96c97e7298769e98 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 4 Nov 2020 11:07:33 +0000
+Subject: btrfs: fix race when defragmenting leads to unnecessary IO
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 7f458a3873ae94efe1f37c8b96c97e7298769e98 upstream.
+
+When defragmenting we skip ranges that have holes or inline extents, so that
+we don't do unnecessary IO and waste space. We do this check when calling
+should_defrag_range() at btrfs_defrag_file(). However we do it without
+holding the inode's lock. The reason we do it like this is to avoid
+blocking other tasks for too long, that possibly want to operate on other
+file ranges, since after the call to should_defrag_range() and before
+locking the inode, we trigger a synchronous page cache readahead. However
+before we were able to lock the inode, some other task might have punched
+a hole in our range, or we may now have an inline extent there, in which
+case we should not set the range for defrag anymore since that would cause
+unnecessary IO and make us waste space (i.e. allocating extents to contain
+zeros for a hole).
+
+So after we locked the inode and the range in the iotree, check again if
+we have holes or an inline extent, and if we do, just skip the range.
+
+I hit this while testing my next patch that fixes races when updating an
+inode's number of bytes (subject "btrfs: update the number of bytes used
+by an inode atomically"), and it depends on this change in order to work
+correctly. Alternatively I could rework that other patch to detect holes
+and flag their range with the 'new delalloc' bit, but this itself fixes
+an efficiency problem due a race that from a functional point of view is
+not harmful (it could be triggered with btrfs/062 from fstests).
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |   39 +++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 39 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1275,6 +1275,7 @@ static int cluster_pages_for_defrag(stru
+       u64 page_end;
+       u64 page_cnt;
+       u64 start = (u64)start_index << PAGE_SHIFT;
++      u64 search_start;
+       int ret;
+       int i;
+       int i_done;
+@@ -1371,6 +1372,40 @@ again:
+       lock_extent_bits(&BTRFS_I(inode)->io_tree,
+                        page_start, page_end - 1, &cached_state);
++
++      /*
++       * When defragmenting we skip ranges that have holes or inline extents,
++       * (check should_defrag_range()), to avoid unnecessary IO and wasting
++       * space. At btrfs_defrag_file(), we check if a range should be defragged
++       * before locking the inode and then, if it should, we trigger a sync
++       * page cache readahead - we lock the inode only after that to avoid
++       * blocking for too long other tasks that possibly want to operate on
++       * other file ranges. But before we were able to get the inode lock,
++       * some other task may have punched a hole in the range, or we may have
++       * now an inline extent, in which case we should not defrag. So check
++       * for that here, where we have the inode and the range locked, and bail
++       * out if that happened.
++       */
++      search_start = page_start;
++      while (search_start < page_end) {
++              struct extent_map *em;
++
++              em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start,
++                                    page_end - search_start);
++              if (IS_ERR(em)) {
++                      ret = PTR_ERR(em);
++                      goto out_unlock_range;
++              }
++              if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
++                      free_extent_map(em);
++                      /* Ok, 0 means we did not defrag anything */
++                      ret = 0;
++                      goto out_unlock_range;
++              }
++              search_start = extent_map_end(em);
++              free_extent_map(em);
++      }
++
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
+                         page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                         EXTENT_DEFRAG, 0, 0, &cached_state);
+@@ -1401,6 +1436,10 @@ again:
+       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
+       extent_changeset_free(data_reserved);
+       return i_done;
++
++out_unlock_range:
++      unlock_extent_cached(&BTRFS_I(inode)->io_tree,
++                           page_start, page_end - 1, &cached_state);
+ out:
+       for (i = 0; i < i_done; i++) {
+               unlock_page(pages[i]);
diff --git a/queue-5.10/btrfs-update-last_byte_to_unpin-in-switch_commit_roots.patch b/queue-5.10/btrfs-update-last_byte_to_unpin-in-switch_commit_roots.patch
new file mode 100644 (file)
index 0000000..5187e33
--- /dev/null
@@ -0,0 +1,188 @@
+From 27d56e62e4748c2135650c260024e9904b8c1a0a Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 23 Oct 2020 09:58:05 -0400
+Subject: btrfs: update last_byte_to_unpin in switch_commit_roots
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 27d56e62e4748c2135650c260024e9904b8c1a0a upstream.
+
+While writing an explanation for the need of the commit_root_sem for
+btrfs_prepare_extent_commit, I realized we have a slight hole that could
+result in leaked space if we have to do the old style caching.  Consider
+the following scenario
+
+ commit root
+ +----+----+----+----+----+----+----+
+ |\\\\|    |\\\\|\\\\|    |\\\\|\\\\|
+ +----+----+----+----+----+----+----+
+ 0    1    2    3    4    5    6    7
+
+ new commit root
+ +----+----+----+----+----+----+----+
+ |    |    |    |\\\\|    |    |\\\\|
+ +----+----+----+----+----+----+----+
+ 0    1    2    3    4    5    6    7
+
+Prior to this patch, we run btrfs_prepare_extent_commit, which updates
+the last_byte_to_unpin, and then we subsequently run
+switch_commit_roots.  In this example lets assume that
+caching_ctl->progress == 1 at btrfs_prepare_extent_commit() time, which
+means that cache->last_byte_to_unpin == 1.  Then we go and do the
+switch_commit_roots(), but in the meantime the caching thread has made
+some more progress, because we drop the commit_root_sem and re-acquired
+it.  Now caching_ctl->progress == 3.  We swap out the commit root and
+carry on to unpin.
+
+The race can happen like:
+
+  1) The caching thread was running using the old commit root when it
+     found the extent for [2, 3);
+
+  2) Then it released the commit_root_sem because it was in the last
+     item of a leaf and the semaphore was contended, and set ->progress
+     to 3 (value of 'last'), as the last extent item in the current leaf
+     was for the extent for range [2, 3);
+
+  3) Next time it gets the commit_root_sem, will start using the new
+     commit root and search for a key with offset 3, so it never finds
+     the hole for [2, 3).
+
+  So the caching thread never saw [2, 3) as free space in any of the
+  commit roots, and by the time finish_extent_commit() was called for
+  the range [0, 3), ->last_byte_to_unpin was 1, so it only returned the
+  subrange [0, 1) to the free space cache, skipping [2, 3).
+
+In the unpin code we have last_byte_to_unpin == 1, so we unpin [0,1),
+but do not unpin [2,3).  However because caching_ctl->progress == 3 we
+do not see the newly freed section of [2,3), and thus do not add it to
+our free space cache.  This results in us missing a chunk of free space
+in memory (on disk too, unless we have a power failure before writing
+the free space cache to disk).
+
+Fix this by making sure the ->last_byte_to_unpin is set at the same time
+that we swap the commit roots, this ensures that we will always be
+consistent.
+
+CC: stable@vger.kernel.org # 5.8+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+[ update changelog with Filipe's review comments ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h       |    1 -
+ fs/btrfs/extent-tree.c |   25 -------------------------
+ fs/btrfs/transaction.c |   42 ++++++++++++++++++++++++++++++++++++++++--
+ 3 files changed, 40 insertions(+), 28 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -2593,7 +2593,6 @@ int btrfs_free_reserved_extent(struct bt
+                              u64 start, u64 len, int delalloc);
+ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
+                             u64 len);
+-void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
+ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
+ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+                        struct btrfs_ref *generic_ref);
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2730,31 +2730,6 @@ btrfs_inc_block_group_reservations(struc
+       atomic_inc(&bg->reservations);
+ }
+-void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
+-{
+-      struct btrfs_caching_control *next;
+-      struct btrfs_caching_control *caching_ctl;
+-      struct btrfs_block_group *cache;
+-
+-      down_write(&fs_info->commit_root_sem);
+-
+-      list_for_each_entry_safe(caching_ctl, next,
+-                               &fs_info->caching_block_groups, list) {
+-              cache = caching_ctl->block_group;
+-              if (btrfs_block_group_done(cache)) {
+-                      cache->last_byte_to_unpin = (u64)-1;
+-                      list_del_init(&caching_ctl->list);
+-                      btrfs_put_caching_control(caching_ctl);
+-              } else {
+-                      cache->last_byte_to_unpin = caching_ctl->progress;
+-              }
+-      }
+-
+-      up_write(&fs_info->commit_root_sem);
+-
+-      btrfs_update_global_block_rsv(fs_info);
+-}
+-
+ /*
+  * Returns the free cluster for the given space info and sets empty_cluster to
+  * what it should be based on the mount options.
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -155,6 +155,7 @@ static noinline void switch_commit_roots
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+       struct btrfs_root *root, *tmp;
++      struct btrfs_caching_control *caching_ctl, *next;
+       down_write(&fs_info->commit_root_sem);
+       list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
+@@ -180,6 +181,45 @@ static noinline void switch_commit_roots
+               spin_lock(&cur_trans->dropped_roots_lock);
+       }
+       spin_unlock(&cur_trans->dropped_roots_lock);
++
++      /*
++       * We have to update the last_byte_to_unpin under the commit_root_sem,
++       * at the same time we swap out the commit roots.
++       *
++       * This is because we must have a real view of the last spot the caching
++       * kthreads were while caching.  Consider the following views of the
++       * extent tree for a block group
++       *
++       * commit root
++       * +----+----+----+----+----+----+----+
++       * |\\\\|    |\\\\|\\\\|    |\\\\|\\\\|
++       * +----+----+----+----+----+----+----+
++       * 0    1    2    3    4    5    6    7
++       *
++       * new commit root
++       * +----+----+----+----+----+----+----+
++       * |    |    |    |\\\\|    |    |\\\\|
++       * +----+----+----+----+----+----+----+
++       * 0    1    2    3    4    5    6    7
++       *
++       * If the cache_ctl->progress was at 3, then we are only allowed to
++       * unpin [0,1) and [2,3], because the caching thread has already
++       * processed those extents.  We are not allowed to unpin [5,6), because
++       * the caching thread will re-start it's search from 3, and thus find
++       * the hole from [4,6) to add to the free space cache.
++       */
++      list_for_each_entry_safe(caching_ctl, next,
++                               &fs_info->caching_block_groups, list) {
++              struct btrfs_block_group *cache = caching_ctl->block_group;
++
++              if (btrfs_block_group_done(cache)) {
++                      cache->last_byte_to_unpin = (u64)-1;
++                      list_del_init(&caching_ctl->list);
++                      btrfs_put_caching_control(caching_ctl);
++              } else {
++                      cache->last_byte_to_unpin = caching_ctl->progress;
++              }
++      }
+       up_write(&fs_info->commit_root_sem);
+ }
+@@ -2293,8 +2333,6 @@ int btrfs_commit_transaction(struct btrf
+               goto unlock_tree_log;
+       }
+-      btrfs_prepare_extent_commit(fs_info);
+-
+       cur_trans = fs_info->running_transaction;
+       btrfs_set_root_node(&fs_info->tree_root->root_item,
diff --git a/queue-5.10/ceph-fix-race-in-concurrent-__ceph_remove_cap-invocations.patch b/queue-5.10/ceph-fix-race-in-concurrent-__ceph_remove_cap-invocations.patch
new file mode 100644 (file)
index 0000000..9fd0c96
--- /dev/null
@@ -0,0 +1,53 @@
+From e5cafce3ad0f8652d6849314d951459c2bff7233 Mon Sep 17 00:00:00 2001
+From: Luis Henriques <lhenriques@suse.de>
+Date: Thu, 12 Nov 2020 10:45:12 +0000
+Subject: ceph: fix race in concurrent __ceph_remove_cap invocations
+
+From: Luis Henriques <lhenriques@suse.de>
+
+commit e5cafce3ad0f8652d6849314d951459c2bff7233 upstream.
+
+A NULL pointer dereference may occur in __ceph_remove_cap with some of the
+callbacks used in ceph_iterate_session_caps, namely trim_caps_cb and
+remove_session_caps_cb. Those callers hold the session->s_mutex, so they
+are prevented from concurrent execution, but ceph_evict_inode does not.
+
+Since the callers of this function hold the i_ceph_lock, the fix is simply
+a matter of returning immediately if caps->ci is NULL.
+
+Cc: stable@vger.kernel.org
+URL: https://tracker.ceph.com/issues/43272
+Suggested-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Luis Henriques <lhenriques@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/caps.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -1140,12 +1140,19 @@ void __ceph_remove_cap(struct ceph_cap *
+ {
+       struct ceph_mds_session *session = cap->session;
+       struct ceph_inode_info *ci = cap->ci;
+-      struct ceph_mds_client *mdsc =
+-              ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
++      struct ceph_mds_client *mdsc;
+       int removed = 0;
++      /* 'ci' being NULL means the remove have already occurred */
++      if (!ci) {
++              dout("%s: cap inode is NULL\n", __func__);
++              return;
++      }
++
+       dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
++      mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
++
+       /* remove from inode's cap rbtree, and clear auth cap */
+       rb_erase(&cap->ci_node, &ci->i_caps);
+       if (ci->i_auth_cap == cap) {
diff --git a/queue-5.10/ext4-don-t-remount-read-only-with-errors-continue-on-reboot.patch b/queue-5.10/ext4-don-t-remount-read-only-with-errors-continue-on-reboot.patch
new file mode 100644 (file)
index 0000000..fad80bb
--- /dev/null
@@ -0,0 +1,52 @@
+From b08070eca9e247f60ab39d79b2c25d274750441f Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 27 Nov 2020 12:33:54 +0100
+Subject: ext4: don't remount read-only with errors=continue on reboot
+
+From: Jan Kara <jack@suse.cz>
+
+commit b08070eca9e247f60ab39d79b2c25d274750441f upstream.
+
+ext4_handle_error() with errors=continue mount option can accidentally
+remount the filesystem read-only when the system is rebooting. Fix that.
+
+Fixes: 1dc1097ff60e ("ext4: avoid panic during forced reboot")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Cc: stable@kernel.org
+Link: https://lore.kernel.org/r/20201127113405.26867-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -666,19 +666,17 @@ static bool system_going_down(void)
+ static void ext4_handle_error(struct super_block *sb)
+ {
++      journal_t *journal = EXT4_SB(sb)->s_journal;
++
+       if (test_opt(sb, WARN_ON_ERROR))
+               WARN_ON_ONCE(1);
+-      if (sb_rdonly(sb))
++      if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT))
+               return;
+-      if (!test_opt(sb, ERRORS_CONT)) {
+-              journal_t *journal = EXT4_SB(sb)->s_journal;
+-
+-              ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+-              if (journal)
+-                      jbd2_journal_abort(journal, -EIO);
+-      }
++      ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
++      if (journal)
++              jbd2_journal_abort(journal, -EIO);
+       /*
+        * We force ERRORS_RO behavior when system is rebooting. Otherwise we
+        * could panic during 'reboot -f' as the underlying device got already
diff --git a/queue-5.10/ext4-fix-a-memory-leak-of-ext4_free_data.patch b/queue-5.10/ext4-fix-a-memory-leak-of-ext4_free_data.patch
new file mode 100644 (file)
index 0000000..8368338
--- /dev/null
@@ -0,0 +1,40 @@
+From cca415537244f6102cbb09b5b90db6ae2c953bdd Mon Sep 17 00:00:00 2001
+From: Chunguang Xu <brookxu@tencent.com>
+Date: Sat, 7 Nov 2020 23:58:18 +0800
+Subject: ext4: fix a memory leak of ext4_free_data
+
+From: Chunguang Xu <brookxu@tencent.com>
+
+commit cca415537244f6102cbb09b5b90db6ae2c953bdd upstream.
+
+When freeing metadata, we will create an ext4_free_data and
+insert it into the pending free list.  After the current
+transaction is committed, the object will be freed.
+
+ext4_mb_free_metadata() will check whether the area to be freed
+overlaps with the pending free list. If true, return directly. At this
+time, ext4_free_data is leaked.  Fortunately, the probability of this
+problem is small, since it only occurs if the file system is corrupted
+such that a block is claimed by more one inode and those inodes are
+deleted within a single jbd2 transaction.
+
+Signed-off-by: Chunguang Xu <brookxu@tencent.com>
+Link: https://lore.kernel.org/r/1604764698-4269-8-git-send-email-brookxu@tencent.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -5126,6 +5126,7 @@ ext4_mb_free_metadata(handle_t *handle,
+                               ext4_group_first_block_no(sb, group) +
+                               EXT4_C2B(sbi, cluster),
+                               "Block already on to-be-freed list");
++                      kmem_cache_free(ext4_free_data_cachep, new_entry);
+                       return 0;
+               }
+       }
diff --git a/queue-5.10/ext4-fix-an-is_err-vs-null-check.patch b/queue-5.10/ext4-fix-an-is_err-vs-null-check.patch
new file mode 100644 (file)
index 0000000..87240c8
--- /dev/null
@@ -0,0 +1,37 @@
+From bc18546bf68e47996a359d2533168d5770a22024 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 23 Oct 2020 14:22:32 +0300
+Subject: ext4: fix an IS_ERR() vs NULL check
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit bc18546bf68e47996a359d2533168d5770a22024 upstream.
+
+The ext4_find_extent() function never returns NULL, it returns error
+pointers.
+
+Fixes: 44059e503b03 ("ext4: fast commit recovery path")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20201023112232.GB282278@mwanda
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/extents.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -5815,8 +5815,8 @@ int ext4_ext_replay_update_ex(struct ino
+       int ret;
+       path = ext4_find_extent(inode, start, NULL, 0);
+-      if (!path)
+-              return -EINVAL;
++      if (IS_ERR(path))
++              return PTR_ERR(path);
+       ex = path[path->p_depth].p_ext;
+       if (!ex) {
+               ret = -EFSCORRUPTED;
diff --git a/queue-5.10/ext4-fix-deadlock-with-fs-freezing-and-ea-inodes.patch b/queue-5.10/ext4-fix-deadlock-with-fs-freezing-and-ea-inodes.patch
new file mode 100644 (file)
index 0000000..5d67f1f
--- /dev/null
@@ -0,0 +1,110 @@
+From 46e294efc355c48d1dd4d58501aa56dac461792a Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 27 Nov 2020 12:06:49 +0100
+Subject: ext4: fix deadlock with fs freezing and EA inodes
+
+From: Jan Kara <jack@suse.cz>
+
+commit 46e294efc355c48d1dd4d58501aa56dac461792a upstream.
+
+Xattr code using inodes with large xattr data can end up dropping last
+inode reference (and thus deleting the inode) from places like
+ext4_xattr_set_entry(). That function is called with transaction started
+and so ext4_evict_inode() can deadlock against fs freezing like:
+
+CPU1                                   CPU2
+
+removexattr()                          freeze_super()
+  vfs_removexattr()
+    ext4_xattr_set()
+      handle = ext4_journal_start()
+      ...
+      ext4_xattr_set_entry()
+        iput(old_ea_inode)
+          ext4_evict_inode(old_ea_inode)
+                                         sb->s_writers.frozen = SB_FREEZE_FS;
+                                         sb_wait_write(sb, SB_FREEZE_FS);
+                                         ext4_freeze()
+                                           jbd2_journal_lock_updates()
+                                             -> blocks waiting for all
+                                                handles to stop
+            sb_start_intwrite()
+             -> blocks as sb is already in SB_FREEZE_FS state
+
+Generally it is advisable to delete inodes from a separate transaction
+as it can consume quite some credits however in this case it would be
+quite clumsy and furthermore the credits for inode deletion are quite
+limited and already accounted for. So just tweak ext4_evict_inode() to
+avoid freeze protection if we have transaction already started and thus
+it is not really needed anyway.
+
+Cc: stable@vger.kernel.org
+Fixes: dec214d00e0d ("ext4: xattr inode deduplication")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20201127110649.24730-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |   21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -175,6 +175,7 @@ void ext4_evict_inode(struct inode *inod
+        */
+       int extra_credits = 6;
+       struct ext4_xattr_inode_array *ea_inode_array = NULL;
++      bool freeze_protected = false;
+       trace_ext4_evict_inode(inode);
+@@ -232,9 +233,14 @@ void ext4_evict_inode(struct inode *inod
+       /*
+        * Protect us against freezing - iput() caller didn't have to have any
+-       * protection against it
+-       */
+-      sb_start_intwrite(inode->i_sb);
++       * protection against it. When we are in a running transaction though,
++       * we are already protected against freezing and we cannot grab further
++       * protection due to lock ordering constraints.
++       */
++      if (!ext4_journal_current_handle()) {
++              sb_start_intwrite(inode->i_sb);
++              freeze_protected = true;
++      }
+       if (!IS_NOQUOTA(inode))
+               extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
+@@ -253,7 +259,8 @@ void ext4_evict_inode(struct inode *inod
+                * cleaned up.
+                */
+               ext4_orphan_del(NULL, inode);
+-              sb_end_intwrite(inode->i_sb);
++              if (freeze_protected)
++                      sb_end_intwrite(inode->i_sb);
+               goto no_delete;
+       }
+@@ -294,7 +301,8 @@ void ext4_evict_inode(struct inode *inod
+ stop_handle:
+               ext4_journal_stop(handle);
+               ext4_orphan_del(NULL, inode);
+-              sb_end_intwrite(inode->i_sb);
++              if (freeze_protected)
++                      sb_end_intwrite(inode->i_sb);
+               ext4_xattr_inode_array_free(ea_inode_array);
+               goto no_delete;
+       }
+@@ -323,7 +331,8 @@ stop_handle:
+       else
+               ext4_free_inode(handle, inode);
+       ext4_journal_stop(handle);
+-      sb_end_intwrite(inode->i_sb);
++      if (freeze_protected)
++              sb_end_intwrite(inode->i_sb);
+       ext4_xattr_inode_array_free(ea_inode_array);
+       return;
+ no_delete:
diff --git a/queue-5.10/fsnotify-fix-events-reported-to-watching-parent-and-child.patch b/queue-5.10/fsnotify-fix-events-reported-to-watching-parent-and-child.patch
new file mode 100644 (file)
index 0000000..05003c2
--- /dev/null
@@ -0,0 +1,248 @@
+From fecc4559780d52d174ea05e3bf543669165389c3 Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Wed, 2 Dec 2020 14:07:09 +0200
+Subject: fsnotify: fix events reported to watching parent and child
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit fecc4559780d52d174ea05e3bf543669165389c3 upstream.
+
+fsnotify_parent() used to send two separate events to backends when a
+parent inode is watching children and the child inode is also watching.
+In an attempt to avoid duplicate events in fanotify, we unified the two
+backend callbacks to a single callback and handled the reporting of the
+two separate events for the relevant backends (inotify and dnotify).
+However the handling is buggy and can result in inotify and dnotify
+listeners receiving events of the type they never asked for or spurious
+events.
+
+The problem is the unified event callback with two inode marks (parent and
+child) is called when any of the parent and child inodes are watched and
+interested in the event, but the parent inode's mark that is interested
+in the event on the child is not necessarily the one we are currently
+reporting to (it could belong to a different group).
+
+So before reporting the parent or child event flavor to backend we need
+to check that the mark is really interested in that event flavor.
+
+The semantics of INODE and CHILD marks were hard to follow and made the
+logic more complicated than it should have been.  Replace it with INODE
+and PARENT marks semantics to hopefully make the logic more clear.
+
+Thanks to Hugh Dickins for spotting a bug in the earlier version of this
+patch.
+
+Fixes: 497b0c5a7c06 ("fsnotify: send event to parent and child with single callback")
+CC: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20201202120713.702387-4-amir73il@gmail.com
+Reported-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/notify/fanotify/fanotify.c    |    7 +--
+ fs/notify/fsnotify.c             |   84 +++++++++++++++++++++++----------------
+ include/linux/fsnotify_backend.h |    6 +-
+ 3 files changed, 57 insertions(+), 40 deletions(-)
+
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -268,12 +268,11 @@ static u32 fanotify_group_event_mask(str
+                       continue;
+               /*
+-               * If the event is for a child and this mark is on a parent not
++               * If the event is on a child and this mark is on a parent not
+                * watching children, don't send it!
+                */
+-              if (event_mask & FS_EVENT_ON_CHILD &&
+-                  type == FSNOTIFY_OBJ_TYPE_INODE &&
+-                   !(mark->mask & FS_EVENT_ON_CHILD))
++              if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
++                  !(mark->mask & FS_EVENT_ON_CHILD))
+                       continue;
+               marks_mask |= mark->mask;
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -152,6 +152,13 @@ static bool fsnotify_event_needs_parent(
+       if (mask & FS_ISDIR)
+               return false;
++      /*
++       * All events that are possible on child can also may be reported with
++       * parent/name info to inode/sb/mount.  Otherwise, a watching parent
++       * could result in events reported with unexpected name info to sb/mount.
++       */
++      BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);
++
+       /* Did either inode/sb/mount subscribe for events with parent/name? */
+       marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
+       marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
+@@ -249,6 +256,10 @@ static int fsnotify_handle_inode_event(s
+           path && d_unlinked(path->dentry))
+               return 0;
++      /* Check interest of this mark in case event was sent with two marks */
++      if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
++              return 0;
++
+       return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
+ }
+@@ -258,38 +269,46 @@ static int fsnotify_handle_event(struct
+                                u32 cookie, struct fsnotify_iter_info *iter_info)
+ {
+       struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
+-      struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
++      struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
+       int ret;
+       if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
+           WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
+               return 0;
+-      /*
+-       * An event can be sent on child mark iterator instead of inode mark
+-       * iterator because of other groups that have interest of this inode
+-       * and have marks on both parent and child.  We can simplify this case.
+-       */
+-      if (!inode_mark) {
+-              inode_mark = child_mark;
+-              child_mark = NULL;
++      if (parent_mark) {
++              /*
++               * parent_mark indicates that the parent inode is watching
++               * children and interested in this event, which is an event
++               * possible on child. But is *this mark* watching children and
++               * interested in this event?
++               */
++              if (parent_mark->mask & FS_EVENT_ON_CHILD) {
++                      ret = fsnotify_handle_inode_event(group, parent_mark, mask,
++                                                        data, data_type, dir, name, 0);
++                      if (ret)
++                              return ret;
++              }
++              if (!inode_mark)
++                      return 0;
++      }
++
++      if (mask & FS_EVENT_ON_CHILD) {
++              /*
++               * Some events can be sent on both parent dir and child marks
++               * (e.g. FS_ATTRIB).  If both parent dir and child are
++               * watching, report the event once to parent dir with name (if
++               * interested) and once to child without name (if interested).
++               * The child watcher is expecting an event without a file name
++               * and without the FS_EVENT_ON_CHILD flag.
++               */
++              mask &= ~FS_EVENT_ON_CHILD;
+               dir = NULL;
+               name = NULL;
+       }
+-      ret = fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
+-                                        dir, name, cookie);
+-      if (ret || !child_mark)
+-              return ret;
+-
+-      /*
+-       * Some events can be sent on both parent dir and child marks
+-       * (e.g. FS_ATTRIB).  If both parent dir and child are watching,
+-       * report the event once to parent dir with name and once to child
+-       * without name.
+-       */
+-      return fsnotify_handle_inode_event(group, child_mark, mask, data, data_type,
+-                                         NULL, NULL, 0);
++      return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
++                                         dir, name, cookie);
+ }
+ static int send_to_group(__u32 mask, const void *data, int data_type,
+@@ -447,7 +466,7 @@ int fsnotify(__u32 mask, const void *dat
+       struct fsnotify_iter_info iter_info = {};
+       struct super_block *sb;
+       struct mount *mnt = NULL;
+-      struct inode *child = NULL;
++      struct inode *parent = NULL;
+       int ret = 0;
+       __u32 test_mask, marks_mask;
+@@ -459,11 +478,10 @@ int fsnotify(__u32 mask, const void *dat
+               inode = dir;
+       } else if (mask & FS_EVENT_ON_CHILD) {
+               /*
+-               * Event on child - report on TYPE_INODE to dir if it is
+-               * watching children and on TYPE_CHILD to child.
++               * Event on child - report on TYPE_PARENT to dir if it is
++               * watching children and on TYPE_INODE to child.
+                */
+-              child = inode;
+-              inode = dir;
++              parent = dir;
+       }
+       sb = inode->i_sb;
+@@ -477,7 +495,7 @@ int fsnotify(__u32 mask, const void *dat
+       if (!sb->s_fsnotify_marks &&
+           (!mnt || !mnt->mnt_fsnotify_marks) &&
+           (!inode || !inode->i_fsnotify_marks) &&
+-          (!child || !child->i_fsnotify_marks))
++          (!parent || !parent->i_fsnotify_marks))
+               return 0;
+       marks_mask = sb->s_fsnotify_mask;
+@@ -485,8 +503,8 @@ int fsnotify(__u32 mask, const void *dat
+               marks_mask |= mnt->mnt_fsnotify_mask;
+       if (inode)
+               marks_mask |= inode->i_fsnotify_mask;
+-      if (child)
+-              marks_mask |= child->i_fsnotify_mask;
++      if (parent)
++              marks_mask |= parent->i_fsnotify_mask;
+       /*
+@@ -509,9 +527,9 @@ int fsnotify(__u32 mask, const void *dat
+               iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
+                       fsnotify_first_mark(&inode->i_fsnotify_marks);
+       }
+-      if (child) {
+-              iter_info.marks[FSNOTIFY_OBJ_TYPE_CHILD] =
+-                      fsnotify_first_mark(&child->i_fsnotify_marks);
++      if (parent) {
++              iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
++                      fsnotify_first_mark(&parent->i_fsnotify_marks);
+       }
+       /*
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -278,7 +278,7 @@ static inline const struct path *fsnotif
+ enum fsnotify_obj_type {
+       FSNOTIFY_OBJ_TYPE_INODE,
+-      FSNOTIFY_OBJ_TYPE_CHILD,
++      FSNOTIFY_OBJ_TYPE_PARENT,
+       FSNOTIFY_OBJ_TYPE_VFSMOUNT,
+       FSNOTIFY_OBJ_TYPE_SB,
+       FSNOTIFY_OBJ_TYPE_COUNT,
+@@ -286,7 +286,7 @@ enum fsnotify_obj_type {
+ };
+ #define FSNOTIFY_OBJ_TYPE_INODE_FL    (1U << FSNOTIFY_OBJ_TYPE_INODE)
+-#define FSNOTIFY_OBJ_TYPE_CHILD_FL    (1U << FSNOTIFY_OBJ_TYPE_CHILD)
++#define FSNOTIFY_OBJ_TYPE_PARENT_FL   (1U << FSNOTIFY_OBJ_TYPE_PARENT)
+ #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+ #define FSNOTIFY_OBJ_TYPE_SB_FL               (1U << FSNOTIFY_OBJ_TYPE_SB)
+ #define FSNOTIFY_OBJ_ALL_TYPES_MASK   ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
+@@ -331,7 +331,7 @@ static inline struct fsnotify_mark *fsno
+ }
+ FSNOTIFY_ITER_FUNCS(inode, INODE)
+-FSNOTIFY_ITER_FUNCS(child, CHILD)
++FSNOTIFY_ITER_FUNCS(parent, PARENT)
+ FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
+ FSNOTIFY_ITER_FUNCS(sb, SB)
diff --git a/queue-5.10/fsnotify-generalize-handle_inode_event.patch b/queue-5.10/fsnotify-generalize-handle_inode_event.patch
new file mode 100644 (file)
index 0000000..975abd5
--- /dev/null
@@ -0,0 +1,180 @@
+From 950cc0d2bef078e1f6459900ca4d4b2a2e0e3c37 Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Wed, 2 Dec 2020 14:07:07 +0200
+Subject: fsnotify: generalize handle_inode_event()
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 950cc0d2bef078e1f6459900ca4d4b2a2e0e3c37 upstream.
+
+The handle_inode_event() interface was added as (quoting comment):
+"a simple variant of handle_event() for groups that only have inode
+marks and don't have ignore mask".
+
+In other words, all backends except fanotify.  The inotify backend
+also falls under this category, but because it required extra arguments
+it was left out of the initial pass of backends conversion to the
+simple interface.
+
+This results in code duplication between the generic helper
+fsnotify_handle_event() and the inotify_handle_event() callback
+which also happen to be buggy code.
+
+Generalize the handle_inode_event() arguments and add the check for
+FS_EXCL_UNLINK flag to the generic helper, so inotify backend could
+be converted to use the simple interface.
+
+Link: https://lore.kernel.org/r/20201202120713.702387-2-amir73il@gmail.com
+CC: stable@vger.kernel.org
+Fixes: b9a1b9772509 ("fsnotify: create method handle_inode_event() in fsnotify_operations")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/filecache.c              |    2 +-
+ fs/notify/dnotify/dnotify.c      |    2 +-
+ fs/notify/fsnotify.c             |   31 ++++++++++++++++++++++++-------
+ include/linux/fsnotify_backend.h |    3 ++-
+ kernel/audit_fsnotify.c          |    2 +-
+ kernel/audit_tree.c              |    2 +-
+ kernel/audit_watch.c             |    2 +-
+ 7 files changed, 31 insertions(+), 13 deletions(-)
+
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -600,7 +600,7 @@ static struct notifier_block nfsd_file_l
+ static int
+ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
+                               struct inode *inode, struct inode *dir,
+-                              const struct qstr *name)
++                              const struct qstr *name, u32 cookie)
+ {
+       trace_nfsd_file_fsnotify_handle_event(inode, mask);
+--- a/fs/notify/dnotify/dnotify.c
++++ b/fs/notify/dnotify/dnotify.c
+@@ -72,7 +72,7 @@ static void dnotify_recalc_inode_mask(st
+  */
+ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
+                               struct inode *inode, struct inode *dir,
+-                              const struct qstr *name)
++                              const struct qstr *name, u32 cookie)
+ {
+       struct dnotify_mark *dn_mark;
+       struct dnotify_struct *dn;
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -232,6 +232,26 @@ notify:
+ }
+ EXPORT_SYMBOL_GPL(__fsnotify_parent);
++static int fsnotify_handle_inode_event(struct fsnotify_group *group,
++                                     struct fsnotify_mark *inode_mark,
++                                     u32 mask, const void *data, int data_type,
++                                     struct inode *dir, const struct qstr *name,
++                                     u32 cookie)
++{
++      const struct path *path = fsnotify_data_path(data, data_type);
++      struct inode *inode = fsnotify_data_inode(data, data_type);
++      const struct fsnotify_ops *ops = group->ops;
++
++      if (WARN_ON_ONCE(!ops->handle_inode_event))
++              return 0;
++
++      if ((inode_mark->mask & FS_EXCL_UNLINK) &&
++          path && d_unlinked(path->dentry))
++              return 0;
++
++      return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
++}
++
+ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
+                                const void *data, int data_type,
+                                struct inode *dir, const struct qstr *name,
+@@ -239,13 +259,8 @@ static int fsnotify_handle_event(struct
+ {
+       struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
+       struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
+-      struct inode *inode = fsnotify_data_inode(data, data_type);
+-      const struct fsnotify_ops *ops = group->ops;
+       int ret;
+-      if (WARN_ON_ONCE(!ops->handle_inode_event))
+-              return 0;
+-
+       if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
+           WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
+               return 0;
+@@ -262,7 +277,8 @@ static int fsnotify_handle_event(struct
+               name = NULL;
+       }
+-      ret = ops->handle_inode_event(inode_mark, mask, inode, dir, name);
++      ret = fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
++                                        dir, name, cookie);
+       if (ret || !child_mark)
+               return ret;
+@@ -272,7 +288,8 @@ static int fsnotify_handle_event(struct
+        * report the event once to parent dir with name and once to child
+        * without name.
+        */
+-      return ops->handle_inode_event(child_mark, mask, inode, NULL, NULL);
++      return fsnotify_handle_inode_event(group, child_mark, mask, data, data_type,
++                                         NULL, NULL, 0);
+ }
+ static int send_to_group(__u32 mask, const void *data, int data_type,
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -137,6 +137,7 @@ struct mem_cgroup;
+  *            if @file_name is not NULL, this is the directory that
+  *            @file_name is relative to.
+  * @file_name:        optional file name associated with event
++ * @cookie:   inotify rename cookie
+  *
+  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
+  * freeing_mark - called when a mark is being destroyed for some reason.  The group
+@@ -151,7 +152,7 @@ struct fsnotify_ops {
+                           struct fsnotify_iter_info *iter_info);
+       int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask,
+                           struct inode *inode, struct inode *dir,
+-                          const struct qstr *file_name);
++                          const struct qstr *file_name, u32 cookie);
+       void (*free_group_priv)(struct fsnotify_group *group);
+       void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
+       void (*free_event)(struct fsnotify_event *event);
+--- a/kernel/audit_fsnotify.c
++++ b/kernel/audit_fsnotify.c
+@@ -154,7 +154,7 @@ static void audit_autoremove_mark_rule(s
+ /* Update mark data in audit rules based on fsnotify events. */
+ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
+                                  struct inode *inode, struct inode *dir,
+-                                 const struct qstr *dname)
++                                 const struct qstr *dname, u32 cookie)
+ {
+       struct audit_fsnotify_mark *audit_mark;
+--- a/kernel/audit_tree.c
++++ b/kernel/audit_tree.c
+@@ -1037,7 +1037,7 @@ static void evict_chunk(struct audit_chu
+ static int audit_tree_handle_event(struct fsnotify_mark *mark, u32 mask,
+                                  struct inode *inode, struct inode *dir,
+-                                 const struct qstr *file_name)
++                                 const struct qstr *file_name, u32 cookie)
+ {
+       return 0;
+ }
+--- a/kernel/audit_watch.c
++++ b/kernel/audit_watch.c
+@@ -466,7 +466,7 @@ void audit_remove_watch_rule(struct audi
+ /* Update watch data in audit rules based on fsnotify events. */
+ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
+                                   struct inode *inode, struct inode *dir,
+-                                  const struct qstr *dname)
++                                  const struct qstr *dname, u32 cookie)
+ {
+       struct audit_parent *parent;
diff --git a/queue-5.10/ima-don-t-modify-file-descriptor-mode-on-the-fly.patch b/queue-5.10/ima-don-t-modify-file-descriptor-mode-on-the-fly.patch
new file mode 100644 (file)
index 0000000..2274324
--- /dev/null
@@ -0,0 +1,75 @@
+From 207cdd565dfc95a0a5185263a567817b7ebf5467 Mon Sep 17 00:00:00 2001
+From: Roberto Sassu <roberto.sassu@huawei.com>
+Date: Thu, 26 Nov 2020 11:34:56 +0100
+Subject: ima: Don't modify file descriptor mode on the fly
+
+From: Roberto Sassu <roberto.sassu@huawei.com>
+
+commit 207cdd565dfc95a0a5185263a567817b7ebf5467 upstream.
+
+Commit a408e4a86b36b ("ima: open a new file instance if no read
+permissions") already introduced a second open to measure a file when the
+original file descriptor does not allow it. However, it didn't remove the
+existing method of changing the mode of the original file descriptor, which
+is still necessary if the current process does not have enough privileges
+to open a new one.
+
+Changing the mode isn't really an option, as the filesystem might need to
+do preliminary steps to make the read possible. Thus, this patch removes
+the code and keeps the second open as the only option to measure a file
+when it is unreadable with the original file descriptor.
+
+Cc: <stable@vger.kernel.org> # 4.20.x: 0014cc04e8ec0 ima: Set file->f_mode
+Fixes: 2fe5d6def1672 ("ima: integrity appraisal extension")
+Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ security/integrity/ima/ima_crypto.c |   20 +++++---------------
+ 1 file changed, 5 insertions(+), 15 deletions(-)
+
+--- a/security/integrity/ima/ima_crypto.c
++++ b/security/integrity/ima/ima_crypto.c
+@@ -537,7 +537,7 @@ int ima_calc_file_hash(struct file *file
+       loff_t i_size;
+       int rc;
+       struct file *f = file;
+-      bool new_file_instance = false, modified_mode = false;
++      bool new_file_instance = false;
+       /*
+        * For consistency, fail file's opened with the O_DIRECT flag on
+@@ -555,18 +555,10 @@ int ima_calc_file_hash(struct file *file
+                               O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL);
+               flags |= O_RDONLY;
+               f = dentry_open(&file->f_path, flags, file->f_cred);
+-              if (IS_ERR(f)) {
+-                      /*
+-                       * Cannot open the file again, lets modify f_mode
+-                       * of original and continue
+-                       */
+-                      pr_info_ratelimited("Unable to reopen file for reading.\n");
+-                      f = file;
+-                      f->f_mode |= FMODE_READ;
+-                      modified_mode = true;
+-              } else {
+-                      new_file_instance = true;
+-              }
++              if (IS_ERR(f))
++                      return PTR_ERR(f);
++
++              new_file_instance = true;
+       }
+       i_size = i_size_read(file_inode(f));
+@@ -581,8 +573,6 @@ int ima_calc_file_hash(struct file *file
+ out:
+       if (new_file_instance)
+               fput(f);
+-      else if (modified_mode)
+-              f->f_mode &= ~FMODE_READ;
+       return rc;
+ }
diff --git a/queue-5.10/inotify-convert-to-handle_inode_event-interface.patch b/queue-5.10/inotify-convert-to-handle_inode_event-interface.patch
new file mode 100644 (file)
index 0000000..5a651fe
--- /dev/null
@@ -0,0 +1,156 @@
+From 1a2620a99803ad660edc5d22fd9c66cce91ceb1c Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Wed, 2 Dec 2020 14:07:08 +0200
+Subject: inotify: convert to handle_inode_event() interface
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 1a2620a99803ad660edc5d22fd9c66cce91ceb1c upstream.
+
+Convert inotify to use the simple handle_inode_event() interface to
+get rid of the code duplication between the generic helper
+fsnotify_handle_event() and the inotify_handle_event() callback, which
+also happen to be buggy code.
+
+The bug will be fixed in the generic helper.
+
+Link: https://lore.kernel.org/r/20201202120713.702387-3-amir73il@gmail.com
+CC: stable@vger.kernel.org
+Fixes: b9a1b9772509 ("fsnotify: create method handle_inode_event() in fsnotify_operations")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/notify/inotify/inotify.h          |    9 ++----
+ fs/notify/inotify/inotify_fsnotify.c |   51 +++++------------------------------
+ fs/notify/inotify/inotify_user.c     |    8 +----
+ 3 files changed, 14 insertions(+), 54 deletions(-)
+
+--- a/fs/notify/inotify/inotify.h
++++ b/fs/notify/inotify/inotify.h
+@@ -24,11 +24,10 @@ static inline struct inotify_event_info
+ extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
+                                          struct fsnotify_group *group);
+-extern int inotify_handle_event(struct fsnotify_group *group, u32 mask,
+-                              const void *data, int data_type,
+-                              struct inode *dir,
+-                              const struct qstr *file_name, u32 cookie,
+-                              struct fsnotify_iter_info *iter_info);
++extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
++                                    u32 mask, struct inode *inode,
++                                    struct inode *dir,
++                                    const struct qstr *name, u32 cookie);
+ extern const struct fsnotify_ops inotify_fsnotify_ops;
+ extern struct kmem_cache *inotify_inode_mark_cachep;
+--- a/fs/notify/inotify/inotify_fsnotify.c
++++ b/fs/notify/inotify/inotify_fsnotify.c
+@@ -55,25 +55,21 @@ static int inotify_merge(struct list_hea
+       return event_compare(last_event, event);
+ }
+-static int inotify_one_event(struct fsnotify_group *group, u32 mask,
+-                           struct fsnotify_mark *inode_mark,
+-                           const struct path *path,
+-                           const struct qstr *file_name, u32 cookie)
++int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
++                             struct inode *inode, struct inode *dir,
++                             const struct qstr *name, u32 cookie)
+ {
+       struct inotify_inode_mark *i_mark;
+       struct inotify_event_info *event;
+       struct fsnotify_event *fsn_event;
++      struct fsnotify_group *group = inode_mark->group;
+       int ret;
+       int len = 0;
+       int alloc_len = sizeof(struct inotify_event_info);
+       struct mem_cgroup *old_memcg;
+-      if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+-          path && d_unlinked(path->dentry))
+-              return 0;
+-
+-      if (file_name) {
+-              len = file_name->len;
++      if (name) {
++              len = name->len;
+               alloc_len += len + 1;
+       }
+@@ -117,7 +113,7 @@ static int inotify_one_event(struct fsno
+       event->sync_cookie = cookie;
+       event->name_len = len;
+       if (len)
+-              strcpy(event->name, file_name->name);
++              strcpy(event->name, name->name);
+       ret = fsnotify_add_event(group, fsn_event, inotify_merge);
+       if (ret) {
+@@ -131,37 +127,6 @@ static int inotify_one_event(struct fsno
+       return 0;
+ }
+-int inotify_handle_event(struct fsnotify_group *group, u32 mask,
+-                       const void *data, int data_type, struct inode *dir,
+-                       const struct qstr *file_name, u32 cookie,
+-                       struct fsnotify_iter_info *iter_info)
+-{
+-      const struct path *path = fsnotify_data_path(data, data_type);
+-      struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
+-      struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
+-      int ret = 0;
+-
+-      if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
+-              return 0;
+-
+-      /*
+-       * Some events cannot be sent on both parent and child marks
+-       * (e.g. IN_CREATE).  Those events are always sent on inode_mark.
+-       * For events that are possible on both parent and child (e.g. IN_OPEN),
+-       * event is sent on inode_mark with name if the parent is watching and
+-       * is sent on child_mark without name if child is watching.
+-       * If both parent and child are watching, report the event with child's
+-       * name here and report another event without child's name below.
+-       */
+-      if (inode_mark)
+-              ret = inotify_one_event(group, mask, inode_mark, path,
+-                                      file_name, cookie);
+-      if (ret || !child_mark)
+-              return ret;
+-
+-      return inotify_one_event(group, mask, child_mark, path, NULL, 0);
+-}
+-
+ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
+ {
+       inotify_ignored_and_remove_idr(fsn_mark, group);
+@@ -227,7 +192,7 @@ static void inotify_free_mark(struct fsn
+ }
+ const struct fsnotify_ops inotify_fsnotify_ops = {
+-      .handle_event = inotify_handle_event,
++      .handle_inode_event = inotify_handle_inode_event,
+       .free_group_priv = inotify_free_group_priv,
+       .free_event = inotify_free_event,
+       .freeing_mark = inotify_freeing_mark,
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -486,14 +486,10 @@ void inotify_ignored_and_remove_idr(stru
+                                   struct fsnotify_group *group)
+ {
+       struct inotify_inode_mark *i_mark;
+-      struct fsnotify_iter_info iter_info = { };
+-
+-      fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE,
+-                                         fsn_mark);
+       /* Queue ignore event for the watch */
+-      inotify_handle_event(group, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE,
+-                           NULL, NULL, 0, &iter_info);
++      inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL,
++                                 0);
+       i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
+       /* remove this mark from the idr */
diff --git a/queue-5.10/iommu-arm-smmu-allow-implementation-specific-write_s2cr.patch b/queue-5.10/iommu-arm-smmu-allow-implementation-specific-write_s2cr.patch
new file mode 100644 (file)
index 0000000..2d53087
--- /dev/null
@@ -0,0 +1,63 @@
+From 56b75b51ed6d5e7bffda59440404409bca2dff00 Mon Sep 17 00:00:00 2001
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+Date: Mon, 19 Oct 2020 11:23:21 -0700
+Subject: iommu/arm-smmu: Allow implementation specific write_s2cr
+
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+
+commit 56b75b51ed6d5e7bffda59440404409bca2dff00 upstream.
+
+The firmware found in some Qualcomm platforms intercepts writes to the
+S2CR register in order to replace the BYPASS type with FAULT. Further
+more it treats faults at this level as catastrophic and restarts the
+device.
+
+Add support for providing implementation specific versions of the S2CR
+write function, to allow the Qualcomm driver to work around this
+behavior.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Tested-by: Steev Klimaszewski <steev@kali.org>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Link: https://lore.kernel.org/r/20201019182323.3162386-2-bjorn.andersson@linaro.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/arm/arm-smmu/arm-smmu.c |   13 ++++++++++---
+ drivers/iommu/arm/arm-smmu/arm-smmu.h |    1 +
+ 2 files changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
+@@ -929,9 +929,16 @@ static void arm_smmu_write_smr(struct ar
+ static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
+ {
+       struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
+-      u32 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
+-                FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
+-                FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
++      u32 reg;
++
++      if (smmu->impl && smmu->impl->write_s2cr) {
++              smmu->impl->write_s2cr(smmu, idx);
++              return;
++      }
++
++      reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
++            FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
++            FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
+       if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
+           smmu->smrs[idx].valid)
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
+@@ -436,6 +436,7 @@ struct arm_smmu_impl {
+       int (*alloc_context_bank)(struct arm_smmu_domain *smmu_domain,
+                                 struct arm_smmu_device *smmu,
+                                 struct device *dev, int start);
++      void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
+ };
+ #define INVALID_SMENDX                        -1
diff --git a/queue-5.10/iommu-arm-smmu-qcom-implement-s2cr-quirk.patch b/queue-5.10/iommu-arm-smmu-qcom-implement-s2cr-quirk.patch
new file mode 100644 (file)
index 0000000..eefe551
--- /dev/null
@@ -0,0 +1,135 @@
+From f9081b8ff5934b8d69c748d0200e844cadd2c667 Mon Sep 17 00:00:00 2001
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+Date: Mon, 19 Oct 2020 11:23:23 -0700
+Subject: iommu/arm-smmu-qcom: Implement S2CR quirk
+
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+
+commit f9081b8ff5934b8d69c748d0200e844cadd2c667 upstream.
+
+The firmware found in some Qualcomm platforms intercepts writes to S2CR
+in order to replace bypass type streams with fault; and ignore S2CR
+updates of type fault.
+
+Detect this behavior and implement a custom write_s2cr function in order
+to trick the firmware into supporting bypass streams by the means of
+configuring the stream for translation using a reserved and disabled
+context bank.
+
+Also circumvent the problem of configuring faulting streams by
+configuring the stream as bypass.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Tested-by: Steev Klimaszewski <steev@kali.org>
+Acked-by: Robin Murphy <robin.murphy@arm.com>
+Link: https://lore.kernel.org/r/20201019182323.3162386-4-bjorn.andersson@linaro.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c |   67 +++++++++++++++++++++++++++++
+ 1 file changed, 67 insertions(+)
+
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+@@ -10,8 +10,15 @@
+ struct qcom_smmu {
+       struct arm_smmu_device smmu;
++      bool bypass_quirk;
++      u8 bypass_cbndx;
+ };
++static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
++{
++      return container_of(smmu, struct qcom_smmu, smmu);
++}
++
+ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
+       { .compatible = "qcom,adreno" },
+       { .compatible = "qcom,mdp4" },
+@@ -25,9 +32,33 @@ static const struct of_device_id qcom_sm
+ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
+ {
++      unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
++      struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
++      u32 reg;
+       u32 smr;
+       int i;
++      /*
++       * With some firmware versions writes to S2CR of type FAULT are
++       * ignored, and writing BYPASS will end up written as FAULT in the
++       * register. Perform a write to S2CR to detect if this is the case and
++       * if so reserve a context bank to emulate bypass streams.
++       */
++      reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, S2CR_TYPE_BYPASS) |
++            FIELD_PREP(ARM_SMMU_S2CR_CBNDX, 0xff) |
++            FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, S2CR_PRIVCFG_DEFAULT);
++      arm_smmu_gr0_write(smmu, last_s2cr, reg);
++      reg = arm_smmu_gr0_read(smmu, last_s2cr);
++      if (FIELD_GET(ARM_SMMU_S2CR_TYPE, reg) != S2CR_TYPE_BYPASS) {
++              qsmmu->bypass_quirk = true;
++              qsmmu->bypass_cbndx = smmu->num_context_banks - 1;
++
++              set_bit(qsmmu->bypass_cbndx, smmu->context_map);
++
++              reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS);
++              arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg);
++      }
++
+       for (i = 0; i < smmu->num_mapping_groups; i++) {
+               smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
+@@ -45,6 +76,41 @@ static int qcom_smmu_cfg_probe(struct ar
+       return 0;
+ }
++static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
++{
++      struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
++      struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
++      u32 cbndx = s2cr->cbndx;
++      u32 type = s2cr->type;
++      u32 reg;
++
++      if (qsmmu->bypass_quirk) {
++              if (type == S2CR_TYPE_BYPASS) {
++                      /*
++                       * Firmware with quirky S2CR handling will substitute
++                       * BYPASS writes with FAULT, so point the stream to the
++                       * reserved context bank and ask for translation on the
++                       * stream
++                       */
++                      type = S2CR_TYPE_TRANS;
++                      cbndx = qsmmu->bypass_cbndx;
++              } else if (type == S2CR_TYPE_FAULT) {
++                      /*
++                       * Firmware with quirky S2CR handling will ignore FAULT
++                       * writes, so trick it to write FAULT by asking for a
++                       * BYPASS.
++                       */
++                      type = S2CR_TYPE_BYPASS;
++                      cbndx = 0xff;
++              }
++      }
++
++      reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, type) |
++            FIELD_PREP(ARM_SMMU_S2CR_CBNDX, cbndx) |
++            FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
++      arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
++}
++
+ static int qcom_smmu_def_domain_type(struct device *dev)
+ {
+       const struct of_device_id *match =
+@@ -86,6 +152,7 @@ static const struct arm_smmu_impl qcom_s
+       .cfg_probe = qcom_smmu_cfg_probe,
+       .def_domain_type = qcom_smmu_def_domain_type,
+       .reset = qcom_smmu500_reset,
++      .write_s2cr = qcom_smmu_write_s2cr,
+ };
+ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/queue-5.10/iommu-arm-smmu-qcom-read-back-stream-mappings.patch b/queue-5.10/iommu-arm-smmu-qcom-read-back-stream-mappings.patch
new file mode 100644 (file)
index 0000000..0ee369e
--- /dev/null
@@ -0,0 +1,68 @@
+From 07a7f2caaa5a2619934491bab3c47b261c554fb0 Mon Sep 17 00:00:00 2001
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+Date: Mon, 19 Oct 2020 11:23:22 -0700
+Subject: iommu/arm-smmu-qcom: Read back stream mappings
+
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+
+commit 07a7f2caaa5a2619934491bab3c47b261c554fb0 upstream.
+
+The Qualcomm boot loader configures stream mapping for the peripherals
+that it accesses and in particular it sets up the stream mapping for the
+display controller to be allowed to scan out a splash screen or EFI
+framebuffer.
+
+Read back the stream mappings during initialization and make the
+arm-smmu driver maintain the streams in bypass mode.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Tested-by: Steev Klimaszewski <steev@kali.org>
+Acked-by: Robin Murphy <robin.murphy@arm.com>
+Link: https://lore.kernel.org/r/20201019182323.3162386-3-bjorn.andersson@linaro.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c |   23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+@@ -23,6 +23,28 @@ static const struct of_device_id qcom_sm
+       { }
+ };
++static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
++{
++      u32 smr;
++      int i;
++
++      for (i = 0; i < smmu->num_mapping_groups; i++) {
++              smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
++
++              if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) {
++                      smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr);
++                      smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
++                      smmu->smrs[i].valid = true;
++
++                      smmu->s2crs[i].type = S2CR_TYPE_BYPASS;
++                      smmu->s2crs[i].privcfg = S2CR_PRIVCFG_DEFAULT;
++                      smmu->s2crs[i].cbndx = 0xff;
++              }
++      }
++
++      return 0;
++}
++
+ static int qcom_smmu_def_domain_type(struct device *dev)
+ {
+       const struct of_device_id *match =
+@@ -61,6 +83,7 @@ static int qcom_smmu500_reset(struct arm
+ }
+ static const struct arm_smmu_impl qcom_smmu_impl = {
++      .cfg_probe = qcom_smmu_cfg_probe,
+       .def_domain_type = qcom_smmu_def_domain_type,
+       .reset = qcom_smmu500_reset,
+ };
diff --git a/queue-5.10/jffs2-fix-gc-exit-abnormally.patch b/queue-5.10/jffs2-fix-gc-exit-abnormally.patch
new file mode 100644 (file)
index 0000000..db239da
--- /dev/null
@@ -0,0 +1,76 @@
+From 9afc9a8a4909fece0e911e72b1060614ba2f7969 Mon Sep 17 00:00:00 2001
+From: Zhe Li <lizhe67@huawei.com>
+Date: Fri, 29 May 2020 11:37:11 +0800
+Subject: jffs2: Fix GC exit abnormally
+
+From: Zhe Li <lizhe67@huawei.com>
+
+commit 9afc9a8a4909fece0e911e72b1060614ba2f7969 upstream.
+
+The log of this problem is:
+jffs2: Error garbage collecting node at 0x***!
+jffs2: No space for garbage collection. Aborting GC thread
+
+This is because GC believe that it do nothing, so it abort.
+
+After going over the image of jffs2, I find a scene that
+can trigger this problem stably.
+The scene is: there is a normal dirent node at summary-area,
+but abnormal at corresponding not-summary-area with error
+name_crc.
+
+The reason that GC exit abnormally is because it find that
+abnormal dirent node to GC, but when it goes to function
+jffs2_add_fd_to_list, it cannot meet the condition listed
+below:
+
+if ((*prev)->nhash == new->nhash && !strcmp((*prev)->name, new->name))
+
+So no node is marked obsolete, statistical information of
+erase_block do not change, which cause GC exit abnormally.
+
+The root cause of this problem is: we do not check the
+name_crc of the abnormal dirent node with summary is enabled.
+
+Noticed that in function jffs2_scan_dirent_node, we use
+function jffs2_scan_dirty_space to deal with the dirent
+node with error name_crc. So this patch add a checking
+code in function read_direntry to ensure the correctness
+of dirent node. If checked failed, the dirent node will
+be marked obsolete so GC will pass this node and this
+problem will be fixed.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Zhe Li <lizhe67@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jffs2/readinode.c |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/fs/jffs2/readinode.c
++++ b/fs/jffs2/readinode.c
+@@ -672,6 +672,22 @@ static inline int read_direntry(struct j
+                       jffs2_free_full_dirent(fd);
+                       return -EIO;
+               }
++
++#ifdef CONFIG_JFFS2_SUMMARY
++              /*
++               * we use CONFIG_JFFS2_SUMMARY because without it, we
++               * have checked it while mounting
++               */
++              crc = crc32(0, fd->name, rd->nsize);
++              if (unlikely(crc != je32_to_cpu(rd->name_crc))) {
++                      JFFS2_NOTICE("name CRC failed on dirent node at"
++                         "%#08x: read %#08x,calculated %#08x\n",
++                         ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
++                      jffs2_mark_node_obsolete(c, ref);
++                      jffs2_free_full_dirent(fd);
++                      return 0;
++              }
++#endif
+       }
+       fd->nhash = full_name_hash(NULL, fd->name, rd->nsize);
diff --git a/queue-5.10/jffs2-fix-ignoring-mounting-options-problem-during-remounting.patch b/queue-5.10/jffs2-fix-ignoring-mounting-options-problem-during-remounting.patch
new file mode 100644 (file)
index 0000000..6bf94b3
--- /dev/null
@@ -0,0 +1,65 @@
+From 08cd274f9b8283a1da93e2ccab216a336da83525 Mon Sep 17 00:00:00 2001
+From: lizhe <lizhe67@huawei.com>
+Date: Wed, 14 Oct 2020 14:54:42 +0800
+Subject: jffs2: Fix ignoring mounting options problem during remounting
+
+From: lizhe <lizhe67@huawei.com>
+
+commit 08cd274f9b8283a1da93e2ccab216a336da83525 upstream.
+
+The jffs2 mount options will be ignored when remounting jffs2.
+It can be easily reproduced with the steps listed below.
+1. mount -t jffs2 -o compr=none /dev/mtdblockx /mnt
+2. mount -o remount compr=zlib /mnt
+
+Since ec10a24f10c8, the option parsing happens before fill_super and
+then pass fc, which contains the options parsing results, to function
+jffs2_reconfigure during remounting. But function jffs2_reconfigure do
+not update c->mount_opts.
+
+This patch add a function jffs2_update_mount_opts to fix this problem.
+
+By the way, I notice that tmpfs use the same way to update remounting
+options. If it is necessary to unify them?
+
+Cc: <stable@vger.kernel.org>
+Fixes: ec10a24f10c8 ("vfs: Convert jffs2 to use the new mount API")
+Signed-off-by: lizhe <lizhe67@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jffs2/super.c |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/fs/jffs2/super.c
++++ b/fs/jffs2/super.c
+@@ -215,11 +215,28 @@ static int jffs2_parse_param(struct fs_c
+       return 0;
+ }
++static inline void jffs2_update_mount_opts(struct fs_context *fc)
++{
++      struct jffs2_sb_info *new_c = fc->s_fs_info;
++      struct jffs2_sb_info *c = JFFS2_SB_INFO(fc->root->d_sb);
++
++      mutex_lock(&c->alloc_sem);
++      if (new_c->mount_opts.override_compr) {
++              c->mount_opts.override_compr = new_c->mount_opts.override_compr;
++              c->mount_opts.compr = new_c->mount_opts.compr;
++      }
++      if (new_c->mount_opts.rp_size)
++              c->mount_opts.rp_size = new_c->mount_opts.rp_size;
++      mutex_unlock(&c->alloc_sem);
++}
++
+ static int jffs2_reconfigure(struct fs_context *fc)
+ {
+       struct super_block *sb = fc->root->d_sb;
+       sync_filesystem(sb);
++      jffs2_update_mount_opts(fc);
++
+       return jffs2_do_remount_fs(sb, fc);
+ }
diff --git a/queue-5.10/kvm-arm64-introduce-handling-of-aarch32-ttbcr2-traps.patch b/queue-5.10/kvm-arm64-introduce-handling-of-aarch32-ttbcr2-traps.patch
new file mode 100644 (file)
index 0000000..4e3be27
--- /dev/null
@@ -0,0 +1,42 @@
+From ca4e514774930f30b66375a974b5edcbebaf0e7e Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Tue, 10 Nov 2020 11:10:15 +0000
+Subject: KVM: arm64: Introduce handling of AArch32 TTBCR2 traps
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit ca4e514774930f30b66375a974b5edcbebaf0e7e upstream.
+
+ARMv8.2 introduced TTBCR2, which shares TCR_EL1 with TTBCR.
+Gracefully handle traps to this register when HCR_EL2.TVM is set.
+
+Cc: stable@vger.kernel.org
+Reported-by: James Morse <james.morse@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/kvm_host.h |    1 +
+ arch/arm64/kvm/sys_regs.c         |    1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -214,6 +214,7 @@ enum vcpu_sysreg {
+ #define c2_TTBR1      (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
+ #define c2_TTBR1_high (c2_TTBR1 + 1)  /* TTBR1 top 32 bits */
+ #define c2_TTBCR      (TCR_EL1 * 2)   /* Translation Table Base Control R. */
++#define c2_TTBCR2     (c2_TTBCR + 1)  /* Translation Table Base Control R. 2 */
+ #define c3_DACR               (DACR32_EL2 * 2)/* Domain Access Control Register */
+ #define c5_DFSR               (ESR_EL1 * 2)   /* Data Fault Status Register */
+ #define c5_IFSR               (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -1987,6 +1987,7 @@ static const struct sys_reg_desc cp15_re
+       { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+       { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+       { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
++      { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 },
+       { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+       { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+       { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
diff --git a/queue-5.10/kvm-svm-remove-the-call-to-sev_platform_status-during-setup.patch b/queue-5.10/kvm-svm-remove-the-call-to-sev_platform_status-during-setup.patch
new file mode 100644 (file)
index 0000000..c811f82
--- /dev/null
@@ -0,0 +1,67 @@
+From 9d4747d02376aeb8de38afa25430de79129c5799 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Thu, 10 Dec 2020 11:09:37 -0600
+Subject: KVM: SVM: Remove the call to sev_platform_status() during setup
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 9d4747d02376aeb8de38afa25430de79129c5799 upstream.
+
+When both KVM support and the CCP driver are built into the kernel instead
+of as modules, KVM initialization can happen before CCP initialization. As
+a result, sev_platform_status() will return a failure when it is called
+from sev_hardware_setup(), when this isn't really an error condition.
+
+Since sev_platform_status() doesn't need to be called at this time anyway,
+remove the invocation from sev_hardware_setup().
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Message-Id: <618380488358b56af558f2682203786f09a49483.1607620209.git.thomas.lendacky@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/svm/sev.c |   22 +---------------------
+ 1 file changed, 1 insertion(+), 21 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -1127,9 +1127,6 @@ void sev_vm_destroy(struct kvm *kvm)
+ int __init sev_hardware_setup(void)
+ {
+-      struct sev_user_data_status *status;
+-      int rc;
+-
+       /* Maximum number of encrypted guests supported simultaneously */
+       max_sev_asid = cpuid_ecx(0x8000001F);
+@@ -1148,26 +1145,9 @@ int __init sev_hardware_setup(void)
+       if (!sev_reclaim_asid_bitmap)
+               return 1;
+-      status = kmalloc(sizeof(*status), GFP_KERNEL);
+-      if (!status)
+-              return 1;
+-
+-      /*
+-       * Check SEV platform status.
+-       *
+-       * PLATFORM_STATUS can be called in any state, if we failed to query
+-       * the PLATFORM status then either PSP firmware does not support SEV
+-       * feature or SEV firmware is dead.
+-       */
+-      rc = sev_platform_status(status, NULL);
+-      if (rc)
+-              goto err;
+-
+       pr_info("SEV supported\n");
+-err:
+-      kfree(status);
+-      return rc;
++      return 0;
+ }
+ void sev_hardware_teardown(void)
diff --git a/queue-5.10/kvm-x86-reinstate-vendor-agnostic-check-on-spec_ctrl-cpuid-bits.patch b/queue-5.10/kvm-x86-reinstate-vendor-agnostic-check-on-spec_ctrl-cpuid-bits.patch
new file mode 100644 (file)
index 0000000..1ec212e
--- /dev/null
@@ -0,0 +1,131 @@
+From 39485ed95d6b83b62fa75c06c2c4d33992e0d971 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 3 Dec 2020 09:40:15 -0500
+Subject: KVM: x86: reinstate vendor-agnostic check on SPEC_CTRL cpuid bits
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 39485ed95d6b83b62fa75c06c2c4d33992e0d971 upstream.
+
+Until commit e7c587da1252 ("x86/speculation: Use synthetic bits for
+IBRS/IBPB/STIBP"), KVM was testing both Intel and AMD CPUID bits before
+allowing the guest to write MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD.
+Testing only Intel bits on VMX processors, or only AMD bits on SVM
+processors, fails if the guests are created with the "opposite" vendor
+as the host.
+
+While at it, also tweak the host CPU check to use the vendor-agnostic
+feature bit X86_FEATURE_IBPB, since we only care about the availability
+of the MSR on the host here and not about specific CPUID bits.
+
+Fixes: e7c587da1252 ("x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP")
+Cc: stable@vger.kernel.org
+Reported-by: Denis V. Lunev <den@openvz.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/cpuid.h   |   14 ++++++++++++++
+ arch/x86/kvm/svm/svm.c |   14 ++++----------
+ arch/x86/kvm/vmx/vmx.c |    8 ++++----
+ 3 files changed, 22 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -264,6 +264,20 @@ static inline int guest_cpuid_stepping(s
+       return x86_stepping(best->eax);
+ }
++static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
++{
++      return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
++              guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) ||
++              guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) ||
++              guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD));
++}
++
++static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
++{
++      return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
++              guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB));
++}
++
+ static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
+ {
+       return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2543,10 +2543,7 @@ static int svm_get_msr(struct kvm_vcpu *
+               break;
+       case MSR_IA32_SPEC_CTRL:
+               if (!msr_info->host_initiated &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
++                  !guest_has_spec_ctrl_msr(vcpu))
+                       return 1;
+               msr_info->data = svm->spec_ctrl;
+@@ -2630,10 +2627,7 @@ static int svm_set_msr(struct kvm_vcpu *
+               break;
+       case MSR_IA32_SPEC_CTRL:
+               if (!msr->host_initiated &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
++                  !guest_has_spec_ctrl_msr(vcpu))
+                       return 1;
+               if (kvm_spec_ctrl_test_value(data))
+@@ -2658,12 +2652,12 @@ static int svm_set_msr(struct kvm_vcpu *
+               break;
+       case MSR_IA32_PRED_CMD:
+               if (!msr->host_initiated &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
++                  !guest_has_pred_cmd_msr(vcpu))
+                       return 1;
+               if (data & ~PRED_CMD_IBPB)
+                       return 1;
+-              if (!boot_cpu_has(X86_FEATURE_AMD_IBPB))
++              if (!boot_cpu_has(X86_FEATURE_IBPB))
+                       return 1;
+               if (!data)
+                       break;
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -1826,7 +1826,7 @@ static int vmx_get_msr(struct kvm_vcpu *
+               break;
+       case MSR_IA32_SPEC_CTRL:
+               if (!msr_info->host_initiated &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++                  !guest_has_spec_ctrl_msr(vcpu))
+                       return 1;
+               msr_info->data = to_vmx(vcpu)->spec_ctrl;
+@@ -2028,7 +2028,7 @@ static int vmx_set_msr(struct kvm_vcpu *
+               break;
+       case MSR_IA32_SPEC_CTRL:
+               if (!msr_info->host_initiated &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++                  !guest_has_spec_ctrl_msr(vcpu))
+                       return 1;
+               if (kvm_spec_ctrl_test_value(data))
+@@ -2063,12 +2063,12 @@ static int vmx_set_msr(struct kvm_vcpu *
+               goto find_uret_msr;
+       case MSR_IA32_PRED_CMD:
+               if (!msr_info->host_initiated &&
+-                  !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++                  !guest_has_pred_cmd_msr(vcpu))
+                       return 1;
+               if (data & ~PRED_CMD_IBPB)
+                       return 1;
+-              if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              if (!boot_cpu_has(X86_FEATURE_IBPB))
+                       return 1;
+               if (!data)
+                       break;
diff --git a/queue-5.10/ovl-make-ioctl-safe.patch b/queue-5.10/ovl-make-ioctl-safe.patch
new file mode 100644 (file)
index 0000000..b178009
--- /dev/null
@@ -0,0 +1,171 @@
+From 89bdfaf93d9157499c3a0d61f489df66f2dead7f Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Mon, 14 Dec 2020 15:26:14 +0100
+Subject: ovl: make ioctl() safe
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 89bdfaf93d9157499c3a0d61f489df66f2dead7f upstream.
+
+ovl_ioctl_set_flags() does a capability check using flags, but then the
+real ioctl double-fetches flags and uses potentially different value.
+
+The "Check the capability before cred override" comment misleading: user
+can skip this check by presenting benign flags first and then overwriting
+them to non-benign flags.
+
+Just remove the cred override for now, hoping this doesn't cause a
+regression.
+
+The proper solution is to create a new setxflags i_op (patches are in the
+works).
+
+Xfstests don't show a regression.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Fixes: dab5ca8fd9dd ("ovl: add lsattr/chattr support")
+Cc: <stable@vger.kernel.org> # v4.19
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/file.c |   87 +++++++++-------------------------------------------
+ 1 file changed, 16 insertions(+), 71 deletions(-)
+
+--- a/fs/overlayfs/file.c
++++ b/fs/overlayfs/file.c
+@@ -541,46 +541,31 @@ static long ovl_real_ioctl(struct file *
+                          unsigned long arg)
+ {
+       struct fd real;
+-      const struct cred *old_cred;
+       long ret;
+       ret = ovl_real_fdget(file, &real);
+       if (ret)
+               return ret;
+-      old_cred = ovl_override_creds(file_inode(file)->i_sb);
+       ret = security_file_ioctl(real.file, cmd, arg);
+-      if (!ret)
++      if (!ret) {
++              /*
++               * Don't override creds, since we currently can't safely check
++               * permissions before doing so.
++               */
+               ret = vfs_ioctl(real.file, cmd, arg);
+-      revert_creds(old_cred);
++      }
+       fdput(real);
+       return ret;
+ }
+-static unsigned int ovl_iflags_to_fsflags(unsigned int iflags)
+-{
+-      unsigned int flags = 0;
+-
+-      if (iflags & S_SYNC)
+-              flags |= FS_SYNC_FL;
+-      if (iflags & S_APPEND)
+-              flags |= FS_APPEND_FL;
+-      if (iflags & S_IMMUTABLE)
+-              flags |= FS_IMMUTABLE_FL;
+-      if (iflags & S_NOATIME)
+-              flags |= FS_NOATIME_FL;
+-
+-      return flags;
+-}
+-
+ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
+-                              unsigned long arg, unsigned int flags)
++                              unsigned long arg)
+ {
+       long ret;
+       struct inode *inode = file_inode(file);
+-      unsigned int oldflags;
+       if (!inode_owner_or_capable(inode))
+               return -EACCES;
+@@ -591,10 +576,13 @@ static long ovl_ioctl_set_flags(struct f
+       inode_lock(inode);
+-      /* Check the capability before cred override */
+-      oldflags = ovl_iflags_to_fsflags(READ_ONCE(inode->i_flags));
+-      ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+-      if (ret)
++      /*
++       * Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
++       * capability.
++       */
++      ret = -EPERM;
++      if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
++          !capable(CAP_LINUX_IMMUTABLE))
+               goto unlock;
+       ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
+@@ -613,46 +601,6 @@ unlock:
+ }
+-static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
+-                                unsigned long arg)
+-{
+-      unsigned int flags;
+-
+-      if (get_user(flags, (int __user *) arg))
+-              return -EFAULT;
+-
+-      return ovl_ioctl_set_flags(file, cmd, arg, flags);
+-}
+-
+-static unsigned int ovl_fsxflags_to_fsflags(unsigned int xflags)
+-{
+-      unsigned int flags = 0;
+-
+-      if (xflags & FS_XFLAG_SYNC)
+-              flags |= FS_SYNC_FL;
+-      if (xflags & FS_XFLAG_APPEND)
+-              flags |= FS_APPEND_FL;
+-      if (xflags & FS_XFLAG_IMMUTABLE)
+-              flags |= FS_IMMUTABLE_FL;
+-      if (xflags & FS_XFLAG_NOATIME)
+-              flags |= FS_NOATIME_FL;
+-
+-      return flags;
+-}
+-
+-static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
+-                                 unsigned long arg)
+-{
+-      struct fsxattr fa;
+-
+-      memset(&fa, 0, sizeof(fa));
+-      if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
+-              return -EFAULT;
+-
+-      return ovl_ioctl_set_flags(file, cmd, arg,
+-                                 ovl_fsxflags_to_fsflags(fa.fsx_xflags));
+-}
+-
+ long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ {
+       long ret;
+@@ -663,12 +611,9 @@ long ovl_ioctl(struct file *file, unsign
+               ret = ovl_real_ioctl(file, cmd, arg);
+               break;
+-      case FS_IOC_SETFLAGS:
+-              ret = ovl_ioctl_set_fsflags(file, cmd, arg);
+-              break;
+-
+       case FS_IOC_FSSETXATTR:
+-              ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
++      case FS_IOC_SETFLAGS:
++              ret = ovl_ioctl_set_flags(file, cmd, arg);
+               break;
+       default:
diff --git a/queue-5.10/powerpc-32-fix-vmap-stack-properly-set-r1-before-activating-mmu-on-syscall-too.patch b/queue-5.10/powerpc-32-fix-vmap-stack-properly-set-r1-before-activating-mmu-on-syscall-too.patch
new file mode 100644 (file)
index 0000000..99ee579
--- /dev/null
@@ -0,0 +1,82 @@
+From d5c243989fb0cb03c74d7340daca3b819f706ee7 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Mon, 21 Dec 2020 06:18:03 +0000
+Subject: powerpc/32: Fix vmap stack - Properly set r1 before activating MMU on syscall too
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit d5c243989fb0cb03c74d7340daca3b819f706ee7 upstream.
+
+We need r1 to be properly set before activating MMU, otherwise any new
+exception taken while saving registers into the stack in syscall
+prologs will use the user stack, which is wrong and will even lockup
+or crash when KUAP is selected.
+
+Do that by switching the meaning of r11 and r1 until we have saved r1
+to the stack: copy r1 into r11 and setup the new stack pointer in r1.
+To avoid complicating and impacting all generic and specific prolog
+code (and more), copy back r1 into r11 once r11 is save onto
+the stack.
+
+We could get rid of copying r1 back and forth at the cost of rewriting
+everything to use r1 instead of r11 all the way when CONFIG_VMAP_STACK
+is set, but the effort is probably not worth it for now.
+
+Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU")
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/a3d819d5c348cee9783a311d5d3f3ba9b48fd219.1608531452.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/head_32.h |   25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+--- a/arch/powerpc/kernel/head_32.h
++++ b/arch/powerpc/kernel/head_32.h
+@@ -131,18 +131,28 @@
+ #ifdef CONFIG_VMAP_STACK
+       mfspr   r11, SPRN_SRR0
+       mtctr   r11
+-#endif
+       andi.   r11, r9, MSR_PR
+-      lwz     r11,TASK_STACK-THREAD(r12)
++      mr      r11, r1
++      lwz     r1,TASK_STACK-THREAD(r12)
+       beq-    99f
+-      addi    r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+-#ifdef CONFIG_VMAP_STACK
++      addi    r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+       li      r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+       mtmsr   r10
+       isync
++      tovirt(r12, r12)
++      stw     r11,GPR1(r1)
++      stw     r11,0(r1)
++      mr      r11, r1
++#else
++      andi.   r11, r9, MSR_PR
++      lwz     r11,TASK_STACK-THREAD(r12)
++      beq-    99f
++      addi    r11, r11, THREAD_SIZE - INT_FRAME_SIZE
++      tophys(r11, r11)
++      stw     r1,GPR1(r11)
++      stw     r1,0(r11)
++      tovirt(r1, r11)         /* set new kernel sp */
+ #endif
+-      tovirt_vmstack r12, r12
+-      tophys_novmstack r11, r11
+       mflr    r10
+       stw     r10, _LINK(r11)
+ #ifdef CONFIG_VMAP_STACK
+@@ -150,9 +160,6 @@
+ #else
+       mfspr   r10,SPRN_SRR0
+ #endif
+-      stw     r1,GPR1(r11)
+-      stw     r1,0(r11)
+-      tovirt_novmstack r1, r11        /* set new kernel sp */
+       stw     r10,_NIP(r11)
+       mfcr    r10
+       rlwinm  r10,r10,0,4,2   /* Clear SO bit in CR */
diff --git a/queue-5.10/powerpc-8xx-fix-early-debug-when-smc1-is-relocated.patch b/queue-5.10/powerpc-8xx-fix-early-debug-when-smc1-is-relocated.patch
new file mode 100644 (file)
index 0000000..ef0dd18
--- /dev/null
@@ -0,0 +1,59 @@
+From 1e78f723d6a52966bfe3804209dbf404fdc9d3bb Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Fri, 4 Dec 2020 10:11:34 +0000
+Subject: powerpc/8xx: Fix early debug when SMC1 is relocated
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 1e78f723d6a52966bfe3804209dbf404fdc9d3bb upstream.
+
+When SMC1 is relocated and early debug is selected, the
+board hangs is ppc_md.setup_arch(). This is because ones
+the microcode has been loaded and SMC1 relocated, early
+debug writes in the weed.
+
+To allow smooth continuation, the SMC1 parameter RAM set up
+by the bootloader have to be copied into the new location.
+
+Fixes: 43db76f41824 ("powerpc/8xx: Add microcode patch to move SMC parameter RAM.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/b2f71f39eca543f1e4ec06596f09a8b12235c701.1607076683.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/cpm1.h         |    1 +
+ arch/powerpc/platforms/8xx/micropatch.c |   11 +++++++++++
+ 2 files changed, 12 insertions(+)
+
+--- a/arch/powerpc/include/asm/cpm1.h
++++ b/arch/powerpc/include/asm/cpm1.h
+@@ -68,6 +68,7 @@ extern void cpm_reset(void);
+ #define PROFF_SPI     ((uint)0x0180)
+ #define PROFF_SCC3    ((uint)0x0200)
+ #define PROFF_SMC1    ((uint)0x0280)
++#define PROFF_DSP1    ((uint)0x02c0)
+ #define PROFF_SCC4    ((uint)0x0300)
+ #define PROFF_SMC2    ((uint)0x0380)
+--- a/arch/powerpc/platforms/8xx/micropatch.c
++++ b/arch/powerpc/platforms/8xx/micropatch.c
+@@ -360,6 +360,17 @@ void __init cpm_load_patch(cpm8xx_t *cp)
+       if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) {
+               smc_uart_t *smp;
++              if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) {
++                      int i;
++
++                      for (i = 0; i < sizeof(*smp); i += 4) {
++                              u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i];
++                              u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i];
++
++                              out_be32(dst, in_be32(src));
++                      }
++              }
++
+               smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1];
+               out_be16(&smp->smc_rpbase, 0x1ec0);
+               smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2];
diff --git a/queue-5.10/powerpc-bitops-fix-possible-undefined-behaviour-with-fls-and-fls64.patch b/queue-5.10/powerpc-bitops-fix-possible-undefined-behaviour-with-fls-and-fls64.patch
new file mode 100644 (file)
index 0000000..d7c15da
--- /dev/null
@@ -0,0 +1,117 @@
+From 1891ef21d92c4801ea082ee8ed478e304ddc6749 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Thu, 22 Oct 2020 14:05:46 +0000
+Subject: powerpc/bitops: Fix possible undefined behaviour with fls() and fls64()
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 1891ef21d92c4801ea082ee8ed478e304ddc6749 upstream.
+
+fls() and fls64() are using __builtin_ctz() and _builtin_ctzll().
+On powerpc, those builtins trivially use ctlzw and ctlzd power
+instructions.
+
+Allthough those instructions provide the expected result with
+input argument 0, __builtin_ctz() and __builtin_ctzll() are
+documented as undefined for value 0.
+
+The easiest fix would be to use fls() and fls64() functions
+defined in include/asm-generic/bitops/builtin-fls.h and
+include/asm-generic/bitops/fls64.h, but GCC output is not optimal:
+
+00000388 <testfls>:
+ 388:   2c 03 00 00     cmpwi   r3,0
+ 38c:   41 82 00 10     beq     39c <testfls+0x14>
+ 390:   7c 63 00 34     cntlzw  r3,r3
+ 394:   20 63 00 20     subfic  r3,r3,32
+ 398:   4e 80 00 20     blr
+ 39c:   38 60 00 00     li      r3,0
+ 3a0:   4e 80 00 20     blr
+
+000003b0 <testfls64>:
+ 3b0:   2c 03 00 00     cmpwi   r3,0
+ 3b4:   40 82 00 1c     bne     3d0 <testfls64+0x20>
+ 3b8:   2f 84 00 00     cmpwi   cr7,r4,0
+ 3bc:   38 60 00 00     li      r3,0
+ 3c0:   4d 9e 00 20     beqlr   cr7
+ 3c4:   7c 83 00 34     cntlzw  r3,r4
+ 3c8:   20 63 00 20     subfic  r3,r3,32
+ 3cc:   4e 80 00 20     blr
+ 3d0:   7c 63 00 34     cntlzw  r3,r3
+ 3d4:   20 63 00 40     subfic  r3,r3,64
+ 3d8:   4e 80 00 20     blr
+
+When the input of fls(x) is a constant, just check x for nullity and
+return either 0 or __builtin_clz(x). Otherwise, use cntlzw instruction
+directly.
+
+For fls64() on PPC64, do the same but with __builtin_clzll() and
+cntlzd instruction. On PPC32, lets take the generic fls64() which
+will use our fls(). The result is as expected:
+
+00000388 <testfls>:
+ 388:   7c 63 00 34     cntlzw  r3,r3
+ 38c:   20 63 00 20     subfic  r3,r3,32
+ 390:   4e 80 00 20     blr
+
+000003a0 <testfls64>:
+ 3a0:   2c 03 00 00     cmpwi   r3,0
+ 3a4:   40 82 00 10     bne     3b4 <testfls64+0x14>
+ 3a8:   7c 83 00 34     cntlzw  r3,r4
+ 3ac:   20 63 00 20     subfic  r3,r3,32
+ 3b0:   4e 80 00 20     blr
+ 3b4:   7c 63 00 34     cntlzw  r3,r3
+ 3b8:   20 63 00 40     subfic  r3,r3,64
+ 3bc:   4e 80 00 20     blr
+
+Fixes: 2fcff790dcb4 ("powerpc: Use builtin functions for fls()/__fls()/fls64()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/348c2d3f19ffcff8abe50d52513f989c4581d000.1603375524.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/bitops.h |   23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/include/asm/bitops.h
++++ b/arch/powerpc/include/asm/bitops.h
+@@ -216,15 +216,34 @@ static inline void arch___clear_bit_unlo
+  */
+ static inline int fls(unsigned int x)
+ {
+-      return 32 - __builtin_clz(x);
++      int lz;
++
++      if (__builtin_constant_p(x))
++              return x ? 32 - __builtin_clz(x) : 0;
++      asm("cntlzw %0,%1" : "=r" (lz) : "r" (x));
++      return 32 - lz;
+ }
+ #include <asm-generic/bitops/builtin-__fls.h>
++/*
++ * 64-bit can do this using one cntlzd (count leading zeroes doubleword)
++ * instruction; for 32-bit we use the generic version, which does two
++ * 32-bit fls calls.
++ */
++#ifdef CONFIG_PPC64
+ static inline int fls64(__u64 x)
+ {
+-      return 64 - __builtin_clzll(x);
++      int lz;
++
++      if (__builtin_constant_p(x))
++              return x ? 64 - __builtin_clzll(x) : 0;
++      asm("cntlzd %0,%1" : "=r" (lz) : "r" (x));
++      return 64 - lz;
+ }
++#else
++#include <asm-generic/bitops/fls64.h>
++#endif
+ #ifdef CONFIG_PPC64
+ unsigned int __arch_hweight8(unsigned int w);
diff --git a/queue-5.10/powerpc-feature-add-cpu_ftr_noexecute-to-g2_le.patch b/queue-5.10/powerpc-feature-add-cpu_ftr_noexecute-to-g2_le.patch
new file mode 100644 (file)
index 0000000..93ad13d
--- /dev/null
@@ -0,0 +1,33 @@
+From 197493af414ee22427be3343637ac290a791925a Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Mon, 12 Oct 2020 08:02:13 +0000
+Subject: powerpc/feature: Add CPU_FTR_NOEXECUTE to G2_LE
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 197493af414ee22427be3343637ac290a791925a upstream.
+
+G2_LE has a 603 core, add CPU_FTR_NOEXECUTE.
+
+Fixes: 385e89d5b20f ("powerpc/mm: add exec protection on powerpc 603")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/39a530ee41d83f49747ab3af8e39c056450b9b4d.1602489653.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/cputable.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/include/asm/cputable.h
++++ b/arch/powerpc/include/asm/cputable.h
+@@ -369,7 +369,7 @@ static inline void cpu_feature_keys_init
+           CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
+ #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE)
+ #define CPU_FTRS_G2_LE        (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
+-          CPU_FTR_MAYBE_CAN_NAP)
++          CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE)
+ #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \
+           CPU_FTR_MAYBE_CAN_NAP | \
+           CPU_FTR_COMMON  | CPU_FTR_NOEXECUTE)
diff --git a/queue-5.10/powerpc-fix-incorrect-stw-ux-u-x-instructions-in-__set_pte_at.patch b/queue-5.10/powerpc-fix-incorrect-stw-ux-u-x-instructions-in-__set_pte_at.patch
new file mode 100644 (file)
index 0000000..7f85a2d
--- /dev/null
@@ -0,0 +1,63 @@
+From d85be8a49e733dcd23674aa6202870d54bf5600d Mon Sep 17 00:00:00 2001
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Date: Thu, 22 Oct 2020 09:29:20 +0000
+Subject: powerpc: Fix incorrect stw{, ux, u, x} instructions in __set_pte_at
+
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+commit d85be8a49e733dcd23674aa6202870d54bf5600d upstream.
+
+The placeholder for instruction selection should use the second
+argument's operand, which is %1, not %0. This could generate incorrect
+assembly code if the memory addressing of operand %0 is a different
+form from that of operand %1.
+
+Also remove the %Un placeholder because having %Un placeholders
+for two operands which are based on the same local var (ptep) doesn't
+make much sense. By the way, it doesn't change the current behaviour
+because "<>" constraint is missing for the associated "=m".
+
+[chleroy: revised commit log iaw segher's comments and removed %U0]
+
+Fixes: 9bf2b5cdc5fe ("powerpc: Fixes for CONFIG_PTE_64BIT for SMP support")
+Cc: <stable@vger.kernel.org> # v2.6.28+
+Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/96354bd77977a6a933fe9020da57629007fdb920.1603358942.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/book3s/32/pgtable.h |    4 ++--
+ arch/powerpc/include/asm/nohash/pgtable.h    |    4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
++++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
+@@ -524,9 +524,9 @@ static inline void __set_pte_at(struct m
+       if (pte_val(*ptep) & _PAGE_HASHPTE)
+               flush_hash_entry(mm, ptep, addr);
+       __asm__ __volatile__("\
+-              stw%U0%X0 %2,%0\n\
++              stw%X0 %2,%0\n\
+               eieio\n\
+-              stw%U0%X0 %L2,%1"
++              stw%X1 %L2,%1"
+       : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+       : "r" (pte) : "memory");
+--- a/arch/powerpc/include/asm/nohash/pgtable.h
++++ b/arch/powerpc/include/asm/nohash/pgtable.h
+@@ -192,9 +192,9 @@ static inline void __set_pte_at(struct m
+        */
+       if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) {
+               __asm__ __volatile__("\
+-                      stw%U0%X0 %2,%0\n\
++                      stw%X0 %2,%0\n\
+                       eieio\n\
+-                      stw%U0%X0 %L2,%1"
++                      stw%X1 %L2,%1"
+               : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+               : "r" (pte) : "memory");
+               return;
diff --git a/queue-5.10/powerpc-mm-fix-verification-of-mmu_ftr_type_44x.patch b/queue-5.10/powerpc-mm-fix-verification-of-mmu_ftr_type_44x.patch
new file mode 100644 (file)
index 0000000..5c8cf3e
--- /dev/null
@@ -0,0 +1,35 @@
+From 17179aeb9d34cc81e1a4ae3f85e5b12b13a1f8d0 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Sat, 10 Oct 2020 17:30:59 +0000
+Subject: powerpc/mm: Fix verification of MMU_FTR_TYPE_44x
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 17179aeb9d34cc81e1a4ae3f85e5b12b13a1f8d0 upstream.
+
+MMU_FTR_TYPE_44x cannot be checked by cpu_has_feature()
+
+Use mmu_has_feature() instead
+
+Fixes: 23eb7f560a2a ("powerpc: Convert flush_icache_range & friends to C")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/ceede82fadf37f3b8275e61fcf8cf29a3e2ec7fe.1602351011.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/mem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -532,7 +532,7 @@ void __flush_dcache_icache(void *p)
+        * space occurs, before returning to user space.
+        */
+-      if (cpu_has_feature(MMU_FTR_TYPE_44x))
++      if (mmu_has_feature(MMU_FTR_TYPE_44x))
+               return;
+       invalidate_icache_range(addr, addr + PAGE_SIZE);
diff --git a/queue-5.10/powerpc-powernv-memtrace-don-t-leak-kernel-memory-to-user-space.patch b/queue-5.10/powerpc-powernv-memtrace-don-t-leak-kernel-memory-to-user-space.patch
new file mode 100644 (file)
index 0000000..baed83c
--- /dev/null
@@ -0,0 +1,103 @@
+From c74cf7a3d59a21b290fe0468f5b470d0b8ee37df Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 11 Nov 2020 15:53:15 +0100
+Subject: powerpc/powernv/memtrace: Don't leak kernel memory to user space
+
+From: David Hildenbrand <david@redhat.com>
+
+commit c74cf7a3d59a21b290fe0468f5b470d0b8ee37df upstream.
+
+We currently leak kernel memory to user space, because memory
+offlining doesn't do any implicit clearing of memory and we are
+missing explicit clearing of memory.
+
+Let's keep it simple and clear pages before removing the linear
+mapping.
+
+Reproduced in QEMU/TCG with 10 GiB of main memory:
+  [root@localhost ~]# dd obs=9G if=/dev/urandom of=/dev/null
+  [... wait until "free -m" used counter no longer changes and cancel]
+  19665802+0 records in
+  1+0 records out
+  9663676416 bytes (9.7 GB, 9.0 GiB) copied, 135.548 s, 71.3 MB/s
+  [root@localhost ~]# cat /sys/devices/system/memory/block_size_bytes
+  40000000
+  [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
+  [  402.978663][ T1086] page:000000001bc4bc74 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x24900
+  [  402.980063][ T1086] flags: 0x7ffff000001000(reserved)
+  [  402.980415][ T1086] raw: 007ffff000001000 c00c000000924008 c00c000000924008 0000000000000000
+  [  402.980627][ T1086] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
+  [  402.980845][ T1086] page dumped because: unmovable page
+  [  402.989608][ T1086] Offlined Pages 16384
+  [  403.324155][ T1086] memtrace: Allocated trace memory on node 0 at 0x0000000200000000
+
+Before this patch:
+  [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace  | head
+  00000000  c8 25 72 51 4d 26 36 c5  5c c2 56 15 d5 1a cd 10  |.%rQM&6.\.V.....|
+  00000010  19 b9 50 b2 cb e3 60 b8  ec 0a f3 ec 4b 3c 39 f0  |..P...`.....K<9.|$
+  00000020  4e 5a 4c cf bd 26 19 ff  37 79 13 67 24 b7 b8 57  |NZL..&..7y.g$..W|$
+  00000030  98 3e f5 be 6f 14 6a bd  a4 52 bc 6e e9 e0 c1 5d  |.>..o.j..R.n...]|$
+  00000040  76 b3 ae b5 88 d7 da e3  64 23 85 2c 10 88 07 b6  |v.......d#.,....|$
+  00000050  9a d8 91 de f7 50 27 69  2e 64 9c 6f d3 19 45 79  |.....P'i.d.o..Ey|$
+  00000060  6a 6f 8a 61 71 19 1f c7  f1 df 28 26 ca 0f 84 55  |jo.aq.....(&...U|$
+  00000070  01 3f be e4 e2 e1 da ff  7b 8c 8e 32 37 b4 24 53  |.?......{..27.$S|$
+  00000080  1b 70 30 45 56 e6 8c c4  0e b5 4c fb 9f dd 88 06  |.p0EV.....L.....|$
+  00000090  ef c4 18 79 f1 60 b1 5c  79 59 4d f4 36 d7 4a 5c  |...y.`.\yYM.6.J\|$
+
+After this patch:
+  [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace  | head
+  00000000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+  *
+  40000000
+
+Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing")
+Cc: stable@vger.kernel.org # v4.14+
+Reported-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20201111145322.15793-2-david@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/memtrace.c |   22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+--- a/arch/powerpc/platforms/powernv/memtrace.c
++++ b/arch/powerpc/platforms/powernv/memtrace.c
+@@ -67,6 +67,23 @@ static int change_memblock_state(struct
+       return 0;
+ }
++static void memtrace_clear_range(unsigned long start_pfn,
++                               unsigned long nr_pages)
++{
++      unsigned long pfn;
++
++      /*
++       * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
++       * does not apply, avoid passing around "struct page" and use
++       * clear_page() instead directly.
++       */
++      for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
++              if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
++                      cond_resched();
++              clear_page(__va(PFN_PHYS(pfn)));
++      }
++}
++
+ /* called with device_hotplug_lock held */
+ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
+ {
+@@ -112,6 +129,11 @@ static u64 memtrace_alloc_node(u32 nid,
+       for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
+               if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
+                       /*
++                       * Clear the range while we still have a linear
++                       * mapping.
++                       */
++                      memtrace_clear_range(base_pfn, nr_pages);
++                      /*
+                        * Remove memory in memory block size chunks so that
+                        * iomem resources are always split to the same size and
+                        * we never try to remove memory that spans two iomem
diff --git a/queue-5.10/powerpc-powernv-memtrace-fix-crashing-the-kernel-when-enabling-concurrently.patch b/queue-5.10/powerpc-powernv-memtrace-fix-crashing-the-kernel-when-enabling-concurrently.patch
new file mode 100644 (file)
index 0000000..9d36362
--- /dev/null
@@ -0,0 +1,96 @@
+From d6718941a2767fb383e105d257d2105fe4f15f0e Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 11 Nov 2020 15:53:16 +0100
+Subject: powerpc/powernv/memtrace: Fix crashing the kernel when enabling concurrently
+
+From: David Hildenbrand <david@redhat.com>
+
+commit d6718941a2767fb383e105d257d2105fe4f15f0e upstream.
+
+It's very easy to crash the kernel right now by simply trying to
+enable memtrace concurrently, hammering on the "enable" interface
+
+loop.sh:
+  #!/bin/bash
+
+  dmesg --console-off
+
+  while true; do
+          echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
+  done
+
+[root@localhost ~]# loop.sh &
+[root@localhost ~]# loop.sh &
+
+Resulting quickly in a kernel crash. Let's properly protect using a
+mutex.
+
+Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing")
+Cc: stable@vger.kernel.org# v4.14+
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20201111145322.15793-3-david@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/memtrace.c |   22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+--- a/arch/powerpc/platforms/powernv/memtrace.c
++++ b/arch/powerpc/platforms/powernv/memtrace.c
+@@ -30,6 +30,7 @@ struct memtrace_entry {
+       char name[16];
+ };
++static DEFINE_MUTEX(memtrace_mutex);
+ static u64 memtrace_size;
+ static struct memtrace_entry *memtrace_array;
+@@ -279,6 +280,7 @@ static int memtrace_online(void)
+ static int memtrace_enable_set(void *data, u64 val)
+ {
++      int rc = -EAGAIN;
+       u64 bytes;
+       /*
+@@ -291,25 +293,31 @@ static int memtrace_enable_set(void *dat
+               return -EINVAL;
+       }
++      mutex_lock(&memtrace_mutex);
++
+       /* Re-add/online previously removed/offlined memory */
+       if (memtrace_size) {
+               if (memtrace_online())
+-                      return -EAGAIN;
++                      goto out_unlock;
+       }
+-      if (!val)
+-              return 0;
++      if (!val) {
++              rc = 0;
++              goto out_unlock;
++      }
+       /* Offline and remove memory */
+       if (memtrace_init_regions_runtime(val))
+-              return -EINVAL;
++              goto out_unlock;
+       if (memtrace_init_debugfs())
+-              return -EINVAL;
++              goto out_unlock;
+       memtrace_size = val;
+-
+-      return 0;
++      rc = 0;
++out_unlock:
++      mutex_unlock(&memtrace_mutex);
++      return rc;
+ }
+ static int memtrace_enable_get(void *data, u64 *val)
diff --git a/queue-5.10/powerpc-powernv-npu-do-not-attempt-npu2-setup-on-power8nvl-npu.patch b/queue-5.10/powerpc-powernv-npu-do-not-attempt-npu2-setup-on-power8nvl-npu.patch
new file mode 100644 (file)
index 0000000..7a3d458
--- /dev/null
@@ -0,0 +1,91 @@
+From b1198a88230f2ce50c271e22b82a8b8610b2eea9 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Sun, 22 Nov 2020 18:38:28 +1100
+Subject: powerpc/powernv/npu: Do not attempt NPU2 setup on POWER8NVL NPU
+
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+
+commit b1198a88230f2ce50c271e22b82a8b8610b2eea9 upstream.
+
+We execute certain NPU2 setup code (such as mapping an LPID to a device
+in NPU2) unconditionally if an Nvlink bridge is detected. However this
+cannot succeed on POWER8NVL machines and errors appear in dmesg. This is
+harmless as skiboot returns an error and the only place we check it is
+vfio-pci but that code does not get called on P8+ either.
+
+This adds a check if pnv_npu2_xxx helpers are called on a machine with
+NPU2 which initializes pnv_phb::npu in pnv_npu2_init();
+pnv_phb::npu==NULL on POWER8/NVL (Naples).
+
+While at this, fix NULL derefencing in pnv_npu_peers_take_ownership/
+pnv_npu_peers_release_ownership which occurs when GPUs on mentioned P8s
+cause EEH which happens if "vfio-pci" disables devices using
+the D3 power state; the vfio-pci's disable_idle_d3 module parameter
+controls this and must be set on Naples. The EEH handling clears
+the entire pnv_ioda_pe struct in pnv_ioda_free_pe() hence
+the NULL derefencing. We cannot recover from that but at least we stop
+crashing.
+
+Tested on
+- POWER9 pvr=004e1201, Ubuntu 19.04 host, Ubuntu 18.04 vm,
+  NVIDIA GV100 10de:1db1 driver 418.39
+- POWER8 pvr=004c0100, RHEL 7.6 host, Ubuntu 16.10 vm,
+  NVIDIA P100 10de:15f9 driver 396.47
+
+Fixes: 1b785611e119 ("powerpc/powernv/npu: Add release_ownership hook")
+Cc: stable@vger.kernel.org # 5.0
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20201122073828.15446-1-aik@ozlabs.ru
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/npu-dma.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/platforms/powernv/npu-dma.c
++++ b/arch/powerpc/platforms/powernv/npu-dma.c
+@@ -385,7 +385,8 @@ static void pnv_npu_peers_take_ownership
+       for (i = 0; i < npucomp->pe_num; ++i) {
+               struct pnv_ioda_pe *pe = npucomp->pe[i];
+-              if (!pe->table_group.ops->take_ownership)
++              if (!pe->table_group.ops ||
++                  !pe->table_group.ops->take_ownership)
+                       continue;
+               pe->table_group.ops->take_ownership(&pe->table_group);
+       }
+@@ -401,7 +402,8 @@ static void pnv_npu_peers_release_owners
+       for (i = 0; i < npucomp->pe_num; ++i) {
+               struct pnv_ioda_pe *pe = npucomp->pe[i];
+-              if (!pe->table_group.ops->release_ownership)
++              if (!pe->table_group.ops ||
++                  !pe->table_group.ops->release_ownership)
+                       continue;
+               pe->table_group.ops->release_ownership(&pe->table_group);
+       }
+@@ -623,6 +625,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev
+               return -ENODEV;
+       hose = pci_bus_to_host(npdev->bus);
++      if (hose->npu == NULL) {
++              dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
++              return 0;
++      }
++
+       nphb = hose->private_data;
+       dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
+@@ -670,6 +677,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_d
+               return -ENODEV;
+       hose = pci_bus_to_host(npdev->bus);
++      if (hose->npu == NULL) {
++              dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
++              return 0;
++      }
++
+       nphb = hose->private_data;
+       dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
diff --git a/queue-5.10/powerpc-rtas-fix-typo-of-ibm-open-errinjct-in-rtas-filter.patch b/queue-5.10/powerpc-rtas-fix-typo-of-ibm-open-errinjct-in-rtas-filter.patch
new file mode 100644 (file)
index 0000000..f026a0b
--- /dev/null
@@ -0,0 +1,48 @@
+From f10881a46f8914428110d110140a455c66bdf27b Mon Sep 17 00:00:00 2001
+From: Tyrel Datwyler <tyreld@linux.ibm.com>
+Date: Tue, 8 Dec 2020 13:54:34 -0600
+Subject: powerpc/rtas: Fix typo of ibm,open-errinjct in RTAS filter
+
+From: Tyrel Datwyler <tyreld@linux.ibm.com>
+
+commit f10881a46f8914428110d110140a455c66bdf27b upstream.
+
+Commit bd59380c5ba4 ("powerpc/rtas: Restrict RTAS requests from userspace")
+introduced the following error when invoking the errinjct userspace
+tool:
+
+  [root@ltcalpine2-lp5 librtas]# errinjct open
+  [327884.071171] sys_rtas: RTAS call blocked - exploit attempt?
+  [327884.071186] sys_rtas: token=0x26, nargs=0 (called by errinjct)
+  errinjct: Could not open RTAS error injection facility
+  errinjct: librtas: open: Unexpected I/O error
+
+The entry for ibm,open-errinjct in rtas_filter array has a typo where
+the "j" is omitted in the rtas call name. After fixing this typo the
+errinjct tool functions again as expected.
+
+  [root@ltcalpine2-lp5 linux]# errinjct open
+  RTAS error injection facility open, token = 1
+
+Fixes: bd59380c5ba4 ("powerpc/rtas: Restrict RTAS requests from userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20201208195434.8289-1-tyreld@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/rtas.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/kernel/rtas.c
++++ b/arch/powerpc/kernel/rtas.c
+@@ -1030,7 +1030,7 @@ static struct rtas_filter rtas_filters[]
+       { "ibm,display-message", -1, 0, -1, -1, -1 },
+       { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 },
+       { "ibm,close-errinjct", -1, -1, -1, -1, -1 },
+-      { "ibm,open-errinct", -1, -1, -1, -1, -1 },
++      { "ibm,open-errinjct", -1, -1, -1, -1, -1 },
+       { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 },
+       { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 },
+       { "ibm,get-indices", -1, 2, 3, -1, -1 },
diff --git a/queue-5.10/powerpc-xmon-change-printk-to-pr_cont.patch b/queue-5.10/powerpc-xmon-change-printk-to-pr_cont.patch
new file mode 100644 (file)
index 0000000..07433e2
--- /dev/null
@@ -0,0 +1,66 @@
+From 7c6c86b36a36dd4a13d30bba07718e767aa2e7a1 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Fri, 4 Dec 2020 10:35:38 +0000
+Subject: powerpc/xmon: Change printk() to pr_cont()
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 7c6c86b36a36dd4a13d30bba07718e767aa2e7a1 upstream.
+
+Since some time now, printk() adds carriage return, leading to
+unusable xmon output if there is no udbg backend available:
+
+  [   54.288722] sysrq: Entering xmon
+  [   54.292209] Vector: 0  at [cace3d2c]
+  [   54.292274]     pc:
+  [   54.292331] c0023650
+  [   54.292468] : xmon+0x28/0x58
+  [   54.292519]
+  [   54.292574]     lr:
+  [   54.292630] c0023724
+  [   54.292749] : sysrq_handle_xmon+0xa4/0xfc
+  [   54.292801]
+  [   54.292867]     sp: cace3de8
+  [   54.292931]    msr: 9032
+  [   54.292999]   current = 0xc28d0000
+  [   54.293072]     pid   = 377, comm = sh
+  [   54.293157] Linux version 5.10.0-rc6-s3k-dev-01364-gedf13f0ccd76-dirty (root@po17688vm.idsi0.si.c-s.fr) (powerpc64-linux-gcc (GCC) 10.1.0, GNU ld (GNU Binutils) 2.34) #4211 PREEMPT Fri Dec 4 09:32:11 UTC 2020
+  [   54.293287] enter ? for help
+  [   54.293470] [cace3de8]
+  [   54.293532] c0023724
+  [   54.293654]  sysrq_handle_xmon+0xa4/0xfc
+  [   54.293711]  (unreliable)
+  ...
+  [   54.296002]
+  [   54.296159] --- Exception: c01 (System Call) at
+  [   54.296217] 0fd4e784
+  [   54.296303]
+  [   54.296375] SP (7fca6ff0) is in userspace
+  [   54.296431] mon>
+  [   54.296484]  <no input ...>
+
+Use pr_cont() instead.
+
+Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines")
+Cc: stable@vger.kernel.org # v4.9+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+[mpe: Mention that it only happens when udbg is not available]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/c8a6ec704416ecd5ff2bd26213c9bc026bdd19de.1607077340.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/xmon/nonstdio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/xmon/nonstdio.c
++++ b/arch/powerpc/xmon/nonstdio.c
+@@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...
+       if (n && rc == 0) {
+               /* No udbg hooks, fallback to printk() - dangerous */
+-              printk("%s", xmon_outbuf);
++              pr_cont("%s", xmon_outbuf);
+       }
+ }
diff --git a/queue-5.10/risc-v-fix-usage-of-memblock_enforce_memory_limit.patch b/queue-5.10/risc-v-fix-usage-of-memblock_enforce_memory_limit.patch
new file mode 100644 (file)
index 0000000..4ab077a
--- /dev/null
@@ -0,0 +1,37 @@
+From de043da0b9e71147ca610ed542d34858aadfc61c Mon Sep 17 00:00:00 2001
+From: Atish Patra <atish.patra@wdc.com>
+Date: Fri, 18 Dec 2020 16:13:56 -0800
+Subject: RISC-V: Fix usage of memblock_enforce_memory_limit
+
+From: Atish Patra <atish.patra@wdc.com>
+
+commit de043da0b9e71147ca610ed542d34858aadfc61c upstream.
+
+memblock_enforce_memory_limit accepts the maximum memory size not the
+maximum address that can be handled by kernel. Fix the function invocation
+accordingly.
+
+Fixes: 1bd14a66ee52 ("RISC-V: Remove any memblock representing unusable memory area")
+Cc: stable@vger.kernel.org
+Reported-by: Bin Meng <bin.meng@windriver.com>
+Tested-by: Bin Meng <bin.meng@windriver.com>
+Acked-by: Mike Rapoport <rppt@linux.ibm.com>
+Signed-off-by: Atish Patra <atish.patra@wdc.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/riscv/mm/init.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/mm/init.c
++++ b/arch/riscv/mm/init.c
+@@ -174,7 +174,7 @@ void __init setup_bootmem(void)
+        * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed
+        * as it is unusable by kernel.
+        */
+-      memblock_enforce_memory_limit(mem_start - PAGE_OFFSET);
++      memblock_enforce_memory_limit(-PAGE_OFFSET);
+       /* Reserve from the start of the kernel to the end of the kernel */
+       memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
index 2c3fb8d9e1112a949ba01075fe9867d314a8c567..5b638301595c0d7a2dc6b912d580beced26deda4 100644 (file)
@@ -584,3 +584,48 @@ usb-serial-keyspan_pda-fix-stalled-writes.patch
 usb-serial-keyspan_pda-fix-write-wakeup-use-after-free.patch
 usb-serial-keyspan_pda-fix-tx-unthrottle-use-after-free.patch
 usb-serial-keyspan_pda-fix-write-unthrottling.patch
+btrfs-do-not-shorten-unpin-len-for-caching-block-groups.patch
+btrfs-update-last_byte_to_unpin-in-switch_commit_roots.patch
+btrfs-fix-race-when-defragmenting-leads-to-unnecessary-io.patch
+ext4-fix-an-is_err-vs-null-check.patch
+ext4-fix-a-memory-leak-of-ext4_free_data.patch
+ext4-fix-deadlock-with-fs-freezing-and-ea-inodes.patch
+ext4-don-t-remount-read-only-with-errors-continue-on-reboot.patch
+risc-v-fix-usage-of-memblock_enforce_memory_limit.patch
+arm64-dts-ti-k3-am65-mark-dss-as-dma-coherent.patch
+arm64-dts-marvell-keep-smmu-disabled-by-default-for-armada-7040-and-8040.patch
+kvm-arm64-introduce-handling-of-aarch32-ttbcr2-traps.patch
+kvm-x86-reinstate-vendor-agnostic-check-on-spec_ctrl-cpuid-bits.patch
+kvm-svm-remove-the-call-to-sev_platform_status-during-setup.patch
+iommu-arm-smmu-allow-implementation-specific-write_s2cr.patch
+iommu-arm-smmu-qcom-read-back-stream-mappings.patch
+iommu-arm-smmu-qcom-implement-s2cr-quirk.patch
+arm-dts-pandaboard-fix-pinmux-for-gpio-user-button-of-pandaboard-es.patch
+arm-dts-at91-sama5d2-fix-can-message-ram-offset-and-size.patch
+arm-tegra-populate-opp-table-for-tegra20-ventana.patch
+xprtrdma-fix-xdrbuf_sparse_pages-support.patch
+powerpc-32-fix-vmap-stack-properly-set-r1-before-activating-mmu-on-syscall-too.patch
+powerpc-fix-incorrect-stw-ux-u-x-instructions-in-__set_pte_at.patch
+powerpc-rtas-fix-typo-of-ibm-open-errinjct-in-rtas-filter.patch
+powerpc-bitops-fix-possible-undefined-behaviour-with-fls-and-fls64.patch
+powerpc-feature-add-cpu_ftr_noexecute-to-g2_le.patch
+powerpc-xmon-change-printk-to-pr_cont.patch
+powerpc-8xx-fix-early-debug-when-smc1-is-relocated.patch
+powerpc-mm-fix-verification-of-mmu_ftr_type_44x.patch
+powerpc-powernv-npu-do-not-attempt-npu2-setup-on-power8nvl-npu.patch
+powerpc-powernv-memtrace-don-t-leak-kernel-memory-to-user-space.patch
+powerpc-powernv-memtrace-fix-crashing-the-kernel-when-enabling-concurrently.patch
+ovl-make-ioctl-safe.patch
+ima-don-t-modify-file-descriptor-mode-on-the-fly.patch
+um-remove-use-of-asprinf-in-umid.c.patch
+um-fix-time-travel-mode.patch
+ceph-fix-race-in-concurrent-__ceph_remove_cap-invocations.patch
+smb3-avoid-confusing-warning-message-on-mount-to-azure.patch
+smb3.1.1-remove-confusing-mount-warning-when-no-spnego-info-on-negprot-rsp.patch
+smb3.1.1-do-not-log-warning-message-if-server-doesn-t-populate-salt.patch
+ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch
+jffs2-fix-gc-exit-abnormally.patch
+jffs2-fix-ignoring-mounting-options-problem-during-remounting.patch
+fsnotify-generalize-handle_inode_event.patch
+inotify-convert-to-handle_inode_event-interface.patch
+fsnotify-fix-events-reported-to-watching-parent-and-child.patch
diff --git a/queue-5.10/smb3-avoid-confusing-warning-message-on-mount-to-azure.patch b/queue-5.10/smb3-avoid-confusing-warning-message-on-mount-to-azure.patch
new file mode 100644 (file)
index 0000000..7ef1ce8
--- /dev/null
@@ -0,0 +1,40 @@
+From ebcd6de98754d9b6a5f89d7835864b1c365d432f Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Tue, 8 Dec 2020 21:13:31 -0600
+Subject: SMB3: avoid confusing warning message on mount to Azure
+
+From: Steve French <stfrench@microsoft.com>
+
+commit ebcd6de98754d9b6a5f89d7835864b1c365d432f upstream.
+
+Mounts to Azure cause an unneeded warning message in dmesg
+   "CIFS: VFS: parse_server_interfaces: incomplete interface info"
+
+Azure rounds up the size (by 8 additional bytes, to a
+16 byte boundary) of the structure returned on the query
+of the server interfaces at mount time.  This is permissible
+even though different than other servers so do not log a warning
+if query network interfaces response is only rounded up by 8
+bytes or fewer.
+
+CC: Stable <stable@vger.kernel.org>
+Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2ops.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -477,7 +477,8 @@ parse_server_interfaces(struct network_i
+               goto out;
+       }
+-      if (bytes_left || p->Next)
++      /* Azure rounds the buffer size up 8, to a 16 byte boundary */
++      if ((bytes_left > 8) || p->Next)
+               cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
diff --git a/queue-5.10/smb3.1.1-do-not-log-warning-message-if-server-doesn-t-populate-salt.patch b/queue-5.10/smb3.1.1-do-not-log-warning-message-if-server-doesn-t-populate-salt.patch
new file mode 100644 (file)
index 0000000..c0f282a
--- /dev/null
@@ -0,0 +1,87 @@
+From 7955f105afb6034af344038d663bc98809483cdd Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Wed, 9 Dec 2020 22:19:00 -0600
+Subject: SMB3.1.1: do not log warning message if server doesn't populate salt
+
+From: Steve French <stfrench@microsoft.com>
+
+commit 7955f105afb6034af344038d663bc98809483cdd upstream.
+
+In the negotiate protocol preauth context, the server is not required
+to populate the salt (although it is done by most servers) so do
+not warn on mount.
+
+We retain the checks (warn) that the preauth context is the minimum
+size and that the salt does not exceed DataLength of the SMB response.
+Although we use the defaults in the case that the preauth context
+response is invalid, these checks may be useful in the future
+as servers add support for additional mechanisms.
+
+CC: Stable <stable@vger.kernel.org>
+Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
+Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2pdu.c |    7 +++++--
+ fs/cifs/smb2pdu.h |   14 +++++++++++---
+ 2 files changed, 16 insertions(+), 5 deletions(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -427,8 +427,8 @@ build_preauth_ctxt(struct smb2_preauth_n
+       pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
+       pneg_ctxt->DataLength = cpu_to_le16(38);
+       pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
+-      pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
+-      get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
++      pneg_ctxt->SaltLength = cpu_to_le16(SMB311_LINUX_CLIENT_SALT_SIZE);
++      get_random_bytes(pneg_ctxt->Salt, SMB311_LINUX_CLIENT_SALT_SIZE);
+       pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512;
+ }
+@@ -566,6 +566,9 @@ static void decode_preauth_context(struc
+       if (len < MIN_PREAUTH_CTXT_DATA_LEN) {
+               pr_warn_once("server sent bad preauth context\n");
+               return;
++      } else if (len < MIN_PREAUTH_CTXT_DATA_LEN + le16_to_cpu(ctxt->SaltLength)) {
++              pr_warn_once("server sent invalid SaltLength\n");
++              return;
+       }
+       if (le16_to_cpu(ctxt->HashAlgorithmCount) != 1)
+               pr_warn_once("Invalid SMB3 hash algorithm count\n");
+--- a/fs/cifs/smb2pdu.h
++++ b/fs/cifs/smb2pdu.h
+@@ -333,12 +333,20 @@ struct smb2_neg_context {
+       /* Followed by array of data */
+ } __packed;
+-#define SMB311_SALT_SIZE                      32
++#define SMB311_LINUX_CLIENT_SALT_SIZE                 32
+ /* Hash Algorithm Types */
+ #define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001)
+ #define SMB2_PREAUTH_HASH_SIZE 64
+-#define MIN_PREAUTH_CTXT_DATA_LEN     (SMB311_SALT_SIZE + 6)
++/*
++ * SaltLength that the server send can be zero, so the only three required
++ * fields (all __le16) end up six bytes total, so the minimum context data len
++ * in the response is six bytes which accounts for
++ *
++ *      HashAlgorithmCount, SaltLength, and 1 HashAlgorithm.
++ */
++#define MIN_PREAUTH_CTXT_DATA_LEN 6
++
+ struct smb2_preauth_neg_context {
+       __le16  ContextType; /* 1 */
+       __le16  DataLength;
+@@ -346,7 +354,7 @@ struct smb2_preauth_neg_context {
+       __le16  HashAlgorithmCount; /* 1 */
+       __le16  SaltLength;
+       __le16  HashAlgorithms; /* HashAlgorithms[0] since only one defined */
+-      __u8    Salt[SMB311_SALT_SIZE];
++      __u8    Salt[SMB311_LINUX_CLIENT_SALT_SIZE];
+ } __packed;
+ /* Encryption Algorithms Ciphers */
diff --git a/queue-5.10/smb3.1.1-remove-confusing-mount-warning-when-no-spnego-info-on-negprot-rsp.patch b/queue-5.10/smb3.1.1-remove-confusing-mount-warning-when-no-spnego-info-on-negprot-rsp.patch
new file mode 100644 (file)
index 0000000..8652dca
--- /dev/null
@@ -0,0 +1,58 @@
+From bc7c4129d4cdc56d1b5477c1714246f27df914dd Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Wed, 9 Dec 2020 01:12:35 -0600
+Subject: SMB3.1.1: remove confusing mount warning when no SPNEGO info on negprot rsp
+
+From: Steve French <stfrench@microsoft.com>
+
+commit bc7c4129d4cdc56d1b5477c1714246f27df914dd upstream.
+
+Azure does not send an SPNEGO blob in the negotiate protocol response,
+so we shouldn't assume that it is there when validating the location
+of the first negotiate context.  This avoids the potential confusing
+mount warning:
+
+   CIFS: Invalid negotiate context offset
+
+CC: Stable <stable@vger.kernel.org>
+Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2misc.c |   16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/fs/cifs/smb2misc.c
++++ b/fs/cifs/smb2misc.c
+@@ -94,6 +94,8 @@ static const __le16 smb2_rsp_struct_size
+       /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
+ };
++#define SMB311_NEGPROT_BASE_SIZE (sizeof(struct smb2_sync_hdr) + sizeof(struct smb2_negotiate_rsp))
++
+ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
+                             __u32 non_ctxlen)
+ {
+@@ -109,11 +111,17 @@ static __u32 get_neg_ctxt_len(struct smb
+       /* Make sure that negotiate contexts start after gss security blob */
+       nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset);
+-      if (nc_offset < non_ctxlen) {
+-              pr_warn_once("Invalid negotiate context offset\n");
++      if (nc_offset + 1 < non_ctxlen) {
++              pr_warn_once("Invalid negotiate context offset %d\n", nc_offset);
+               return 0;
+-      }
+-      size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
++      } else if (nc_offset + 1 == non_ctxlen) {
++              cifs_dbg(FYI, "no SPNEGO security blob in negprot rsp\n");
++              size_of_pad_before_neg_ctxts = 0;
++      } else if (non_ctxlen == SMB311_NEGPROT_BASE_SIZE)
++              /* has padding, but no SPNEGO blob */
++              size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen + 1;
++      else
++              size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
+       /* Verify that at least minimal negotiate contexts fit within frame */
+       if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) {
diff --git a/queue-5.10/ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch b/queue-5.10/ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch
new file mode 100644 (file)
index 0000000..18149ce
--- /dev/null
@@ -0,0 +1,69 @@
+From 20f1431160c6b590cdc269a846fc5a448abf5b98 Mon Sep 17 00:00:00 2001
+From: Richard Weinberger <richard@nod.at>
+Date: Mon, 16 Nov 2020 22:05:30 +0100
+Subject: ubifs: wbuf: Don't leak kernel memory to flash
+
+From: Richard Weinberger <richard@nod.at>
+
+commit 20f1431160c6b590cdc269a846fc5a448abf5b98 upstream.
+
+Write buffers use a kmalloc()'ed buffer, they can leak
+up to seven bytes of kernel memory to flash if writes are not
+aligned.
+So use ubifs_pad() to fill these gaps with padding bytes.
+This was never a problem while scanning because the scanner logic
+manually aligns node lengths and skips over these gaps.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 1e51764a3c2ac05a2 ("UBIFS: add new flash file system")
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ubifs/io.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/fs/ubifs/io.c
++++ b/fs/ubifs/io.c
+@@ -319,7 +319,7 @@ void ubifs_pad(const struct ubifs_info *
+ {
+       uint32_t crc;
+-      ubifs_assert(c, pad >= 0 && !(pad & 7));
++      ubifs_assert(c, pad >= 0);
+       if (pad >= UBIFS_PAD_NODE_SZ) {
+               struct ubifs_ch *ch = buf;
+@@ -764,6 +764,10 @@ int ubifs_wbuf_write_nolock(struct ubifs
+                * write-buffer.
+                */
+               memcpy(wbuf->buf + wbuf->used, buf, len);
++              if (aligned_len > len) {
++                      ubifs_assert(c, aligned_len - len < 8);
++                      ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
++              }
+               if (aligned_len == wbuf->avail) {
+                       dbg_io("flush jhead %s wbuf to LEB %d:%d",
+@@ -856,13 +860,18 @@ int ubifs_wbuf_write_nolock(struct ubifs
+       }
+       spin_lock(&wbuf->lock);
+-      if (aligned_len)
++      if (aligned_len) {
+               /*
+                * And now we have what's left and what does not take whole
+                * max. write unit, so write it to the write-buffer and we are
+                * done.
+                */
+               memcpy(wbuf->buf, buf + written, len);
++              if (aligned_len > len) {
++                      ubifs_assert(c, aligned_len - len < 8);
++                      ubifs_pad(c, wbuf->buf + len, aligned_len - len);
++              }
++      }
+       if (c->leb_size - wbuf->offs >= c->max_write_size)
+               wbuf->size = c->max_write_size;
diff --git a/queue-5.10/um-fix-time-travel-mode.patch b/queue-5.10/um-fix-time-travel-mode.patch
new file mode 100644 (file)
index 0000000..0dc01ff
--- /dev/null
@@ -0,0 +1,37 @@
+From ff9632d2a66512436d616ef4c380a0e73f748db1 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Fri, 20 Nov 2020 21:08:51 +0100
+Subject: um: Fix time-travel mode
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit ff9632d2a66512436d616ef4c380a0e73f748db1 upstream.
+
+Since the time-travel rework, basic time-travel mode hasn't worked
+properly, but there's no longer a need for this WARN_ON() so just
+remove it and thereby fix things.
+
+Cc: stable@vger.kernel.org
+Fixes: 4b786e24ca80 ("um: time-travel: Rewrite as an event scheduler")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/um/kernel/time.c |    5 -----
+ 1 file changed, 5 deletions(-)
+
+--- a/arch/um/kernel/time.c
++++ b/arch/um/kernel/time.c
+@@ -260,11 +260,6 @@ static void __time_travel_add_event(stru
+       struct time_travel_event *tmp;
+       bool inserted = false;
+-      if (WARN(time_travel_mode == TT_MODE_BASIC &&
+-               e != &time_travel_timer_event,
+-               "only timer events can be handled in basic mode"))
+-              return;
+-
+       if (e->pending)
+               return;
diff --git a/queue-5.10/um-remove-use-of-asprinf-in-umid.c.patch b/queue-5.10/um-remove-use-of-asprinf-in-umid.c.patch
new file mode 100644 (file)
index 0000000..90750be
--- /dev/null
@@ -0,0 +1,51 @@
+From 97be7ceaf7fea68104824b6aa874cff235333ac1 Mon Sep 17 00:00:00 2001
+From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
+Date: Fri, 13 Nov 2020 10:26:17 +0000
+Subject: um: Remove use of asprinf in umid.c
+
+From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
+
+commit 97be7ceaf7fea68104824b6aa874cff235333ac1 upstream.
+
+asprintf is not compatible with the existing uml memory allocation
+mechanism. Its use on the "user" side of UML results in a corrupt slab
+state.
+
+Fixes: 0d4e5ac7e780 ("um: remove uses of variable length arrays")
+Cc: stable@vger.kernel.org
+Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/um/os-Linux/umid.c |   17 +++++------------
+ 1 file changed, 5 insertions(+), 12 deletions(-)
+
+--- a/arch/um/os-Linux/umid.c
++++ b/arch/um/os-Linux/umid.c
+@@ -137,20 +137,13 @@ static inline int is_umdir_used(char *di
+ {
+       char pid[sizeof("nnnnnnnnn")], *end, *file;
+       int dead, fd, p, n, err;
+-      size_t filelen;
++      size_t filelen = strlen(dir) + sizeof("/pid") + 1;
+-      err = asprintf(&file, "%s/pid", dir);
+-      if (err < 0)
+-              return 0;
++      file = malloc(filelen);
++      if (!file)
++              return -ENOMEM;
+-      filelen = strlen(file);
+-
+-      n = snprintf(file, filelen, "%s/pid", dir);
+-      if (n >= filelen) {
+-              printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n");
+-              err = -E2BIG;
+-              goto out;
+-      }
++      snprintf(file, filelen, "%s/pid", dir);
+       dead = 0;
+       fd = open(file, O_RDONLY);
diff --git a/queue-5.10/xprtrdma-fix-xdrbuf_sparse_pages-support.patch b/queue-5.10/xprtrdma-fix-xdrbuf_sparse_pages-support.patch
new file mode 100644 (file)
index 0000000..616e052
--- /dev/null
@@ -0,0 +1,95 @@
+From 15261b9126cd5bb2ad8521da49d8f5c042d904c7 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Tue, 8 Dec 2020 18:29:02 -0500
+Subject: xprtrdma: Fix XDRBUF_SPARSE_PAGES support
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 15261b9126cd5bb2ad8521da49d8f5c042d904c7 upstream.
+
+Olga K. observed that rpcrdma_marsh_req() allocates sparse pages
+only when it has determined that a Reply chunk is necessary. There
+are plenty of cases where no Reply chunk is needed, but the
+XDRBUF_SPARSE_PAGES flag is set. The result would be a crash in
+rpcrdma_inline_fixup() when it tries to copy parts of the received
+Reply into a missing page.
+
+To avoid crashing, handle sparse page allocation up front.
+
+Until XATTR support was added, this issue did not appear often
+because the only SPARSE_PAGES consumer always expected a reply large
+enough to always require a Reply chunk.
+
+Reported-by: Olga Kornievskaia <kolga@netapp.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c |   40 +++++++++++++++++++++++++++++++---------
+ 1 file changed, 31 insertions(+), 9 deletions(-)
+
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -179,6 +179,31 @@ rpcrdma_nonpayload_inline(const struct r
+               r_xprt->rx_ep->re_max_inline_recv;
+ }
++/* ACL likes to be lazy in allocating pages. For TCP, these
++ * pages can be allocated during receive processing. Not true
++ * for RDMA, which must always provision receive buffers
++ * up front.
++ */
++static noinline int
++rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
++{
++      struct page **ppages;
++      int len;
++
++      len = buf->page_len;
++      ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
++      while (len > 0) {
++              if (!*ppages)
++                      *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
++              if (!*ppages)
++                      return -ENOBUFS;
++              ppages++;
++              len -= PAGE_SIZE;
++      }
++
++      return 0;
++}
++
+ /* Split @vec on page boundaries into SGEs. FMR registers pages, not
+  * a byte range. Other modes coalesce these SGEs into a single MR
+  * when they can.
+@@ -233,15 +258,6 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt
+       ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
+       page_base = offset_in_page(xdrbuf->page_base);
+       while (len) {
+-              /* ACL likes to be lazy in allocating pages - ACLs
+-               * are small by default but can get huge.
+-               */
+-              if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
+-                      if (!*ppages)
+-                              *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
+-                      if (!*ppages)
+-                              return -ENOBUFS;
+-              }
+               seg->mr_page = *ppages;
+               seg->mr_offset = (char *)page_base;
+               seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
+@@ -867,6 +883,12 @@ rpcrdma_marshal_req(struct rpcrdma_xprt
+       __be32 *p;
+       int ret;
++      if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
++              ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
++              if (ret)
++                      return ret;
++      }
++
+       rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
+       xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
+                       rqst);