]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.7-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jun 2020 11:27:06 +0000 (13:27 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jun 2020 11:27:06 +0000 (13:27 +0200)
added patches:
arm64-dts-imx8mm-evk-correct-ldo1-ldo2-voltage-range.patch
arm64-dts-imx8mn-ddr4-evk-correct-ldo1-ldo2-voltage-range.patch
arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch
mm-compaction-make-capture-control-handling-safe-wrt-interrupts.patch
mm-memcontrol-handle-div0-crash-race-condition-in-memory.low.patch
mm-memcontrol.c-add-missed-css_put.patch
mm-memory_hotplug.c-fix-false-softlockup-during-pfn-range-removal.patch
mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab.patch
mm-slab-use-memzero_explicit-in-kzfree.patch
ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch
ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch
ocfs2-fix-value-of-ocfs2_invalid_slot.patch
ocfs2-load-global_inode_alloc.patch

14 files changed:
queue-5.7/arm64-dts-imx8mm-evk-correct-ldo1-ldo2-voltage-range.patch [new file with mode: 0644]
queue-5.7/arm64-dts-imx8mn-ddr4-evk-correct-ldo1-ldo2-voltage-range.patch [new file with mode: 0644]
queue-5.7/arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch [new file with mode: 0644]
queue-5.7/mm-compaction-make-capture-control-handling-safe-wrt-interrupts.patch [new file with mode: 0644]
queue-5.7/mm-memcontrol-handle-div0-crash-race-condition-in-memory.low.patch [new file with mode: 0644]
queue-5.7/mm-memcontrol.c-add-missed-css_put.patch [new file with mode: 0644]
queue-5.7/mm-memory_hotplug.c-fix-false-softlockup-during-pfn-range-removal.patch [new file with mode: 0644]
queue-5.7/mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab.patch [new file with mode: 0644]
queue-5.7/mm-slab-use-memzero_explicit-in-kzfree.patch [new file with mode: 0644]
queue-5.7/ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch [new file with mode: 0644]
queue-5.7/ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch [new file with mode: 0644]
queue-5.7/ocfs2-fix-value-of-ocfs2_invalid_slot.patch [new file with mode: 0644]
queue-5.7/ocfs2-load-global_inode_alloc.patch [new file with mode: 0644]
queue-5.7/series

diff --git a/queue-5.7/arm64-dts-imx8mm-evk-correct-ldo1-ldo2-voltage-range.patch b/queue-5.7/arm64-dts-imx8mm-evk-correct-ldo1-ldo2-voltage-range.patch
new file mode 100644 (file)
index 0000000..3a8e25c
--- /dev/null
@@ -0,0 +1,51 @@
+From 4fd6b5735c03c0955d93960d31f17d7144f5578f Mon Sep 17 00:00:00 2001
+From: Robin Gong <yibin.gong@nxp.com>
+Date: Fri, 22 May 2020 18:44:50 +0800
+Subject: arm64: dts: imx8mm-evk: correct ldo1/ldo2 voltage range
+
+From: Robin Gong <yibin.gong@nxp.com>
+
+commit 4fd6b5735c03c0955d93960d31f17d7144f5578f upstream.
+
+Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group
+(1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups
+have been supported at bd718x7-regulator driver, hence, just corrrect the
+voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too.
+Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mm
+datasheet as the below warning log in kernel:
+
+[    0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV
+[    0.999196] LDO2: Bringing 800000uV into 900000-900000uV
+
+Fixes: 78cc25fa265d ("arm64: dts: imx8mm-evk: Add BD71847 PMIC")
+Cc: stable@vger.kernel.org
+Signed-off-by: Robin Gong <yibin.gong@nxp.com>
+Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/boot/dts/freescale/imx8mm-evk.dts |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
+@@ -196,7 +196,7 @@
+                       ldo1_reg: LDO1 {
+                               regulator-name = "LDO1";
+-                              regulator-min-microvolt = <3000000>;
++                              regulator-min-microvolt = <1600000>;
+                               regulator-max-microvolt = <3300000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+@@ -204,7 +204,7 @@
+                       ldo2_reg: LDO2 {
+                               regulator-name = "LDO2";
+-                              regulator-min-microvolt = <900000>;
++                              regulator-min-microvolt = <800000>;
+                               regulator-max-microvolt = <900000>;
+                               regulator-boot-on;
+                               regulator-always-on;
diff --git a/queue-5.7/arm64-dts-imx8mn-ddr4-evk-correct-ldo1-ldo2-voltage-range.patch b/queue-5.7/arm64-dts-imx8mn-ddr4-evk-correct-ldo1-ldo2-voltage-range.patch
new file mode 100644 (file)
index 0000000..84cad24
--- /dev/null
@@ -0,0 +1,51 @@
+From cfb12c8952f617df58d73d24161e539a035d82b0 Mon Sep 17 00:00:00 2001
+From: Robin Gong <yibin.gong@nxp.com>
+Date: Fri, 22 May 2020 18:44:51 +0800
+Subject: arm64: dts: imx8mn-ddr4-evk: correct ldo1/ldo2 voltage range
+
+From: Robin Gong <yibin.gong@nxp.com>
+
+commit cfb12c8952f617df58d73d24161e539a035d82b0 upstream.
+
+Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group
+(1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups
+have been supported at bd718x7-regulator driver, hence, just corrrect the
+voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too.
+Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mn
+datasheet as the below warning log in kernel:
+
+[    0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV
+[    0.999196] LDO2: Bringing 800000uV into 900000-900000uV
+
+Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Robin Gong <yibin.gong@nxp.com>
+Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
+@@ -101,7 +101,7 @@
+                       ldo1_reg: LDO1 {
+                               regulator-name = "LDO1";
+-                              regulator-min-microvolt = <3000000>;
++                              regulator-min-microvolt = <1600000>;
+                               regulator-max-microvolt = <3300000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+@@ -109,7 +109,7 @@
+                       ldo2_reg: LDO2 {
+                               regulator-name = "LDO2";
+-                              regulator-min-microvolt = <900000>;
++                              regulator-min-microvolt = <800000>;
+                               regulator-max-microvolt = <900000>;
+                               regulator-boot-on;
+                               regulator-always-on;
diff --git a/queue-5.7/arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch b/queue-5.7/arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch
new file mode 100644 (file)
index 0000000..4faf00e
--- /dev/null
@@ -0,0 +1,67 @@
+From 8dfe804a4031ca6ba3a3efb2048534249b64f3a5 Mon Sep 17 00:00:00 2001
+From: Jiping Ma <jiping.ma2@windriver.com>
+Date: Mon, 11 May 2020 10:52:07 +0800
+Subject: arm64: perf: Report the PC value in REGS_ABI_32 mode
+
+From: Jiping Ma <jiping.ma2@windriver.com>
+
+commit 8dfe804a4031ca6ba3a3efb2048534249b64f3a5 upstream.
+
+A 32-bit perf querying the registers of a compat task using REGS_ABI_32
+will receive zeroes from w15, when it expects to find the PC.
+
+Return the PC value for register dwarf register 15 when returning register
+values for a compat task to perf.
+
+Cc: <stable@vger.kernel.org>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Jiping Ma <jiping.ma2@windriver.com>
+Link: https://lore.kernel.org/r/1589165527-188401-1-git-send-email-jiping.ma2@windriver.com
+[will: Shuffled code and added a comment]
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/perf_regs.c |   25 ++++++++++++++++++++++---
+ 1 file changed, 22 insertions(+), 3 deletions(-)
+
+--- a/arch/arm64/kernel/perf_regs.c
++++ b/arch/arm64/kernel/perf_regs.c
+@@ -15,15 +15,34 @@ u64 perf_reg_value(struct pt_regs *regs,
+               return 0;
+       /*
+-       * Compat (i.e. 32 bit) mode:
+-       * - PC has been set in the pt_regs struct in kernel_entry,
+-       * - Handle SP and LR here.
++       * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but
++       * we're stuck with it for ABI compatability reasons.
++       *
++       * For a 32-bit consumer inspecting a 32-bit task, then it will look at
++       * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h).
++       * These correspond directly to a prefix of the registers saved in our
++       * 'struct pt_regs', with the exception of the PC, so we copy that down
++       * (x15 corresponds to SP_hyp in the architecture).
++       *
++       * So far, so good.
++       *
++       * The oddity arises when a 64-bit consumer looks at a 32-bit task and
++       * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return
++       * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and
++       * PC registers would normally live. The initial idea was to allow a
++       * 64-bit unwinder to unwind a 32-bit task and, although it's not clear
++       * how well that works in practice, somebody might be relying on it.
++       *
++       * At the time we make a sample, we don't know whether the consumer is
++       * 32-bit or 64-bit, so we have to cater for both possibilities.
+        */
+       if (compat_user_mode(regs)) {
+               if ((u32)idx == PERF_REG_ARM64_SP)
+                       return regs->compat_sp;
+               if ((u32)idx == PERF_REG_ARM64_LR)
+                       return regs->compat_lr;
++              if (idx == 15)
++                      return regs->pc;
+       }
+       if ((u32)idx == PERF_REG_ARM64_SP)
diff --git a/queue-5.7/mm-compaction-make-capture-control-handling-safe-wrt-interrupts.patch b/queue-5.7/mm-compaction-make-capture-control-handling-safe-wrt-interrupts.patch
new file mode 100644 (file)
index 0000000..b28ba1d
--- /dev/null
@@ -0,0 +1,93 @@
+From b9e20f0da1f5c9c68689450a8cb436c9486434c8 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 25 Jun 2020 20:29:24 -0700
+Subject: mm, compaction: make capture control handling safe wrt interrupts
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit b9e20f0da1f5c9c68689450a8cb436c9486434c8 upstream.
+
+Hugh reports:
+
+ "While stressing compaction, one run oopsed on NULL capc->cc in
+  __free_one_page()'s task_capc(zone): compact_zone_order() had been
+  interrupted, and a page was being freed in the return from interrupt.
+
+  Though you would not expect it from the source, both gccs I was using
+  (4.8.1 and 7.5.0) had chosen to compile compact_zone_order() with the
+  ".cc = &cc" implemented by mov %rbx,-0xb0(%rbp) immediately before
+  callq compact_zone - long after the "current->capture_control =
+  &capc". An interrupt in between those finds capc->cc NULL (zeroed by
+  an earlier rep stos).
+
+  This could presumably be fixed by a barrier() before setting
+  current->capture_control in compact_zone_order(); but would also need
+  more care on return from compact_zone(), in order not to risk leaking
+  a page captured by interrupt just before capture_control is reset.
+
+  Maybe that is the preferable fix, but I felt safer for task_capc() to
+  exclude the rather surprising possibility of capture at interrupt
+  time"
+
+I have checked that gcc10 also behaves the same.
+
+The advantage of fix in compact_zone_order() is that we don't add
+another test in the page freeing hot path, and that it might prevent
+future problems if we stop exposing pointers to uninitialized structures
+in current task.
+
+So this patch implements the suggestion for compact_zone_order() with
+barrier() (and WRITE_ONCE() to prevent store tearing) for setting
+current->capture_control, and prevents page leaking with
+WRITE_ONCE/READ_ONCE in the proper order.
+
+Link: http://lkml.kernel.org/r/20200616082649.27173-1-vbabka@suse.cz
+Fixes: 5e1f0f098b46 ("mm, compaction: capture a page under direct compaction")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Hugh Dickins <hughd@google.com>
+Suggested-by: Hugh Dickins <hughd@google.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: Alex Shi <alex.shi@linux.alibaba.com>
+Cc: Li Wang <liwang@redhat.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: <stable@vger.kernel.org>   [5.1+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |   17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -2318,15 +2318,26 @@ static enum compact_result compact_zone_
+               .page = NULL,
+       };
+-      current->capture_control = &capc;
++      /*
++       * Make sure the structs are really initialized before we expose the
++       * capture control, in case we are interrupted and the interrupt handler
++       * frees a page.
++       */
++      barrier();
++      WRITE_ONCE(current->capture_control, &capc);
+       ret = compact_zone(&cc, &capc);
+       VM_BUG_ON(!list_empty(&cc.freepages));
+       VM_BUG_ON(!list_empty(&cc.migratepages));
+-      *capture = capc.page;
+-      current->capture_control = NULL;
++      /*
++       * Make sure we hide capture control first before we read the captured
++       * page pointer, otherwise an interrupt could free and capture a page
++       * and we would leak it.
++       */
++      WRITE_ONCE(current->capture_control, NULL);
++      *capture = READ_ONCE(capc.page);
+       return ret;
+ }
diff --git a/queue-5.7/mm-memcontrol-handle-div0-crash-race-condition-in-memory.low.patch b/queue-5.7/mm-memcontrol-handle-div0-crash-race-condition-in-memory.low.patch
new file mode 100644 (file)
index 0000000..d9506d6
--- /dev/null
@@ -0,0 +1,78 @@
+From cd324edce598ebddde44162a2aa01321c1261b9e Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 25 Jun 2020 20:30:16 -0700
+Subject: mm: memcontrol: handle div0 crash race condition in memory.low
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit cd324edce598ebddde44162a2aa01321c1261b9e upstream.
+
+Tejun reports seeing rare div0 crashes in memory.low stress testing:
+
+  RIP: 0010:mem_cgroup_calculate_protection+0xed/0x150
+  Code: 0f 46 d1 4c 39 d8 72 57 f6 05 16 d6 42 01 40 74 1f 4c 39 d8 76 1a 4c 39 d1 76 15 4c 29 d1 4c 29 d8 4d 29 d9 31 d2 48 0f af c1 <49> f7 f1 49 01 c2 4c 89 96 38 01 00 00 5d c3 48 0f af c7 31 d2 49
+  RSP: 0018:ffffa14e01d6fcd0 EFLAGS: 00010246
+  RAX: 000000000243e384 RBX: 0000000000000000 RCX: 0000000000008f4b
+  RDX: 0000000000000000 RSI: ffff8b89bee84000 RDI: 0000000000000000
+  RBP: ffffa14e01d6fcd0 R08: ffff8b89ca7d40f8 R09: 0000000000000000
+  R10: 0000000000000000 R11: 00000000006422f7 R12: 0000000000000000
+  R13: ffff8b89d9617000 R14: ffff8b89bee84000 R15: ffffa14e01d6fdb8
+  FS:  0000000000000000(0000) GS:ffff8b8a1f1c0000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00007f93b1fc175b CR3: 000000016100a000 CR4: 0000000000340ea0
+  Call Trace:
+    shrink_node+0x1e5/0x6c0
+    balance_pgdat+0x32d/0x5f0
+    kswapd+0x1d7/0x3d0
+    kthread+0x11c/0x160
+    ret_from_fork+0x1f/0x30
+
+This happens when parent_usage == siblings_protected.
+
+We check that usage is bigger than protected, which should imply
+parent_usage being bigger than siblings_protected.  However, we don't
+read (or even update) these values atomically, and they can be out of
+sync as the memory state changes under us.  A bit of fluctuation around
+the target protection isn't a big deal, but we need to handle the div0
+case.
+
+Check the parent state explicitly to make sure we have a reasonable
+positive value for the divisor.
+
+Link: http://lkml.kernel.org/r/20200615140658.601684-1-hannes@cmpxchg.org
+Fixes: 8a931f801340 ("mm: memcontrol: recursive memory.low protection")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Tejun Heo <tj@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Chris Down <chris@chrisdown.name>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -6349,11 +6349,16 @@ static unsigned long effective_protectio
+        * We're using unprotected memory for the weight so that if
+        * some cgroups DO claim explicit protection, we don't protect
+        * the same bytes twice.
++       *
++       * Check both usage and parent_usage against the respective
++       * protected values. One should imply the other, but they
++       * aren't read atomically - make sure the division is sane.
+        */
+       if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
+               return ep;
+-
+-      if (parent_effective > siblings_protected && usage > protected) {
++      if (parent_effective > siblings_protected &&
++          parent_usage > siblings_protected &&
++          usage > protected) {
+               unsigned long unclaimed;
+               unclaimed = parent_effective - siblings_protected;
diff --git a/queue-5.7/mm-memcontrol.c-add-missed-css_put.patch b/queue-5.7/mm-memcontrol.c-add-missed-css_put.patch
new file mode 100644 (file)
index 0000000..90922d5
--- /dev/null
@@ -0,0 +1,42 @@
+From 3a98990ae2150277ed34d3b248c60e68bf2244b2 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 25 Jun 2020 20:30:19 -0700
+Subject: mm/memcontrol.c: add missed css_put()
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 3a98990ae2150277ed34d3b248c60e68bf2244b2 upstream.
+
+We should put the css reference when memory allocation failed.
+
+Link: http://lkml.kernel.org/r/20200614122653.98829-1-songmuchun@bytedance.com
+Fixes: f0a3a24b532d ("mm: memcg/slab: rework non-root kmem_cache lifecycle management")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Qian Cai <cai@lca.pw>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2790,8 +2790,10 @@ static void memcg_schedule_kmem_cache_cr
+               return;
+       cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
+-      if (!cw)
++      if (!cw) {
++              css_put(&memcg->css);
+               return;
++      }
+       cw->memcg = memcg;
+       cw->cachep = cachep;
diff --git a/queue-5.7/mm-memory_hotplug.c-fix-false-softlockup-during-pfn-range-removal.patch b/queue-5.7/mm-memory_hotplug.c-fix-false-softlockup-during-pfn-range-removal.patch
new file mode 100644 (file)
index 0000000..b2fc961
--- /dev/null
@@ -0,0 +1,103 @@
+From b7e3debdd0408c0dca5d4750371afa5003f792dc Mon Sep 17 00:00:00 2001
+From: Ben Widawsky <ben.widawsky@intel.com>
+Date: Thu, 25 Jun 2020 20:30:51 -0700
+Subject: mm/memory_hotplug.c: fix false softlockup during pfn range removal
+
+From: Ben Widawsky <ben.widawsky@intel.com>
+
+commit b7e3debdd0408c0dca5d4750371afa5003f792dc upstream.
+
+When working with very large nodes, poisoning the struct pages (for which
+there will be very many) can take a very long time.  If the system is
+using voluntary preemptions, the software watchdog will not be able to
+detect forward progress.  This patch addresses this issue by offering to
+give up time like __remove_pages() does.  This behavior was introduced in
+v5.6 with: commit d33695b16a9f ("mm/memory_hotplug: poison memmap in
+remove_pfn_range_from_zone()")
+
+Alternately, init_page_poison could do this cond_resched(), but it seems
+to me that the caller of init_page_poison() is what actually knows whether
+or not it should relax its own priority.
+
+Based on Dan's notes, I think this is perfectly safe: commit f931ab479dd2
+("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}")
+
+Aside from fixing the lockup, it is also a friendlier thing to do on lower
+core systems that might wipe out large chunks of hotplug memory (probably
+not a very common case).
+
+Fixes this kind of splat:
+
+  watchdog: BUG: soft lockup - CPU#46 stuck for 22s! [daxctl:9922]
+  irq event stamp: 138450
+  hardirqs last  enabled at (138449): [<ffffffffa1001f26>] trace_hardirqs_on_thunk+0x1a/0x1c
+  hardirqs last disabled at (138450): [<ffffffffa1001f42>] trace_hardirqs_off_thunk+0x1a/0x1c
+  softirqs last  enabled at (138448): [<ffffffffa1e00347>] __do_softirq+0x347/0x456
+  softirqs last disabled at (138443): [<ffffffffa10c416d>] irq_exit+0x7d/0xb0
+  CPU: 46 PID: 9922 Comm: daxctl Not tainted 5.7.0-BEN-14238-g373c6049b336 #30
+  Hardware name: Intel Corporation PURLEY/PURLEY, BIOS PLYXCRB1.86B.0578.D07.1902280810 02/28/2019
+  RIP: 0010:memset_erms+0x9/0x10
+  Code: c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 f3 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 <f3> aa 4c 89 c8 c3 90 49 89 fa 40 0f b6 ce 48 b8 01 01 01 01 01 01
+  Call Trace:
+   remove_pfn_range_from_zone+0x3a/0x380
+   memunmap_pages+0x17f/0x280
+   release_nodes+0x22a/0x260
+   __device_release_driver+0x172/0x220
+   device_driver_detach+0x3e/0xa0
+   unbind_store+0x113/0x130
+   kernfs_fop_write+0xdc/0x1c0
+   vfs_write+0xde/0x1d0
+   ksys_write+0x58/0xd0
+   do_syscall_64+0x5a/0x120
+   entry_SYSCALL_64_after_hwframe+0x49/0xb3
+  Built 2 zonelists, mobility grouping on.  Total pages: 49050381
+  Policy zone: Normal
+  Built 3 zonelists, mobility grouping on.  Total pages: 49312525
+  Policy zone: Normal
+
+David said: "It really only is an issue for devmem.  Ordinary
+hotplugged system memory is not affected (onlined/offlined in memory
+block granularity)."
+
+Link: http://lkml.kernel.org/r/20200619231213.1160351-1-ben.widawsky@intel.com
+Fixes: commit d33695b16a9f ("mm/memory_hotplug: poison memmap in remove_pfn_range_from_zone()")
+Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
+Reported-by: "Scargall, Steve" <steve.scargall@intel.com>
+Reported-by: Ben Widawsky <ben.widawsky@intel.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory_hotplug.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -468,11 +468,20 @@ void __ref remove_pfn_range_from_zone(st
+                                     unsigned long start_pfn,
+                                     unsigned long nr_pages)
+ {
++      const unsigned long end_pfn = start_pfn + nr_pages;
+       struct pglist_data *pgdat = zone->zone_pgdat;
+-      unsigned long flags;
++      unsigned long pfn, cur_nr_pages, flags;
+       /* Poison struct pages because they are now uninitialized again. */
+-      page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
++      for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
++              cond_resched();
++
++              /* Select all remaining pages up to the next section boundary */
++              cur_nr_pages =
++                      min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
++              page_init_poison(pfn_to_page(pfn),
++                               sizeof(struct page) * cur_nr_pages);
++      }
+ #ifdef CONFIG_ZONE_DEVICE
+       /*
diff --git a/queue-5.7/mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab.patch b/queue-5.7/mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab.patch
new file mode 100644 (file)
index 0000000..c508c17
--- /dev/null
@@ -0,0 +1,77 @@
+From d7670879c5c4aa443d518fb234a9e5f30931efa3 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Thu, 25 Jun 2020 20:29:49 -0700
+Subject: mm, slab: fix sign conversion problem in memcg_uncharge_slab()
+
+From: Waiman Long <longman@redhat.com>
+
+commit d7670879c5c4aa443d518fb234a9e5f30931efa3 upstream.
+
+It was found that running the LTP test on a PowerPC system could produce
+erroneous values in /proc/meminfo, like:
+
+  MemTotal:       531915072 kB
+  MemFree:        507962176 kB
+  MemAvailable:   1100020596352 kB
+
+Using bisection, the problem is tracked down to commit 9c315e4d7d8c ("mm:
+memcg/slab: cache page number in memcg_(un)charge_slab()").
+
+In memcg_uncharge_slab() with a "int order" argument:
+
+  unsigned int nr_pages = 1 << order;
+    :
+  mod_lruvec_state(lruvec, cache_vmstat_idx(s), -nr_pages);
+
+The mod_lruvec_state() function will eventually call the
+__mod_zone_page_state() which accepts a long argument.  Depending on the
+compiler and how inlining is done, "-nr_pages" may be treated as a
+negative number or a very large positive number.  Apparently, it was
+treated as a large positive number in that PowerPC system leading to
+incorrect stat counts.  This problem hasn't been seen in x86-64 yet,
+perhaps the gcc compiler there has some slight difference in behavior.
+
+It is fixed by making nr_pages a signed value.  For consistency, a similar
+change is applied to memcg_charge_slab() as well.
+
+Link: http://lkml.kernel.org/r/20200620184719.10994-1-longman@redhat.com
+Fixes: 9c315e4d7d8c ("mm: memcg/slab: cache page number in memcg_(un)charge_slab()").
+Signed-off-by: Waiman Long <longman@redhat.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slab.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -348,7 +348,7 @@ static __always_inline int memcg_charge_
+                                            gfp_t gfp, int order,
+                                            struct kmem_cache *s)
+ {
+-      unsigned int nr_pages = 1 << order;
++      int nr_pages = 1 << order;
+       struct mem_cgroup *memcg;
+       struct lruvec *lruvec;
+       int ret;
+@@ -388,7 +388,7 @@ out:
+ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
+                                               struct kmem_cache *s)
+ {
+-      unsigned int nr_pages = 1 << order;
++      int nr_pages = 1 << order;
+       struct mem_cgroup *memcg;
+       struct lruvec *lruvec;
diff --git a/queue-5.7/mm-slab-use-memzero_explicit-in-kzfree.patch b/queue-5.7/mm-slab-use-memzero_explicit-in-kzfree.patch
new file mode 100644 (file)
index 0000000..312a234
--- /dev/null
@@ -0,0 +1,54 @@
+From 8982ae527fbef170ef298650c15d55a9ccd33973 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Thu, 25 Jun 2020 20:29:52 -0700
+Subject: mm/slab: use memzero_explicit() in kzfree()
+
+From: Waiman Long <longman@redhat.com>
+
+commit 8982ae527fbef170ef298650c15d55a9ccd33973 upstream.
+
+The kzfree() function is normally used to clear some sensitive
+information, like encryption keys, in the buffer before freeing it back to
+the pool.  Memset() is currently used for buffer clearing.  However
+unlikely, there is still a non-zero probability that the compiler may
+choose to optimize away the memory clearing especially if LTO is being
+used in the future.
+
+To make sure that this optimization will never happen,
+memzero_explicit(), which is introduced in v3.18, is now used in
+kzfree() to future-proof it.
+
+Link: http://lkml.kernel.org/r/20200616154311.12314-2-longman@redhat.com
+Fixes: 3ef0e5ba4673 ("slab: introduce kzfree()")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: David Howells <dhowells@redhat.com>
+Cc: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+Cc: James Morris <jmorris@namei.org>
+Cc: "Serge E. Hallyn" <serge@hallyn.com>
+Cc: Joe Perches <joe@perches.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Dan Carpenter <dan.carpenter@oracle.com>
+Cc: "Jason A . Donenfeld" <Jason@zx2c4.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slab_common.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -1726,7 +1726,7 @@ void kzfree(const void *p)
+       if (unlikely(ZERO_OR_NULL_PTR(mem)))
+               return;
+       ks = ksize(mem);
+-      memset(mem, 0, ks);
++      memzero_explicit(mem, ks);
+       kfree(mem);
+ }
+ EXPORT_SYMBOL(kzfree);
diff --git a/queue-5.7/ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch b/queue-5.7/ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch
new file mode 100644 (file)
index 0000000..be11bfb
--- /dev/null
@@ -0,0 +1,98 @@
+From 4cd9973f9ff69e37dd0ba2bd6e6423f8179c329a Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 25 Jun 2020 20:29:30 -0700
+Subject: ocfs2: avoid inode removal while nfsd is accessing it
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 4cd9973f9ff69e37dd0ba2bd6e6423f8179c329a upstream.
+
+Patch series "ocfs2: fix nfsd over ocfs2 issues", v2.
+
+This is a series of patches to fix issues on nfsd over ocfs2.  patch 1
+is to avoid inode removed while nfsd access it patch 2 & 3 is to fix a
+panic issue.
+
+This patch (of 4):
+
+When nfsd is getting file dentry using handle or parent dentry of some
+dentry, one cluster lock is used to avoid inode removed from other node,
+but it still could be removed from local node, so use a rw lock to avoid
+this.
+
+Link: http://lkml.kernel.org/r/20200616183829.87211-1-junxiao.bi@oracle.com
+Link: http://lkml.kernel.org/r/20200616183829.87211-2-junxiao.bi@oracle.com
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlmglue.c |   17 ++++++++++++++++-
+ fs/ocfs2/ocfs2.h   |    1 +
+ 2 files changed, 17 insertions(+), 1 deletion(-)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init
+                                  &ocfs2_nfs_sync_lops, osb);
+ }
++static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
++{
++      ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
++      init_rwsem(&osb->nfs_sync_rwlock);
++}
++
+ void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
+ {
+       struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
+@@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_sup
+       if (ocfs2_is_hard_readonly(osb))
+               return -EROFS;
++      if (ex)
++              down_write(&osb->nfs_sync_rwlock);
++      else
++              down_read(&osb->nfs_sync_rwlock);
++
+       if (ocfs2_mount_local(osb))
+               return 0;
+@@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_
+       if (!ocfs2_mount_local(osb))
+               ocfs2_cluster_unlock(osb, lockres,
+                                    ex ? LKM_EXMODE : LKM_PRMODE);
++      if (ex)
++              up_write(&osb->nfs_sync_rwlock);
++      else
++              up_read(&osb->nfs_sync_rwlock);
+ }
+ int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
+@@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *o
+ local:
+       ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
+       ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
+-      ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
++      ocfs2_nfs_sync_lock_init(osb);
+       ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
+       osb->cconn = conn;
+--- a/fs/ocfs2/ocfs2.h
++++ b/fs/ocfs2/ocfs2.h
+@@ -394,6 +394,7 @@ struct ocfs2_super
+       struct ocfs2_lock_res osb_super_lockres;
+       struct ocfs2_lock_res osb_rename_lockres;
+       struct ocfs2_lock_res osb_nfs_sync_lockres;
++      struct rw_semaphore nfs_sync_rwlock;
+       struct ocfs2_lock_res osb_trim_fs_lockres;
+       struct mutex obs_trim_fs_mutex;
+       struct ocfs2_dlm_debug *osb_dlm_debug;
diff --git a/queue-5.7/ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch b/queue-5.7/ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch
new file mode 100644 (file)
index 0000000..90a073b
--- /dev/null
@@ -0,0 +1,90 @@
+From e5a15e17a78d58f933d17cafedfcf7486a29f5b4 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 25 Jun 2020 20:29:37 -0700
+Subject: ocfs2: fix panic on nfs server over ocfs2
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit e5a15e17a78d58f933d17cafedfcf7486a29f5b4 upstream.
+
+The following kernel panic was captured when running nfs server over
+ocfs2, at that time ocfs2_test_inode_bit() was checking whether one
+inode locating at "blkno" 5 was valid, that is ocfs2 root inode, its
+"suballoc_slot" was OCFS2_INVALID_SLOT(65535) and it was allocted from
+//global_inode_alloc, but here it wrongly assumed that it was got from per
+slot inode alloctor which would cause array overflow and trigger kernel
+panic.
+
+  BUG: unable to handle kernel paging request at 0000000000001088
+  IP: [<ffffffff816f6898>] _raw_spin_lock+0x18/0xf0
+  PGD 1e06ba067 PUD 1e9e7d067 PMD 0
+  Oops: 0002 [#1] SMP
+  CPU: 6 PID: 24873 Comm: nfsd Not tainted 4.1.12-124.36.1.el6uek.x86_64 #2
+  Hardware name: Huawei CH121 V3/IT11SGCA1, BIOS 3.87 02/02/2018
+  RIP: _raw_spin_lock+0x18/0xf0
+  RSP: e02b:ffff88005ae97908  EFLAGS: 00010206
+  RAX: ffff88005ae98000 RBX: 0000000000001088 RCX: 0000000000000000
+  RDX: 0000000000020000 RSI: 0000000000000009 RDI: 0000000000001088
+  RBP: ffff88005ae97928 R08: 0000000000000000 R09: ffff880212878e00
+  R10: 0000000000007ff0 R11: 0000000000000000 R12: 0000000000001088
+  R13: ffff8800063c0aa8 R14: ffff8800650c27d0 R15: 000000000000ffff
+  FS:  0000000000000000(0000) GS:ffff880218180000(0000) knlGS:ffff880218180000
+  CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000001088 CR3: 00000002033d0000 CR4: 0000000000042660
+  Call Trace:
+    igrab+0x1e/0x60
+    ocfs2_get_system_file_inode+0x63/0x3a0 [ocfs2]
+    ocfs2_test_inode_bit+0x328/0xa00 [ocfs2]
+    ocfs2_get_parent+0xba/0x3e0 [ocfs2]
+    reconnect_path+0xb5/0x300
+    exportfs_decode_fh+0xf6/0x2b0
+    fh_verify+0x350/0x660 [nfsd]
+    nfsd4_putfh+0x4d/0x60 [nfsd]
+    nfsd4_proc_compound+0x3d3/0x6f0 [nfsd]
+    nfsd_dispatch+0xe0/0x290 [nfsd]
+    svc_process_common+0x412/0x6a0 [sunrpc]
+    svc_process+0x123/0x210 [sunrpc]
+    nfsd+0xff/0x170 [nfsd]
+    kthread+0xcb/0xf0
+    ret_from_fork+0x61/0x90
+  Code: 83 c2 02 0f b7 f2 e8 18 dc 91 ff 66 90 eb bf 0f 1f 40 00 55 48 89 e5 41 56 41 55 41 54 53 0f 1f 44 00 00 48 89 fb ba 00 00 02 00 <f0> 0f c1 17 89 d0 45 31 e4 45 31 ed c1 e8 10 66 39 d0 41 89 c6
+  RIP   _raw_spin_lock+0x18/0xf0
+  CR2: 0000000000001088
+  ---[ end trace 7264463cd1aac8f9 ]---
+  Kernel panic - not syncing: Fatal exception
+
+Link: http://lkml.kernel.org/r/20200616183829.87211-4-junxiao.bi@oracle.com
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/suballoc.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/ocfs2/suballoc.c
++++ b/fs/ocfs2/suballoc.c
+@@ -2825,9 +2825,12 @@ int ocfs2_test_inode_bit(struct ocfs2_su
+               goto bail;
+       }
+-      inode_alloc_inode =
+-              ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
+-                                          suballoc_slot);
++      if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
++              inode_alloc_inode = ocfs2_get_system_file_inode(osb,
++                      GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
++      else
++              inode_alloc_inode = ocfs2_get_system_file_inode(osb,
++                      INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
+       if (!inode_alloc_inode) {
+               /* the error code could be inaccurate, but we are not able to
+                * get the correct one. */
diff --git a/queue-5.7/ocfs2-fix-value-of-ocfs2_invalid_slot.patch b/queue-5.7/ocfs2-fix-value-of-ocfs2_invalid_slot.patch
new file mode 100644 (file)
index 0000000..475b56c
--- /dev/null
@@ -0,0 +1,53 @@
+From 9277f8334ffc719fe922d776444d6e4e884dbf30 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 25 Jun 2020 20:29:40 -0700
+Subject: ocfs2: fix value of OCFS2_INVALID_SLOT
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 9277f8334ffc719fe922d776444d6e4e884dbf30 upstream.
+
+In the ocfs2 disk layout, slot number is 16 bits, but in ocfs2
+implementation, slot number is 32 bits.  Usually this will not cause any
+issue, because slot number is converted from u16 to u32, but
+OCFS2_INVALID_SLOT was defined as -1, when an invalid slot number from
+disk was obtained, its value was (u16)-1, and it was converted to u32.
+Then the following checking in get_local_system_inode will be always
+skipped:
+
+ static struct inode **get_local_system_inode(struct ocfs2_super *osb,
+                                               int type,
+                                               u32 slot)
+ {
+       BUG_ON(slot == OCFS2_INVALID_SLOT);
+       ...
+ }
+
+Link: http://lkml.kernel.org/r/20200616183829.87211-5-junxiao.bi@oracle.com
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/ocfs2_fs.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ocfs2/ocfs2_fs.h
++++ b/fs/ocfs2/ocfs2_fs.h
+@@ -290,7 +290,7 @@
+ #define OCFS2_MAX_SLOTS                       255
+ /* Slot map indicator for an empty slot */
+-#define OCFS2_INVALID_SLOT            -1
++#define OCFS2_INVALID_SLOT            ((u16)-1)
+ #define OCFS2_VOL_UUID_LEN            16
+ #define OCFS2_MAX_VOL_LABEL_LEN               64
diff --git a/queue-5.7/ocfs2-load-global_inode_alloc.patch b/queue-5.7/ocfs2-load-global_inode_alloc.patch
new file mode 100644 (file)
index 0000000..c8a4a1b
--- /dev/null
@@ -0,0 +1,43 @@
+From 7569d3c754e452769a5747eeeba488179e38a5da Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 25 Jun 2020 20:29:33 -0700
+Subject: ocfs2: load global_inode_alloc
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 7569d3c754e452769a5747eeeba488179e38a5da upstream.
+
+Set global_inode_alloc as OCFS2_FIRST_ONLINE_SYSTEM_INODE, that will
+make it load during mount.  It can be used to test whether some
+global/system inodes are valid.  One use case is that nfsd will test
+whether root inode is valid.
+
+Link: http://lkml.kernel.org/r/20200616183829.87211-3-junxiao.bi@oracle.com
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/ocfs2_fs.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ocfs2/ocfs2_fs.h
++++ b/fs/ocfs2/ocfs2_fs.h
+@@ -326,8 +326,8 @@ struct ocfs2_system_inode_info {
+ enum {
+       BAD_BLOCK_SYSTEM_INODE = 0,
+       GLOBAL_INODE_ALLOC_SYSTEM_INODE,
++#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
+       SLOT_MAP_SYSTEM_INODE,
+-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
+       HEARTBEAT_SYSTEM_INODE,
+       GLOBAL_BITMAP_SYSTEM_INODE,
+       USER_QUOTA_SYSTEM_INODE,
index 987268eeb914d1a68a47462df8d4eb93974ee08f..527ecb0ac3151ef2a5d8fc18d3f1489cb1d7b897 100644 (file)
@@ -225,3 +225,16 @@ btrfs-check-if-a-log-root-exists-before-locking-the-log_mutex-on-unlink.patch
 btrfs-fix-hang-on-snapshot-creation-after-rwf_nowait-write.patch
 btrfs-fix-failure-of-rwf_nowait-write-into-prealloc-extent-beyond-eof.patch
 btrfs-fix-rwf_nowait-write-not-failling-when-we-need-to-cow.patch
+mm-compaction-make-capture-control-handling-safe-wrt-interrupts.patch
+mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab.patch
+mm-slab-use-memzero_explicit-in-kzfree.patch
+ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch
+ocfs2-load-global_inode_alloc.patch
+ocfs2-fix-value-of-ocfs2_invalid_slot.patch
+ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch
+mm-memcontrol-handle-div0-crash-race-condition-in-memory.low.patch
+mm-memcontrol.c-add-missed-css_put.patch
+mm-memory_hotplug.c-fix-false-softlockup-during-pfn-range-removal.patch
+arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch
+arm64-dts-imx8mm-evk-correct-ldo1-ldo2-voltage-range.patch
+arm64-dts-imx8mn-ddr4-evk-correct-ldo1-ldo2-voltage-range.patch