--- /dev/null
+From f931ab479dd24cf7a2c6e2df19778406892591fb Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 10 Jan 2017 16:57:36 -0800
+Subject: mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit f931ab479dd24cf7a2c6e2df19778406892591fb upstream.
+
+Both arch_add_memory() and arch_remove_memory() expect a single threaded
+context.
+
+For example, arch/x86/mm/init_64.c::kernel_physical_mapping_init() does
+not hold any locks over this check and branch:
+
+ if (pgd_val(*pgd)) {
+ pud = (pud_t *)pgd_page_vaddr(*pgd);
+ paddr_last = phys_pud_init(pud, __pa(vaddr),
+ __pa(vaddr_end),
+ page_size_mask);
+ continue;
+ }
+
+ pud = alloc_low_page();
+ paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+ page_size_mask);
+
+The result is that two threads calling devm_memremap_pages()
+simultaneously can end up colliding on pgd initialization. This leads
+to crash signatures like the following where the loser of the race
+initializes the wrong pgd entry:
+
+ BUG: unable to handle kernel paging request at ffff888ebfff0000
+ IP: memcpy_erms+0x6/0x10
+ PGD 2f8e8fc067 PUD 0 /* <---- Invalid PUD */
+ Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
+ CPU: 54 PID: 3818 Comm: systemd-udevd Not tainted 4.6.7+ #13
+ task: ffff882fac290040 ti: ffff882f887a4000 task.ti: ffff882f887a4000
+ RIP: memcpy_erms+0x6/0x10
+ [..]
+ Call Trace:
+ ? pmem_do_bvec+0x205/0x370 [nd_pmem]
+ ? blk_queue_enter+0x3a/0x280
+ pmem_rw_page+0x38/0x80 [nd_pmem]
+ bdev_read_page+0x84/0xb0
+
+Hold the standard memory hotplug mutex over calls to
+arch_{add,remove}_memory().
+
+Fixes: 41e94a851304 ("add devm_memremap_pages")
+Link: http://lkml.kernel.org/r/148357647831.9498.12606007370121652979.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/memremap.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/memremap.c
++++ b/kernel/memremap.c
+@@ -159,7 +159,9 @@ static void devm_memremap_pages_release(
+ struct page_map *page_map = res;
+
+ /* pages are dead and unused, undo the arch mapping */
++ mem_hotplug_begin();
+ arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
++ mem_hotplug_done();
+ }
+
+ void *devm_memremap_pages(struct device *dev, struct resource *res)
+@@ -189,7 +191,9 @@ void *devm_memremap_pages(struct device
+ if (nid < 0)
+ nid = numa_mem_id();
+
++ mem_hotplug_begin();
+ error = arch_add_memory(nid, res->start, resource_size(res), true);
++ mem_hotplug_done();
+ if (error) {
+ devres_free(page_map);
+ return ERR_PTR(error);
--- /dev/null
+From e7ee2c089e94067d68475990bdeed211c8852917 Mon Sep 17 00:00:00 2001
+From: Eric Ren <zren@suse.com>
+Date: Tue, 10 Jan 2017 16:57:33 -0800
+Subject: ocfs2: fix crash caused by stale lvb with fsdlm plugin
+
+From: Eric Ren <zren@suse.com>
+
+commit e7ee2c089e94067d68475990bdeed211c8852917 upstream.
+
+The crash happens rather often when we reset some cluster nodes while
+nodes contend fiercely to do truncate and append.
+
+The crash backtrace is below:
+
+ dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 locks on 971 resources
+ dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 generation 5 done: 4 ms
+ ocfs2: Begin replay journal (node 318952601, slot 2) on device (253,18)
+ ocfs2: End replay journal (node 318952601, slot 2) on device (253,18)
+ ocfs2: Beginning quota recovery on device (253,18) for slot 2
+ ocfs2: Finishing quota recovery on device (253,18) for slot 2
+ (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug expression: le64_to_cpu(fe->i_size) != i_size_read(inode)
+ (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1
+ ------------[ cut here ]------------
+ kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470!
+ invalid opcode: 0000 [#1] SMP
+ Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse sd_mod iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr parport joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio ehci_hcd usbcore serio_raw usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
+ Supported: No, Unsupported modules are loaded
+ CPU: 1 PID: 30154 Comm: truncate Tainted: G OE N 4.4.21-69-default #1
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
+ task: ffff88004ff6d240 ti: ffff880074e68000 task.ti: ffff880074e68000
+ RIP: 0010:[<ffffffffa05c8c30>] [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
+ RSP: 0018:ffff880074e6bd50 EFLAGS: 00010282
+ RAX: 0000000000000074 RBX: 000000000000029e RCX: 0000000000000000
+ RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246
+ RBP: ffff880074e6bda8 R08: 000000003675dc7a R09: ffffffff82013414
+ R10: 0000000000034c50 R11: 0000000000000000 R12: ffff88003aab3448
+ R13: 00000000000002dc R14: 0000000000046e11 R15: 0000000000000020
+ FS: 00007f839f965700(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+ CR2: 00007f839f97e000 CR3: 0000000036723000 CR4: 00000000000006e0
+ Call Trace:
+ ocfs2_setattr+0x698/0xa90 [ocfs2]
+ notify_change+0x1ae/0x380
+ do_truncate+0x5e/0x90
+ do_sys_ftruncate.constprop.11+0x108/0x160
+ entry_SYSCALL_64_fastpath+0x12/0x6d
+ Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff
+ RIP [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
+
+It's because ocfs2_inode_lock() get us stale LVB in which the i_size is
+not equal to the disk i_size. We mistakenly trust the LVB because the
+underlaying fsdlm dlm_lock() doesn't set lkb_sbflags with
+DLM_SBF_VALNOTVALID properly for us. But, why?
+
+The current code tries to downconvert lock without DLM_LKF_VALBLK flag
+to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion, even
+if the lock resource type needs LVB. This is not the right way for
+fsdlm.
+
+The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on
+DLM_LKF_VALBLK to decide if we care about the LVB in the LKB. If
+DLM_LKF_VALBLK is not set, fsdlm will skip recovering RSB's LVB from
+this lkb and set the right DLM_SBF_VALNOTVALID appropriately when node
+failure happens.
+
+The following diagram briefly illustrates how this crash happens:
+
+RSB1 is inode metadata lock resource with LOCK_TYPE_USES_LVB;
+
+The 1st round:
+
+ Node1 Node2
+RSB1: PR
+ RSB1(master): NULL->EX
+ocfs2_downconvert_lock(PR->NULL, set_lvb==0)
+ ocfs2_dlm_lock(no DLM_LKF_VALBLK)
+
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+dlm_lock(no DLM_LKF_VALBLK)
+ convert_lock(overwrite lkb->lkb_exflags
+ with no DLM_LKF_VALBLK)
+
+RSB1: NULL RSB1: EX
+ reset Node2
+dlm_recover_rsbs()
+ recover_lvb()
+
+/* The LVB is not trustable if the node with EX fails and
+ * no lock >= PR is left. We should set RSB_VALNOTVALID for RSB1.
+ */
+
+ if(!(kb_exflags & DLM_LKF_VALBLK)) /* This means we miss the chance to
+ return; * to invalid the LVB here.
+ */
+
+The 2nd round:
+
+ Node 1 Node2
+RSB1(become master from recovery)
+
+ocfs2_setattr()
+ ocfs2_inode_lock(NULL->EX)
+ /* dlm_lock() return the stale lvb without setting DLM_SBF_VALNOTVALID */
+ ocfs2_meta_lvb_is_trustable() return 1 /* so we don't refresh inode from disk */
+ ocfs2_truncate_file()
+ mlog_bug_on_msg(disk isize != i_size_read(inode)) /* crash! */
+
+The fix is quite straightforward. We keep to set DLM_LKF_VALBLK flag
+for dlm_lock() if the lock resource type needs LVB and the fsdlm plugin
+is uesed.
+
+Link: http://lkml.kernel.org/r/1481275846-6604-1-git-send-email-zren@suse.com
+Signed-off-by: Eric Ren <zren@suse.com>
+Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
+Cc: Mark Fasheh <mfasheh@versity.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlmglue.c | 10 ++++++++++
+ fs/ocfs2/stackglue.c | 6 ++++++
+ fs/ocfs2/stackglue.h | 3 +++
+ 3 files changed, 19 insertions(+)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3321,6 +3321,16 @@ static int ocfs2_downconvert_lock(struct
+ mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
+ lockres->l_level, new_level);
+
++ /*
++ * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
++ * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
++ * we can recover correctly from node failure. Otherwise, we may get
++ * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
++ */
++ if (!ocfs2_is_o2cb_active() &&
++ lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
++ lvb = 1;
++
+ if (lvb)
+ dlm_flags |= DLM_LKF_VALBLK;
+
+--- a/fs/ocfs2/stackglue.c
++++ b/fs/ocfs2/stackglue.c
+@@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_
+ */
+ static struct ocfs2_stack_plugin *active_stack;
+
++inline int ocfs2_is_o2cb_active(void)
++{
++ return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
++}
++EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
++
+ static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
+ {
+ struct ocfs2_stack_plugin *p;
+--- a/fs/ocfs2/stackglue.h
++++ b/fs/ocfs2/stackglue.h
+@@ -298,4 +298,7 @@ void ocfs2_stack_glue_set_max_proto_vers
+ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
+ void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
+
++/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
++int ocfs2_is_o2cb_active(void);
++
+ #endif /* STACKGLUE_H */