]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Jan 2017 09:42:45 +0000 (10:42 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Jan 2017 09:42:45 +0000 (10:42 +0100)
added patches:
dax-fix-deadlock-with-dax-4k-holes.patch
mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch
mm-pmd-dirty-emulation-in-page-fault-handler.patch
ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch
selftests-do-not-require-bash-for-the-generated-test.patch
selftests-do-not-require-bash-to-run-netsocktests-testcase.patch
zram-revalidate-disk-under-init_lock.patch
zram-support-bdi_cap_stable_writes.patch

queue-4.9/dax-fix-deadlock-with-dax-4k-holes.patch [new file with mode: 0644]
queue-4.9/mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch [new file with mode: 0644]
queue-4.9/mm-pmd-dirty-emulation-in-page-fault-handler.patch [new file with mode: 0644]
queue-4.9/ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch [new file with mode: 0644]
queue-4.9/selftests-do-not-require-bash-for-the-generated-test.patch [new file with mode: 0644]
queue-4.9/selftests-do-not-require-bash-to-run-netsocktests-testcase.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/zram-revalidate-disk-under-init_lock.patch [new file with mode: 0644]
queue-4.9/zram-support-bdi_cap_stable_writes.patch [new file with mode: 0644]

diff --git a/queue-4.9/dax-fix-deadlock-with-dax-4k-holes.patch b/queue-4.9/dax-fix-deadlock-with-dax-4k-holes.patch
new file mode 100644 (file)
index 0000000..e8f4add
--- /dev/null
@@ -0,0 +1,86 @@
+From 965d004af54088d138f806d04d803fb60d441986 Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Tue, 10 Jan 2017 16:57:15 -0800
+Subject: dax: fix deadlock with DAX 4k holes
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit 965d004af54088d138f806d04d803fb60d441986 upstream.
+
+Currently in DAX if we have three read faults on the same hole address we
+can end up with the following:
+
+Thread 0               Thread 1                Thread 2
+--------               --------                --------
+dax_iomap_fault
+ grab_mapping_entry
+  lock_slot
+   <locks empty DAX entry>
+
+                       dax_iomap_fault
+                        grab_mapping_entry
+                         get_unlocked_mapping_entry
+                          <sleeps on empty DAX entry>
+
+                                               dax_iomap_fault
+                                                grab_mapping_entry
+                                                 get_unlocked_mapping_entry
+                                                  <sleeps on empty DAX entry>
+  dax_load_hole
+   find_or_create_page
+   ...
+    page_cache_tree_insert
+     dax_wake_mapping_entry_waiter
+      <wakes one sleeper>
+     __radix_tree_replace
+      <swaps empty DAX entry with 4k zero page>
+
+                       <wakes>
+                       get_page
+                       lock_page
+                       ...
+                       put_locked_mapping_entry
+                       unlock_page
+                       put_page
+
+                                               <sleeps forever on the DAX
+                                                wait queue>
+
+The crux of the problem is that once we insert a 4k zero page, all
+locking from then on is done in terms of that 4k zero page and any
+additional threads sleeping on the empty DAX entry will never be woken.
+
+Fix this by waking all sleepers when we replace the DAX radix tree entry
+with a 4k zero page.  This will allow all sleeping threads to
+successfully transition from locking based on the DAX empty entry to
+locking on the 4k zero page.
+
+With the test case reported by Xiong this happens very regularly in my
+test setup, with some runs resulting in 9+ threads in this deadlocked
+state.  With this fix I've been able to run that same test dozens of
+times in a loop without issue.
+
+Fixes: ac401cc78242 ("dax: New fault locking")
+Link: http://lkml.kernel.org/r/1483479365-13607-1-git-send-email-ross.zwisler@linux.intel.com
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Reported-by: Xiong Zhou <xzhou@redhat.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/filemap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -144,7 +144,7 @@ static int page_cache_tree_insert(struct
+                               workingset_node_pages_dec(node);
+                       /* Wakeup waiters for exceptional entry lock */
+                       dax_wake_mapping_entry_waiter(mapping, page->index,
+-                                                    false);
++                                                    true);
+               }
+       }
+       radix_tree_replace_slot(slot, page);
diff --git a/queue-4.9/mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch b/queue-4.9/mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch
new file mode 100644 (file)
index 0000000..666cf2b
--- /dev/null
@@ -0,0 +1,83 @@
+From f931ab479dd24cf7a2c6e2df19778406892591fb Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 10 Jan 2017 16:57:36 -0800
+Subject: mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit f931ab479dd24cf7a2c6e2df19778406892591fb upstream.
+
+Both arch_add_memory() and arch_remove_memory() expect a single threaded
+context.
+
+For example, arch/x86/mm/init_64.c::kernel_physical_mapping_init() does
+not hold any locks over this check and branch:
+
+    if (pgd_val(*pgd)) {
+       pud = (pud_t *)pgd_page_vaddr(*pgd);
+       paddr_last = phys_pud_init(pud, __pa(vaddr),
+                                  __pa(vaddr_end),
+                                  page_size_mask);
+       continue;
+    }
+
+    pud = alloc_low_page();
+    paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+                          page_size_mask);
+
+The result is that two threads calling devm_memremap_pages()
+simultaneously can end up colliding on pgd initialization.  This leads
+to crash signatures like the following where the loser of the race
+initializes the wrong pgd entry:
+
+    BUG: unable to handle kernel paging request at ffff888ebfff0000
+    IP: memcpy_erms+0x6/0x10
+    PGD 2f8e8fc067 PUD 0 /* <---- Invalid PUD */
+    Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
+    CPU: 54 PID: 3818 Comm: systemd-udevd Not tainted 4.6.7+ #13
+    task: ffff882fac290040 ti: ffff882f887a4000 task.ti: ffff882f887a4000
+    RIP: memcpy_erms+0x6/0x10
+    [..]
+    Call Trace:
+      ? pmem_do_bvec+0x205/0x370 [nd_pmem]
+      ? blk_queue_enter+0x3a/0x280
+      pmem_rw_page+0x38/0x80 [nd_pmem]
+      bdev_read_page+0x84/0xb0
+
+Hold the standard memory hotplug mutex over calls to
+arch_{add,remove}_memory().
+
+Fixes: 41e94a851304 ("add devm_memremap_pages")
+Link: http://lkml.kernel.org/r/148357647831.9498.12606007370121652979.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/memremap.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/memremap.c
++++ b/kernel/memremap.c
+@@ -246,7 +246,9 @@ static void devm_memremap_pages_release(
+       /* pages are dead and unused, undo the arch mapping */
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
++      mem_hotplug_begin();
+       arch_remove_memory(align_start, align_size);
++      mem_hotplug_done();
+       untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+       pgmap_radix_release(res);
+       dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
+@@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device
+       if (error)
+               goto err_pfn_remap;
++      mem_hotplug_begin();
+       error = arch_add_memory(nid, align_start, align_size, true);
++      mem_hotplug_done();
+       if (error)
+               goto err_add_memory;
diff --git a/queue-4.9/mm-pmd-dirty-emulation-in-page-fault-handler.patch b/queue-4.9/mm-pmd-dirty-emulation-in-page-fault-handler.patch
new file mode 100644 (file)
index 0000000..2ad7ad5
--- /dev/null
@@ -0,0 +1,70 @@
+From 20f664aabeb88d582b623a625f83b0454fa34f07 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Tue, 10 Jan 2017 16:57:51 -0800
+Subject: mm: pmd dirty emulation in page fault handler
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit 20f664aabeb88d582b623a625f83b0454fa34f07 upstream.
+
+Andreas reported [1] made a test in jemalloc hang in THP mode in arm64:
+
+  http://lkml.kernel.org/r/mvmmvfy37g1.fsf@hawking.suse.de
+
+The problem is currently page fault handler doesn't supports dirty bit
+emulation of pmd for non-HW dirty-bit architecture so that application
+stucks until VM marked the pmd dirty.
+
+How the emulation work depends on the architecture.  In case of arm64,
+when it set up pte firstly, it sets pte PTE_RDONLY to get a chance to
+mark the pte dirty via triggering page fault when store access happens.
+Once the page fault occurs, VM marks the pmd dirty and arch code for
+setting pmd will clear PTE_RDONLY for application to proceed.
+
+IOW, if VM doesn't mark the pmd dirty, application hangs forever by
+repeated fault(i.e., store op but the pmd is PTE_RDONLY).
+
+This patch enables pmd dirty-bit emulation for those architectures.
+
+[1] b8d3c4c3009d, mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called
+
+Fixes: b8d3c4c3009d ("mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called")
+Link: http://lkml.kernel.org/r/1482506098-6149-1-git-send-email-minchan@kernel.org
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reported-by: Andreas Schwab <schwab@suse.de>
+Tested-by: Andreas Schwab <schwab@suse.de>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Jason Evans <je@fb.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -872,15 +872,17 @@ void huge_pmd_set_accessed(struct fault_
+ {
+       pmd_t entry;
+       unsigned long haddr;
++      bool write = fe->flags & FAULT_FLAG_WRITE;
+       fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd);
+       if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
+               goto unlock;
+       entry = pmd_mkyoung(orig_pmd);
++      if (write)
++              entry = pmd_mkdirty(entry);
+       haddr = fe->address & HPAGE_PMD_MASK;
+-      if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry,
+-                              fe->flags & FAULT_FLAG_WRITE))
++      if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, write))
+               update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd);
+ unlock:
diff --git a/queue-4.9/ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch b/queue-4.9/ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch
new file mode 100644 (file)
index 0000000..aa9aac0
--- /dev/null
@@ -0,0 +1,174 @@
+From e7ee2c089e94067d68475990bdeed211c8852917 Mon Sep 17 00:00:00 2001
+From: Eric Ren <zren@suse.com>
+Date: Tue, 10 Jan 2017 16:57:33 -0800
+Subject: ocfs2: fix crash caused by stale lvb with fsdlm plugin
+
+From: Eric Ren <zren@suse.com>
+
+commit e7ee2c089e94067d68475990bdeed211c8852917 upstream.
+
+The crash happens rather often when we reset some cluster nodes while
+nodes contend fiercely to do truncate and append.
+
+The crash backtrace is below:
+
+   dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 locks on 971 resources
+   dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 generation 5 done: 4 ms
+   ocfs2: Begin replay journal (node 318952601, slot 2) on device (253,18)
+   ocfs2: End replay journal (node 318952601, slot 2) on device (253,18)
+   ocfs2: Beginning quota recovery on device (253,18) for slot 2
+   ocfs2: Finishing quota recovery on device (253,18) for slot 2
+   (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug expression: le64_to_cpu(fe->i_size) != i_size_read(inode)
+   (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1
+   ------------[ cut here ]------------
+   kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470!
+   invalid opcode: 0000 [#1] SMP
+   Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse sd_mod    iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr parport      joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix               drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio ehci_hcd       usbcore serio_raw usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
+   Supported: No, Unsupported modules are loaded
+   CPU: 1 PID: 30154 Comm: truncate Tainted: G           OE   N  4.4.21-69-default #1
+   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
+   task: ffff88004ff6d240 ti: ffff880074e68000 task.ti: ffff880074e68000
+   RIP: 0010:[<ffffffffa05c8c30>]  [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
+   RSP: 0018:ffff880074e6bd50  EFLAGS: 00010282
+   RAX: 0000000000000074 RBX: 000000000000029e RCX: 0000000000000000
+   RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246
+   RBP: ffff880074e6bda8 R08: 000000003675dc7a R09: ffffffff82013414
+   R10: 0000000000034c50 R11: 0000000000000000 R12: ffff88003aab3448
+   R13: 00000000000002dc R14: 0000000000046e11 R15: 0000000000000020
+   FS:  00007f839f965700(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000
+   CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+   CR2: 00007f839f97e000 CR3: 0000000036723000 CR4: 00000000000006e0
+   Call Trace:
+     ocfs2_setattr+0x698/0xa90 [ocfs2]
+     notify_change+0x1ae/0x380
+     do_truncate+0x5e/0x90
+     do_sys_ftruncate.constprop.11+0x108/0x160
+     entry_SYSCALL_64_fastpath+0x12/0x6d
+   Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff
+   RIP  [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
+
+It's because ocfs2_inode_lock() get us stale LVB in which the i_size is
+not equal to the disk i_size.  We mistakenly trust the LVB because the
+underlaying fsdlm dlm_lock() doesn't set lkb_sbflags with
+DLM_SBF_VALNOTVALID properly for us.  But, why?
+
+The current code tries to downconvert lock without DLM_LKF_VALBLK flag
+to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion, even
+if the lock resource type needs LVB.  This is not the right way for
+fsdlm.
+
+The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on
+DLM_LKF_VALBLK to decide if we care about the LVB in the LKB.  If
+DLM_LKF_VALBLK is not set, fsdlm will skip recovering RSB's LVB from
+this lkb and set the right DLM_SBF_VALNOTVALID appropriately when node
+failure happens.
+
+The following diagram briefly illustrates how this crash happens:
+
+RSB1 is inode metadata lock resource with LOCK_TYPE_USES_LVB;
+
+The 1st round:
+
+             Node1                                    Node2
+RSB1: PR
+                                                  RSB1(master): NULL->EX
+ocfs2_downconvert_lock(PR->NULL, set_lvb==0)
+  ocfs2_dlm_lock(no DLM_LKF_VALBLK)
+
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+dlm_lock(no DLM_LKF_VALBLK)
+  convert_lock(overwrite lkb->lkb_exflags
+               with no DLM_LKF_VALBLK)
+
+RSB1: NULL                                        RSB1: EX
+                                                  reset Node2
+dlm_recover_rsbs()
+  recover_lvb()
+
+/* The LVB is not trustable if the node with EX fails and
+ * no lock >= PR is left. We should set RSB_VALNOTVALID for RSB1.
+ */
+
+ if(!(kb_exflags & DLM_LKF_VALBLK)) /* This means we miss the chance to
+           return;                   * to invalid the LVB here.
+                                     */
+
+The 2nd round:
+
+         Node 1                                Node2
+RSB1(become master from recovery)
+
+ocfs2_setattr()
+  ocfs2_inode_lock(NULL->EX)
+    /* dlm_lock() return the stale lvb without setting DLM_SBF_VALNOTVALID */
+    ocfs2_meta_lvb_is_trustable() return 1 /* so we don't refresh inode from disk */
+  ocfs2_truncate_file()
+      mlog_bug_on_msg(disk isize != i_size_read(inode))  /* crash! */
+
+The fix is quite straightforward.  We keep to set DLM_LKF_VALBLK flag
+for dlm_lock() if the lock resource type needs LVB and the fsdlm plugin
+is uesed.
+
+Link: http://lkml.kernel.org/r/1481275846-6604-1-git-send-email-zren@suse.com
+Signed-off-by: Eric Ren <zren@suse.com>
+Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
+Cc: Mark Fasheh <mfasheh@versity.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlmglue.c   |   10 ++++++++++
+ fs/ocfs2/stackglue.c |    6 ++++++
+ fs/ocfs2/stackglue.h |    3 +++
+ 3 files changed, 19 insertions(+)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct
+       mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
+            lockres->l_level, new_level);
++      /*
++       * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
++       * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
++       * we can recover correctly from node failure. Otherwise, we may get
++       * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
++       */
++      if (!ocfs2_is_o2cb_active() &&
++          lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
++              lvb = 1;
++
+       if (lvb)
+               dlm_flags |= DLM_LKF_VALBLK;
+--- a/fs/ocfs2/stackglue.c
++++ b/fs/ocfs2/stackglue.c
+@@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_
+  */
+ static struct ocfs2_stack_plugin *active_stack;
++inline int ocfs2_is_o2cb_active(void)
++{
++      return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
++}
++EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
++
+ static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
+ {
+       struct ocfs2_stack_plugin *p;
+--- a/fs/ocfs2/stackglue.h
++++ b/fs/ocfs2/stackglue.h
+@@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_vers
+ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
+ void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
++/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
++int ocfs2_is_o2cb_active(void);
++
+ extern struct kset *ocfs2_kset;
+ #endif  /* STACKGLUE_H */
diff --git a/queue-4.9/selftests-do-not-require-bash-for-the-generated-test.patch b/queue-4.9/selftests-do-not-require-bash-for-the-generated-test.patch
new file mode 100644 (file)
index 0000000..801c537
--- /dev/null
@@ -0,0 +1,32 @@
+From a2b1e8a20c992b01eeb76de00d4f534cbe9f3822 Mon Sep 17 00:00:00 2001
+From: Rolf Eike Beer <eb@emlix.com>
+Date: Wed, 14 Dec 2016 11:59:34 +0100
+Subject: selftests: do not require bash for the generated test
+
+From: Rolf Eike Beer <eb@emlix.com>
+
+commit a2b1e8a20c992b01eeb76de00d4f534cbe9f3822 upstream.
+
+Nothing in this minimal script seems to require bash. We often run these
+tests on embedded devices where the only shell available is the busybox
+ash. Use sh instead.
+
+Signed-off-by: Rolf Eike Beer <eb@emlix.com>
+Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/Makefile
++++ b/tools/testing/selftests/Makefile
+@@ -87,7 +87,7 @@ ifdef INSTALL_PATH
+       done;
+       @# Ask all targets to emit their test scripts
+-      echo "#!/bin/bash" > $(ALL_SCRIPT)
++      echo "#!/bin/sh" > $(ALL_SCRIPT)
+       echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT)
+       echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
diff --git a/queue-4.9/selftests-do-not-require-bash-to-run-netsocktests-testcase.patch b/queue-4.9/selftests-do-not-require-bash-to-run-netsocktests-testcase.patch
new file mode 100644 (file)
index 0000000..724b069
--- /dev/null
@@ -0,0 +1,29 @@
+From 3659f98b5375d195f1870c3e508fe51e52206839 Mon Sep 17 00:00:00 2001
+From: Rolf Eike Beer <eb@emlix.com>
+Date: Wed, 14 Dec 2016 11:59:57 +0100
+Subject: selftests: do not require bash to run netsocktests testcase
+
+From: Rolf Eike Beer <eb@emlix.com>
+
+commit 3659f98b5375d195f1870c3e508fe51e52206839 upstream.
+
+Nothing in this minimal script seems to require bash. We often run these
+tests on embedded devices where the only shell available is the busybox
+ash. Use sh instead.
+
+Signed-off-by: Rolf Eike Beer <eb@emlix.com>
+Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/net/run_netsocktests |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/run_netsocktests
++++ b/tools/testing/selftests/net/run_netsocktests
+@@ -1,4 +1,4 @@
+-#!/bin/bash
++#!/bin/sh
+ echo "--------------------"
+ echo "running socket test"
index 550e92f738d9091f30942880f36498b1faa93c4f..a4f80977726036f2171f479cdbb463dd680c33a3 100644 (file)
@@ -7,3 +7,11 @@ regulator-axp20x-fix-axp809-ldo_io-registration-error-on-cold-boot.patch
 drm-tegra-dpaux-fix-error-handling.patch
 drm-vc4-fix-a-couple-error-codes-in-vc4_cl_lookup_bos.patch
 drm-savage-dereferencing-an-error-pointer.patch
+selftests-do-not-require-bash-to-run-netsocktests-testcase.patch
+selftests-do-not-require-bash-for-the-generated-test.patch
+zram-revalidate-disk-under-init_lock.patch
+zram-support-bdi_cap_stable_writes.patch
+dax-fix-deadlock-with-dax-4k-holes.patch
+mm-pmd-dirty-emulation-in-page-fault-handler.patch
+mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch
+ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch
diff --git a/queue-4.9/zram-revalidate-disk-under-init_lock.patch b/queue-4.9/zram-revalidate-disk-under-init_lock.patch
new file mode 100644 (file)
index 0000000..0d9267a
--- /dev/null
@@ -0,0 +1,54 @@
+From e7ccfc4ccb703e0f033bd4617580039898e912dd Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Tue, 10 Jan 2017 16:58:18 -0800
+Subject: zram: revalidate disk under init_lock
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit e7ccfc4ccb703e0f033bd4617580039898e912dd upstream.
+
+Commit b4c5c60920e3 ("zram: avoid lockdep splat by revalidate_disk")
+moved revalidate_disk call out of init_lock to avoid lockdep
+false-positive splat.  However, commit 08eee69fcf6b ("zram: remove
+init_lock in zram_make_request") removed init_lock in IO path so there
+is no worry about lockdep splat.  So, let's restore it.
+
+This patch is needed to set BDI_CAP_STABLE_WRITES atomically in next
+patch.
+
+Fixes: da9556a2367c ("zram: user per-cpu compression streams")
+Link: http://lkml.kernel.org/r/1482366980-3782-3-git-send-email-minchan@kernel.org
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Hyeoncheol Lee <cheol.lee@lge.com>
+Cc: <yjay.kim@lge.com>
+Cc: Sangseok Lee <sangseok.lee@lge.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c |    8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -1094,14 +1094,8 @@ static ssize_t disksize_store(struct dev
+       zram->comp = comp;
+       zram->disksize = disksize;
+       set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+-      up_write(&zram->init_lock);
+-
+-      /*
+-       * Revalidate disk out of the init_lock to avoid lockdep splat.
+-       * It's okay because disk's capacity is protected by init_lock
+-       * so that revalidate_disk always sees up-to-date capacity.
+-       */
+       revalidate_disk(zram->disk);
++      up_write(&zram->init_lock);
+       return len;
diff --git a/queue-4.9/zram-support-bdi_cap_stable_writes.patch b/queue-4.9/zram-support-bdi_cap_stable_writes.patch
new file mode 100644 (file)
index 0000000..9da0761
--- /dev/null
@@ -0,0 +1,88 @@
+From b09ab054b69b07077bd3292f67e777861ac796e5 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Tue, 10 Jan 2017 16:58:21 -0800
+Subject: zram: support BDI_CAP_STABLE_WRITES
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit b09ab054b69b07077bd3292f67e777861ac796e5 upstream.
+
+zram has used per-cpu stream feature from v4.7.  It aims for increasing
+cache hit ratio of scratch buffer for compressing.  Downside of that
+approach is that zram should ask memory space for compressed page in
+per-cpu context which requires stricted gfp flag which could be failed.
+If so, it retries to allocate memory space out of per-cpu context so it
+could get memory this time and compress the data again, copies it to the
+memory space.
+
+In this scenario, zram assumes the data should never be changed but it is
+not true without stable page support.  So, If the data is changed under
+us, zram can make buffer overrun so that zsmalloc free object chain is
+broken so system goes crash like below
+
+   https://bugzilla.suse.com/show_bug.cgi?id=997574
+
+This patch adds BDI_CAP_STABLE_WRITES to zram for declaring "I am block
+device needing *stable write*".
+
+Fixes: da9556a2367c ("zram: user per-cpu compression streams")
+Link: http://lkml.kernel.org/r/1482366980-3782-4-git-send-email-minchan@kernel.org
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Hyeoncheol Lee <cheol.lee@lge.com>
+Cc: <yjay.kim@lge.com>
+Cc: Sangseok Lee <sangseok.lee@lge.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -25,6 +25,7 @@
+ #include <linux/genhd.h>
+ #include <linux/highmem.h>
+ #include <linux/slab.h>
++#include <linux/backing-dev.h>
+ #include <linux/string.h>
+ #include <linux/vmalloc.h>
+ #include <linux/err.h>
+@@ -111,6 +112,14 @@ static inline bool is_partial_io(struct
+       return bvec->bv_len != PAGE_SIZE;
+ }
++static void zram_revalidate_disk(struct zram *zram)
++{
++      revalidate_disk(zram->disk);
++      /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
++      zram->disk->queue->backing_dev_info.capabilities |=
++              BDI_CAP_STABLE_WRITES;
++}
++
+ /*
+  * Check if request is within bounds and aligned on zram logical blocks.
+  */
+@@ -1094,7 +1103,7 @@ static ssize_t disksize_store(struct dev
+       zram->comp = comp;
+       zram->disksize = disksize;
+       set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+-      revalidate_disk(zram->disk);
++      zram_revalidate_disk(zram);
+       up_write(&zram->init_lock);
+       return len;
+@@ -1142,7 +1151,7 @@ static ssize_t reset_store(struct device
+       /* Make sure all the pending I/O are finished */
+       fsync_bdev(bdev);
+       zram_reset_device(zram);
+-      revalidate_disk(zram->disk);
++      zram_revalidate_disk(zram);
+       bdput(bdev);
+       mutex_lock(&bdev->bd_mutex);