From: Greg Kroah-Hartman Date: Tue, 17 Jan 2017 09:42:45 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.9.5~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c8f0632e73f9b9291c52dd8a7c523b18bab2b1f5;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: dax-fix-deadlock-with-dax-4k-holes.patch mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch mm-pmd-dirty-emulation-in-page-fault-handler.patch ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch selftests-do-not-require-bash-for-the-generated-test.patch selftests-do-not-require-bash-to-run-netsocktests-testcase.patch zram-revalidate-disk-under-init_lock.patch zram-support-bdi_cap_stable_writes.patch --- diff --git a/queue-4.9/dax-fix-deadlock-with-dax-4k-holes.patch b/queue-4.9/dax-fix-deadlock-with-dax-4k-holes.patch new file mode 100644 index 00000000000..e8f4add60ea --- /dev/null +++ b/queue-4.9/dax-fix-deadlock-with-dax-4k-holes.patch @@ -0,0 +1,86 @@ +From 965d004af54088d138f806d04d803fb60d441986 Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Tue, 10 Jan 2017 16:57:15 -0800 +Subject: dax: fix deadlock with DAX 4k holes + +From: Ross Zwisler + +commit 965d004af54088d138f806d04d803fb60d441986 upstream. + +Currently in DAX if we have three read faults on the same hole address we +can end up with the following: + +Thread 0 Thread 1 Thread 2 +-------- -------- -------- +dax_iomap_fault + grab_mapping_entry + lock_slot + + + dax_iomap_fault + grab_mapping_entry + get_unlocked_mapping_entry + + + dax_iomap_fault + grab_mapping_entry + get_unlocked_mapping_entry + + dax_load_hole + find_or_create_page + ... + page_cache_tree_insert + dax_wake_mapping_entry_waiter + + __radix_tree_replace + + + + get_page + lock_page + ... + put_locked_mapping_entry + unlock_page + put_page + + + +The crux of the problem is that once we insert a 4k zero page, all +locking from then on is done in terms of that 4k zero page and any +additional threads sleeping on the empty DAX entry will never be woken. + +Fix this by waking all sleepers when we replace the DAX radix tree entry +with a 4k zero page. This will allow all sleeping threads to +successfully transition from locking based on the DAX empty entry to +locking on the 4k zero page. + +With the test case reported by Xiong this happens very regularly in my +test setup, with some runs resulting in 9+ threads in this deadlocked +state. With this fix I've been able to run that same test dozens of +times in a loop without issue. + +Fixes: ac401cc78242 ("dax: New fault locking") +Link: http://lkml.kernel.org/r/1483479365-13607-1-git-send-email-ross.zwisler@linux.intel.com +Signed-off-by: Ross Zwisler +Reported-by: Xiong Zhou +Reviewed-by: Jan Kara +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/filemap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -144,7 +144,7 @@ static int page_cache_tree_insert(struct + workingset_node_pages_dec(node); + /* Wakeup waiters for exceptional entry lock */ + dax_wake_mapping_entry_waiter(mapping, page->index, +- false); ++ true); + } + } + radix_tree_replace_slot(slot, page); diff --git a/queue-4.9/mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch b/queue-4.9/mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch new file mode 100644 index 00000000000..666cf2b6581 --- /dev/null +++ b/queue-4.9/mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch @@ -0,0 +1,83 @@ +From f931ab479dd24cf7a2c6e2df19778406892591fb Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Tue, 10 Jan 2017 16:57:36 -0800 +Subject: mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done} + +From: Dan Williams + +commit f931ab479dd24cf7a2c6e2df19778406892591fb upstream. + +Both arch_add_memory() and arch_remove_memory() expect a single threaded +context. + +For example, arch/x86/mm/init_64.c::kernel_physical_mapping_init() does +not hold any locks over this check and branch: + + if (pgd_val(*pgd)) { + pud = (pud_t *)pgd_page_vaddr(*pgd); + paddr_last = phys_pud_init(pud, __pa(vaddr), + __pa(vaddr_end), + page_size_mask); + continue; + } + + pud = alloc_low_page(); + paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end), + page_size_mask); + +The result is that two threads calling devm_memremap_pages() +simultaneously can end up colliding on pgd initialization. This leads +to crash signatures like the following where the loser of the race +initializes the wrong pgd entry: + + BUG: unable to handle kernel paging request at ffff888ebfff0000 + IP: memcpy_erms+0x6/0x10 + PGD 2f8e8fc067 PUD 0 /* <---- Invalid PUD */ + Oops: 0000 [#1] SMP DEBUG_PAGEALLOC + CPU: 54 PID: 3818 Comm: systemd-udevd Not tainted 4.6.7+ #13 + task: ffff882fac290040 ti: ffff882f887a4000 task.ti: ffff882f887a4000 + RIP: memcpy_erms+0x6/0x10 + [..] + Call Trace: + ? pmem_do_bvec+0x205/0x370 [nd_pmem] + ? blk_queue_enter+0x3a/0x280 + pmem_rw_page+0x38/0x80 [nd_pmem] + bdev_read_page+0x84/0xb0 + +Hold the standard memory hotplug mutex over calls to +arch_{add,remove}_memory(). + +Fixes: 41e94a851304 ("add devm_memremap_pages") +Link: http://lkml.kernel.org/r/148357647831.9498.12606007370121652979.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Dan Williams +Cc: Christoph Hellwig +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/memremap.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/memremap.c ++++ b/kernel/memremap.c +@@ -246,7 +246,9 @@ static void devm_memremap_pages_release( + /* pages are dead and unused, undo the arch mapping */ + align_start = res->start & ~(SECTION_SIZE - 1); + align_size = ALIGN(resource_size(res), SECTION_SIZE); ++ mem_hotplug_begin(); + arch_remove_memory(align_start, align_size); ++ mem_hotplug_done(); + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + pgmap_radix_release(res); + dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, +@@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device + if (error) + goto err_pfn_remap; + ++ mem_hotplug_begin(); + error = arch_add_memory(nid, align_start, align_size, true); ++ mem_hotplug_done(); + if (error) + goto err_add_memory; + diff --git a/queue-4.9/mm-pmd-dirty-emulation-in-page-fault-handler.patch b/queue-4.9/mm-pmd-dirty-emulation-in-page-fault-handler.patch new file mode 100644 index 00000000000..2ad7ad54354 --- /dev/null +++ b/queue-4.9/mm-pmd-dirty-emulation-in-page-fault-handler.patch @@ -0,0 +1,70 @@ +From 20f664aabeb88d582b623a625f83b0454fa34f07 Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Tue, 10 Jan 2017 16:57:51 -0800 +Subject: mm: pmd dirty emulation in page fault handler + +From: Minchan Kim + +commit 20f664aabeb88d582b623a625f83b0454fa34f07 upstream. + +Andreas reported [1] made a test in jemalloc hang in THP mode in arm64: + + http://lkml.kernel.org/r/mvmmvfy37g1.fsf@hawking.suse.de + +The problem is currently page fault handler doesn't supports dirty bit +emulation of pmd for non-HW dirty-bit architecture so that application +stucks until VM marked the pmd dirty. + +How the emulation work depends on the architecture. In case of arm64, +when it set up pte firstly, it sets pte PTE_RDONLY to get a chance to +mark the pte dirty via triggering page fault when store access happens. +Once the page fault occurs, VM marks the pmd dirty and arch code for +setting pmd will clear PTE_RDONLY for application to proceed. + +IOW, if VM doesn't mark the pmd dirty, application hangs forever by +repeated fault(i.e., store op but the pmd is PTE_RDONLY). + +This patch enables pmd dirty-bit emulation for those architectures. + +[1] b8d3c4c3009d, mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called + +Fixes: b8d3c4c3009d ("mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called") +Link: http://lkml.kernel.org/r/1482506098-6149-1-git-send-email-minchan@kernel.org +Signed-off-by: Minchan Kim +Reported-by: Andreas Schwab +Tested-by: Andreas Schwab +Acked-by: Kirill A. Shutemov +Acked-by: Michal Hocko +Cc: Jason Evans +Cc: Will Deacon +Cc: Catalin Marinas +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -872,15 +872,17 @@ void huge_pmd_set_accessed(struct fault_ + { + pmd_t entry; + unsigned long haddr; ++ bool write = fe->flags & FAULT_FLAG_WRITE; + + fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd); + if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) + goto unlock; + + entry = pmd_mkyoung(orig_pmd); ++ if (write) ++ entry = pmd_mkdirty(entry); + haddr = fe->address & HPAGE_PMD_MASK; +- if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, +- fe->flags & FAULT_FLAG_WRITE)) ++ if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, write)) + update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd); + + unlock: diff --git a/queue-4.9/ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch b/queue-4.9/ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch new file mode 100644 index 00000000000..aa9aac08f10 --- /dev/null +++ b/queue-4.9/ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch @@ -0,0 +1,174 @@ +From e7ee2c089e94067d68475990bdeed211c8852917 Mon Sep 17 00:00:00 2001 +From: Eric Ren +Date: Tue, 10 Jan 2017 16:57:33 -0800 +Subject: ocfs2: fix crash caused by stale lvb with fsdlm plugin + +From: Eric Ren + +commit e7ee2c089e94067d68475990bdeed211c8852917 upstream. + +The crash happens rather often when we reset some cluster nodes while +nodes contend fiercely to do truncate and append. + +The crash backtrace is below: + + dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 locks on 971 resources + dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 generation 5 done: 4 ms + ocfs2: Begin replay journal (node 318952601, slot 2) on device (253,18) + ocfs2: End replay journal (node 318952601, slot 2) on device (253,18) + ocfs2: Beginning quota recovery on device (253,18) for slot 2 + ocfs2: Finishing quota recovery on device (253,18) for slot 2 + (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug expression: le64_to_cpu(fe->i_size) != i_size_read(inode) + (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1 + ------------[ cut here ]------------ + kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470! + invalid opcode: 0000 [#1] SMP + Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse sd_mod iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr parport joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio ehci_hcd usbcore serio_raw usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4 + Supported: No, Unsupported modules are loaded + CPU: 1 PID: 30154 Comm: truncate Tainted: G OE N 4.4.21-69-default #1 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014 + task: ffff88004ff6d240 ti: ffff880074e68000 task.ti: ffff880074e68000 + RIP: 0010:[] [] ocfs2_truncate_file+0x640/0x6c0 [ocfs2] + RSP: 0018:ffff880074e6bd50 EFLAGS: 00010282 + RAX: 0000000000000074 RBX: 000000000000029e RCX: 0000000000000000 + RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246 + RBP: ffff880074e6bda8 R08: 000000003675dc7a R09: ffffffff82013414 + R10: 0000000000034c50 R11: 0000000000000000 R12: ffff88003aab3448 + R13: 00000000000002dc R14: 0000000000046e11 R15: 0000000000000020 + FS: 00007f839f965700(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b + CR2: 00007f839f97e000 CR3: 0000000036723000 CR4: 00000000000006e0 + Call Trace: + ocfs2_setattr+0x698/0xa90 [ocfs2] + notify_change+0x1ae/0x380 + do_truncate+0x5e/0x90 + do_sys_ftruncate.constprop.11+0x108/0x160 + entry_SYSCALL_64_fastpath+0x12/0x6d + Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff + RIP [] ocfs2_truncate_file+0x640/0x6c0 [ocfs2] + +It's because ocfs2_inode_lock() get us stale LVB in which the i_size is +not equal to the disk i_size. We mistakenly trust the LVB because the +underlaying fsdlm dlm_lock() doesn't set lkb_sbflags with +DLM_SBF_VALNOTVALID properly for us. But, why? + +The current code tries to downconvert lock without DLM_LKF_VALBLK flag +to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion, even +if the lock resource type needs LVB. This is not the right way for +fsdlm. + +The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on +DLM_LKF_VALBLK to decide if we care about the LVB in the LKB. If +DLM_LKF_VALBLK is not set, fsdlm will skip recovering RSB's LVB from +this lkb and set the right DLM_SBF_VALNOTVALID appropriately when node +failure happens. + +The following diagram briefly illustrates how this crash happens: + +RSB1 is inode metadata lock resource with LOCK_TYPE_USES_LVB; + +The 1st round: + + Node1 Node2 +RSB1: PR + RSB1(master): NULL->EX +ocfs2_downconvert_lock(PR->NULL, set_lvb==0) + ocfs2_dlm_lock(no DLM_LKF_VALBLK) + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +dlm_lock(no DLM_LKF_VALBLK) + convert_lock(overwrite lkb->lkb_exflags + with no DLM_LKF_VALBLK) + +RSB1: NULL RSB1: EX + reset Node2 +dlm_recover_rsbs() + recover_lvb() + +/* The LVB is not trustable if the node with EX fails and + * no lock >= PR is left. We should set RSB_VALNOTVALID for RSB1. + */ + + if(!(kb_exflags & DLM_LKF_VALBLK)) /* This means we miss the chance to + return; * to invalid the LVB here. + */ + +The 2nd round: + + Node 1 Node2 +RSB1(become master from recovery) + +ocfs2_setattr() + ocfs2_inode_lock(NULL->EX) + /* dlm_lock() return the stale lvb without setting DLM_SBF_VALNOTVALID */ + ocfs2_meta_lvb_is_trustable() return 1 /* so we don't refresh inode from disk */ + ocfs2_truncate_file() + mlog_bug_on_msg(disk isize != i_size_read(inode)) /* crash! */ + +The fix is quite straightforward. We keep to set DLM_LKF_VALBLK flag +for dlm_lock() if the lock resource type needs LVB and the fsdlm plugin +is uesed. + +Link: http://lkml.kernel.org/r/1481275846-6604-1-git-send-email-zren@suse.com +Signed-off-by: Eric Ren +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/dlmglue.c | 10 ++++++++++ + fs/ocfs2/stackglue.c | 6 ++++++ + fs/ocfs2/stackglue.h | 3 +++ + 3 files changed, 19 insertions(+) + +--- a/fs/ocfs2/dlmglue.c ++++ b/fs/ocfs2/dlmglue.c +@@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct + mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, + lockres->l_level, new_level); + ++ /* ++ * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always ++ * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that ++ * we can recover correctly from node failure. Otherwise, we may get ++ * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. ++ */ ++ if (!ocfs2_is_o2cb_active() && ++ lockres->l_ops->flags & LOCK_TYPE_USES_LVB) ++ lvb = 1; ++ + if (lvb) + dlm_flags |= DLM_LKF_VALBLK; + +--- a/fs/ocfs2/stackglue.c ++++ b/fs/ocfs2/stackglue.c +@@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_ + */ + static struct ocfs2_stack_plugin *active_stack; + ++inline int ocfs2_is_o2cb_active(void) ++{ ++ return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); ++} ++EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); ++ + static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) + { + struct ocfs2_stack_plugin *p; +--- a/fs/ocfs2/stackglue.h ++++ b/fs/ocfs2/stackglue.h +@@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_vers + int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); + void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); + ++/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ ++int ocfs2_is_o2cb_active(void); ++ + extern struct kset *ocfs2_kset; + + #endif /* STACKGLUE_H */ diff --git a/queue-4.9/selftests-do-not-require-bash-for-the-generated-test.patch b/queue-4.9/selftests-do-not-require-bash-for-the-generated-test.patch new file mode 100644 index 00000000000..801c5374232 --- /dev/null +++ b/queue-4.9/selftests-do-not-require-bash-for-the-generated-test.patch @@ -0,0 +1,32 @@ +From a2b1e8a20c992b01eeb76de00d4f534cbe9f3822 Mon Sep 17 00:00:00 2001 +From: Rolf Eike Beer +Date: Wed, 14 Dec 2016 11:59:34 +0100 +Subject: selftests: do not require bash for the generated test + +From: Rolf Eike Beer + +commit a2b1e8a20c992b01eeb76de00d4f534cbe9f3822 upstream. + +Nothing in this minimal script seems to require bash. We often run these +tests on embedded devices where the only shell available is the busybox +ash. Use sh instead. + +Signed-off-by: Rolf Eike Beer +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/Makefile ++++ b/tools/testing/selftests/Makefile +@@ -87,7 +87,7 @@ ifdef INSTALL_PATH + done; + + @# Ask all targets to emit their test scripts +- echo "#!/bin/bash" > $(ALL_SCRIPT) ++ echo "#!/bin/sh" > $(ALL_SCRIPT) + echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + diff --git a/queue-4.9/selftests-do-not-require-bash-to-run-netsocktests-testcase.patch b/queue-4.9/selftests-do-not-require-bash-to-run-netsocktests-testcase.patch new file mode 100644 index 00000000000..724b0694999 --- /dev/null +++ b/queue-4.9/selftests-do-not-require-bash-to-run-netsocktests-testcase.patch @@ -0,0 +1,29 @@ +From 3659f98b5375d195f1870c3e508fe51e52206839 Mon Sep 17 00:00:00 2001 +From: Rolf Eike Beer +Date: Wed, 14 Dec 2016 11:59:57 +0100 +Subject: selftests: do not require bash to run netsocktests testcase + +From: Rolf Eike Beer + +commit 3659f98b5375d195f1870c3e508fe51e52206839 upstream. + +Nothing in this minimal script seems to require bash. We often run these +tests on embedded devices where the only shell available is the busybox +ash. Use sh instead. + +Signed-off-by: Rolf Eike Beer +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/net/run_netsocktests | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/net/run_netsocktests ++++ b/tools/testing/selftests/net/run_netsocktests +@@ -1,4 +1,4 @@ +-#!/bin/bash ++#!/bin/sh + + echo "--------------------" + echo "running socket test" diff --git a/queue-4.9/series b/queue-4.9/series index 550e92f738d..a4f80977726 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -7,3 +7,11 @@ regulator-axp20x-fix-axp809-ldo_io-registration-error-on-cold-boot.patch drm-tegra-dpaux-fix-error-handling.patch drm-vc4-fix-a-couple-error-codes-in-vc4_cl_lookup_bos.patch drm-savage-dereferencing-an-error-pointer.patch +selftests-do-not-require-bash-to-run-netsocktests-testcase.patch +selftests-do-not-require-bash-for-the-generated-test.patch +zram-revalidate-disk-under-init_lock.patch +zram-support-bdi_cap_stable_writes.patch +dax-fix-deadlock-with-dax-4k-holes.patch +mm-pmd-dirty-emulation-in-page-fault-handler.patch +mm-fix-devm_memremap_pages-crash-use-mem_hotplug_-begin-done.patch +ocfs2-fix-crash-caused-by-stale-lvb-with-fsdlm-plugin.patch diff --git a/queue-4.9/zram-revalidate-disk-under-init_lock.patch b/queue-4.9/zram-revalidate-disk-under-init_lock.patch new file mode 100644 index 00000000000..0d9267a1a65 --- /dev/null +++ b/queue-4.9/zram-revalidate-disk-under-init_lock.patch @@ -0,0 +1,54 @@ +From e7ccfc4ccb703e0f033bd4617580039898e912dd Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Tue, 10 Jan 2017 16:58:18 -0800 +Subject: zram: revalidate disk under init_lock + +From: Minchan Kim + +commit e7ccfc4ccb703e0f033bd4617580039898e912dd upstream. + +Commit b4c5c60920e3 ("zram: avoid lockdep splat by revalidate_disk") +moved revalidate_disk call out of init_lock to avoid lockdep +false-positive splat. However, commit 08eee69fcf6b ("zram: remove +init_lock in zram_make_request") removed init_lock in IO path so there +is no worry about lockdep splat. So, let's restore it. + +This patch is needed to set BDI_CAP_STABLE_WRITES atomically in next +patch. + +Fixes: da9556a2367c ("zram: user per-cpu compression streams") +Link: http://lkml.kernel.org/r/1482366980-3782-3-git-send-email-minchan@kernel.org +Signed-off-by: Minchan Kim +Reviewed-by: Sergey Senozhatsky +Cc: Takashi Iwai +Cc: Hyeoncheol Lee +Cc: +Cc: Sangseok Lee +Cc: Hugh Dickins +Cc: Darrick J. Wong +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/zram/zram_drv.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -1094,14 +1094,8 @@ static ssize_t disksize_store(struct dev + zram->comp = comp; + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); +- up_write(&zram->init_lock); +- +- /* +- * Revalidate disk out of the init_lock to avoid lockdep splat. +- * It's okay because disk's capacity is protected by init_lock +- * so that revalidate_disk always sees up-to-date capacity. +- */ + revalidate_disk(zram->disk); ++ up_write(&zram->init_lock); + + return len; + diff --git a/queue-4.9/zram-support-bdi_cap_stable_writes.patch b/queue-4.9/zram-support-bdi_cap_stable_writes.patch new file mode 100644 index 00000000000..9da0761388f --- /dev/null +++ b/queue-4.9/zram-support-bdi_cap_stable_writes.patch @@ -0,0 +1,88 @@ +From b09ab054b69b07077bd3292f67e777861ac796e5 Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Tue, 10 Jan 2017 16:58:21 -0800 +Subject: zram: support BDI_CAP_STABLE_WRITES + +From: Minchan Kim + +commit b09ab054b69b07077bd3292f67e777861ac796e5 upstream. + +zram has used per-cpu stream feature from v4.7. It aims for increasing +cache hit ratio of scratch buffer for compressing. Downside of that +approach is that zram should ask memory space for compressed page in +per-cpu context which requires stricted gfp flag which could be failed. +If so, it retries to allocate memory space out of per-cpu context so it +could get memory this time and compress the data again, copies it to the +memory space. + +In this scenario, zram assumes the data should never be changed but it is +not true without stable page support. So, If the data is changed under +us, zram can make buffer overrun so that zsmalloc free object chain is +broken so system goes crash like below + + https://bugzilla.suse.com/show_bug.cgi?id=997574 + +This patch adds BDI_CAP_STABLE_WRITES to zram for declaring "I am block +device needing *stable write*". + +Fixes: da9556a2367c ("zram: user per-cpu compression streams") +Link: http://lkml.kernel.org/r/1482366980-3782-4-git-send-email-minchan@kernel.org +Signed-off-by: Minchan Kim +Reviewed-by: Sergey Senozhatsky +Cc: Takashi Iwai +Cc: Hyeoncheol Lee +Cc: +Cc: Sangseok Lee +Cc: Hugh Dickins +Cc: Darrick J. Wong +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/zram/zram_drv.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -111,6 +112,14 @@ static inline bool is_partial_io(struct + return bvec->bv_len != PAGE_SIZE; + } + ++static void zram_revalidate_disk(struct zram *zram) ++{ ++ revalidate_disk(zram->disk); ++ /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ ++ zram->disk->queue->backing_dev_info.capabilities |= ++ BDI_CAP_STABLE_WRITES; ++} ++ + /* + * Check if request is within bounds and aligned on zram logical blocks. + */ +@@ -1094,7 +1103,7 @@ static ssize_t disksize_store(struct dev + zram->comp = comp; + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); +- revalidate_disk(zram->disk); ++ zram_revalidate_disk(zram); + up_write(&zram->init_lock); + + return len; +@@ -1142,7 +1151,7 @@ static ssize_t reset_store(struct device + /* Make sure all the pending I/O are finished */ + fsync_bdev(bdev); + zram_reset_device(zram); +- revalidate_disk(zram->disk); ++ zram_revalidate_disk(zram); + bdput(bdev); + + mutex_lock(&bdev->bd_mutex);