From: Greg Kroah-Hartman Date: Fri, 14 Apr 2017 09:07:57 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.10.11~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5374c539afb7f2d49d9295729468ec1ee65739ad;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: blk-mq-avoid-memory-reclaim-when-remapping-queues.patch net-packet-fix-overflow-in-check-for-priv-area-size.patch --- diff --git a/queue-4.9/blk-mq-avoid-memory-reclaim-when-remapping-queues.patch b/queue-4.9/blk-mq-avoid-memory-reclaim-when-remapping-queues.patch new file mode 100644 index 00000000000..c9d5420560f --- /dev/null +++ b/queue-4.9/blk-mq-avoid-memory-reclaim-when-remapping-queues.patch @@ -0,0 +1,109 @@ +From 36e1f3d107867b25c616c2fd294f5a1c9d4e5d09 Mon Sep 17 00:00:00 2001 +From: Gabriel Krisman Bertazi +Date: Tue, 6 Dec 2016 13:31:44 -0200 +Subject: blk-mq: Avoid memory reclaim when remapping queues + +From: Gabriel Krisman Bertazi + +commit 36e1f3d107867b25c616c2fd294f5a1c9d4e5d09 upstream. + +While stressing memory and IO at the same time we changed SMT settings, +we were able to consistently trigger deadlocks in the mm system, which +froze the entire machine. + +I think that under memory stress conditions, the large allocations +performed by blk_mq_init_rq_map may trigger a reclaim, which stalls +waiting on the block layer remmaping completion, thus deadlocking the +system. The trace below was collected after the machine stalled, +waiting for the hotplug event completion. + +The simplest fix for this is to make allocations in this path +non-reclaimable, with GFP_NOIO. With this patch, We couldn't hit the +issue anymore. + +This should apply on top of Jens's for-next branch cleanly. + +Changes since v1: + - Use GFP_NOIO instead of GFP_NOWAIT. + + Call Trace: +[c000000f0160aaf0] [c000000f0160ab50] 0xc000000f0160ab50 (unreliable) +[c000000f0160acc0] [c000000000016624] __switch_to+0x2e4/0x430 +[c000000f0160ad20] [c000000000b1a880] __schedule+0x310/0x9b0 +[c000000f0160ae00] [c000000000b1af68] schedule+0x48/0xc0 +[c000000f0160ae30] [c000000000b1b4b0] schedule_preempt_disabled+0x20/0x30 +[c000000f0160ae50] [c000000000b1d4fc] __mutex_lock_slowpath+0xec/0x1f0 +[c000000f0160aed0] [c000000000b1d678] mutex_lock+0x78/0xa0 +[c000000f0160af00] [d000000019413cac] xfs_reclaim_inodes_ag+0x33c/0x380 [xfs] +[c000000f0160b0b0] [d000000019415164] xfs_reclaim_inodes_nr+0x54/0x70 [xfs] +[c000000f0160b0f0] [d0000000194297f8] xfs_fs_free_cached_objects+0x38/0x60 [xfs] +[c000000f0160b120] [c0000000003172c8] super_cache_scan+0x1f8/0x210 +[c000000f0160b190] [c00000000026301c] shrink_slab.part.13+0x21c/0x4c0 +[c000000f0160b2d0] [c000000000268088] shrink_zone+0x2d8/0x3c0 +[c000000f0160b380] [c00000000026834c] do_try_to_free_pages+0x1dc/0x520 +[c000000f0160b450] [c00000000026876c] try_to_free_pages+0xdc/0x250 +[c000000f0160b4e0] [c000000000251978] __alloc_pages_nodemask+0x868/0x10d0 +[c000000f0160b6f0] [c000000000567030] blk_mq_init_rq_map+0x160/0x380 +[c000000f0160b7a0] [c00000000056758c] blk_mq_map_swqueue+0x33c/0x360 +[c000000f0160b820] [c000000000567904] blk_mq_queue_reinit+0x64/0xb0 +[c000000f0160b850] [c00000000056a16c] blk_mq_queue_reinit_notify+0x19c/0x250 +[c000000f0160b8a0] [c0000000000f5d38] notifier_call_chain+0x98/0x100 +[c000000f0160b8f0] [c0000000000c5fb0] __cpu_notify+0x70/0xe0 +[c000000f0160b930] [c0000000000c63c4] notify_prepare+0x44/0xb0 +[c000000f0160b9b0] [c0000000000c52f4] cpuhp_invoke_callback+0x84/0x250 +[c000000f0160ba10] [c0000000000c570c] cpuhp_up_callbacks+0x5c/0x120 +[c000000f0160ba60] [c0000000000c7cb8] _cpu_up+0xf8/0x1d0 +[c000000f0160bac0] [c0000000000c7eb0] do_cpu_up+0x120/0x150 +[c000000f0160bb40] [c0000000006fe024] cpu_subsys_online+0x64/0xe0 +[c000000f0160bb90] [c0000000006f5124] device_online+0xb4/0x120 +[c000000f0160bbd0] [c0000000006f5244] online_store+0xb4/0xc0 +[c000000f0160bc20] [c0000000006f0a68] dev_attr_store+0x68/0xa0 +[c000000f0160bc60] [c0000000003ccc30] sysfs_kf_write+0x80/0xb0 +[c000000f0160bca0] [c0000000003cbabc] kernfs_fop_write+0x17c/0x250 +[c000000f0160bcf0] [c00000000030fe6c] __vfs_write+0x6c/0x1e0 +[c000000f0160bd90] [c000000000311490] vfs_write+0xd0/0x270 +[c000000f0160bde0] [c0000000003131fc] SyS_write+0x6c/0x110 +[c000000f0160be30] [c000000000009204] system_call+0x38/0xec + +Signed-off-by: Gabriel Krisman Bertazi +Cc: Brian King +Cc: Douglas Miller +Cc: linux-block@vger.kernel.org +Cc: linux-scsi@vger.kernel.org +Signed-off-by: Jens Axboe +Signed-off-by: Sumit Semwal +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-mq.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -1474,7 +1474,7 @@ static struct blk_mq_tags *blk_mq_init_r + INIT_LIST_HEAD(&tags->page_list); + + tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), +- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, ++ GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + set->numa_node); + if (!tags->rqs) { + blk_mq_free_tags(tags); +@@ -1500,7 +1500,7 @@ static struct blk_mq_tags *blk_mq_init_r + + do { + page = alloc_pages_node(set->numa_node, +- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, ++ GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, + this_order); + if (page) + break; +@@ -1521,7 +1521,7 @@ static struct blk_mq_tags *blk_mq_init_r + * Allow kmemleak to scan these pages as they contain pointers + * to additional allocations like via ops->init_request(). + */ +- kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL); ++ kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); + entries_per_page = order_to_size(this_order) / rq_size; + to_do = min(entries_per_page, set->queue_depth - i); + left -= to_do * rq_size; diff --git a/queue-4.9/net-packet-fix-overflow-in-check-for-priv-area-size.patch b/queue-4.9/net-packet-fix-overflow-in-check-for-priv-area-size.patch new file mode 100644 index 00000000000..c0a6d41e30b --- /dev/null +++ b/queue-4.9/net-packet-fix-overflow-in-check-for-priv-area-size.patch @@ -0,0 +1,40 @@ +From 2b6867c2ce76c596676bec7d2d525af525fdc6e2 Mon Sep 17 00:00:00 2001 +From: Andrey Konovalov +Date: Wed, 29 Mar 2017 16:11:20 +0200 +Subject: net/packet: fix overflow in check for priv area size + +From: Andrey Konovalov + +commit 2b6867c2ce76c596676bec7d2d525af525fdc6e2 upstream. + +Subtracting tp_sizeof_priv from tp_block_size and casting to int +to check whether one is less then the other doesn't always work +(both of them are unsigned ints). + +Compare them as is instead. + +Also cast tp_sizeof_priv to u64 before using BLK_PLUS_PRIV, as +it can overflow inside BLK_PLUS_PRIV otherwise. + +Signed-off-by: Andrey Konovalov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/packet/af_packet.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -4235,8 +4235,8 @@ static int packet_set_ring(struct sock * + if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) + goto out; + if (po->tp_version >= TPACKET_V3 && +- (int)(req->tp_block_size - +- BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) ++ req->tp_block_size <= ++ BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) + goto out; + if (unlikely(req->tp_frame_size < po->tp_hdrlen + + po->tp_reserve)) diff --git a/queue-4.9/series b/queue-4.9/series index e8bab30fb30..d44c658f888 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -22,3 +22,5 @@ mips-irq-stack-fix-erroneous-jal-to-plat_irq_dispatch.patch crypto-caam-fix-rng-deinstantiation-error-checking.patch crypto-caam-fix-invalid-dereference-in-caam_rsa_init_tfm.patch revert-drm-i915-execlists-reset-ring-registers-upon-resume.patch +net-packet-fix-overflow-in-check-for-priv-area-size.patch +blk-mq-avoid-memory-reclaim-when-remapping-queues.patch