From: Greg Kroah-Hartman Date: Fri, 15 May 2015 02:07:10 +0000 (-0700) Subject: 3.14-stable patches X-Git-Tag: v3.10.79~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=508eeda9b263ed9ce9ea53c7708ae8f454365277;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: mm-memory-failure-call-shake_page-when-error-hits-thp-tail-page.patch mm-soft-offline-fix-num_poisoned_pages-counting-on-concurrent-events.patch revert-dm-crypt-fix-deadlock-when-async-crypto-algorithm-returns-ebusy.patch writeback-use-1-instead-of-1-to-protect-against-div-by-zero.patch xen-console-update-console-event-channel-on-resume.patch xen-events-clear-cpu_evtchn_mask-before-resuming.patch xen-events-set-irq_info-evtchn-before-binding-the-channel-to-cpu-in-__startup_pirq.patch --- diff --git a/queue-3.14/mm-memory-failure-call-shake_page-when-error-hits-thp-tail-page.patch b/queue-3.14/mm-memory-failure-call-shake_page-when-error-hits-thp-tail-page.patch new file mode 100644 index 00000000000..1504d7335ed --- /dev/null +++ b/queue-3.14/mm-memory-failure-call-shake_page-when-error-hits-thp-tail-page.patch @@ -0,0 +1,59 @@ +From 09789e5de18e4e442870b2d700831f5cb802eb05 Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi +Date: Tue, 5 May 2015 16:23:35 -0700 +Subject: mm/memory-failure: call shake_page() when error hits thp tail page + +From: Naoya Horiguchi + +commit 09789e5de18e4e442870b2d700831f5cb802eb05 upstream. + +Currently memory_failure() calls shake_page() to sweep pages out from +pcplists only when the victim page is 4kB LRU page or thp head page. +But we should do this for a thp tail page too. + +Consider that a memory error hits a thp tail page whose head page is on +a pcplist when memory_failure() runs. Then, the current kernel skips +shake_pages() part, so hwpoison_user_mappings() returns without calling +split_huge_page() nor try_to_unmap() because PageLRU of the thp head is +still cleared due to the skip of shake_page(). + +As a result, me_huge_page() runs for the thp, which is broken behavior. + +One effect is a leak of the thp. And another is to fail to isolate the +memory error, so later access to the error address causes another MCE, +which kills the processes which used the thp. + +This patch fixes this problem by calling shake_page() for thp tail case. + +Fixes: 385de35722c9 ("thp: allow a hwpoisoned head page to be put back to LRU") +Signed-off-by: Naoya Horiguchi +Reviewed-by: Andi Kleen +Acked-by: Dean Nelson +Cc: Andrea Arcangeli +Cc: Hidetoshi Seto +Cc: Jin Dongming +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory-failure.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1149,10 +1149,10 @@ int memory_failure(unsigned long pfn, in + * The check (unnecessarily) ignores LRU pages being isolated and + * walked by the page reclaim code, however that's not a big loss. + */ +- if (!PageHuge(p) && !PageTransTail(p)) { +- if (!PageLRU(p)) +- shake_page(p, 0); +- if (!PageLRU(p)) { ++ if (!PageHuge(p)) { ++ if (!PageLRU(hpage)) ++ shake_page(hpage, 0); ++ if (!PageLRU(hpage)) { + /* + * shake_page could have turned it free. + */ diff --git a/queue-3.14/mm-soft-offline-fix-num_poisoned_pages-counting-on-concurrent-events.patch b/queue-3.14/mm-soft-offline-fix-num_poisoned_pages-counting-on-concurrent-events.patch new file mode 100644 index 00000000000..a7eb72105af --- /dev/null +++ b/queue-3.14/mm-soft-offline-fix-num_poisoned_pages-counting-on-concurrent-events.patch @@ -0,0 +1,46 @@ +From 602498f9aa43d4951eece3fd6ad95a6d0a78d537 Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi +Date: Tue, 5 May 2015 16:23:46 -0700 +Subject: mm: soft-offline: fix num_poisoned_pages counting on concurrent events + +From: Naoya Horiguchi + +commit 602498f9aa43d4951eece3fd6ad95a6d0a78d537 upstream. + +If multiple soft offline events hit one free page/hugepage concurrently, +soft_offline_page() can handle the free page/hugepage multiple times, +which makes num_poisoned_pages counter increased more than once. This +patch fixes this wrong counting by checking TestSetPageHWPoison for normal +papes and by checking the return value of dequeue_hwpoisoned_huge_page() +for hugepages. + +Signed-off-by: Naoya Horiguchi +Acked-by: Dean Nelson +Cc: Andi Kleen +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory-failure.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1723,12 +1723,12 @@ int soft_offline_page(struct page *page, + } else if (ret == 0) { /* for free pages */ + if (PageHuge(page)) { + set_page_hwpoison_huge_page(hpage); +- dequeue_hwpoisoned_huge_page(hpage); +- atomic_long_add(1 << compound_order(hpage), ++ if (!dequeue_hwpoisoned_huge_page(hpage)) ++ atomic_long_add(1 << compound_order(hpage), + &num_poisoned_pages); + } else { +- SetPageHWPoison(page); +- atomic_long_inc(&num_poisoned_pages); ++ if (!TestSetPageHWPoison(page)) ++ atomic_long_inc(&num_poisoned_pages); + } + } + unset_migratetype_isolate(page, MIGRATE_MOVABLE); diff --git a/queue-3.14/revert-dm-crypt-fix-deadlock-when-async-crypto-algorithm-returns-ebusy.patch b/queue-3.14/revert-dm-crypt-fix-deadlock-when-async-crypto-algorithm-returns-ebusy.patch new file mode 100644 index 00000000000..d55199f6d5f --- /dev/null +++ b/queue-3.14/revert-dm-crypt-fix-deadlock-when-async-crypto-algorithm-returns-ebusy.patch @@ -0,0 +1,117 @@ +From c0403ec0bb5a8c5b267fb7e16021bec0b17e4964 Mon Sep 17 00:00:00 2001 +From: Rabin Vincent +Date: Tue, 5 May 2015 15:15:56 +0200 +Subject: Revert "dm crypt: fix deadlock when async crypto algorithm returns -EBUSY" + +From: Rabin Vincent + +commit c0403ec0bb5a8c5b267fb7e16021bec0b17e4964 upstream. + +This reverts Linux 4.1-rc1 commit 0618764cb25f6fa9fb31152995de42a8a0496475. + +The problem which that commit attempts to fix actually lies in the +Freescale CAAM crypto driver not dm-crypt. + +dm-crypt uses CRYPTO_TFM_REQ_MAY_BACKLOG. This means the the crypto +driver should internally backlog requests which arrive when the queue is +full and process them later. Until the crypto hw's queue becomes full, +the driver returns -EINPROGRESS. When the crypto hw's queue if full, +the driver returns -EBUSY, and if CRYPTO_TFM_REQ_MAY_BACKLOG is set, is +expected to backlog the request and process it when the hardware has +queue space. At the point when the driver takes the request from the +backlog and starts processing it, it calls the completion function with +a status of -EINPROGRESS. The completion function is called (for a +second time, in the case of backlogged requests) with a status/err of 0 +when a request is done. + +Crypto drivers for hardware without hardware queueing use the helpers, +crypto_init_queue(), crypto_enqueue_request(), crypto_dequeue_request() +and crypto_get_backlog() helpers to implement this behaviour correctly, +while others implement this behaviour without these helpers (ccp, for +example). + +dm-crypt (before the patch that needs reverting) uses this API +correctly. It queues up as many requests as the hw queues will allow +(i.e. as long as it gets back -EINPROGRESS from the request function). +Then, when it sees at least one backlogged request (gets -EBUSY), it +waits till that backlogged request is handled (completion gets called +with -EINPROGRESS), and then continues. The references to +af_alg_wait_for_completion() and af_alg_complete() in that commit's +commit message are irrelevant because those functions only handle one +request at a time, unlink dm-crypt. + +The problem is that the Freescale CAAM driver, which that commit +describes as having being tested with, fails to implement the +backlogging behaviour correctly. In cam_jr_enqueue(), if the hardware +queue is full, it simply returns -EBUSY without backlogging the request. +What the observed deadlock was is not described in the commit message +but it is obviously the wait_for_completion() in crypto_convert() where +dm-crypto would wait for the completion being called with -EINPROGRESS +in the case of backlogged requests. This completion will never be +completed due to the bug in the CAAM driver. + +Commit 0618764cb25 incorrectly made dm-crypt wait for every request, +even when the driver/hardware queues are not full, which means that +dm-crypt will never see -EBUSY. This means that that commit will cause +a performance regression on all crypto drivers which implement the API +correctly. + +Revert it. Correct backlog handling should be implemented in the CAAM +driver instead. + +Cc'ing stable purely because commit 0618764cb25 did. If for some reason +a stable@ kernel did pick up commit 0618764cb25 it should get reverted. + +Signed-off-by: Rabin Vincent +Reviewed-by: Horia Geanta +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-crypt.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -915,10 +915,11 @@ static int crypt_convert(struct crypt_co + + switch (r) { + /* async */ +- case -EINPROGRESS: + case -EBUSY: + wait_for_completion(&ctx->restart); + reinit_completion(&ctx->restart); ++ /* fall through*/ ++ case -EINPROGRESS: + ctx->req = NULL; + ctx->cc_sector++; + continue; +@@ -1313,8 +1314,10 @@ static void kcryptd_async_done(struct cr + struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); + struct crypt_config *cc = io->cc; + +- if (error == -EINPROGRESS) ++ if (error == -EINPROGRESS) { ++ complete(&ctx->restart); + return; ++ } + + if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) + error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); +@@ -1325,15 +1328,12 @@ static void kcryptd_async_done(struct cr + mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); + + if (!atomic_dec_and_test(&ctx->cc_pending)) +- goto done; ++ return; + + if (bio_data_dir(io->base_bio) == READ) + kcryptd_crypt_read_done(io); + else + kcryptd_crypt_write_io_submit(io, 1); +-done: +- if (!completion_done(&ctx->restart)) +- complete(&ctx->restart); + } + + static void kcryptd_crypt(struct work_struct *work) diff --git a/queue-3.14/series b/queue-3.14/series index f87cabadca4..997c8bcc9b9 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -3,3 +3,10 @@ nilfs2-fix-sanity-check-of-btree-level-in-nilfs_btree_root_broken.patch rdma-cma-canonize-ipv4-on-ipv6-sockets-properly.patch gpio-sysfs-fix-memory-leaks-and-device-hotplug.patch mnt-fix-fs_fully_visible-to-verify-the-root-directory-is-visible.patch +mm-memory-failure-call-shake_page-when-error-hits-thp-tail-page.patch +writeback-use-1-instead-of-1-to-protect-against-div-by-zero.patch +mm-soft-offline-fix-num_poisoned_pages-counting-on-concurrent-events.patch +xen-events-clear-cpu_evtchn_mask-before-resuming.patch +xen-console-update-console-event-channel-on-resume.patch +xen-events-set-irq_info-evtchn-before-binding-the-channel-to-cpu-in-__startup_pirq.patch +revert-dm-crypt-fix-deadlock-when-async-crypto-algorithm-returns-ebusy.patch diff --git a/queue-3.14/writeback-use-1-instead-of-1-to-protect-against-div-by-zero.patch b/queue-3.14/writeback-use-1-instead-of-1-to-protect-against-div-by-zero.patch new file mode 100644 index 00000000000..bdcbcccf5c7 --- /dev/null +++ b/queue-3.14/writeback-use-1-instead-of-1-to-protect-against-div-by-zero.patch @@ -0,0 +1,67 @@ +From 464d1387acb94dc43ba772b35242345e3d2ead1b Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Tue, 21 Apr 2015 16:49:13 -0400 +Subject: writeback: use |1 instead of +1 to protect against div by zero + +From: Tejun Heo + +commit 464d1387acb94dc43ba772b35242345e3d2ead1b upstream. + +mm/page-writeback.c has several places where 1 is added to the divisor +to prevent division by zero exceptions; however, if the original +divisor is equivalent to -1, adding 1 leads to division by zero. + +There are three places where +1 is used for this purpose - one in +pos_ratio_polynom() and two in bdi_position_ratio(). The second one +in bdi_position_ratio() actually triggered div-by-zero oops on a +machine running a 3.10 kernel. The divisor is + + x_intercept - bdi_setpoint + 1 == span + 1 + +span is confirmed to be (u32)-1. It isn't clear how it ended up that +but it could be from write bandwidth calculation underflow fixed by +c72efb658f7c ("writeback: fix possible underflow in write bandwidth +calculation"). + +At any rate, +1 isn't a proper protection against div-by-zero. This +patch converts all +1 protections to |1. Note that +bdi_update_dirty_ratelimit() was already using |1 before this patch. + +Signed-off-by: Tejun Heo +Reviewed-by: Jan Kara +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page-writeback.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -601,7 +601,7 @@ static long long pos_ratio_polynom(unsig + long x; + + x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, +- limit - setpoint + 1); ++ (limit - setpoint) | 1); + pos_ratio = x; + pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; + pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; +@@ -828,7 +828,7 @@ static unsigned long bdi_position_ratio( + * scale global setpoint to bdi's: + * bdi_setpoint = setpoint * bdi_thresh / thresh + */ +- x = div_u64((u64)bdi_thresh << 16, thresh + 1); ++ x = div_u64((u64)bdi_thresh << 16, thresh | 1); + bdi_setpoint = setpoint * (u64)x >> 16; + /* + * Use span=(8*write_bw) in single bdi case as indicated by +@@ -843,7 +843,7 @@ static unsigned long bdi_position_ratio( + + if (bdi_dirty < x_intercept - span / 4) { + pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty), +- x_intercept - bdi_setpoint + 1); ++ (x_intercept - bdi_setpoint) | 1); + } else + pos_ratio /= 4; + diff --git a/queue-3.14/xen-console-update-console-event-channel-on-resume.patch b/queue-3.14/xen-console-update-console-event-channel-on-resume.patch new file mode 100644 index 00000000000..6c8d82b019f --- /dev/null +++ b/queue-3.14/xen-console-update-console-event-channel-on-resume.patch @@ -0,0 +1,51 @@ +From b9d934f27c91b878c4b2e64299d6e419a4022f8d Mon Sep 17 00:00:00 2001 +From: Boris Ostrovsky +Date: Wed, 29 Apr 2015 17:10:14 -0400 +Subject: xen/console: Update console event channel on resume + +From: Boris Ostrovsky + +commit b9d934f27c91b878c4b2e64299d6e419a4022f8d upstream. + +After a resume the hypervisor/tools may change console event +channel number. We should re-query it. + +Signed-off-by: Boris Ostrovsky +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/hvc/hvc_xen.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/drivers/tty/hvc/hvc_xen.c ++++ b/drivers/tty/hvc/hvc_xen.c +@@ -299,11 +299,27 @@ static int xen_initial_domain_console_in + return 0; + } + ++static void xen_console_update_evtchn(struct xencons_info *info) ++{ ++ if (xen_hvm_domain()) { ++ uint64_t v; ++ int err; ++ ++ err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); ++ if (!err && v) ++ info->evtchn = v; ++ } else ++ info->evtchn = xen_start_info->console.domU.evtchn; ++} ++ + void xen_console_resume(void) + { + struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE); +- if (info != NULL && info->irq) ++ if (info != NULL && info->irq) { ++ if (!xen_initial_domain()) ++ xen_console_update_evtchn(info); + rebind_evtchn_irq(info->evtchn, info->irq); ++ } + } + + static void xencons_disconnect_backend(struct xencons_info *info) diff --git a/queue-3.14/xen-events-clear-cpu_evtchn_mask-before-resuming.patch b/queue-3.14/xen-events-clear-cpu_evtchn_mask-before-resuming.patch new file mode 100644 index 00000000000..6769ca322b5 --- /dev/null +++ b/queue-3.14/xen-events-clear-cpu_evtchn_mask-before-resuming.patch @@ -0,0 +1,86 @@ +From 5cec98834989a014a9560b1841649eaca95cf00e Mon Sep 17 00:00:00 2001 +From: Boris Ostrovsky +Date: Wed, 29 Apr 2015 17:10:12 -0400 +Subject: xen/events: Clear cpu_evtchn_mask before resuming + +From: Boris Ostrovsky + +commit 5cec98834989a014a9560b1841649eaca95cf00e upstream. + +When a guest is resumed, the hypervisor may change event channel +assignments. If this happens and the guest uses 2-level events it +is possible for the interrupt to be claimed by wrong VCPU since +cpu_evtchn_mask bits may be stale. This can happen even though +evtchn_2l_bind_to_cpu() attempts to clear old bits: irq_info that +is passed in is not necessarily the original one (from pre-migration +times) but instead is freshly allocated during resume and so any +information about which CPU the channel was bound to is lost. + +Thus we should clear the mask during resume. + +We also need to make sure that bits for xenstore and console channels +are set when these two subsystems are resumed. While rebind_evtchn_irq() +(which is invoked for both of them on a resume) calls irq_set_affinity(), +the latter will in fact postpone setting affinity until handling the +interrupt. But because cpu_evtchn_mask will have bits for these two +cleared we won't be able to take the interrupt. + +With that in mind, we need to bind those two channels explicitly in +rebind_evtchn_irq(). We will keep irq_set_affinity() so that we have a +pass through generic irq affinity code later, in case something needs +to be updated there as well. + +(Also replace cpumask_of(0) with cpumask_of(info->cpu) in +rebind_evtchn_irq(): it should be set to zero in preceding +xen_irq_info_evtchn_setup().) + +Signed-off-by: Boris Ostrovsky +Reported-by: Annie Li +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_2l.c | 10 ++++++++++ + drivers/xen/events/events_base.c | 5 +++-- + 2 files changed, 13 insertions(+), 2 deletions(-) + +--- a/drivers/xen/events/events_2l.c ++++ b/drivers/xen/events/events_2l.c +@@ -352,6 +352,15 @@ irqreturn_t xen_debug_interrupt(int irq, + return IRQ_HANDLED; + } + ++static void evtchn_2l_resume(void) ++{ ++ int i; ++ ++ for_each_online_cpu(i) ++ memset(per_cpu(cpu_evtchn_mask, i), 0, sizeof(xen_ulong_t) * ++ EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); ++} ++ + static const struct evtchn_ops evtchn_ops_2l = { + .max_channels = evtchn_2l_max_channels, + .nr_channels = evtchn_2l_max_channels, +@@ -363,6 +372,7 @@ static const struct evtchn_ops evtchn_op + .mask = evtchn_2l_mask, + .unmask = evtchn_2l_unmask, + .handle_events = evtchn_2l_handle_events, ++ .resume = evtchn_2l_resume, + }; + + void __init xen_evtchn_2l_init(void) +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -1294,8 +1294,9 @@ void rebind_evtchn_irq(int evtchn, int i + + mutex_unlock(&irq_mapping_update_lock); + +- /* new event channels are always bound to cpu 0 */ +- irq_set_affinity(irq, cpumask_of(0)); ++ bind_evtchn_to_cpu(evtchn, info->cpu); ++ /* This will be deferred until interrupt is processed */ ++ irq_set_affinity(irq, cpumask_of(info->cpu)); + + /* Unmask the event channel. */ + enable_irq(irq); diff --git a/queue-3.14/xen-events-set-irq_info-evtchn-before-binding-the-channel-to-cpu-in-__startup_pirq.patch b/queue-3.14/xen-events-set-irq_info-evtchn-before-binding-the-channel-to-cpu-in-__startup_pirq.patch new file mode 100644 index 00000000000..2871a9dcb5f --- /dev/null +++ b/queue-3.14/xen-events-set-irq_info-evtchn-before-binding-the-channel-to-cpu-in-__startup_pirq.patch @@ -0,0 +1,33 @@ +From 16e6bd5970c88a2ac018b84a5f1dd5c2ff1fdf2c Mon Sep 17 00:00:00 2001 +From: Boris Ostrovsky +Date: Wed, 29 Apr 2015 17:10:15 -0400 +Subject: xen/events: Set irq_info->evtchn before binding the channel to CPU in __startup_pirq() + +From: Boris Ostrovsky + +commit 16e6bd5970c88a2ac018b84a5f1dd5c2ff1fdf2c upstream. + +.. because bind_evtchn_to_cpu(evtchn, cpu) will map evtchn to +'info' and pass 'info' down to xen_evtchn_port_bind_to_cpu(). + +Signed-off-by: Boris Ostrovsky +Tested-by: Annie Li +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_base.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -550,8 +550,8 @@ static unsigned int __startup_pirq(unsig + if (rc) + goto err; + +- bind_evtchn_to_cpu(evtchn, 0); + info->evtchn = evtchn; ++ bind_evtchn_to_cpu(evtchn, 0); + + rc = xen_evtchn_port_setup(info); + if (rc)