--- /dev/null
+From 09789e5de18e4e442870b2d700831f5cb802eb05 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Tue, 5 May 2015 16:23:35 -0700
+Subject: mm/memory-failure: call shake_page() when error hits thp tail page
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 09789e5de18e4e442870b2d700831f5cb802eb05 upstream.
+
+Currently memory_failure() calls shake_page() to sweep pages out from
+pcplists only when the victim page is 4kB LRU page or thp head page.
+But we should do this for a thp tail page too.
+
+Consider that a memory error hits a thp tail page whose head page is on
+a pcplist when memory_failure() runs. Then, the current kernel skips
+shake_pages() part, so hwpoison_user_mappings() returns without calling
+split_huge_page() nor try_to_unmap() because PageLRU of the thp head is
+still cleared due to the skip of shake_page().
+
+As a result, me_huge_page() runs for the thp, which is broken behavior.
+
+One effect is a leak of the thp. And another is to fail to isolate the
+memory error, so later access to the error address causes another MCE,
+which kills the processes which used the thp.
+
+This patch fixes this problem by calling shake_page() for thp tail case.
+
+Fixes: 385de35722c9 ("thp: allow a hwpoisoned head page to be put back to LRU")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Andi Kleen <ak@linux.intel.com>
+Acked-by: Dean Nelson <dnelson@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+Cc: Jin Dongming <jin.dongming@np.css.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1149,10 +1149,10 @@ int memory_failure(unsigned long pfn, in
+ * The check (unnecessarily) ignores LRU pages being isolated and
+ * walked by the page reclaim code, however that's not a big loss.
+ */
+- if (!PageHuge(p) && !PageTransTail(p)) {
+- if (!PageLRU(p))
+- shake_page(p, 0);
+- if (!PageLRU(p)) {
++ if (!PageHuge(p)) {
++ if (!PageLRU(hpage))
++ shake_page(hpage, 0);
++ if (!PageLRU(hpage)) {
+ /*
+ * shake_page could have turned it free.
+ */
--- /dev/null
+From 602498f9aa43d4951eece3fd6ad95a6d0a78d537 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Tue, 5 May 2015 16:23:46 -0700
+Subject: mm: soft-offline: fix num_poisoned_pages counting on concurrent events
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 602498f9aa43d4951eece3fd6ad95a6d0a78d537 upstream.
+
+If multiple soft offline events hit one free page/hugepage concurrently,
+soft_offline_page() can handle the free page/hugepage multiple times,
+which makes num_poisoned_pages counter increased more than once. This
+patch fixes this wrong counting by checking TestSetPageHWPoison for normal
+papes and by checking the return value of dequeue_hwpoisoned_huge_page()
+for hugepages.
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Dean Nelson <dnelson@redhat.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1723,12 +1723,12 @@ int soft_offline_page(struct page *page,
+ } else if (ret == 0) { /* for free pages */
+ if (PageHuge(page)) {
+ set_page_hwpoison_huge_page(hpage);
+- dequeue_hwpoisoned_huge_page(hpage);
+- atomic_long_add(1 << compound_order(hpage),
++ if (!dequeue_hwpoisoned_huge_page(hpage))
++ atomic_long_add(1 << compound_order(hpage),
+ &num_poisoned_pages);
+ } else {
+- SetPageHWPoison(page);
+- atomic_long_inc(&num_poisoned_pages);
++ if (!TestSetPageHWPoison(page))
++ atomic_long_inc(&num_poisoned_pages);
+ }
+ }
+ unset_migratetype_isolate(page, MIGRATE_MOVABLE);
--- /dev/null
+From c0403ec0bb5a8c5b267fb7e16021bec0b17e4964 Mon Sep 17 00:00:00 2001
+From: Rabin Vincent <rabin.vincent@axis.com>
+Date: Tue, 5 May 2015 15:15:56 +0200
+Subject: Revert "dm crypt: fix deadlock when async crypto algorithm returns -EBUSY"
+
+From: Rabin Vincent <rabin.vincent@axis.com>
+
+commit c0403ec0bb5a8c5b267fb7e16021bec0b17e4964 upstream.
+
+This reverts Linux 4.1-rc1 commit 0618764cb25f6fa9fb31152995de42a8a0496475.
+
+The problem which that commit attempts to fix actually lies in the
+Freescale CAAM crypto driver not dm-crypt.
+
+dm-crypt uses CRYPTO_TFM_REQ_MAY_BACKLOG. This means the the crypto
+driver should internally backlog requests which arrive when the queue is
+full and process them later. Until the crypto hw's queue becomes full,
+the driver returns -EINPROGRESS. When the crypto hw's queue if full,
+the driver returns -EBUSY, and if CRYPTO_TFM_REQ_MAY_BACKLOG is set, is
+expected to backlog the request and process it when the hardware has
+queue space. At the point when the driver takes the request from the
+backlog and starts processing it, it calls the completion function with
+a status of -EINPROGRESS. The completion function is called (for a
+second time, in the case of backlogged requests) with a status/err of 0
+when a request is done.
+
+Crypto drivers for hardware without hardware queueing use the helpers,
+crypto_init_queue(), crypto_enqueue_request(), crypto_dequeue_request()
+and crypto_get_backlog() helpers to implement this behaviour correctly,
+while others implement this behaviour without these helpers (ccp, for
+example).
+
+dm-crypt (before the patch that needs reverting) uses this API
+correctly. It queues up as many requests as the hw queues will allow
+(i.e. as long as it gets back -EINPROGRESS from the request function).
+Then, when it sees at least one backlogged request (gets -EBUSY), it
+waits till that backlogged request is handled (completion gets called
+with -EINPROGRESS), and then continues. The references to
+af_alg_wait_for_completion() and af_alg_complete() in that commit's
+commit message are irrelevant because those functions only handle one
+request at a time, unlink dm-crypt.
+
+The problem is that the Freescale CAAM driver, which that commit
+describes as having being tested with, fails to implement the
+backlogging behaviour correctly. In cam_jr_enqueue(), if the hardware
+queue is full, it simply returns -EBUSY without backlogging the request.
+What the observed deadlock was is not described in the commit message
+but it is obviously the wait_for_completion() in crypto_convert() where
+dm-crypto would wait for the completion being called with -EINPROGRESS
+in the case of backlogged requests. This completion will never be
+completed due to the bug in the CAAM driver.
+
+Commit 0618764cb25 incorrectly made dm-crypt wait for every request,
+even when the driver/hardware queues are not full, which means that
+dm-crypt will never see -EBUSY. This means that that commit will cause
+a performance regression on all crypto drivers which implement the API
+correctly.
+
+Revert it. Correct backlog handling should be implemented in the CAAM
+driver instead.
+
+Cc'ing stable purely because commit 0618764cb25 did. If for some reason
+a stable@ kernel did pick up commit 0618764cb25 it should get reverted.
+
+Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
+Reviewed-by: Horia Geanta <horia.geanta@freescale.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-crypt.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -915,10 +915,11 @@ static int crypt_convert(struct crypt_co
+
+ switch (r) {
+ /* async */
+- case -EINPROGRESS:
+ case -EBUSY:
+ wait_for_completion(&ctx->restart);
+ reinit_completion(&ctx->restart);
++ /* fall through*/
++ case -EINPROGRESS:
+ ctx->req = NULL;
+ ctx->cc_sector++;
+ continue;
+@@ -1313,8 +1314,10 @@ static void kcryptd_async_done(struct cr
+ struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx);
+ struct crypt_config *cc = io->cc;
+
+- if (error == -EINPROGRESS)
++ if (error == -EINPROGRESS) {
++ complete(&ctx->restart);
+ return;
++ }
+
+ if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
+ error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
+@@ -1325,15 +1328,12 @@ static void kcryptd_async_done(struct cr
+ mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
+
+ if (!atomic_dec_and_test(&ctx->cc_pending))
+- goto done;
++ return;
+
+ if (bio_data_dir(io->base_bio) == READ)
+ kcryptd_crypt_read_done(io);
+ else
+ kcryptd_crypt_write_io_submit(io, 1);
+-done:
+- if (!completion_done(&ctx->restart))
+- complete(&ctx->restart);
+ }
+
+ static void kcryptd_crypt(struct work_struct *work)
rdma-cma-canonize-ipv4-on-ipv6-sockets-properly.patch
gpio-sysfs-fix-memory-leaks-and-device-hotplug.patch
mnt-fix-fs_fully_visible-to-verify-the-root-directory-is-visible.patch
+mm-memory-failure-call-shake_page-when-error-hits-thp-tail-page.patch
+writeback-use-1-instead-of-1-to-protect-against-div-by-zero.patch
+mm-soft-offline-fix-num_poisoned_pages-counting-on-concurrent-events.patch
+xen-events-clear-cpu_evtchn_mask-before-resuming.patch
+xen-console-update-console-event-channel-on-resume.patch
+xen-events-set-irq_info-evtchn-before-binding-the-channel-to-cpu-in-__startup_pirq.patch
+revert-dm-crypt-fix-deadlock-when-async-crypto-algorithm-returns-ebusy.patch
--- /dev/null
+From 464d1387acb94dc43ba772b35242345e3d2ead1b Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 21 Apr 2015 16:49:13 -0400
+Subject: writeback: use |1 instead of +1 to protect against div by zero
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 464d1387acb94dc43ba772b35242345e3d2ead1b upstream.
+
+mm/page-writeback.c has several places where 1 is added to the divisor
+to prevent division by zero exceptions; however, if the original
+divisor is equivalent to -1, adding 1 leads to division by zero.
+
+There are three places where +1 is used for this purpose - one in
+pos_ratio_polynom() and two in bdi_position_ratio(). The second one
+in bdi_position_ratio() actually triggered div-by-zero oops on a
+machine running a 3.10 kernel. The divisor is
+
+ x_intercept - bdi_setpoint + 1 == span + 1
+
+span is confirmed to be (u32)-1. It isn't clear how it ended up that
+but it could be from write bandwidth calculation underflow fixed by
+c72efb658f7c ("writeback: fix possible underflow in write bandwidth
+calculation").
+
+At any rate, +1 isn't a proper protection against div-by-zero. This
+patch converts all +1 protections to |1. Note that
+bdi_update_dirty_ratelimit() was already using |1 before this patch.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page-writeback.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -601,7 +601,7 @@ static long long pos_ratio_polynom(unsig
+ long x;
+
+ x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+- limit - setpoint + 1);
++ (limit - setpoint) | 1);
+ pos_ratio = x;
+ pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
+ pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
+@@ -828,7 +828,7 @@ static unsigned long bdi_position_ratio(
+ * scale global setpoint to bdi's:
+ * bdi_setpoint = setpoint * bdi_thresh / thresh
+ */
+- x = div_u64((u64)bdi_thresh << 16, thresh + 1);
++ x = div_u64((u64)bdi_thresh << 16, thresh | 1);
+ bdi_setpoint = setpoint * (u64)x >> 16;
+ /*
+ * Use span=(8*write_bw) in single bdi case as indicated by
+@@ -843,7 +843,7 @@ static unsigned long bdi_position_ratio(
+
+ if (bdi_dirty < x_intercept - span / 4) {
+ pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
+- x_intercept - bdi_setpoint + 1);
++ (x_intercept - bdi_setpoint) | 1);
+ } else
+ pos_ratio /= 4;
+
--- /dev/null
+From b9d934f27c91b878c4b2e64299d6e419a4022f8d Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Date: Wed, 29 Apr 2015 17:10:14 -0400
+Subject: xen/console: Update console event channel on resume
+
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+
+commit b9d934f27c91b878c4b2e64299d6e419a4022f8d upstream.
+
+After a resume the hypervisor/tools may change console event
+channel number. We should re-query it.
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/hvc/hvc_xen.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/drivers/tty/hvc/hvc_xen.c
++++ b/drivers/tty/hvc/hvc_xen.c
+@@ -299,11 +299,27 @@ static int xen_initial_domain_console_in
+ return 0;
+ }
+
++static void xen_console_update_evtchn(struct xencons_info *info)
++{
++ if (xen_hvm_domain()) {
++ uint64_t v;
++ int err;
++
++ err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v);
++ if (!err && v)
++ info->evtchn = v;
++ } else
++ info->evtchn = xen_start_info->console.domU.evtchn;
++}
++
+ void xen_console_resume(void)
+ {
+ struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE);
+- if (info != NULL && info->irq)
++ if (info != NULL && info->irq) {
++ if (!xen_initial_domain())
++ xen_console_update_evtchn(info);
+ rebind_evtchn_irq(info->evtchn, info->irq);
++ }
+ }
+
+ static void xencons_disconnect_backend(struct xencons_info *info)
--- /dev/null
+From 5cec98834989a014a9560b1841649eaca95cf00e Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Date: Wed, 29 Apr 2015 17:10:12 -0400
+Subject: xen/events: Clear cpu_evtchn_mask before resuming
+
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+
+commit 5cec98834989a014a9560b1841649eaca95cf00e upstream.
+
+When a guest is resumed, the hypervisor may change event channel
+assignments. If this happens and the guest uses 2-level events it
+is possible for the interrupt to be claimed by wrong VCPU since
+cpu_evtchn_mask bits may be stale. This can happen even though
+evtchn_2l_bind_to_cpu() attempts to clear old bits: irq_info that
+is passed in is not necessarily the original one (from pre-migration
+times) but instead is freshly allocated during resume and so any
+information about which CPU the channel was bound to is lost.
+
+Thus we should clear the mask during resume.
+
+We also need to make sure that bits for xenstore and console channels
+are set when these two subsystems are resumed. While rebind_evtchn_irq()
+(which is invoked for both of them on a resume) calls irq_set_affinity(),
+the latter will in fact postpone setting affinity until handling the
+interrupt. But because cpu_evtchn_mask will have bits for these two
+cleared we won't be able to take the interrupt.
+
+With that in mind, we need to bind those two channels explicitly in
+rebind_evtchn_irq(). We will keep irq_set_affinity() so that we have a
+pass through generic irq affinity code later, in case something needs
+to be updated there as well.
+
+(Also replace cpumask_of(0) with cpumask_of(info->cpu) in
+rebind_evtchn_irq(): it should be set to zero in preceding
+xen_irq_info_evtchn_setup().)
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Reported-by: Annie Li <annie.li@oracle.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/events/events_2l.c | 10 ++++++++++
+ drivers/xen/events/events_base.c | 5 +++--
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/xen/events/events_2l.c
++++ b/drivers/xen/events/events_2l.c
+@@ -352,6 +352,15 @@ irqreturn_t xen_debug_interrupt(int irq,
+ return IRQ_HANDLED;
+ }
+
++static void evtchn_2l_resume(void)
++{
++ int i;
++
++ for_each_online_cpu(i)
++ memset(per_cpu(cpu_evtchn_mask, i), 0, sizeof(xen_ulong_t) *
++ EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
++}
++
+ static const struct evtchn_ops evtchn_ops_2l = {
+ .max_channels = evtchn_2l_max_channels,
+ .nr_channels = evtchn_2l_max_channels,
+@@ -363,6 +372,7 @@ static const struct evtchn_ops evtchn_op
+ .mask = evtchn_2l_mask,
+ .unmask = evtchn_2l_unmask,
+ .handle_events = evtchn_2l_handle_events,
++ .resume = evtchn_2l_resume,
+ };
+
+ void __init xen_evtchn_2l_init(void)
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -1294,8 +1294,9 @@ void rebind_evtchn_irq(int evtchn, int i
+
+ mutex_unlock(&irq_mapping_update_lock);
+
+- /* new event channels are always bound to cpu 0 */
+- irq_set_affinity(irq, cpumask_of(0));
++ bind_evtchn_to_cpu(evtchn, info->cpu);
++ /* This will be deferred until interrupt is processed */
++ irq_set_affinity(irq, cpumask_of(info->cpu));
+
+ /* Unmask the event channel. */
+ enable_irq(irq);
--- /dev/null
+From 16e6bd5970c88a2ac018b84a5f1dd5c2ff1fdf2c Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Date: Wed, 29 Apr 2015 17:10:15 -0400
+Subject: xen/events: Set irq_info->evtchn before binding the channel to CPU in __startup_pirq()
+
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+
+commit 16e6bd5970c88a2ac018b84a5f1dd5c2ff1fdf2c upstream.
+
+.. because bind_evtchn_to_cpu(evtchn, cpu) will map evtchn to
+'info' and pass 'info' down to xen_evtchn_port_bind_to_cpu().
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Tested-by: Annie Li <annie.li@oracle.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/events/events_base.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -550,8 +550,8 @@ static unsigned int __startup_pirq(unsig
+ if (rc)
+ goto err;
+
+- bind_evtchn_to_cpu(evtchn, 0);
+ info->evtchn = evtchn;
++ bind_evtchn_to_cpu(evtchn, 0);
+
+ rc = xen_evtchn_port_setup(info);
+ if (rc)