--- /dev/null
+From da87ca4d4ca101f177fffd84f1f0a5e4c0343557 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Wed, 19 Feb 2014 16:19:35 -0800
+Subject: ioat: fix tasklet tear down
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit da87ca4d4ca101f177fffd84f1f0a5e4c0343557 upstream.
+
+Since commit 77873803363c "net_dma: mark broken" we no longer pin dma
+engines active for the network-receive-offload use case. As a result
+the ->free_chan_resources() that occurs after the driver self test no
+longer has a NET_DMA induced ->alloc_chan_resources() to back it up. A
+late firing irq can lead to ksoftirqd spinning indefinitely due to the
+tasklet_disable() performed by ->free_chan_resources(). Only
+->alloc_chan_resources() can clear this condition in affected kernels.
+
+This problem has been present since commit 3e037454bcfa "I/OAT: Add
+support for MSI and MSI-X" in 2.6.24, but is now exposed. Given the
+NET_DMA use case is deprecated we can revisit moving the driver to use
+threaded irqs. For now, just tear down the irq and tasklet properly by:
+
+1/ Disable the irq from triggering the tasklet
+
+2/ Disable the irq from re-arming
+
+3/ Flush inflight interrupts
+
+4/ Flush the timer
+
+5/ Flush inflight tasklets
+
+References:
+https://lkml.org/lkml/2014/1/27/282
+https://lkml.org/lkml/2014/2/19/672
+
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Reported-by: Mike Galbraith <bitbucket@online.de>
+Reported-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
+Tested-by: Mike Galbraith <bitbucket@online.de>
+Tested-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/ioat/dma.c | 52 +++++++++++++++++++++++++++++++++++++++-------
+ drivers/dma/ioat/dma.h | 1
+ drivers/dma/ioat/dma_v2.c | 11 ++++-----
+ drivers/dma/ioat/dma_v3.c | 3 ++
+ 4 files changed, 54 insertions(+), 13 deletions(-)
+
+--- a/drivers/dma/ioat/dma.c
++++ b/drivers/dma/ioat/dma.c
+@@ -77,7 +77,8 @@ static irqreturn_t ioat_dma_do_interrupt
+ attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+ for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
+ chan = ioat_chan_by_index(instance, bit);
+- tasklet_schedule(&chan->cleanup_task);
++ if (test_bit(IOAT_RUN, &chan->state))
++ tasklet_schedule(&chan->cleanup_task);
+ }
+
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+@@ -93,7 +94,8 @@ static irqreturn_t ioat_dma_do_interrupt
+ {
+ struct ioat_chan_common *chan = data;
+
+- tasklet_schedule(&chan->cleanup_task);
++ if (test_bit(IOAT_RUN, &chan->state))
++ tasklet_schedule(&chan->cleanup_task);
+
+ return IRQ_HANDLED;
+ }
+@@ -116,7 +118,6 @@ void ioat_init_channel(struct ioatdma_de
+ chan->timer.function = device->timer_fn;
+ chan->timer.data = data;
+ tasklet_init(&chan->cleanup_task, device->cleanup_fn, data);
+- tasklet_disable(&chan->cleanup_task);
+ }
+
+ /**
+@@ -354,13 +355,49 @@ static int ioat1_dma_alloc_chan_resource
+ writel(((u64) chan->completion_dma) >> 32,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+- tasklet_enable(&chan->cleanup_task);
++ set_bit(IOAT_RUN, &chan->state);
+ ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
+ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+ __func__, ioat->desccount);
+ return ioat->desccount;
+ }
+
++void ioat_stop(struct ioat_chan_common *chan)
++{
++ struct ioatdma_device *device = chan->device;
++ struct pci_dev *pdev = device->pdev;
++ int chan_id = chan_num(chan);
++ struct msix_entry *msix;
++
++ /* 1/ stop irq from firing tasklets
++ * 2/ stop the tasklet from re-arming irqs
++ */
++ clear_bit(IOAT_RUN, &chan->state);
++
++ /* flush inflight interrupts */
++ switch (device->irq_mode) {
++ case IOAT_MSIX:
++ msix = &device->msix_entries[chan_id];
++ synchronize_irq(msix->vector);
++ break;
++ case IOAT_MSI:
++ case IOAT_INTX:
++ synchronize_irq(pdev->irq);
++ break;
++ default:
++ break;
++ }
++
++ /* flush inflight timers */
++ del_timer_sync(&chan->timer);
++
++ /* flush inflight tasklet runs */
++ tasklet_kill(&chan->cleanup_task);
++
++ /* final cleanup now that everything is quiesced and can't re-arm */
++ device->cleanup_fn((unsigned long) &chan->common);
++}
++
+ /**
+ * ioat1_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+@@ -379,9 +416,7 @@ static void ioat1_dma_free_chan_resource
+ if (ioat->desccount == 0)
+ return;
+
+- tasklet_disable(&chan->cleanup_task);
+- del_timer_sync(&chan->timer);
+- ioat1_cleanup(ioat);
++ ioat_stop(chan);
+
+ /* Delay 100ms after reset to allow internal DMA logic to quiesce
+ * before removing DMA descriptor resources.
+@@ -526,8 +561,11 @@ ioat1_dma_prep_memcpy(struct dma_chan *c
+ static void ioat1_cleanup_event(unsigned long data)
+ {
+ struct ioat_dma_chan *ioat = to_ioat_chan((void *) data);
++ struct ioat_chan_common *chan = &ioat->base;
+
+ ioat1_cleanup(ioat);
++ if (!test_bit(IOAT_RUN, &chan->state))
++ return;
+ writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+ }
+
+--- a/drivers/dma/ioat/dma.h
++++ b/drivers/dma/ioat/dma.h
+@@ -356,6 +356,7 @@ bool ioat_cleanup_preamble(struct ioat_c
+ void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+ void ioat_kobject_del(struct ioatdma_device *device);
+ int ioat_dma_setup_interrupts(struct ioatdma_device *device);
++void ioat_stop(struct ioat_chan_common *chan);
+ extern const struct sysfs_ops ioat_sysfs_ops;
+ extern struct ioat_sysfs_entry ioat_version_attr;
+ extern struct ioat_sysfs_entry ioat_cap_attr;
+--- a/drivers/dma/ioat/dma_v2.c
++++ b/drivers/dma/ioat/dma_v2.c
+@@ -190,8 +190,11 @@ static void ioat2_cleanup(struct ioat2_d
+ void ioat2_cleanup_event(unsigned long data)
+ {
+ struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
++ struct ioat_chan_common *chan = &ioat->base;
+
+ ioat2_cleanup(ioat);
++ if (!test_bit(IOAT_RUN, &chan->state))
++ return;
+ writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+ }
+
+@@ -553,10 +556,10 @@ int ioat2_alloc_chan_resources(struct dm
+ ioat->issued = 0;
+ ioat->tail = 0;
+ ioat->alloc_order = order;
++ set_bit(IOAT_RUN, &chan->state);
+ spin_unlock_bh(&ioat->prep_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+
+- tasklet_enable(&chan->cleanup_task);
+ ioat2_start_null_desc(ioat);
+
+ /* check that we got off the ground */
+@@ -566,7 +569,6 @@ int ioat2_alloc_chan_resources(struct dm
+ } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
+
+ if (is_ioat_active(status) || is_ioat_idle(status)) {
+- set_bit(IOAT_RUN, &chan->state);
+ return 1 << ioat->alloc_order;
+ } else {
+ u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+@@ -809,11 +811,8 @@ void ioat2_free_chan_resources(struct dm
+ if (!ioat->ring)
+ return;
+
+- tasklet_disable(&chan->cleanup_task);
+- del_timer_sync(&chan->timer);
+- device->cleanup_fn((unsigned long) c);
++ ioat_stop(chan);
+ device->reset_hw(chan);
+- clear_bit(IOAT_RUN, &chan->state);
+
+ spin_lock_bh(&chan->cleanup_lock);
+ spin_lock_bh(&ioat->prep_lock);
+--- a/drivers/dma/ioat/dma_v3.c
++++ b/drivers/dma/ioat/dma_v3.c
+@@ -464,8 +464,11 @@ static void ioat3_cleanup(struct ioat2_d
+ static void ioat3_cleanup_event(unsigned long data)
+ {
+ struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
++ struct ioat_chan_common *chan = &ioat->base;
+
+ ioat3_cleanup(ioat);
++ if (!test_bit(IOAT_RUN, &chan->state))
++ return;
+ writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+ }
+
--- /dev/null
+From f3713fd9cff733d9df83116422d8e4af6e86b2bb Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <davidlohr@hp.com>
+Date: Tue, 25 Feb 2014 15:01:45 -0800
+Subject: ipc,mqueue: remove limits for the amount of system-wide
+ queues
+
+From: Davidlohr Bueso <davidlohr@hp.com>
+
+commit f3713fd9cff733d9df83116422d8e4af6e86b2bb upstream.
+
+Commit 93e6f119c0ce ("ipc/mqueue: cleanup definition names and
+locations") added global hardcoded limits to the amount of message
+queues that can be created. While these limits are per-namespace,
+reality is that it ends up breaking userspace applications.
+Historically users have, at least in theory, been able to create up to
+INT_MAX queues, and limiting it to just 1024 is way too low and dramatic
+for some workloads and use cases. For instance, Madars reports:
+
+ "This update imposes bad limits on our multi-process application. As
+ our app uses approaches that each process opens its own set of queues
+ (usually something about 3-5 queues per process). In some scenarios
+ we might run up to 3000 processes or more (which of-course for linux
+ is not a problem). Thus we might need up to 9000 queues or more. All
+ processes run under one user."
+
+Other affected users can be found in launchpad bug #1155695:
+ https://bugs.launchpad.net/ubuntu/+source/manpages/+bug/1155695
+
+Instead of increasing this limit, revert it entirely and fallback to the
+original way of dealing queue limits -- where once a user's resource
+limit is reached, and all memory is used, new queues cannot be created.
+
+Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
+Reported-by: Madars Vitolins <m@silodev.com>
+Acked-by: Doug Ledford <dledford@redhat.com>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ipc_namespace.h | 2 --
+ ipc/mq_sysctl.c | 18 ++++++++++++------
+ ipc/mqueue.c | 6 +++---
+ 3 files changed, 15 insertions(+), 11 deletions(-)
+
+--- a/include/linux/ipc_namespace.h
++++ b/include/linux/ipc_namespace.h
+@@ -119,9 +119,7 @@ extern int mq_init_ns(struct ipc_namespa
+ * the new maximum will handle anyone else. I may have to revisit this
+ * in the future.
+ */
+-#define MIN_QUEUESMAX 1
+ #define DFLT_QUEUESMAX 256
+-#define HARD_QUEUESMAX 1024
+ #define MIN_MSGMAX 1
+ #define DFLT_MSG 10U
+ #define DFLT_MSGMAX 10
+--- a/ipc/mq_sysctl.c
++++ b/ipc/mq_sysctl.c
+@@ -22,6 +22,16 @@ static void *get_mq(ctl_table *table)
+ return which;
+ }
+
++static int proc_mq_dointvec(ctl_table *table, int write,
++ void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++ struct ctl_table mq_table;
++ memcpy(&mq_table, table, sizeof(mq_table));
++ mq_table.data = get_mq(table);
++
++ return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
++}
++
+ static int proc_mq_dointvec_minmax(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+@@ -33,12 +43,10 @@ static int proc_mq_dointvec_minmax(ctl_t
+ lenp, ppos);
+ }
+ #else
++#define proc_mq_dointvec NULL
+ #define proc_mq_dointvec_minmax NULL
+ #endif
+
+-static int msg_queues_limit_min = MIN_QUEUESMAX;
+-static int msg_queues_limit_max = HARD_QUEUESMAX;
+-
+ static int msg_max_limit_min = MIN_MSGMAX;
+ static int msg_max_limit_max = HARD_MSGMAX;
+
+@@ -51,9 +59,7 @@ static ctl_table mq_sysctls[] = {
+ .data = &init_ipc_ns.mq_queues_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_mq_dointvec_minmax,
+- .extra1 = &msg_queues_limit_min,
+- .extra2 = &msg_queues_limit_max,
++ .proc_handler = proc_mq_dointvec,
+ },
+ {
+ .procname = "msg_max",
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -433,9 +433,9 @@ static int mqueue_create(struct inode *d
+ error = -EACCES;
+ goto out_unlock;
+ }
+- if (ipc_ns->mq_queues_count >= HARD_QUEUESMAX ||
+- (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
+- !capable(CAP_SYS_RESOURCE))) {
++
++ if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
++ !capable(CAP_SYS_RESOURCE)) {
+ error = -ENOSPC;
+ goto out_unlock;
+ }
--- /dev/null
+From 1362f4ea20fa63688ba6026e586d9746ff13a846 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 20 Feb 2014 17:02:27 +0100
+Subject: quota: Fix race between dqput() and dquot_scan_active()
+
+From: Jan Kara <jack@suse.cz>
+
+commit 1362f4ea20fa63688ba6026e586d9746ff13a846 upstream.
+
+Currently last dqput() can race with dquot_scan_active() causing it to
+call callback for an already deactivated dquot. The race is as follows:
+
+CPU1 CPU2
+ dqput()
+ spin_lock(&dq_list_lock);
+ if (atomic_read(&dquot->dq_count) > 1) {
+ - not taken
+ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ spin_unlock(&dq_list_lock);
+ ->release_dquot(dquot);
+ if (atomic_read(&dquot->dq_count) > 1)
+ - not taken
+ dquot_scan_active()
+ spin_lock(&dq_list_lock);
+ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ - not taken
+ atomic_inc(&dquot->dq_count);
+ spin_unlock(&dq_list_lock);
+ - proceeds to release dquot
+ ret = fn(dquot, priv);
+ - called for inactive dquot
+
+Fix the problem by making sure possible ->release_dquot() is finished by
+the time we call the callback and new calls to it will notice reference
+dquot_scan_active() has taken and bail out.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/quota/dquot.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/fs/quota/dquot.c
++++ b/fs/quota/dquot.c
+@@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block
+ dqstats_inc(DQST_LOOKUPS);
+ dqput(old_dquot);
+ old_dquot = dquot;
+- ret = fn(dquot, priv);
+- if (ret < 0)
+- goto out;
++ /*
++ * ->release_dquot() can be racing with us. Our reference
++ * protects us from new calls to it so just wait for any
++ * outstanding call and recheck the DQ_ACTIVE_B after that.
++ */
++ wait_on_dquot(dquot);
++ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
++ ret = fn(dquot, priv);
++ if (ret < 0)
++ goto out;
++ }
+ spin_lock(&dq_list_lock);
+ /* We are safe to continue now because our dquot could not
+ * be moved out of the inuse list while we hold the reference */