--- /dev/null
+From 603e7729920e42b3c2f4dbfab9eef4878cb6e8fa Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 4 Oct 2013 09:29:12 -0400
+Subject: IB/qib: Convert qib_user_sdma_pin_pages() to use get_user_pages_fast()
+
+From: Jan Kara <jack@suse.cz>
+
+commit 603e7729920e42b3c2f4dbfab9eef4878cb6e8fa upstream.
+
+qib_user_sdma_queue_pkts() gets called with mmap_sem held for
+writing. Except for get_user_pages() deep down in
+qib_user_sdma_pin_pages() we don't seem to need mmap_sem at all. Even
+more interestingly the function qib_user_sdma_queue_pkts() (and also
+qib_user_sdma_coalesce() called somewhat later) call copy_from_user()
+which can hit a page fault and we deadlock on trying to get mmap_sem
+when handling that fault.
+
+So just make qib_user_sdma_pin_pages() use get_user_pages_fast() and
+leave mmap_sem locking for mm.
+
+This deadlock has actually been observed in the wild when the node
+is under memory pressure.
+
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+[Backported to 3.4: (Thank to Ben Hutchings)
+ - Adjust context
+ - Adjust indentation and nr_pages argument in qib_user_sdma_pin_pages()]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/qib/qib_user_sdma.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
+@@ -284,8 +284,7 @@ static int qib_user_sdma_pin_pages(const
+ int j;
+ int ret;
+
+- ret = get_user_pages(current, current->mm, addr,
+- npages, 0, 1, pages, NULL);
++ ret = get_user_pages_fast(addr, npages, 0, pages);
+
+ if (ret != npages) {
+ int i;
+@@ -830,10 +829,7 @@ int qib_user_sdma_writev(struct qib_ctxt
+ while (dim) {
+ const int mxp = 8;
+
+- down_write(¤t->mm->mmap_sem);
+ ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+- up_write(¤t->mm->mmap_sem);
+-
+ if (ret <= 0)
+ goto done_unlock;
+ else {
--- /dev/null
+From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 17 May 2012 17:15:29 +0200
+Subject: sched/nohz: Fix rq->cpu_load calculations some more
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 upstream.
+
+Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[]
+calculations") since while that fixed the busy case it regressed the
+mostly idle case.
+
+Add a callback from the nohz exit to also age the rq->cpu_load[]
+array. This closes the hole where either there was no nohz load
+balance pass during the nohz, or there was a 'significant' amount of
+idle time between the last nohz balance and the nohz exit.
+
+So we'll update unconditionally from the tick to not insert any
+accidental 0 load periods while busy, and we try and catch up from
+nohz idle balance and nohz exit. Both these are still prone to missing
+a jiffy, but that has always been the case.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: pjt@google.com
+Cc: Venkatesh Pallipadi <venki@google.com>
+Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/sched.h | 1
+ kernel/sched/core.c | 53 ++++++++++++++++++++++++++++++++++++++---------
+ kernel/time/tick-sched.c | 1
+ 3 files changed, 45 insertions(+), 10 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -144,6 +144,7 @@ extern unsigned long this_cpu_load(void)
+
+
+ extern void calc_global_load(unsigned long ticks);
++extern void update_cpu_load_nohz(void);
+
+ extern unsigned long get_parent_ip(unsigned long addr);
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2649,25 +2649,32 @@ static void __update_cpu_load(struct rq
+ sched_avg_update(this_rq);
+ }
+
++#ifdef CONFIG_NO_HZ
++/*
++ * There is no sane way to deal with nohz on smp when using jiffies because the
++ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
++ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
++ *
++ * Therefore we cannot use the delta approach from the regular tick since that
++ * would seriously skew the load calculation. However we'll make do for those
++ * updates happening while idle (nohz_idle_balance) or coming out of idle
++ * (tick_nohz_idle_exit).
++ *
++ * This means we might still be one tick off for nohz periods.
++ */
++
+ /*
+ * Called from nohz_idle_balance() to update the load ratings before doing the
+ * idle balance.
+ */
+ void update_idle_cpu_load(struct rq *this_rq)
+ {
+- unsigned long curr_jiffies = jiffies;
++ unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
+ unsigned long load = this_rq->load.weight;
+ unsigned long pending_updates;
+
+ /*
+- * Bloody broken means of dealing with nohz, but better than nothing..
+- * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
+- * update and see 0 difference the one time and 2 the next, even though
+- * we ticked at roughtly the same rate.
+- *
+- * Hence we only use this from nohz_idle_balance() and skip this
+- * nonsense when called from the scheduler_tick() since that's
+- * guaranteed a stable rate.
++ * bail if there's load or we're actually up-to-date.
+ */
+ if (load || curr_jiffies == this_rq->last_load_update_tick)
+ return;
+@@ -2679,12 +2686,38 @@ void update_idle_cpu_load(struct rq *thi
+ }
+
+ /*
++ * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
++ */
++void update_cpu_load_nohz(void)
++{
++ struct rq *this_rq = this_rq();
++ unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
++ unsigned long pending_updates;
++
++ if (curr_jiffies == this_rq->last_load_update_tick)
++ return;
++
++ raw_spin_lock(&this_rq->lock);
++ pending_updates = curr_jiffies - this_rq->last_load_update_tick;
++ if (pending_updates) {
++ this_rq->last_load_update_tick = curr_jiffies;
++ /*
++ * We were idle, this means load 0, the current load might be
++ * !0 due to remote wakeups and the sort.
++ */
++ __update_cpu_load(this_rq, 0, pending_updates);
++ }
++ raw_spin_unlock(&this_rq->lock);
++}
++#endif /* CONFIG_NO_HZ */
++
++/*
+ * Called from scheduler_tick()
+ */
+ static void update_cpu_load_active(struct rq *this_rq)
+ {
+ /*
+- * See the mess in update_idle_cpu_load().
++ * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
+ */
+ this_rq->last_load_update_tick = jiffies;
+ __update_cpu_load(this_rq, this_rq->load.weight, 1);
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -582,6 +582,7 @@ void tick_nohz_idle_exit(void)
+ /* Update jiffies first */
+ select_nohz_load_balancer(0);
+ tick_do_update_jiffies64(now);
++ update_cpu_load_nohz();
+
+ #ifndef CONFIG_VIRT_CPU_ACCOUNTING
+ /*
--- /dev/null
+From 556061b00c9f2fd6a5524b6bde823ef12f299ecf Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 11 May 2012 17:31:26 +0200
+Subject: sched/nohz: Fix rq->cpu_load[] calculations
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 556061b00c9f2fd6a5524b6bde823ef12f299ecf upstream.
+
+While investigating why the load-balancer did funny I found that the
+rq->cpu_load[] tables were completely screwy.. a bit more digging
+revealed that the updates that got through were missing ticks followed
+by a catchup of 2 ticks.
+
+The catchup assumes the cpu was idle during that time (since only nohz
+can cause missed ticks and the machine is idle etc..) this means that
+esp. the higher indices were significantly lower than they ought to
+be.
+
+The reason for this is that its not correct to compare against jiffies
+on every jiffy on any other cpu than the cpu that updates jiffies.
+
+This patch cludges around it by only doing the catch-up stuff from
+nohz_idle_balance() and doing the regular stuff unconditionally from
+the tick.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: pjt@google.com
+Cc: Venkatesh Pallipadi <venki@google.com>
+Link: http://lkml.kernel.org/n/tip-tp4kj18xdd5aj4vvj0qg55s2@git.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/core.c | 53 +++++++++++++++++++++++++++++++++++++--------------
+ kernel/sched/fair.c | 2 -
+ kernel/sched/sched.h | 2 -
+ 3 files changed, 41 insertions(+), 16 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -692,8 +692,6 @@ int tg_nop(struct task_group *tg, void *
+ }
+ #endif
+
+-void update_cpu_load(struct rq *this_rq);
+-
+ static void set_load_weight(struct task_struct *p)
+ {
+ int prio = p->static_prio - MAX_RT_PRIO;
+@@ -2620,22 +2618,13 @@ decay_load_missed(unsigned long load, un
+ * scheduler tick (TICK_NSEC). With tickless idle this will not be called
+ * every tick. We fix it up based on jiffies.
+ */
+-void update_cpu_load(struct rq *this_rq)
++static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
++ unsigned long pending_updates)
+ {
+- unsigned long this_load = this_rq->load.weight;
+- unsigned long curr_jiffies = jiffies;
+- unsigned long pending_updates;
+ int i, scale;
+
+ this_rq->nr_load_updates++;
+
+- /* Avoid repeated calls on same jiffy, when moving in and out of idle */
+- if (curr_jiffies == this_rq->last_load_update_tick)
+- return;
+-
+- pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+- this_rq->last_load_update_tick = curr_jiffies;
+-
+ /* Update our load: */
+ this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
+ for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
+@@ -2660,9 +2649,45 @@ void update_cpu_load(struct rq *this_rq)
+ sched_avg_update(this_rq);
+ }
+
++/*
++ * Called from nohz_idle_balance() to update the load ratings before doing the
++ * idle balance.
++ */
++void update_idle_cpu_load(struct rq *this_rq)
++{
++ unsigned long curr_jiffies = jiffies;
++ unsigned long load = this_rq->load.weight;
++ unsigned long pending_updates;
++
++ /*
++ * Bloody broken means of dealing with nohz, but better than nothing..
++ * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
++ * update and see 0 difference the one time and 2 the next, even though
++ * we ticked at roughtly the same rate.
++ *
++ * Hence we only use this from nohz_idle_balance() and skip this
++ * nonsense when called from the scheduler_tick() since that's
++ * guaranteed a stable rate.
++ */
++ if (load || curr_jiffies == this_rq->last_load_update_tick)
++ return;
++
++ pending_updates = curr_jiffies - this_rq->last_load_update_tick;
++ this_rq->last_load_update_tick = curr_jiffies;
++
++ __update_cpu_load(this_rq, load, pending_updates);
++}
++
++/*
++ * Called from scheduler_tick()
++ */
+ static void update_cpu_load_active(struct rq *this_rq)
+ {
+- update_cpu_load(this_rq);
++ /*
++ * See the mess in update_idle_cpu_load().
++ */
++ this_rq->last_load_update_tick = jiffies;
++ __update_cpu_load(this_rq, this_rq->load.weight, 1);
+
+ calc_load_account_active(this_rq);
+ }
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5042,7 +5042,7 @@ static void nohz_idle_balance(int this_c
+
+ raw_spin_lock_irq(&this_rq->lock);
+ update_rq_clock(this_rq);
+- update_cpu_load(this_rq);
++ update_idle_cpu_load(this_rq);
+ raw_spin_unlock_irq(&this_rq->lock);
+
+ rebalance_domains(balance_cpu, CPU_IDLE);
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -873,7 +873,7 @@ extern void resched_cpu(int cpu);
+ extern struct rt_bandwidth def_rt_bandwidth;
+ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+
+-extern void update_cpu_load(struct rq *this_rq);
++extern void update_idle_cpu_load(struct rq *this_rq);
+
+ #ifdef CONFIG_CGROUP_CPUACCT
+ #include <linux/cgroup.h>
ftrace-synchronize-setting-function_trace_op-with-ftrace_trace_function.patch
ftrace-fix-synchronization-location-disabling-and-freeing-ftrace_ops.patch
ftrace-have-function-graph-only-trace-based-on-global_ops-filters.patch
+sched-nohz-fix-rq-cpu_load-calculations.patch
+sched-nohz-fix-rq-cpu_load-calculations-some-more.patch
+ib-qib-convert-qib_user_sdma_pin_pages-to-use-get_user_pages_fast.patch