From: Greg Kroah-Hartman Date: Tue, 18 Feb 2014 22:37:04 +0000 (-0800) Subject: 3.4-stable patches X-Git-Tag: v3.4.81~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ff08ad4408bf17367ded4d225d463518f8ae2d42;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: ib-qib-convert-qib_user_sdma_pin_pages-to-use-get_user_pages_fast.patch sched-nohz-fix-rq-cpu_load-calculations-some-more.patch sched-nohz-fix-rq-cpu_load-calculations.patch --- diff --git a/queue-3.4/ib-qib-convert-qib_user_sdma_pin_pages-to-use-get_user_pages_fast.patch b/queue-3.4/ib-qib-convert-qib_user_sdma_pin_pages-to-use-get_user_pages_fast.patch new file mode 100644 index 00000000000..15cf9410524 --- /dev/null +++ b/queue-3.4/ib-qib-convert-qib_user_sdma_pin_pages-to-use-get_user_pages_fast.patch @@ -0,0 +1,60 @@ +From 603e7729920e42b3c2f4dbfab9eef4878cb6e8fa Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Fri, 4 Oct 2013 09:29:12 -0400 +Subject: IB/qib: Convert qib_user_sdma_pin_pages() to use get_user_pages_fast() + +From: Jan Kara + +commit 603e7729920e42b3c2f4dbfab9eef4878cb6e8fa upstream. + +qib_user_sdma_queue_pkts() gets called with mmap_sem held for +writing. Except for get_user_pages() deep down in +qib_user_sdma_pin_pages() we don't seem to need mmap_sem at all. Even +more interestingly the function qib_user_sdma_queue_pkts() (and also +qib_user_sdma_coalesce() called somewhat later) call copy_from_user() +which can hit a page fault and we deadlock on trying to get mmap_sem +when handling that fault. + +So just make qib_user_sdma_pin_pages() use get_user_pages_fast() and +leave mmap_sem locking for mm. + +This deadlock has actually been observed in the wild when the node +is under memory pressure. + +Reviewed-by: Mike Marciniszyn +Signed-off-by: Jan Kara +Signed-off-by: Roland Dreier +[Backported to 3.4: (Thank to Ben Hutchings) + - Adjust context + - Adjust indentation and nr_pages argument in qib_user_sdma_pin_pages()] +Signed-off-by: Ben Hutchings +Signed-off-by: Mike Marciniszyn +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/qib/qib_user_sdma.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/drivers/infiniband/hw/qib/qib_user_sdma.c ++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c +@@ -284,8 +284,7 @@ static int qib_user_sdma_pin_pages(const + int j; + int ret; + +- ret = get_user_pages(current, current->mm, addr, +- npages, 0, 1, pages, NULL); ++ ret = get_user_pages_fast(addr, npages, 0, pages); + + if (ret != npages) { + int i; +@@ -830,10 +829,7 @@ int qib_user_sdma_writev(struct qib_ctxt + while (dim) { + const int mxp = 8; + +- down_write(¤t->mm->mmap_sem); + ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); +- up_write(¤t->mm->mmap_sem); +- + if (ret <= 0) + goto done_unlock; + else { diff --git a/queue-3.4/sched-nohz-fix-rq-cpu_load-calculations-some-more.patch b/queue-3.4/sched-nohz-fix-rq-cpu_load-calculations-some-more.patch new file mode 100644 index 00000000000..5cb51f101ae --- /dev/null +++ b/queue-3.4/sched-nohz-fix-rq-cpu_load-calculations-some-more.patch @@ -0,0 +1,141 @@ +From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Thu, 17 May 2012 17:15:29 +0200 +Subject: sched/nohz: Fix rq->cpu_load calculations some more + +From: Peter Zijlstra + +commit 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 upstream. + +Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[] +calculations") since while that fixed the busy case it regressed the +mostly idle case. + +Add a callback from the nohz exit to also age the rq->cpu_load[] +array. This closes the hole where either there was no nohz load +balance pass during the nohz, or there was a 'significant' amount of +idle time between the last nohz balance and the nohz exit. + +So we'll update unconditionally from the tick to not insert any +accidental 0 load periods while busy, and we try and catch up from +nohz idle balance and nohz exit. Both these are still prone to missing +a jiffy, but that has always been the case. + +Signed-off-by: Peter Zijlstra +Cc: pjt@google.com +Cc: Venkatesh Pallipadi +Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org +Signed-off-by: Ingo Molnar +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/sched.h | 1 + kernel/sched/core.c | 53 ++++++++++++++++++++++++++++++++++++++--------- + kernel/time/tick-sched.c | 1 + 3 files changed, 45 insertions(+), 10 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -144,6 +144,7 @@ extern unsigned long this_cpu_load(void) + + + extern void calc_global_load(unsigned long ticks); ++extern void update_cpu_load_nohz(void); + + extern unsigned long get_parent_ip(unsigned long addr); + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -2649,25 +2649,32 @@ static void __update_cpu_load(struct rq + sched_avg_update(this_rq); + } + ++#ifdef CONFIG_NO_HZ ++/* ++ * There is no sane way to deal with nohz on smp when using jiffies because the ++ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading ++ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. ++ * ++ * Therefore we cannot use the delta approach from the regular tick since that ++ * would seriously skew the load calculation. However we'll make do for those ++ * updates happening while idle (nohz_idle_balance) or coming out of idle ++ * (tick_nohz_idle_exit). ++ * ++ * This means we might still be one tick off for nohz periods. ++ */ ++ + /* + * Called from nohz_idle_balance() to update the load ratings before doing the + * idle balance. + */ + void update_idle_cpu_load(struct rq *this_rq) + { +- unsigned long curr_jiffies = jiffies; ++ unsigned long curr_jiffies = ACCESS_ONCE(jiffies); + unsigned long load = this_rq->load.weight; + unsigned long pending_updates; + + /* +- * Bloody broken means of dealing with nohz, but better than nothing.. +- * jiffies is updated by one cpu, another cpu can drift wrt the jiffy +- * update and see 0 difference the one time and 2 the next, even though +- * we ticked at roughtly the same rate. +- * +- * Hence we only use this from nohz_idle_balance() and skip this +- * nonsense when called from the scheduler_tick() since that's +- * guaranteed a stable rate. ++ * bail if there's load or we're actually up-to-date. + */ + if (load || curr_jiffies == this_rq->last_load_update_tick) + return; +@@ -2679,12 +2686,38 @@ void update_idle_cpu_load(struct rq *thi + } + + /* ++ * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. ++ */ ++void update_cpu_load_nohz(void) ++{ ++ struct rq *this_rq = this_rq(); ++ unsigned long curr_jiffies = ACCESS_ONCE(jiffies); ++ unsigned long pending_updates; ++ ++ if (curr_jiffies == this_rq->last_load_update_tick) ++ return; ++ ++ raw_spin_lock(&this_rq->lock); ++ pending_updates = curr_jiffies - this_rq->last_load_update_tick; ++ if (pending_updates) { ++ this_rq->last_load_update_tick = curr_jiffies; ++ /* ++ * We were idle, this means load 0, the current load might be ++ * !0 due to remote wakeups and the sort. ++ */ ++ __update_cpu_load(this_rq, 0, pending_updates); ++ } ++ raw_spin_unlock(&this_rq->lock); ++} ++#endif /* CONFIG_NO_HZ */ ++ ++/* + * Called from scheduler_tick() + */ + static void update_cpu_load_active(struct rq *this_rq) + { + /* +- * See the mess in update_idle_cpu_load(). ++ * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). + */ + this_rq->last_load_update_tick = jiffies; + __update_cpu_load(this_rq, this_rq->load.weight, 1); +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -582,6 +582,7 @@ void tick_nohz_idle_exit(void) + /* Update jiffies first */ + select_nohz_load_balancer(0); + tick_do_update_jiffies64(now); ++ update_cpu_load_nohz(); + + #ifndef CONFIG_VIRT_CPU_ACCOUNTING + /* diff --git a/queue-3.4/sched-nohz-fix-rq-cpu_load-calculations.patch b/queue-3.4/sched-nohz-fix-rq-cpu_load-calculations.patch new file mode 100644 index 00000000000..9259e800bc2 --- /dev/null +++ b/queue-3.4/sched-nohz-fix-rq-cpu_load-calculations.patch @@ -0,0 +1,145 @@ +From 556061b00c9f2fd6a5524b6bde823ef12f299ecf Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 11 May 2012 17:31:26 +0200 +Subject: sched/nohz: Fix rq->cpu_load[] calculations + +From: Peter Zijlstra + +commit 556061b00c9f2fd6a5524b6bde823ef12f299ecf upstream. + +While investigating why the load-balancer did funny I found that the +rq->cpu_load[] tables were completely screwy.. a bit more digging +revealed that the updates that got through were missing ticks followed +by a catchup of 2 ticks. + +The catchup assumes the cpu was idle during that time (since only nohz +can cause missed ticks and the machine is idle etc..) this means that +esp. the higher indices were significantly lower than they ought to +be. + +The reason for this is that its not correct to compare against jiffies +on every jiffy on any other cpu than the cpu that updates jiffies. + +This patch cludges around it by only doing the catch-up stuff from +nohz_idle_balance() and doing the regular stuff unconditionally from +the tick. + +Signed-off-by: Peter Zijlstra +Cc: pjt@google.com +Cc: Venkatesh Pallipadi +Link: http://lkml.kernel.org/n/tip-tp4kj18xdd5aj4vvj0qg55s2@git.kernel.org +Signed-off-by: Ingo Molnar +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/core.c | 53 +++++++++++++++++++++++++++++++++++++-------------- + kernel/sched/fair.c | 2 - + kernel/sched/sched.h | 2 - + 3 files changed, 41 insertions(+), 16 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -692,8 +692,6 @@ int tg_nop(struct task_group *tg, void * + } + #endif + +-void update_cpu_load(struct rq *this_rq); +- + static void set_load_weight(struct task_struct *p) + { + int prio = p->static_prio - MAX_RT_PRIO; +@@ -2620,22 +2618,13 @@ decay_load_missed(unsigned long load, un + * scheduler tick (TICK_NSEC). With tickless idle this will not be called + * every tick. We fix it up based on jiffies. + */ +-void update_cpu_load(struct rq *this_rq) ++static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, ++ unsigned long pending_updates) + { +- unsigned long this_load = this_rq->load.weight; +- unsigned long curr_jiffies = jiffies; +- unsigned long pending_updates; + int i, scale; + + this_rq->nr_load_updates++; + +- /* Avoid repeated calls on same jiffy, when moving in and out of idle */ +- if (curr_jiffies == this_rq->last_load_update_tick) +- return; +- +- pending_updates = curr_jiffies - this_rq->last_load_update_tick; +- this_rq->last_load_update_tick = curr_jiffies; +- + /* Update our load: */ + this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ + for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { +@@ -2660,9 +2649,45 @@ void update_cpu_load(struct rq *this_rq) + sched_avg_update(this_rq); + } + ++/* ++ * Called from nohz_idle_balance() to update the load ratings before doing the ++ * idle balance. ++ */ ++void update_idle_cpu_load(struct rq *this_rq) ++{ ++ unsigned long curr_jiffies = jiffies; ++ unsigned long load = this_rq->load.weight; ++ unsigned long pending_updates; ++ ++ /* ++ * Bloody broken means of dealing with nohz, but better than nothing.. ++ * jiffies is updated by one cpu, another cpu can drift wrt the jiffy ++ * update and see 0 difference the one time and 2 the next, even though ++ * we ticked at roughtly the same rate. ++ * ++ * Hence we only use this from nohz_idle_balance() and skip this ++ * nonsense when called from the scheduler_tick() since that's ++ * guaranteed a stable rate. ++ */ ++ if (load || curr_jiffies == this_rq->last_load_update_tick) ++ return; ++ ++ pending_updates = curr_jiffies - this_rq->last_load_update_tick; ++ this_rq->last_load_update_tick = curr_jiffies; ++ ++ __update_cpu_load(this_rq, load, pending_updates); ++} ++ ++/* ++ * Called from scheduler_tick() ++ */ + static void update_cpu_load_active(struct rq *this_rq) + { +- update_cpu_load(this_rq); ++ /* ++ * See the mess in update_idle_cpu_load(). ++ */ ++ this_rq->last_load_update_tick = jiffies; ++ __update_cpu_load(this_rq, this_rq->load.weight, 1); + + calc_load_account_active(this_rq); + } +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5042,7 +5042,7 @@ static void nohz_idle_balance(int this_c + + raw_spin_lock_irq(&this_rq->lock); + update_rq_clock(this_rq); +- update_cpu_load(this_rq); ++ update_idle_cpu_load(this_rq); + raw_spin_unlock_irq(&this_rq->lock); + + rebalance_domains(balance_cpu, CPU_IDLE); +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -873,7 +873,7 @@ extern void resched_cpu(int cpu); + extern struct rt_bandwidth def_rt_bandwidth; + extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); + +-extern void update_cpu_load(struct rq *this_rq); ++extern void update_idle_cpu_load(struct rq *this_rq); + + #ifdef CONFIG_CGROUP_CPUACCT + #include diff --git a/queue-3.4/series b/queue-3.4/series index a3671465e00..b49f32ea2b6 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -11,3 +11,6 @@ dm-sysfs-fix-a-module-unload-race.patch ftrace-synchronize-setting-function_trace_op-with-ftrace_trace_function.patch ftrace-fix-synchronization-location-disabling-and-freeing-ftrace_ops.patch ftrace-have-function-graph-only-trace-based-on-global_ops-filters.patch +sched-nohz-fix-rq-cpu_load-calculations.patch +sched-nohz-fix-rq-cpu_load-calculations-some-more.patch +ib-qib-convert-qib_user_sdma_pin_pages-to-use-get_user_pages_fast.patch