--- /dev/null
+From b104a35d32025ca740539db2808aa3385d0f30eb Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Wed, 30 Jul 2014 16:08:24 -0700
+Subject: mm, thp: do not allow thp faults to avoid cpuset restrictions
+
+From: David Rientjes <rientjes@google.com>
+
+commit b104a35d32025ca740539db2808aa3385d0f30eb upstream.
+
+The page allocator relies on __GFP_WAIT to determine if ALLOC_CPUSET
+should be set in allocflags. ALLOC_CPUSET controls if a page allocation
+should be restricted only to the set of allowed cpuset mems.
+
+Transparent hugepages clears __GFP_WAIT when defrag is disabled to prevent
+the fault path from using memory compaction or direct reclaim. Thus, it
+is unfairly able to allocate outside of its cpuset mems restriction as a
+side-effect.
+
+This patch ensures that ALLOC_CPUSET is only cleared when the gfp mask is
+truly GFP_ATOMIC by verifying it is also not a thp allocation.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Reported-by: Alex Thorlton <athorlton@sgi.com>
+Tested-by: Alex Thorlton <athorlton@sgi.com>
+Cc: Bob Liu <lliubbo@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Hedi Berriche <hedi@sgi.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2187,7 +2187,7 @@ static inline int
+ gfp_to_alloc_flags(gfp_t gfp_mask)
+ {
+ int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+- const gfp_t wait = gfp_mask & __GFP_WAIT;
++ const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
+
+ /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
+ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
+@@ -2196,20 +2196,20 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+ * The caller may dip into page reserves a bit more if the caller
+ * cannot run direct reclaim, or if the caller has realtime scheduling
+ * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
+- * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
++ * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
+ */
+ alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
+
+- if (!wait) {
++ if (atomic) {
+ /*
+- * Not worth trying to allocate harder for
+- * __GFP_NOMEMALLOC even if it can't schedule.
++ * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
++ * if it can't schedule.
+ */
+- if (!(gfp_mask & __GFP_NOMEMALLOC))
++ if (!(gfp_mask & __GFP_NOMEMALLOC))
+ alloc_flags |= ALLOC_HARDER;
+ /*
+- * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
+- * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
++ * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
++ * comment for __cpuset_node_allowed_softwall().
+ */
+ alloc_flags &= ~ALLOC_CPUSET;
+ } else if (unlikely(rt_task(current)) && !in_interrupt())
--- /dev/null
+From aac74dc495456412c4130a1167ce4beb6c1f0b38 Mon Sep 17 00:00:00 2001
+From: John Stultz <john.stultz@linaro.org>
+Date: Wed, 4 Jun 2014 16:11:40 -0700
+Subject: printk: rename printk_sched to printk_deferred
+
+From: John Stultz <john.stultz@linaro.org>
+
+commit aac74dc495456412c4130a1167ce4beb6c1f0b38 upstream.
+
+After learning we'll need some sort of deferred printk functionality in
+the timekeeping core, Peter suggested we rename the printk_sched function
+so it can be reused by needed subsystems.
+
+This only changes the function name. No logic changes.
+
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Bohac <jbohac@suse.cz>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/printk.h | 6 +++---
+ kernel/printk.c | 2 +-
+ kernel/sched/core.c | 2 +-
+ kernel/sched/rt.c | 2 +-
+ 4 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/include/linux/printk.h
++++ b/include/linux/printk.h
+@@ -101,9 +101,9 @@ asmlinkage __printf(1, 2) __cold
+ int printk(const char *fmt, ...);
+
+ /*
+- * Special printk facility for scheduler use only, _DO_NOT_USE_ !
++ * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
+ */
+-__printf(1, 2) __cold int printk_sched(const char *fmt, ...);
++__printf(1, 2) __cold int printk_deferred(const char *fmt, ...);
+
+ /*
+ * Please don't use printk_ratelimit(), because it shares ratelimiting state
+@@ -133,7 +133,7 @@ int printk(const char *s, ...)
+ return 0;
+ }
+ static inline __printf(1, 2) __cold
+-int printk_sched(const char *s, ...)
++int printk_deferred(const char *s, ...)
+ {
+ return 0;
+ }
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -1653,7 +1653,7 @@ late_initcall(printk_late_init);
+
+ #if defined CONFIG_PRINTK
+
+-int printk_sched(const char *fmt, ...)
++int printk_deferred(const char *fmt, ...)
+ {
+ unsigned long flags;
+ va_list args;
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1313,7 +1313,7 @@ out:
+ * leave kernel.
+ */
+ if (p->mm && printk_ratelimit()) {
+- printk_sched("process %d (%s) no longer affine to cpu%d\n",
++ printk_deferred("process %d (%s) no longer affine to cpu%d\n",
+ task_pid_nr(p), p->comm, cpu);
+ }
+ }
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -884,7 +884,7 @@ static int sched_rt_runtime_exceeded(str
+
+ if (!once) {
+ once = true;
+- printk_sched("sched: RT throttling activated\n");
++ printk_deferred("sched: RT throttling activated\n");
+ }
+ } else {
+ /*
--- /dev/null
+From 504d58745c9ca28d33572e2d8a9990b43e06075d Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 1 Aug 2014 12:20:02 +0200
+Subject: timer: Fix lock inversion between hrtimer_bases.lock and scheduler locks
+
+From: Jan Kara <jack@suse.cz>
+
+commit 504d58745c9ca28d33572e2d8a9990b43e06075d upstream.
+
+clockevents_increase_min_delta() calls printk() from under
+hrtimer_bases.lock. That causes lock inversion on scheduler locks because
+printk() can call into the scheduler. Lockdep puts it as:
+
+======================================================
+[ INFO: possible circular locking dependency detected ]
+3.15.0-rc8-06195-g939f04b #2 Not tainted
+-------------------------------------------------------
+trinity-main/74 is trying to acquire lock:
+ (&port_lock_key){-.....}, at: [<811c60be>] serial8250_console_write+0x8c/0x10c
+
+but task is already holding lock:
+ (hrtimer_bases.lock){-.-...}, at: [<8103caeb>] hrtimer_try_to_cancel+0x13/0x66
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #5 (hrtimer_bases.lock){-.-...}:
+ [<8104a942>] lock_acquire+0x92/0x101
+ [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
+ [<8103c918>] __hrtimer_start_range_ns+0x1c/0x197
+ [<8107ec20>] perf_swevent_start_hrtimer.part.41+0x7a/0x85
+ [<81080792>] task_clock_event_start+0x3a/0x3f
+ [<810807a4>] task_clock_event_add+0xd/0x14
+ [<8108259a>] event_sched_in+0xb6/0x17a
+ [<810826a2>] group_sched_in+0x44/0x122
+ [<81082885>] ctx_sched_in.isra.67+0x105/0x11f
+ [<810828e6>] perf_event_sched_in.isra.70+0x47/0x4b
+ [<81082bf6>] __perf_install_in_context+0x8b/0xa3
+ [<8107eb8e>] remote_function+0x12/0x2a
+ [<8105f5af>] smp_call_function_single+0x2d/0x53
+ [<8107e17d>] task_function_call+0x30/0x36
+ [<8107fb82>] perf_install_in_context+0x87/0xbb
+ [<810852c9>] SYSC_perf_event_open+0x5c6/0x701
+ [<810856f9>] SyS_perf_event_open+0x17/0x19
+ [<8142f8ee>] syscall_call+0x7/0xb
+
+-> #4 (&ctx->lock){......}:
+ [<8104a942>] lock_acquire+0x92/0x101
+ [<8142f04c>] _raw_spin_lock+0x21/0x30
+ [<81081df3>] __perf_event_task_sched_out+0x1dc/0x34f
+ [<8142cacc>] __schedule+0x4c6/0x4cb
+ [<8142cae0>] schedule+0xf/0x11
+ [<8142f9a6>] work_resched+0x5/0x30
+
+-> #3 (&rq->lock){-.-.-.}:
+ [<8104a942>] lock_acquire+0x92/0x101
+ [<8142f04c>] _raw_spin_lock+0x21/0x30
+ [<81040873>] __task_rq_lock+0x33/0x3a
+ [<8104184c>] wake_up_new_task+0x25/0xc2
+ [<8102474b>] do_fork+0x15c/0x2a0
+ [<810248a9>] kernel_thread+0x1a/0x1f
+ [<814232a2>] rest_init+0x1a/0x10e
+ [<817af949>] start_kernel+0x303/0x308
+ [<817af2ab>] i386_start_kernel+0x79/0x7d
+
+-> #2 (&p->pi_lock){-.-...}:
+ [<8104a942>] lock_acquire+0x92/0x101
+ [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
+ [<810413dd>] try_to_wake_up+0x1d/0xd6
+ [<810414cd>] default_wake_function+0xb/0xd
+ [<810461f3>] __wake_up_common+0x39/0x59
+ [<81046346>] __wake_up+0x29/0x3b
+ [<811b8733>] tty_wakeup+0x49/0x51
+ [<811c3568>] uart_write_wakeup+0x17/0x19
+ [<811c5dc1>] serial8250_tx_chars+0xbc/0xfb
+ [<811c5f28>] serial8250_handle_irq+0x54/0x6a
+ [<811c5f57>] serial8250_default_handle_irq+0x19/0x1c
+ [<811c56d8>] serial8250_interrupt+0x38/0x9e
+ [<810510e7>] handle_irq_event_percpu+0x5f/0x1e2
+ [<81051296>] handle_irq_event+0x2c/0x43
+ [<81052cee>] handle_level_irq+0x57/0x80
+ [<81002a72>] handle_irq+0x46/0x5c
+ [<810027df>] do_IRQ+0x32/0x89
+ [<8143036e>] common_interrupt+0x2e/0x33
+ [<8142f23c>] _raw_spin_unlock_irqrestore+0x3f/0x49
+ [<811c25a4>] uart_start+0x2d/0x32
+ [<811c2c04>] uart_write+0xc7/0xd6
+ [<811bc6f6>] n_tty_write+0xb8/0x35e
+ [<811b9beb>] tty_write+0x163/0x1e4
+ [<811b9cd9>] redirected_tty_write+0x6d/0x75
+ [<810b6ed6>] vfs_write+0x75/0xb0
+ [<810b7265>] SyS_write+0x44/0x77
+ [<8142f8ee>] syscall_call+0x7/0xb
+
+-> #1 (&tty->write_wait){-.....}:
+ [<8104a942>] lock_acquire+0x92/0x101
+ [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
+ [<81046332>] __wake_up+0x15/0x3b
+ [<811b8733>] tty_wakeup+0x49/0x51
+ [<811c3568>] uart_write_wakeup+0x17/0x19
+ [<811c5dc1>] serial8250_tx_chars+0xbc/0xfb
+ [<811c5f28>] serial8250_handle_irq+0x54/0x6a
+ [<811c5f57>] serial8250_default_handle_irq+0x19/0x1c
+ [<811c56d8>] serial8250_interrupt+0x38/0x9e
+ [<810510e7>] handle_irq_event_percpu+0x5f/0x1e2
+ [<81051296>] handle_irq_event+0x2c/0x43
+ [<81052cee>] handle_level_irq+0x57/0x80
+ [<81002a72>] handle_irq+0x46/0x5c
+ [<810027df>] do_IRQ+0x32/0x89
+ [<8143036e>] common_interrupt+0x2e/0x33
+ [<8142f23c>] _raw_spin_unlock_irqrestore+0x3f/0x49
+ [<811c25a4>] uart_start+0x2d/0x32
+ [<811c2c04>] uart_write+0xc7/0xd6
+ [<811bc6f6>] n_tty_write+0xb8/0x35e
+ [<811b9beb>] tty_write+0x163/0x1e4
+ [<811b9cd9>] redirected_tty_write+0x6d/0x75
+ [<810b6ed6>] vfs_write+0x75/0xb0
+ [<810b7265>] SyS_write+0x44/0x77
+ [<8142f8ee>] syscall_call+0x7/0xb
+
+-> #0 (&port_lock_key){-.....}:
+ [<8104a62d>] __lock_acquire+0x9ea/0xc6d
+ [<8104a942>] lock_acquire+0x92/0x101
+ [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
+ [<811c60be>] serial8250_console_write+0x8c/0x10c
+ [<8104e402>] call_console_drivers.constprop.31+0x87/0x118
+ [<8104f5d5>] console_unlock+0x1d7/0x398
+ [<8104fb70>] vprintk_emit+0x3da/0x3e4
+ [<81425f76>] printk+0x17/0x19
+ [<8105bfa0>] clockevents_program_min_delta+0x104/0x116
+ [<8105c548>] clockevents_program_event+0xe7/0xf3
+ [<8105cc1c>] tick_program_event+0x1e/0x23
+ [<8103c43c>] hrtimer_force_reprogram+0x88/0x8f
+ [<8103c49e>] __remove_hrtimer+0x5b/0x79
+ [<8103cb21>] hrtimer_try_to_cancel+0x49/0x66
+ [<8103cb4b>] hrtimer_cancel+0xd/0x18
+ [<8107f102>] perf_swevent_cancel_hrtimer.part.60+0x2b/0x30
+ [<81080705>] task_clock_event_stop+0x20/0x64
+ [<81080756>] task_clock_event_del+0xd/0xf
+ [<81081350>] event_sched_out+0xab/0x11e
+ [<810813e0>] group_sched_out+0x1d/0x66
+ [<81081682>] ctx_sched_out+0xaf/0xbf
+ [<81081e04>] __perf_event_task_sched_out+0x1ed/0x34f
+ [<8142cacc>] __schedule+0x4c6/0x4cb
+ [<8142cae0>] schedule+0xf/0x11
+ [<8142f9a6>] work_resched+0x5/0x30
+
+other info that might help us debug this:
+
+Chain exists of:
+ &port_lock_key --> &ctx->lock --> hrtimer_bases.lock
+
+ Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(hrtimer_bases.lock);
+ lock(&ctx->lock);
+ lock(hrtimer_bases.lock);
+ lock(&port_lock_key);
+
+ *** DEADLOCK ***
+
+4 locks held by trinity-main/74:
+ #0: (&rq->lock){-.-.-.}, at: [<8142c6f3>] __schedule+0xed/0x4cb
+ #1: (&ctx->lock){......}, at: [<81081df3>] __perf_event_task_sched_out+0x1dc/0x34f
+ #2: (hrtimer_bases.lock){-.-...}, at: [<8103caeb>] hrtimer_try_to_cancel+0x13/0x66
+ #3: (console_lock){+.+...}, at: [<8104fb5d>] vprintk_emit+0x3c7/0x3e4
+
+stack backtrace:
+CPU: 0 PID: 74 Comm: trinity-main Not tainted 3.15.0-rc8-06195-g939f04b #2
+ 00000000 81c3a310 8b995c14 81426f69 8b995c44 81425a99 8161f671 8161f570
+ 8161f538 8161f559 8161f538 8b995c78 8b142bb0 00000004 8b142fdc 8b142bb0
+ 8b995ca8 8104a62d 8b142fac 000016f2 81c3a310 00000001 00000001 00000003
+Call Trace:
+ [<81426f69>] dump_stack+0x16/0x18
+ [<81425a99>] print_circular_bug+0x18f/0x19c
+ [<8104a62d>] __lock_acquire+0x9ea/0xc6d
+ [<8104a942>] lock_acquire+0x92/0x101
+ [<811c60be>] ? serial8250_console_write+0x8c/0x10c
+ [<811c6032>] ? wait_for_xmitr+0x76/0x76
+ [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
+ [<811c60be>] ? serial8250_console_write+0x8c/0x10c
+ [<811c60be>] serial8250_console_write+0x8c/0x10c
+ [<8104af87>] ? lock_release+0x191/0x223
+ [<811c6032>] ? wait_for_xmitr+0x76/0x76
+ [<8104e402>] call_console_drivers.constprop.31+0x87/0x118
+ [<8104f5d5>] console_unlock+0x1d7/0x398
+ [<8104fb70>] vprintk_emit+0x3da/0x3e4
+ [<81425f76>] printk+0x17/0x19
+ [<8105bfa0>] clockevents_program_min_delta+0x104/0x116
+ [<8105cc1c>] tick_program_event+0x1e/0x23
+ [<8103c43c>] hrtimer_force_reprogram+0x88/0x8f
+ [<8103c49e>] __remove_hrtimer+0x5b/0x79
+ [<8103cb21>] hrtimer_try_to_cancel+0x49/0x66
+ [<8103cb4b>] hrtimer_cancel+0xd/0x18
+ [<8107f102>] perf_swevent_cancel_hrtimer.part.60+0x2b/0x30
+ [<81080705>] task_clock_event_stop+0x20/0x64
+ [<81080756>] task_clock_event_del+0xd/0xf
+ [<81081350>] event_sched_out+0xab/0x11e
+ [<810813e0>] group_sched_out+0x1d/0x66
+ [<81081682>] ctx_sched_out+0xaf/0xbf
+ [<81081e04>] __perf_event_task_sched_out+0x1ed/0x34f
+ [<8104416d>] ? __dequeue_entity+0x23/0x27
+ [<81044505>] ? pick_next_task_fair+0xb1/0x120
+ [<8142cacc>] __schedule+0x4c6/0x4cb
+ [<81047574>] ? trace_hardirqs_off_caller+0xd7/0x108
+ [<810475b0>] ? trace_hardirqs_off+0xb/0xd
+ [<81056346>] ? rcu_irq_exit+0x64/0x77
+
+Fix the problem by using printk_deferred() which does not call into the
+scheduler.
+
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/clockevents.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/kernel/time/clockevents.c
++++ b/kernel/time/clockevents.c
+@@ -143,7 +143,8 @@ static int clockevents_increase_min_delt
+ {
+ /* Nothing to do if we already reached the limit */
+ if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
+- printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n");
++ printk_deferred(KERN_WARNING
++ "CE: Reprogramming failure. Giving up\n");
+ dev->next_event.tv64 = KTIME_MAX;
+ return -ETIME;
+ }
+@@ -156,9 +157,10 @@ static int clockevents_increase_min_delt
+ if (dev->min_delta_ns > MIN_DELTA_LIMIT)
+ dev->min_delta_ns = MIN_DELTA_LIMIT;
+
+- printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
+- dev->name ? dev->name : "?",
+- (unsigned long long) dev->min_delta_ns);
++ printk_deferred(KERN_WARNING
++ "CE: %s increased min_delta_ns to %llu nsec\n",
++ dev->name ? dev->name : "?",
++ (unsigned long long) dev->min_delta_ns);
+ return 0;
+ }
+