--- /dev/null
+From 90cae1fe1c3540f791d5b8e025985fa5e699b2bb Mon Sep 17 00:00:00 2001
+From: Oliver O'Halloran <oohall@gmail.com>
+Date: Tue, 26 Jul 2016 15:22:17 -0700
+Subject: mm/init: fix zone boundary creation
+
+From: Oliver O'Halloran <oohall@gmail.com>
+
+commit 90cae1fe1c3540f791d5b8e025985fa5e699b2bb upstream.
+
+As a part of memory initialisation the architecture passes an array to
+free_area_init_nodes() which specifies the max PFN of each memory zone.
+This array is not necessarily monotonic (due to unused zones) so this
+array is parsed to build monotonic lists of the min and max PFN for each
+zone. ZONE_MOVABLE is special cased here as its limits are managed by
+the mm subsystem rather than the architecture. Unfortunately, this
+special casing is broken when ZONE_MOVABLE is the not the last zone in
+the zone list. The core of the issue is:
+
+ if (i == ZONE_MOVABLE)
+ continue;
+ arch_zone_lowest_possible_pfn[i] =
+ arch_zone_highest_possible_pfn[i-1];
+
+As ZONE_MOVABLE is skipped the lowest_possible_pfn of the next zone will
+be set to zero. This patch fixes this bug by adding explicitly tracking
+where the next zone should start rather than relying on the contents
+arch_zone_highest_possible_pfn[].
+
+Thie is low priority. To get bitten by this you need to enable a zone
+that appears after ZONE_MOVABLE in the zone_type enum. As far as I can
+tell this means running a kernel with ZONE_DEVICE or ZONE_CMA enabled,
+so I can't see this affecting too many people.
+
+I only noticed this because I've been fiddling with ZONE_DEVICE on
+powerpc and 4.6 broke my test kernel. This bug, in conjunction with the
+changes in Taku Izumi's kernelcore=mirror patch (d91749c1dda71) and
+powerpc being the odd architecture which initialises max_zone_pfn[] to
+~0ul instead of 0 caused all of system memory to be placed into
+ZONE_DEVICE at boot, followed a panic since device memory cannot be used
+for kernel allocations. I've already submitted a patch to fix the
+powerpc specific bits, but I figured this should be fixed too.
+
+Link: http://lkml.kernel.org/r/1462435033-15601-1-git-send-email-oohall@gmail.com
+Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
+Cc: Anton Blanchard <anton@samba.org>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c | 17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5337,15 +5337,18 @@ void __init free_area_init_nodes(unsigne
+ sizeof(arch_zone_lowest_possible_pfn));
+ memset(arch_zone_highest_possible_pfn, 0,
+ sizeof(arch_zone_highest_possible_pfn));
+- arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
+- arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
+- for (i = 1; i < MAX_NR_ZONES; i++) {
++
++ start_pfn = find_min_pfn_with_active_regions();
++
++ for (i = 0; i < MAX_NR_ZONES; i++) {
+ if (i == ZONE_MOVABLE)
+ continue;
+- arch_zone_lowest_possible_pfn[i] =
+- arch_zone_highest_possible_pfn[i-1];
+- arch_zone_highest_possible_pfn[i] =
+- max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
++
++ end_pfn = max(max_zone_pfn[i], start_pfn);
++ arch_zone_lowest_possible_pfn[i] = start_pfn;
++ arch_zone_highest_possible_pfn[i] = end_pfn;
++
++ start_pfn = end_pfn;
+ }
+ arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
+ arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
--- /dev/null
+From 86038c5ea81b519a8a1fcfcd5e4599aab0cdd119 Mon Sep 17 00:00:00 2001
+From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Date: Tue, 16 Dec 2014 12:47:34 +0100
+Subject: perf: Avoid horrible stack usage
+
+From: Peter Zijlstra (Intel) <peterz@infradead.org>
+
+commit 86038c5ea81b519a8a1fcfcd5e4599aab0cdd119 upstream.
+
+Both Linus (most recent) and Steve (a while ago) reported that perf
+related callbacks have massive stack bloat.
+
+The problem is that software events need a pt_regs in order to
+properly report the event location and unwind stack. And because we
+could not assume one was present we allocated one on stack and filled
+it with minimal bits required for operation.
+
+Now, pt_regs is quite large, so this is undesirable. Furthermore it
+turns out that most sites actually have a pt_regs pointer available,
+making this even more onerous, as the stack space is pointless waste.
+
+This patch addresses the problem by observing that software events
+have well defined nesting semantics, therefore we can use static
+per-cpu storage instead of on-stack.
+
+Linus made the further observation that all but the scheduler callers
+of perf_sw_event() have a pt_regs available, so we change the regular
+perf_sw_event() to require a valid pt_regs (where it used to be
+optional) and add perf_sw_event_sched() for the scheduler.
+
+We have a scheduler specific call instead of a more generic _noregs()
+like construct because we can assume non-recursion from the scheduler
+and thereby simplify the code further (_noregs would have to put the
+recursion context call inline in order to assertain which __perf_regs
+element to use).
+
+One last note on the implementation of perf_trace_buf_prepare(); we
+allow .regs = NULL for those cases where we already have a pt_regs
+pointer available and do not need another.
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Reported-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Cc: Javi Merino <javi.merino@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Petr Mladek <pmladek@suse.cz>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
+Cc: Vaibhav Nagarnaik <vnagarnaik@google.com>
+Link: http://lkml.kernel.org/r/20141216115041.GW3337@twins.programming.kicks-ass.net
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ftrace_event.h | 2 +-
+ include/linux/perf_event.h | 28 +++++++++++++++++++++-------
+ include/trace/ftrace.h | 7 ++++---
+ kernel/events/core.c | 23 +++++++++++++++++------
+ kernel/sched/core.c | 2 +-
+ kernel/trace/trace_event_perf.c | 4 +++-
+ kernel/trace/trace_kprobe.c | 4 ++--
+ kernel/trace/trace_syscalls.c | 4 ++--
+ kernel/trace/trace_uprobe.c | 2 +-
+ 9 files changed, 52 insertions(+), 24 deletions(-)
+
+--- a/include/linux/ftrace_event.h
++++ b/include/linux/ftrace_event.h
+@@ -584,7 +584,7 @@ extern int ftrace_profile_set_filter(st
+ char *filter_str);
+ extern void ftrace_profile_free_filter(struct perf_event *event);
+ extern void *perf_trace_buf_prepare(int size, unsigned short type,
+- struct pt_regs *regs, int *rctxp);
++ struct pt_regs **regs, int *rctxp);
+
+ static inline void
+ perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -660,6 +660,7 @@ static inline int is_software_event(stru
+
+ extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
+
++extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
+ extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
+
+ #ifndef perf_arch_fetch_caller_regs
+@@ -684,14 +685,25 @@ static inline void perf_fetch_caller_reg
+ static __always_inline void
+ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
+ {
+- struct pt_regs hot_regs;
++ if (static_key_false(&perf_swevent_enabled[event_id]))
++ __perf_sw_event(event_id, nr, regs, addr);
++}
++
++DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
+
++/*
++ * 'Special' version for the scheduler, it hard assumes no recursion,
++ * which is guaranteed by us not actually scheduling inside other swevents
++ * because those disable preemption.
++ */
++static __always_inline void
++perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
++{
+ if (static_key_false(&perf_swevent_enabled[event_id])) {
+- if (!regs) {
+- perf_fetch_caller_regs(&hot_regs);
+- regs = &hot_regs;
+- }
+- __perf_sw_event(event_id, nr, regs, addr);
++ struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
++
++ perf_fetch_caller_regs(regs);
++ ___perf_sw_event(event_id, nr, regs, addr);
+ }
+ }
+
+@@ -707,7 +719,7 @@ static inline void perf_event_task_sched
+ static inline void perf_event_task_sched_out(struct task_struct *prev,
+ struct task_struct *next)
+ {
+- perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
++ perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+
+ if (static_key_false(&perf_sched_events.key))
+ __perf_event_task_sched_out(prev, next);
+@@ -818,6 +830,8 @@ static inline int perf_event_refresh(str
+ static inline void
+ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
+ static inline void
++perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { }
++static inline void
+ perf_bp_event(struct perf_event *event, void *data) { }
+
+ static inline int perf_register_guest_info_callbacks
+--- a/include/trace/ftrace.h
++++ b/include/trace/ftrace.h
+@@ -765,7 +765,7 @@ perf_trace_##call(void *__data, proto)
+ struct ftrace_event_call *event_call = __data; \
+ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+ struct ftrace_raw_##call *entry; \
+- struct pt_regs __regs; \
++ struct pt_regs *__regs; \
+ u64 __addr = 0, __count = 1; \
+ struct task_struct *__task = NULL; \
+ struct hlist_head *head; \
+@@ -784,18 +784,19 @@ perf_trace_##call(void *__data, proto)
+ sizeof(u64)); \
+ __entry_size -= sizeof(u32); \
+ \
+- perf_fetch_caller_regs(&__regs); \
+ entry = perf_trace_buf_prepare(__entry_size, \
+ event_call->event.type, &__regs, &rctx); \
+ if (!entry) \
+ return; \
+ \
++ perf_fetch_caller_regs(__regs); \
++ \
+ tstruct \
+ \
+ { assign; } \
+ \
+ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
+- __count, &__regs, head, __task); \
++ __count, __regs, head, __task); \
+ }
+
+ /*
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -5905,6 +5905,8 @@ end:
+ rcu_read_unlock();
+ }
+
++DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
++
+ int perf_swevent_get_recursion_context(void)
+ {
+ struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
+@@ -5920,21 +5922,30 @@ inline void perf_swevent_put_recursion_c
+ put_recursion_context(swhash->recursion, rctx);
+ }
+
+-void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
++void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
+ {
+ struct perf_sample_data data;
+- int rctx;
+
+- preempt_disable_notrace();
+- rctx = perf_swevent_get_recursion_context();
+- if (rctx < 0)
++ if (WARN_ON_ONCE(!regs))
+ return;
+
+ perf_sample_data_init(&data, addr, 0);
+-
+ do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
++}
++
++void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
++{
++ int rctx;
++
++ preempt_disable_notrace();
++ rctx = perf_swevent_get_recursion_context();
++ if (unlikely(rctx < 0))
++ goto fail;
++
++ ___perf_sw_event(event_id, nr, regs, addr);
+
+ perf_swevent_put_recursion_context(rctx);
++fail:
+ preempt_enable_notrace();
+ }
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1083,7 +1083,7 @@ void set_task_cpu(struct task_struct *p,
+ if (p->sched_class->migrate_task_rq)
+ p->sched_class->migrate_task_rq(p, new_cpu);
+ p->se.nr_migrations++;
+- perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
++ perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+ }
+
+ __set_task_cpu(p, new_cpu);
+--- a/kernel/trace/trace_event_perf.c
++++ b/kernel/trace/trace_event_perf.c
+@@ -261,7 +261,7 @@ void perf_trace_del(struct perf_event *p
+ }
+
+ void *perf_trace_buf_prepare(int size, unsigned short type,
+- struct pt_regs *regs, int *rctxp)
++ struct pt_regs **regs, int *rctxp)
+ {
+ struct trace_entry *entry;
+ unsigned long flags;
+@@ -280,6 +280,8 @@ void *perf_trace_buf_prepare(int size, u
+ if (*rctxp < 0)
+ return NULL;
+
++ if (regs)
++ *regs = this_cpu_ptr(&__perf_regs[*rctxp]);
+ raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
+
+ /* zero the dead bytes from align to not leak stack to user */
+--- a/kernel/trace/trace_kprobe.c
++++ b/kernel/trace/trace_kprobe.c
+@@ -1158,7 +1158,7 @@ kprobe_perf_func(struct trace_kprobe *tk
+ size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ size -= sizeof(u32);
+
+- entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
++ entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
+ if (!entry)
+ return;
+
+@@ -1189,7 +1189,7 @@ kretprobe_perf_func(struct trace_kprobe
+ size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ size -= sizeof(u32);
+
+- entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
++ entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
+ if (!entry)
+ return;
+
+--- a/kernel/trace/trace_syscalls.c
++++ b/kernel/trace/trace_syscalls.c
+@@ -586,7 +586,7 @@ static void perf_syscall_enter(void *ign
+ size -= sizeof(u32);
+
+ rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
+- sys_data->enter_event->event.type, regs, &rctx);
++ sys_data->enter_event->event.type, NULL, &rctx);
+ if (!rec)
+ return;
+
+@@ -659,7 +659,7 @@ static void perf_syscall_exit(void *igno
+ size -= sizeof(u32);
+
+ rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
+- sys_data->exit_event->event.type, regs, &rctx);
++ sys_data->exit_event->event.type, NULL, &rctx);
+ if (!rec)
+ return;
+
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -1115,7 +1115,7 @@ static void __uprobe_perf_func(struct tr
+ if (hlist_empty(head))
+ goto out;
+
+- entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
++ entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
+ if (!entry)
+ goto out;
+