--- /dev/null
+From 548acf19234dbda5a52d5a8e7e205af46e9da840 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Wed, 17 Feb 2016 10:20:12 -0800
+Subject: x86/mm: Expand the exception table logic to allow new handling options
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 548acf19234dbda5a52d5a8e7e205af46e9da840 upstream.
+
+Huge amounts of help from Andy Lutomirski and Borislav Petkov to
+produce this. Andy provided the inspiration to add classes to the
+exception table with a clever bit-squeezing trick, Boris pointed
+out how much cleaner it would all be if we just had a new field.
+
+Linus Torvalds blessed the expansion with:
+
+ ' I'd rather not be clever in order to save just a tiny amount of space
+ in the exception table, which isn't really criticial for anybody. '
+
+The third field is another relative function pointer, this one to a
+handler that executes the actions.
+
+We start out with three handlers:
+
+ 1: Legacy - just jumps the to fixup IP
+ 2: Fault - provide the trap number in %ax to the fixup code
+ 3: Cleaned up legacy for the uaccess error hack
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/f6af78fcbd348cf4939875cfda9c19689b5e50b8.1455732970.git.tony.luck@intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/exception-tables.txt | 35 +++++++++++
+ arch/x86/include/asm/asm.h | 40 +++++++-----
+ arch/x86/include/asm/uaccess.h | 16 ++---
+ arch/x86/kernel/kprobes/core.c | 2
+ arch/x86/kernel/traps.c | 6 -
+ arch/x86/mm/extable.c | 102 +++++++++++++++++++++++----------
+ arch/x86/mm/fault.c | 2
+ scripts/sortextable.c | 32 ++++++++++
+ 8 files changed, 177 insertions(+), 58 deletions(-)
+
+--- a/Documentation/x86/exception-tables.txt
++++ b/Documentation/x86/exception-tables.txt
+@@ -290,3 +290,38 @@ Due to the way that the exception table
+ only use exceptions for code in the .text section. Any other section
+ will cause the exception table to not be sorted correctly, and the
+ exceptions will fail.
++
++Things changed when 64-bit support was added to x86 Linux. Rather than
++double the size of the exception table by expanding the two entries
++from 32-bits to 64 bits, a clever trick was used to store addresses
++as relative offsets from the table itself. The assembly code changed
++from:
++ .long 1b,3b
++to:
++ .long (from) - .
++ .long (to) - .
++
++and the C-code that uses these values converts back to absolute addresses
++like this:
++
++ ex_insn_addr(const struct exception_table_entry *x)
++ {
++ return (unsigned long)&x->insn + x->insn;
++ }
++
++In v4.6 the exception table entry was expanded with a new field "handler".
++This is also 32-bits wide and contains a third relative function
++pointer which points to one of:
++
++1) int ex_handler_default(const struct exception_table_entry *fixup)
++ This is legacy case that just jumps to the fixup code
++2) int ex_handler_fault(const struct exception_table_entry *fixup)
++ This case provides the fault number of the trap that occurred at
++ entry->insn. It is used to distinguish page faults from machine
++ check.
++3) int ex_handler_ext(const struct exception_table_entry *fixup)
++ This case is used for uaccess_err ... we need to set a flag
++ in the task structure. Before the handler functions existed this
++ case was handled by adding a large offset to the fixup to tag
++ it as special.
++More functions can easily be added.
+--- a/arch/x86/include/asm/asm.h
++++ b/arch/x86/include/asm/asm.h
+@@ -44,19 +44,22 @@
+
+ /* Exception table entry */
+ #ifdef __ASSEMBLY__
+-# define _ASM_EXTABLE(from,to) \
++# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ .pushsection "__ex_table","a" ; \
+- .balign 8 ; \
++ .balign 4 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
++ .long (handler) - . ; \
+ .popsection
+
+-# define _ASM_EXTABLE_EX(from,to) \
+- .pushsection "__ex_table","a" ; \
+- .balign 8 ; \
+- .long (from) - . ; \
+- .long (to) - . + 0x7ffffff0 ; \
+- .popsection
++# define _ASM_EXTABLE(from, to) \
++ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
++
++# define _ASM_EXTABLE_FAULT(from, to) \
++ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
++
++# define _ASM_EXTABLE_EX(from, to) \
++ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
+ # define _ASM_NOKPROBE(entry) \
+ .pushsection "_kprobe_blacklist","aw" ; \
+@@ -89,19 +92,24 @@
+ .endm
+
+ #else
+-# define _ASM_EXTABLE(from,to) \
++# define _EXPAND_EXTABLE_HANDLE(x) #x
++# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+- " .balign 8\n" \
++ " .balign 4\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
++ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
+ " .popsection\n"
+
+-# define _ASM_EXTABLE_EX(from,to) \
+- " .pushsection \"__ex_table\",\"a\"\n" \
+- " .balign 8\n" \
+- " .long (" #from ") - .\n" \
+- " .long (" #to ") - . + 0x7ffffff0\n" \
+- " .popsection\n"
++# define _ASM_EXTABLE(from, to) \
++ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
++
++# define _ASM_EXTABLE_FAULT(from, to) \
++ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
++
++# define _ASM_EXTABLE_EX(from, to) \
++ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
++
+ /* For C file, we already have NOKPROBE_SYMBOL macro */
+ #endif
+
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -90,12 +90,11 @@ static inline bool __chk_range_not_ok(un
+ likely(!__range_not_ok(addr, size, user_addr_max()))
+
+ /*
+- * The exception table consists of pairs of addresses relative to the
+- * exception table enty itself: the first is the address of an
+- * instruction that is allowed to fault, and the second is the address
+- * at which the program should continue. No registers are modified,
+- * so it is entirely up to the continuation code to figure out what to
+- * do.
++ * The exception table consists of triples of addresses relative to the
++ * exception table entry itself. The first address is of an instruction
++ * that is allowed to fault, the second is the target at which the program
++ * should continue. The third is a handler function to deal with the fault
++ * caused by the instruction in the first field.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+@@ -104,13 +103,14 @@ static inline bool __chk_range_not_ok(un
+ */
+
+ struct exception_table_entry {
+- int insn, fixup;
++ int insn, fixup, handler;
+ };
+ /* This is not the generic standard exception_table_entry format */
+ #define ARCH_HAS_SORT_EXTABLE
+ #define ARCH_HAS_SEARCH_EXTABLE
+
+-extern int fixup_exception(struct pt_regs *regs);
++extern int fixup_exception(struct pt_regs *regs, int trapnr);
++extern bool ex_has_fault_handler(unsigned long ip);
+ extern int early_fixup_exception(unsigned long *ip);
+
+ /*
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -1000,7 +1000,7 @@ int kprobe_fault_handler(struct pt_regs
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+- if (fixup_exception(regs))
++ if (fixup_exception(regs, trapnr))
+ return 1;
+
+ /*
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -199,7 +199,7 @@ do_trap_no_signal(struct task_struct *ts
+ }
+
+ if (!user_mode(regs)) {
+- if (!fixup_exception(regs)) {
++ if (!fixup_exception(regs, trapnr)) {
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_nr = trapnr;
+ die(str, regs, error_code);
+@@ -453,7 +453,7 @@ do_general_protection(struct pt_regs *re
+
+ tsk = current;
+ if (!user_mode(regs)) {
+- if (fixup_exception(regs))
++ if (fixup_exception(regs, X86_TRAP_GP))
+ return;
+
+ tsk->thread.error_code = error_code;
+@@ -699,7 +699,7 @@ static void math_error(struct pt_regs *r
+ conditional_sti(regs);
+
+ if (!user_mode(regs)) {
+- if (!fixup_exception(regs)) {
++ if (!fixup_exception(regs, trapnr)) {
+ task->thread.error_code = error_code;
+ task->thread.trap_nr = trapnr;
+ die(str, regs, error_code);
+--- a/arch/x86/mm/extable.c
++++ b/arch/x86/mm/extable.c
+@@ -3,6 +3,9 @@
+ #include <linux/sort.h>
+ #include <asm/uaccess.h>
+
++typedef bool (*ex_handler_t)(const struct exception_table_entry *,
++ struct pt_regs *, int);
++
+ static inline unsigned long
+ ex_insn_addr(const struct exception_table_entry *x)
+ {
+@@ -13,11 +16,56 @@ ex_fixup_addr(const struct exception_tab
+ {
+ return (unsigned long)&x->fixup + x->fixup;
+ }
++static inline ex_handler_t
++ex_fixup_handler(const struct exception_table_entry *x)
++{
++ return (ex_handler_t)((unsigned long)&x->handler + x->handler);
++}
+
+-int fixup_exception(struct pt_regs *regs)
++bool ex_handler_default(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, int trapnr)
+ {
+- const struct exception_table_entry *fixup;
+- unsigned long new_ip;
++ regs->ip = ex_fixup_addr(fixup);
++ return true;
++}
++EXPORT_SYMBOL(ex_handler_default);
++
++bool ex_handler_fault(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, int trapnr)
++{
++ regs->ip = ex_fixup_addr(fixup);
++ regs->ax = trapnr;
++ return true;
++}
++EXPORT_SYMBOL_GPL(ex_handler_fault);
++
++bool ex_handler_ext(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, int trapnr)
++{
++ /* Special hack for uaccess_err */
++ current_thread_info()->uaccess_err = 1;
++ regs->ip = ex_fixup_addr(fixup);
++ return true;
++}
++EXPORT_SYMBOL(ex_handler_ext);
++
++bool ex_has_fault_handler(unsigned long ip)
++{
++ const struct exception_table_entry *e;
++ ex_handler_t handler;
++
++ e = search_exception_tables(ip);
++ if (!e)
++ return false;
++ handler = ex_fixup_handler(e);
++
++ return handler == ex_handler_fault;
++}
++
++int fixup_exception(struct pt_regs *regs, int trapnr)
++{
++ const struct exception_table_entry *e;
++ ex_handler_t handler;
+
+ #ifdef CONFIG_PNPBIOS
+ if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
+@@ -33,42 +81,34 @@ int fixup_exception(struct pt_regs *regs
+ }
+ #endif
+
+- fixup = search_exception_tables(regs->ip);
+- if (fixup) {
+- new_ip = ex_fixup_addr(fixup);
+-
+- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
+- /* Special hack for uaccess_err */
+- current_thread_info()->uaccess_err = 1;
+- new_ip -= 0x7ffffff0;
+- }
+- regs->ip = new_ip;
+- return 1;
+- }
++ e = search_exception_tables(regs->ip);
++ if (!e)
++ return 0;
+
+- return 0;
++ handler = ex_fixup_handler(e);
++ return handler(e, regs, trapnr);
+ }
+
+ /* Restricted version used during very early boot */
+ int __init early_fixup_exception(unsigned long *ip)
+ {
+- const struct exception_table_entry *fixup;
++ const struct exception_table_entry *e;
+ unsigned long new_ip;
++ ex_handler_t handler;
+
+- fixup = search_exception_tables(*ip);
+- if (fixup) {
+- new_ip = ex_fixup_addr(fixup);
+-
+- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
+- /* uaccess handling not supported during early boot */
+- return 0;
+- }
++ e = search_exception_tables(*ip);
++ if (!e)
++ return 0;
+
+- *ip = new_ip;
+- return 1;
+- }
++ new_ip = ex_fixup_addr(e);
++ handler = ex_fixup_handler(e);
++
++ /* special handling not supported during early boot */
++ if (handler != ex_handler_default)
++ return 0;
+
+- return 0;
++ *ip = new_ip;
++ return 1;
+ }
+
+ /*
+@@ -133,6 +173,8 @@ void sort_extable(struct exception_table
+ i += 4;
+ p->fixup += i;
+ i += 4;
++ p->handler += i;
++ i += 4;
+ }
+
+ sort(start, finish - start, sizeof(struct exception_table_entry),
+@@ -145,6 +187,8 @@ void sort_extable(struct exception_table
+ i += 4;
+ p->fixup -= i;
+ i += 4;
++ p->handler -= i;
++ i += 4;
+ }
+ }
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -663,7 +663,7 @@ no_context(struct pt_regs *regs, unsigne
+ int sig;
+
+ /* Are we prepared to handle this kernel fault? */
+- if (fixup_exception(regs)) {
++ if (fixup_exception(regs, X86_TRAP_PF)) {
+ /*
+ * Any interrupt that takes a fault gets the fixup. This makes
+ * the below recursive fault logic only apply to a faults from
+--- a/scripts/sortextable.c
++++ b/scripts/sortextable.c
+@@ -209,6 +209,35 @@ static int compare_relative_table(const
+ return 0;
+ }
+
++static void x86_sort_relative_table(char *extab_image, int image_size)
++{
++ int i;
++
++ i = 0;
++ while (i < image_size) {
++ uint32_t *loc = (uint32_t *)(extab_image + i);
++
++ w(r(loc) + i, loc);
++ w(r(loc + 1) + i + 4, loc + 1);
++ w(r(loc + 2) + i + 8, loc + 2);
++
++ i += sizeof(uint32_t) * 3;
++ }
++
++ qsort(extab_image, image_size / 12, 12, compare_relative_table);
++
++ i = 0;
++ while (i < image_size) {
++ uint32_t *loc = (uint32_t *)(extab_image + i);
++
++ w(r(loc) - i, loc);
++ w(r(loc + 1) - (i + 4), loc + 1);
++ w(r(loc + 2) - (i + 8), loc + 2);
++
++ i += sizeof(uint32_t) * 3;
++ }
++}
++
+ static void sort_relative_table(char *extab_image, int image_size)
+ {
+ int i;
+@@ -281,6 +310,9 @@ do_file(char const *const fname)
+ break;
+ case EM_386:
+ case EM_X86_64:
++ custom_sort = x86_sort_relative_table;
++ break;
++
+ case EM_S390:
+ custom_sort = sort_relative_table;
+ break;