From: Greg Kroah-Hartman Date: Fri, 3 Feb 2023 07:57:35 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.14.305~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=45a039cc6de2785a8d224175d11bf5932626b5f5;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: docs-fix-path-paste-o-for-sys-kernel-warn_count.patch exit-add-and-use-make_task_dead.patch exit-allow-oops_limit-to-be-disabled.patch exit-expose-oops_count-to-sysfs.patch exit-put-an-upper-limit-on-how-often-we-can-oops.patch exit-use-read_once-for-all-oops-warn-limit-reads.patch h8300-fix-build-errors-from-do_exit-to-make_task_dead-transition.patch hexagon-fix-function-name-in-die.patch ia64-make-ia64_mca_recovery-bool-instead-of-tristate.patch objtool-add-a-missing-comma-to-avoid-string-concatenation.patch panic-consolidate-open-coded-panic_on_warn-checks.patch panic-expose-warn_count-to-sysfs.patch panic-introduce-warn_limit.patch panic-unset-panic_on_warn-inside-panic.patch sysctl-add-a-new-register_sysctl_init-interface.patch --- diff --git a/queue-4.14/docs-fix-path-paste-o-for-sys-kernel-warn_count.patch b/queue-4.14/docs-fix-path-paste-o-for-sys-kernel-warn_count.patch new file mode 100644 index 00000000000..a34d6df8b7b --- /dev/null +++ b/queue-4.14/docs-fix-path-paste-o-for-sys-kernel-warn_count.patch @@ -0,0 +1,40 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:45 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:53 -0800 +Subject: docs: Fix path paste-o for /sys/kernel/warn_count +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, kernel test robot +Message-ID: <20230203003354.85691-15-ebiggers@kernel.org> + +From: Kees Cook + +commit 00dd027f721e0458418f7750d8a5a664ed3e5994 upstream. + +Running "make htmldocs" shows that "/sys/kernel/oops_count" was +duplicated. This should have been "warn_count": + + Warning: /sys/kernel/oops_count is defined 2 times: + ./Documentation/ABI/testing/sysfs-kernel-warn_count:0 + ./Documentation/ABI/testing/sysfs-kernel-oops_count:0 + +Fix the typo. + +Reported-by: kernel test robot +Link: https://lore.kernel.org/linux-doc/202212110529.A3Qav8aR-lkp@intel.com +Fixes: 8b05aa263361 ("panic: Expose "warn_count" to sysfs") +Cc: linux-hardening@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/ABI/testing/sysfs-kernel-warn_count | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/Documentation/ABI/testing/sysfs-kernel-warn_count ++++ b/Documentation/ABI/testing/sysfs-kernel-warn_count +@@ -1,4 +1,4 @@ +-What: /sys/kernel/oops_count ++What: /sys/kernel/warn_count + Date: November 2022 + KernelVersion: 6.2.0 + Contact: Linux Kernel Hardening List diff --git a/queue-4.14/exit-add-and-use-make_task_dead.patch b/queue-4.14/exit-add-and-use-make_task_dead.patch new file mode 100644 index 00000000000..527e1a97692 --- /dev/null +++ b/queue-4.14/exit-add-and-use-make_task_dead.patch @@ -0,0 +1,486 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:45 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:42 -0800 +Subject: exit: Add and use make_task_dead. +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org +Message-ID: <20230203003354.85691-4-ebiggers@kernel.org> + +From: "Eric W. Biederman" + +commit 0e25498f8cd43c1b5aa327f373dd094e9a006da7 upstream. + +There are two big uses of do_exit. The first is it's design use to be +the guts of the exit(2) system call. The second use is to terminate +a task after something catastrophic has happened like a NULL pointer +in kernel code. + +Add a function make_task_dead that is initialy exactly the same as +do_exit to cover the cases where do_exit is called to handle +catastrophic failure. In time this can probably be reduced to just a +light wrapper around do_task_dead. For now keep it exactly the same so +that there will be no behavioral differences introducing this new +concept. + +Replace all of the uses of do_exit that use it for catastraphic +task cleanup with make_task_dead to make it clear what the code +is doing. + +As part of this rename rewind_stack_do_exit +rewind_stack_and_make_dead. + +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + arch/alpha/kernel/traps.c | 6 +++--- + arch/alpha/mm/fault.c | 2 +- + arch/arm/kernel/traps.c | 2 +- + arch/arm/mm/fault.c | 2 +- + arch/arm64/kernel/traps.c | 2 +- + arch/arm64/mm/fault.c | 2 +- + arch/h8300/kernel/traps.c | 2 +- + arch/h8300/mm/fault.c | 2 +- + arch/hexagon/kernel/traps.c | 2 +- + arch/ia64/kernel/mca_drv.c | 2 +- + arch/ia64/kernel/traps.c | 2 +- + arch/ia64/mm/fault.c | 2 +- + arch/m68k/kernel/traps.c | 2 +- + arch/m68k/mm/fault.c | 2 +- + arch/microblaze/kernel/exceptions.c | 4 ++-- + arch/mips/kernel/traps.c | 2 +- + arch/nios2/kernel/traps.c | 4 ++-- + arch/openrisc/kernel/traps.c | 2 +- + arch/parisc/kernel/traps.c | 2 +- + arch/powerpc/kernel/traps.c | 2 +- + arch/s390/kernel/dumpstack.c | 2 +- + arch/s390/kernel/nmi.c | 2 +- + arch/sh/kernel/traps.c | 2 +- + arch/sparc/kernel/traps_32.c | 4 +--- + arch/sparc/kernel/traps_64.c | 4 +--- + arch/x86/entry/entry_32.S | 6 +++--- + arch/x86/entry/entry_64.S | 6 +++--- + arch/x86/kernel/dumpstack.c | 4 ++-- + arch/xtensa/kernel/traps.c | 2 +- + include/linux/sched/task.h | 1 + + kernel/exit.c | 9 +++++++++ + tools/objtool/check.c | 3 ++- + 32 files changed, 50 insertions(+), 43 deletions(-) + +--- a/arch/alpha/kernel/traps.c ++++ b/arch/alpha/kernel/traps.c +@@ -192,7 +192,7 @@ die_if_kernel(char * str, struct pt_regs + local_irq_enable(); + while (1); + } +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + #ifndef CONFIG_MATHEMU +@@ -609,7 +609,7 @@ do_entUna(void * va, unsigned long opcod + + printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", + pc, va, opcode, reg); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + + got_exception: + /* Ok, we caught the exception, but we don't want it. Is there +@@ -664,7 +664,7 @@ got_exception: + local_irq_enable(); + while (1); + } +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + /* +--- a/arch/alpha/mm/fault.c ++++ b/arch/alpha/mm/fault.c +@@ -206,7 +206,7 @@ retry: + printk(KERN_ALERT "Unable to handle kernel paging request at " + "virtual address %016lx\n", address); + die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + + /* We ran out of memory, or some other thing happened to us that + made us unable to handle the page fault gracefully. */ +--- a/arch/arm/kernel/traps.c ++++ b/arch/arm/kernel/traps.c +@@ -342,7 +342,7 @@ static void oops_end(unsigned long flags + if (panic_on_oops) + panic("Fatal exception"); + if (signr) +- do_exit(signr); ++ make_task_dead(signr); + } + + /* +--- a/arch/arm/mm/fault.c ++++ b/arch/arm/mm/fault.c +@@ -150,7 +150,7 @@ __do_kernel_fault(struct mm_struct *mm, + show_pte(mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + /* +--- a/arch/arm64/kernel/traps.c ++++ b/arch/arm64/kernel/traps.c +@@ -223,7 +223,7 @@ void die(const char *str, struct pt_regs + raw_spin_unlock_irqrestore(&die_lock, flags); + + if (ret != NOTIFY_STOP) +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + void arm64_notify_die(const char *str, struct pt_regs *regs, +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -288,7 +288,7 @@ static void __do_kernel_fault(unsigned l + show_pte(addr); + die("Oops", regs, esr); + bust_spinlocks(0); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + /* +--- a/arch/h8300/kernel/traps.c ++++ b/arch/h8300/kernel/traps.c +@@ -110,7 +110,7 @@ void die(const char *str, struct pt_regs + dump(fp); + + spin_unlock_irq(&die_lock); +- do_exit(SIGSEGV); ++ make_dead_task(SIGSEGV); + } + + static int kstack_depth_to_print = 24; +--- a/arch/h8300/mm/fault.c ++++ b/arch/h8300/mm/fault.c +@@ -52,7 +52,7 @@ asmlinkage int do_page_fault(struct pt_r + printk(" at virtual address %08lx\n", address); + if (!user_mode(regs)) + die("Oops", regs, error_code); +- do_exit(SIGKILL); ++ make_dead_task(SIGKILL); + + return 1; + } +--- a/arch/hexagon/kernel/traps.c ++++ b/arch/hexagon/kernel/traps.c +@@ -234,7 +234,7 @@ int die(const char *str, struct pt_regs + panic("Fatal exception"); + + oops_exit(); +- do_exit(err); ++ make_dead_task(err); + return 0; + } + +--- a/arch/ia64/kernel/mca_drv.c ++++ b/arch/ia64/kernel/mca_drv.c +@@ -176,7 +176,7 @@ mca_handler_bh(unsigned long paddr, void + spin_unlock(&mca_bh_lock); + + /* This process is about to be killed itself */ +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + /** +--- a/arch/ia64/kernel/traps.c ++++ b/arch/ia64/kernel/traps.c +@@ -85,7 +85,7 @@ die (const char *str, struct pt_regs *re + if (panic_on_oops) + panic("Fatal exception"); + +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + return 0; + } + +--- a/arch/ia64/mm/fault.c ++++ b/arch/ia64/mm/fault.c +@@ -300,7 +300,7 @@ retry: + regs = NULL; + bust_spinlocks(0); + if (regs) +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + return; + + out_of_memory: +--- a/arch/m68k/kernel/traps.c ++++ b/arch/m68k/kernel/traps.c +@@ -1141,7 +1141,7 @@ void die_if_kernel (char *str, struct pt + pr_crit("%s: %08x\n", str, nr); + show_registers(fp); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + asmlinkage void set_esp0(unsigned long ssp) +--- a/arch/m68k/mm/fault.c ++++ b/arch/m68k/mm/fault.c +@@ -50,7 +50,7 @@ int send_fault_sig(struct pt_regs *regs) + pr_alert("Unable to handle kernel access"); + pr_cont(" at virtual address %p\n", siginfo.si_addr); + die_if_kernel("Oops", regs, 0 /*error_code*/); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + return 1; +--- a/arch/microblaze/kernel/exceptions.c ++++ b/arch/microblaze/kernel/exceptions.c +@@ -44,10 +44,10 @@ void die(const char *str, struct pt_regs + pr_warn("Oops: %s, sig: %ld\n", str, err); + show_regs(fp); + spin_unlock_irq(&die_lock); +- /* do_exit() should take care of panic'ing from an interrupt ++ /* make_task_dead() should take care of panic'ing from an interrupt + * context so we don't handle it here + */ +- do_exit(err); ++ make_task_dead(err); + } + + /* for user application debugging */ +--- a/arch/mips/kernel/traps.c ++++ b/arch/mips/kernel/traps.c +@@ -414,7 +414,7 @@ void __noreturn die(const char *str, str + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + +- do_exit(sig); ++ make_task_dead(sig); + } + + extern struct exception_table_entry __start___dbe_table[]; +--- a/arch/nios2/kernel/traps.c ++++ b/arch/nios2/kernel/traps.c +@@ -43,10 +43,10 @@ void die(const char *str, struct pt_regs + show_regs(regs); + spin_unlock_irq(&die_lock); + /* +- * do_exit() should take care of panic'ing from an interrupt ++ * make_task_dead() should take care of panic'ing from an interrupt + * context so we don't handle it here + */ +- do_exit(err); ++ make_task_dead(err); + } + + void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr) +--- a/arch/openrisc/kernel/traps.c ++++ b/arch/openrisc/kernel/traps.c +@@ -265,7 +265,7 @@ void die(const char *str, struct pt_regs + __asm__ __volatile__("l.nop 1"); + do {} while (1); + #endif +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + /* This is normally the 'Oops' routine */ +--- a/arch/parisc/kernel/traps.c ++++ b/arch/parisc/kernel/traps.c +@@ -290,7 +290,7 @@ void die_if_kernel(char *str, struct pt_ + panic("Fatal exception"); + + oops_exit(); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + /* gdb uses break 4,8 */ +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -211,7 +211,7 @@ static void oops_end(unsigned long flags + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); +- do_exit(signr); ++ make_task_dead(signr); + } + NOKPROBE_SYMBOL(oops_end); + +--- a/arch/s390/kernel/dumpstack.c ++++ b/arch/s390/kernel/dumpstack.c +@@ -186,5 +186,5 @@ void die(struct pt_regs *regs, const cha + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } +--- a/arch/s390/kernel/nmi.c ++++ b/arch/s390/kernel/nmi.c +@@ -94,7 +94,7 @@ void s390_handle_mcck(void) + "malfunction (code 0x%016lx).\n", mcck.mcck_code); + printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", + current->comm, current->pid); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + } + EXPORT_SYMBOL_GPL(s390_handle_mcck); +--- a/arch/sh/kernel/traps.c ++++ b/arch/sh/kernel/traps.c +@@ -57,7 +57,7 @@ void die(const char *str, struct pt_regs + if (panic_on_oops) + panic("Fatal exception"); + +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + void die_if_kernel(const char *str, struct pt_regs *regs, long err) +--- a/arch/sparc/kernel/traps_32.c ++++ b/arch/sparc/kernel/traps_32.c +@@ -86,9 +86,7 @@ void __noreturn die_if_kernel(char *str, + } + printk("Instruction DUMP:"); + instruction_dump ((unsigned long *) regs->pc); +- if(regs->psr & PSR_PS) +- do_exit(SIGKILL); +- do_exit(SIGSEGV); ++ make_task_dead((regs->psr & PSR_PS) ? SIGKILL : SIGSEGV); + } + + void do_hw_interrupt(struct pt_regs *regs, unsigned long type) +--- a/arch/sparc/kernel/traps_64.c ++++ b/arch/sparc/kernel/traps_64.c +@@ -2547,9 +2547,7 @@ void __noreturn die_if_kernel(char *str, + } + if (panic_on_oops) + panic("Fatal exception"); +- if (regs->tstate & TSTATE_PRIV) +- do_exit(SIGKILL); +- do_exit(SIGSEGV); ++ make_task_dead((regs->tstate & TSTATE_PRIV)? SIGKILL : SIGSEGV); + } + EXPORT_SYMBOL(die_if_kernel); + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -1068,13 +1068,13 @@ ENTRY(async_page_fault) + END(async_page_fault) + #endif + +-ENTRY(rewind_stack_do_exit) ++ENTRY(rewind_stack_and_make_dead) + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp + + movl PER_CPU_VAR(cpu_current_top_of_stack), %esi + leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp + +- call do_exit ++ call make_task_dead + 1: jmp 1b +-END(rewind_stack_do_exit) ++END(rewind_stack_and_make_dead) +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1672,7 +1672,7 @@ ENTRY(ignore_sysret) + sysretl + END(ignore_sysret) + +-ENTRY(rewind_stack_do_exit) ++ENTRY(rewind_stack_and_make_dead) + UNWIND_HINT_FUNC + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp +@@ -1681,5 +1681,5 @@ ENTRY(rewind_stack_do_exit) + leaq -PTREGS_SIZE(%rax), %rsp + UNWIND_HINT_REGS + +- call do_exit +-END(rewind_stack_do_exit) ++ call make_task_dead ++END(rewind_stack_and_make_dead) +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -271,7 +271,7 @@ unsigned long oops_begin(void) + EXPORT_SYMBOL_GPL(oops_begin); + NOKPROBE_SYMBOL(oops_begin); + +-void __noreturn rewind_stack_do_exit(int signr); ++void __noreturn rewind_stack_and_make_dead(int signr); + + void oops_end(unsigned long flags, struct pt_regs *regs, int signr) + { +@@ -303,7 +303,7 @@ void oops_end(unsigned long flags, struc + * reuse the task stack and that existing poisons are invalid. + */ + kasan_unpoison_task_stack(current); +- rewind_stack_do_exit(signr); ++ rewind_stack_and_make_dead(signr); + } + NOKPROBE_SYMBOL(oops_end); + +--- a/arch/xtensa/kernel/traps.c ++++ b/arch/xtensa/kernel/traps.c +@@ -547,5 +547,5 @@ void die(const char * str, struct pt_reg + if (panic_on_oops) + panic("Fatal exception"); + +- do_exit(err); ++ make_task_dead(err); + } +--- a/include/linux/sched/task.h ++++ b/include/linux/sched/task.h +@@ -36,6 +36,7 @@ extern int sched_fork(unsigned long clon + extern void sched_dead(struct task_struct *p); + + void __noreturn do_task_dead(void); ++void __noreturn make_task_dead(int signr); + + extern void proc_caches_init(void); + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -920,6 +920,15 @@ void __noreturn do_exit(long code) + } + EXPORT_SYMBOL_GPL(do_exit); + ++void __noreturn make_task_dead(int signr) ++{ ++ /* ++ * Take the task off the cpu after something catastrophic has ++ * happened. ++ */ ++ do_exit(signr); ++} ++ + void complete_and_exit(struct completion *comp, long code) + { + if (comp) +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -159,6 +159,7 @@ static int __dead_end_function(struct ob + "panic", + "do_exit", + "do_task_dead", ++ "make_task_dead", + "__module_put_and_exit", + "complete_and_exit", + "kvm_spurious_fault", +@@ -166,7 +167,7 @@ static int __dead_end_function(struct ob + "lbug_with_loc", + "fortify_panic", + "machine_real_restart", +- "rewind_stack_do_exit", ++ "rewind_stack_and_make_dead" + }; + + if (func->bind == STB_WEAK) diff --git a/queue-4.14/exit-allow-oops_limit-to-be-disabled.patch b/queue-4.14/exit-allow-oops_limit-to-be-disabled.patch new file mode 100644 index 00000000000..9a3f9bc4ad0 --- /dev/null +++ b/queue-4.14/exit-allow-oops_limit-to-be-disabled.patch @@ -0,0 +1,58 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:54 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:49 -0800 +Subject: exit: Allow oops_limit to be disabled +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Jonathan Corbet , Andrew Morton , Baolin Wang , "Jason A. Donenfeld" , Eric Biggers , Huang Ying , Arnd Bergmann , linux-doc@vger.kernel.org +Message-ID: <20230203003354.85691-11-ebiggers@kernel.org> + +From: Kees Cook + +commit de92f65719cd672f4b48397540b9f9eff67eca40 upstream. + +In preparation for keeping oops_limit logic in sync with warn_limit, +have oops_limit == 0 disable checking the Oops counter. + +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Andrew Morton +Cc: Baolin Wang +Cc: "Jason A. Donenfeld" +Cc: Eric Biggers +Cc: Huang Ying +Cc: "Eric W. Biederman" +Cc: Arnd Bergmann +Cc: linux-doc@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/sysctl/kernel.txt | 5 +++-- + kernel/exit.c | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +--- a/Documentation/sysctl/kernel.txt ++++ b/Documentation/sysctl/kernel.txt +@@ -519,8 +519,9 @@ scanned for a given scan. + oops_limit: + + Number of kernel oopses after which the kernel should panic when +-``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect +-as setting ``panic_on_oops=1``. ++``panic_on_oops`` is not set. Setting this to 0 disables checking ++the count. Setting this to 1 has the same effect as setting ++``panic_on_oops=1``. The default value is 10000. + + ============================================================== + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -984,7 +984,7 @@ void __noreturn make_task_dead(int signr + * To make sure this can't happen, place an upper bound on how often the + * kernel may oops without panic(). + */ +- if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) ++ if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) + panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + + do_exit(signr); diff --git a/queue-4.14/exit-expose-oops_count-to-sysfs.patch b/queue-4.14/exit-expose-oops_count-to-sysfs.patch new file mode 100644 index 00000000000..cb9135e76da --- /dev/null +++ b/queue-4.14/exit-expose-oops_count-to-sysfs.patch @@ -0,0 +1,83 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:55 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:48 -0800 +Subject: exit: Expose "oops_count" to sysfs +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Arnd Bergmann , Luis Chamberlain +Message-ID: <20230203003354.85691-10-ebiggers@kernel.org> + +From: Kees Cook + +commit 9db89b41117024f80b38b15954017fb293133364 upstream. + +Since Oops count is now tracked and is a fairly interesting signal, add +the entry /sys/kernel/oops_count to expose it to userspace. + +Cc: "Eric W. Biederman" +Cc: Jann Horn +Cc: Arnd Bergmann +Reviewed-by: Luis Chamberlain +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221117234328.594699-3-keescook@chromium.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/ABI/testing/sysfs-kernel-oops_count | 6 ++++++ + kernel/exit.c | 22 ++++++++++++++++++++-- + 2 files changed, 26 insertions(+), 2 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-kernel-oops_count + +--- /dev/null ++++ b/Documentation/ABI/testing/sysfs-kernel-oops_count +@@ -0,0 +1,6 @@ ++What: /sys/kernel/oops_count ++Date: November 2022 ++KernelVersion: 6.2.0 ++Contact: Linux Kernel Hardening List ++Description: ++ Shows how many times the system has Oopsed since last boot. +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -62,6 +62,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -95,6 +96,25 @@ static __init int kernel_exit_sysctls_in + late_initcall(kernel_exit_sysctls_init); + #endif + ++static atomic_t oops_count = ATOMIC_INIT(0); ++ ++#ifdef CONFIG_SYSFS ++static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *page) ++{ ++ return sysfs_emit(page, "%d\n", atomic_read(&oops_count)); ++} ++ ++static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count); ++ ++static __init int kernel_exit_sysfs_init(void) ++{ ++ sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL); ++ return 0; ++} ++late_initcall(kernel_exit_sysfs_init); ++#endif ++ + static void __unhash_process(struct task_struct *p, bool group_dead) + { + nr_threads--; +@@ -949,8 +969,6 @@ EXPORT_SYMBOL_GPL(do_exit); + + void __noreturn make_task_dead(int signr) + { +- static atomic_t oops_count = ATOMIC_INIT(0); +- + /* + * Take the task off the cpu after something catastrophic has + * happened. diff --git a/queue-4.14/exit-put-an-upper-limit-on-how-often-we-can-oops.patch b/queue-4.14/exit-put-an-upper-limit-on-how-often-we-can-oops.patch new file mode 100644 index 00000000000..95fa855dd33 --- /dev/null +++ b/queue-4.14/exit-put-an-upper-limit-on-how-often-we-can-oops.patch @@ -0,0 +1,151 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:50 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:47 -0800 +Subject: exit: Put an upper limit on how often we can oops +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Luis Chamberlain +Message-ID: <20230203003354.85691-9-ebiggers@kernel.org> + +From: Jann Horn + +commit d4ccd54d28d3c8598e2354acc13e28c060961dbb upstream. + +Many Linux systems are configured to not panic on oops; but allowing an +attacker to oops the system **really** often can make even bugs that look +completely unexploitable exploitable (like NULL dereferences and such) if +each crash elevates a refcount by one or a lock is taken in read mode, and +this causes a counter to eventually overflow. + +The most interesting counters for this are 32 bits wide (like open-coded +refcounts that don't use refcount_t). (The ldsem reader count on 32-bit +platforms is just 16 bits, but probably nobody cares about 32-bit platforms +that much nowadays.) + +So let's panic the system if the kernel is constantly oopsing. + +The speed of oopsing 2^32 times probably depends on several factors, like +how long the stack trace is and which unwinder you're using; an empirically +important one is whether your console is showing a graphical environment or +a text console that oopses will be printed to. +In a quick single-threaded benchmark, it looks like oopsing in a vfork() +child with a very short stack trace only takes ~510 microseconds per run +when a graphical console is active; but switching to a text console that +oopses are printed to slows it down around 87x, to ~45 milliseconds per +run. +(Adding more threads makes this faster, but the actual oops printing +happens under &die_lock on x86, so you can maybe speed this up by a factor +of around 2 and then any further improvement gets eaten up by lock +contention.) + +It looks like it would take around 8-12 days to overflow a 32-bit counter +with repeated oopsing on a multi-core X86 system running a graphical +environment; both me (in an X86 VM) and Seth (with a distro kernel on +normal hardware in a standard configuration) got numbers in that ballpark. + +12 days aren't *that* short on a desktop system, and you'd likely need much +longer on a typical server system (assuming that people don't run graphical +desktop environments on their servers), and this is a *very* noisy and +violent approach to exploiting the kernel; and it also seems to take orders +of magnitude longer on some machines, probably because stuff like EFI +pstore will slow it down a ton if that's active. + +Signed-off-by: Jann Horn +Link: https://lore.kernel.org/r/20221107201317.324457-1-jannh@google.com +Reviewed-by: Luis Chamberlain +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221117234328.594699-2-keescook@chromium.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/sysctl/kernel.txt | 9 ++++++++ + kernel/exit.c | 43 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 52 insertions(+) + +--- a/Documentation/sysctl/kernel.txt ++++ b/Documentation/sysctl/kernel.txt +@@ -48,6 +48,7 @@ show up in /proc/sys/kernel: + - msgmnb + - msgmni + - nmi_watchdog ++- oops_limit + - osrelease + - ostype + - overflowgid +@@ -515,6 +516,14 @@ scanned for a given scan. + + ============================================================== + ++oops_limit: ++ ++Number of kernel oopses after which the kernel should panic when ++``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect ++as setting ``panic_on_oops=1``. ++ ++============================================================== ++ + osrelease, ostype & version: + + # cat osrelease +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -68,6 +68,33 @@ + #include + #include + ++/* ++ * The default value should be high enough to not crash a system that randomly ++ * crashes its kernel from time to time, but low enough to at least not permit ++ * overflowing 32-bit refcounts or the ldsem writer count. ++ */ ++static unsigned int oops_limit = 10000; ++ ++#ifdef CONFIG_SYSCTL ++static struct ctl_table kern_exit_table[] = { ++ { ++ .procname = "oops_limit", ++ .data = &oops_limit, ++ .maxlen = sizeof(oops_limit), ++ .mode = 0644, ++ .proc_handler = proc_douintvec, ++ }, ++ { } ++}; ++ ++static __init int kernel_exit_sysctls_init(void) ++{ ++ register_sysctl_init("kernel", kern_exit_table); ++ return 0; ++} ++late_initcall(kernel_exit_sysctls_init); ++#endif ++ + static void __unhash_process(struct task_struct *p, bool group_dead) + { + nr_threads--; +@@ -922,10 +949,26 @@ EXPORT_SYMBOL_GPL(do_exit); + + void __noreturn make_task_dead(int signr) + { ++ static atomic_t oops_count = ATOMIC_INIT(0); ++ + /* + * Take the task off the cpu after something catastrophic has + * happened. + */ ++ ++ /* ++ * Every time the system oopses, if the oops happens while a reference ++ * to an object was held, the reference leaks. ++ * If the oops doesn't also leak memory, repeated oopsing can cause ++ * reference counters to wrap around (if they're not using refcount_t). ++ * This means that repeated oopsing can make unexploitable-looking bugs ++ * exploitable through repeated oopsing. ++ * To make sure this can't happen, place an upper bound on how often the ++ * kernel may oops without panic(). ++ */ ++ if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) ++ panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); ++ + do_exit(signr); + } + diff --git a/queue-4.14/exit-use-read_once-for-all-oops-warn-limit-reads.patch b/queue-4.14/exit-use-read_once-for-all-oops-warn-limit-reads.patch new file mode 100644 index 00000000000..4cf309a9301 --- /dev/null +++ b/queue-4.14/exit-use-read_once-for-all-oops-warn-limit-reads.patch @@ -0,0 +1,80 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:46 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:54 -0800 +Subject: exit: Use READ_ONCE() for all oops/warn limit reads +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Arnd Bergmann , Petr Mladek , Andrew Morton , Luis Chamberlain , Marco Elver , tangmeng , Sebastian Andrzej Siewior , Tiezhu Yang , Peter Zijlstra +Message-ID: <20230203003354.85691-16-ebiggers@kernel.org> + +From: Kees Cook + +commit 7535b832c6399b5ebfc5b53af5c51dd915ee2538 upstream. + +Use a temporary variable to take full advantage of READ_ONCE() behavior. +Without this, the report (and even the test) might be out of sync with +the initial test. + +Reported-by: Peter Zijlstra +Link: https://lore.kernel.org/lkml/Y5x7GXeluFmZ8E0E@hirez.programming.kicks-ass.net +Fixes: 9fc9e278a5c0 ("panic: Introduce warn_limit") +Fixes: d4ccd54d28d3 ("exit: Put an upper limit on how often we can oops") +Cc: "Eric W. Biederman" +Cc: Jann Horn +Cc: Arnd Bergmann +Cc: Petr Mladek +Cc: Andrew Morton +Cc: Luis Chamberlain +Cc: Marco Elver +Cc: tangmeng +Cc: Sebastian Andrzej Siewior +Cc: Tiezhu Yang +Signed-off-by: Kees Cook +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + kernel/exit.c | 6 ++++-- + kernel/panic.c | 7 +++++-- + 2 files changed, 9 insertions(+), 4 deletions(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -973,6 +973,7 @@ void __noreturn make_task_dead(int signr + * Take the task off the cpu after something catastrophic has + * happened. + */ ++ unsigned int limit; + + /* + * Every time the system oopses, if the oops happens while a reference +@@ -984,8 +985,9 @@ void __noreturn make_task_dead(int signr + * To make sure this can't happen, place an upper bound on how often the + * kernel may oops without panic(). + */ +- if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) +- panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); ++ limit = READ_ONCE(oops_limit); ++ if (atomic_inc_return(&oops_count) >= limit && limit) ++ panic("Oopsed too often (kernel.oops_limit is %d)", limit); + + do_exit(signr); + } +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -165,12 +165,15 @@ EXPORT_SYMBOL(nmi_panic); + + void check_panic_on_warn(const char *origin) + { ++ unsigned int limit; ++ + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); + +- if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) ++ limit = READ_ONCE(warn_limit); ++ if (atomic_inc_return(&warn_count) >= limit && limit) + panic("%s: system warned too often (kernel.warn_limit is %d)", +- origin, warn_limit); ++ origin, limit); + } + + /** diff --git a/queue-4.14/h8300-fix-build-errors-from-do_exit-to-make_task_dead-transition.patch b/queue-4.14/h8300-fix-build-errors-from-do_exit-to-make_task_dead-transition.patch new file mode 100644 index 00000000000..0454db9943f --- /dev/null +++ b/queue-4.14/h8300-fix-build-errors-from-do_exit-to-make_task_dead-transition.patch @@ -0,0 +1,74 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:40 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:45 -0800 +Subject: h8300: Fix build errors from do_exit() to make_task_dead() transition +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Nathan Chancellor +Message-ID: <20230203003354.85691-7-ebiggers@kernel.org> + +From: Nathan Chancellor + +commit ab4ababdf77ccc56c7301c751dff49c79709c51c upstream. + +When building ARCH=h8300 defconfig: + +arch/h8300/kernel/traps.c: In function 'die': +arch/h8300/kernel/traps.c:109:2: error: implicit declaration of function +'make_dead_task' [-Werror=implicit-function-declaration] + 109 | make_dead_task(SIGSEGV); + | ^~~~~~~~~~~~~~ + +arch/h8300/mm/fault.c: In function 'do_page_fault': +arch/h8300/mm/fault.c:54:2: error: implicit declaration of function +'make_dead_task' [-Werror=implicit-function-declaration] + 54 | make_dead_task(SIGKILL); + | ^~~~~~~~~~~~~~ + +The function's name is make_task_dead(), change it so there is no more +build error. + +Additionally, include linux/sched/task.h in arch/h8300/kernel/traps.c +to avoid the same error because do_exit()'s declaration is in kernel.h +but make_task_dead()'s is in task.h, which is not included in traps.c. + +Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") +Signed-off-by: Nathan Chancellor +Link: https://lkml.kernel.org/r/20211227184851.2297759-3-nathan@kernel.org +Signed-off-by: Eric W. Biederman +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + arch/h8300/kernel/traps.c | 3 ++- + arch/h8300/mm/fault.c | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/h8300/kernel/traps.c ++++ b/arch/h8300/kernel/traps.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -110,7 +111,7 @@ void die(const char *str, struct pt_regs + dump(fp); + + spin_unlock_irq(&die_lock); +- make_dead_task(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + static int kstack_depth_to_print = 24; +--- a/arch/h8300/mm/fault.c ++++ b/arch/h8300/mm/fault.c +@@ -52,7 +52,7 @@ asmlinkage int do_page_fault(struct pt_r + printk(" at virtual address %08lx\n", address); + if (!user_mode(regs)) + die("Oops", regs, error_code); +- make_dead_task(SIGKILL); ++ make_task_dead(SIGKILL); + + return 1; + } diff --git a/queue-4.14/hexagon-fix-function-name-in-die.patch b/queue-4.14/hexagon-fix-function-name-in-die.patch new file mode 100644 index 00000000000..6c777b0ba4f --- /dev/null +++ b/queue-4.14/hexagon-fix-function-name-in-die.patch @@ -0,0 +1,43 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:40 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:44 -0800 +Subject: hexagon: Fix function name in die() +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Nathan Chancellor +Message-ID: <20230203003354.85691-6-ebiggers@kernel.org> + +From: Nathan Chancellor + +commit 4f0712ccec09c071e221242a2db9a6779a55a949 upstream. + +When building ARCH=hexagon defconfig: + +arch/hexagon/kernel/traps.c:217:2: error: implicit declaration of +function 'make_dead_task' [-Werror,-Wimplicit-function-declaration] + make_dead_task(err); + ^ + +The function's name is make_task_dead(), change it so there is no more +build error. + +Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") +Signed-off-by: Nathan Chancellor +Link: https://lkml.kernel.org/r/20211227184851.2297759-2-nathan@kernel.org +Signed-off-by: Eric W. Biederman +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + arch/hexagon/kernel/traps.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/hexagon/kernel/traps.c ++++ b/arch/hexagon/kernel/traps.c +@@ -234,7 +234,7 @@ int die(const char *str, struct pt_regs + panic("Fatal exception"); + + oops_exit(); +- make_dead_task(err); ++ make_task_dead(err); + return 0; + } + diff --git a/queue-4.14/ia64-make-ia64_mca_recovery-bool-instead-of-tristate.patch b/queue-4.14/ia64-make-ia64_mca_recovery-bool-instead-of-tristate.patch new file mode 100644 index 00000000000..a6193036d54 --- /dev/null +++ b/queue-4.14/ia64-make-ia64_mca_recovery-bool-instead-of-tristate.patch @@ -0,0 +1,51 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:45 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:46 -0800 +Subject: ia64: make IA64_MCA_RECOVERY bool instead of tristate +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Tony Luck , Randy Dunlap , Christoph Hellwig , Christoph Hellwig , Andrew Morton , Linus Torvalds +Message-ID: <20230203003354.85691-8-ebiggers@kernel.org> + +From: Randy Dunlap + +commit dbecf9b8b8ce580f4e11afed9d61e8aa294cddd2 upstream. + +In linux-next, IA64_MCA_RECOVERY uses the (new) function +make_task_dead(), which is not exported for use by modules. Instead of +exporting it for one user, convert IA64_MCA_RECOVERY to be a bool +Kconfig symbol. + +In a config file from "kernel test robot " for a +different problem, this linker error was exposed when +CONFIG_IA64_MCA_RECOVERY=m. + +Fixes this build error: + + ERROR: modpost: "make_task_dead" [arch/ia64/kernel/mca_recovery.ko] undefined! + +Link: https://lkml.kernel.org/r/20220124213129.29306-1-rdunlap@infradead.org +Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") +Signed-off-by: Randy Dunlap +Suggested-by: Christoph Hellwig +Reviewed-by: Christoph Hellwig +Reviewed-by: "Eric W. Biederman" +Cc: Tony Luck +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + arch/ia64/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/ia64/Kconfig ++++ b/arch/ia64/Kconfig +@@ -461,7 +461,7 @@ config ARCH_PROC_KCORE_TEXT + depends on PROC_KCORE + + config IA64_MCA_RECOVERY +- tristate "MCA recovery from errors other than TLB." ++ bool "MCA recovery from errors other than TLB." + + config PERFMON + bool "Performance monitor support" diff --git a/queue-4.14/objtool-add-a-missing-comma-to-avoid-string-concatenation.patch b/queue-4.14/objtool-add-a-missing-comma-to-avoid-string-concatenation.patch new file mode 100644 index 00000000000..0447dfe48a5 --- /dev/null +++ b/queue-4.14/objtool-add-a-missing-comma-to-avoid-string-concatenation.patch @@ -0,0 +1,45 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:40 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:43 -0800 +Subject: objtool: Add a missing comma to avoid string concatenation +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, kernel test robot +Message-ID: <20230203003354.85691-5-ebiggers@kernel.org> + +From: "Eric W. Biederman" + +commit 1fb466dff904e4a72282af336f2c355f011eec61 upstream. + +Recently the kbuild robot reported two new errors: + +>> lib/kunit/kunit-example-test.o: warning: objtool: .text.unlikely: unexpected end of section +>> arch/x86/kernel/dumpstack.o: warning: objtool: oops_end() falls through to next function show_opcodes() + +I don't know why they did not occur in my test setup but after digging +it I realized I had accidentally dropped a comma in +tools/objtool/check.c when I renamed rewind_stack_do_exit to +rewind_stack_and_make_dead. + +Add that comma back to fix objtool errors. + +Link: https://lkml.kernel.org/r/202112140949.Uq5sFKR1-lkp@intel.com +Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") +Reported-by: kernel test robot +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + tools/objtool/check.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -167,7 +167,7 @@ static int __dead_end_function(struct ob + "lbug_with_loc", + "fortify_panic", + "machine_real_restart", +- "rewind_stack_and_make_dead" ++ "rewind_stack_and_make_dead", + }; + + if (func->bind == STB_WEAK) diff --git a/queue-4.14/panic-consolidate-open-coded-panic_on_warn-checks.patch b/queue-4.14/panic-consolidate-open-coded-panic_on_warn-checks.patch new file mode 100644 index 00000000000..432020eb864 --- /dev/null +++ b/queue-4.14/panic-consolidate-open-coded-panic_on_warn-checks.patch @@ -0,0 +1,118 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:49 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:50 -0800 +Subject: panic: Consolidate open-coded panic_on_warn checks +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Marco Elver , Dmitry Vyukov , Ingo Molnar , Peter Zijlstra , Juri Lelli , Vincent Guittot , Dietmar Eggemann , Steven Rostedt , Ben Segall , Mel Gorman , Daniel Bristot de Oliveira , Valentin Schneider , Andrey Ryabinin , Alexander Potapenko , Andrey Konovalov , Vincenzo Frascino , Andrew Morton , David Gow , tangmeng , Shuah Khan , Petr Mladek , "Paul E. McKenney" , Sebastian Andrzej Siewior , "Guilherme G. Piccoli" , Tiezhu Yang , kasan-dev@googlegroups.com, linux-mm@kvack.org, Luis Chamberlain +Message-ID: <20230203003354.85691-12-ebiggers@kernel.org> + +From: Kees Cook + +commit 79cc1ba7badf9e7a12af99695a557e9ce27ee967 upstream. + +Several run-time checkers (KASAN, UBSAN, KFENCE, KCSAN, sched) roll +their own warnings, and each check "panic_on_warn". Consolidate this +into a single function so that future instrumentation can be added in +a single location. + +Cc: Marco Elver +Cc: Dmitry Vyukov +Cc: Ingo Molnar +Cc: Peter Zijlstra +Cc: Juri Lelli +Cc: Vincent Guittot +Cc: Dietmar Eggemann +Cc: Steven Rostedt +Cc: Ben Segall +Cc: Mel Gorman +Cc: Daniel Bristot de Oliveira +Cc: Valentin Schneider +Cc: Andrey Ryabinin +Cc: Alexander Potapenko +Cc: Andrey Konovalov +Cc: Vincenzo Frascino +Cc: Andrew Morton +Cc: David Gow +Cc: tangmeng +Cc: Jann Horn +Cc: Shuah Khan +Cc: Petr Mladek +Cc: "Paul E. McKenney" +Cc: Sebastian Andrzej Siewior +Cc: "Guilherme G. Piccoli" +Cc: Tiezhu Yang +Cc: kasan-dev@googlegroups.com +Cc: linux-mm@kvack.org +Reviewed-by: Luis Chamberlain +Signed-off-by: Kees Cook +Reviewed-by: Marco Elver +Reviewed-by: Andrey Konovalov +Link: https://lore.kernel.org/r/20221117234328.594699-4-keescook@chromium.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/kernel.h | 1 + + kernel/panic.c | 9 +++++++-- + kernel/sched/core.c | 3 +-- + mm/kasan/report.c | 3 +-- + 4 files changed, 10 insertions(+), 6 deletions(-) + +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -293,6 +293,7 @@ extern long (*panic_blink)(int state); + __printf(1, 2) + void panic(const char *fmt, ...) __noreturn __cold; + void nmi_panic(struct pt_regs *regs, const char *msg); ++void check_panic_on_warn(const char *origin); + extern void oops_enter(void); + extern void oops_exit(void); + void print_oops_end_marker(void); +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -122,6 +122,12 @@ void nmi_panic(struct pt_regs *regs, con + } + EXPORT_SYMBOL(nmi_panic); + ++void check_panic_on_warn(const char *origin) ++{ ++ if (panic_on_warn) ++ panic("%s: panic_on_warn set ...\n", origin); ++} ++ + /** + * panic - halt the system + * @fmt: The text string to print +@@ -546,8 +552,7 @@ void __warn(const char *file, int line, + if (args) + vprintk(args->fmt, args->args); + +- if (panic_on_warn) +- panic("panic_on_warn set ...\n"); ++ check_panic_on_warn("kernel"); + + print_modules(); + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3185,8 +3185,7 @@ static noinline void __schedule_bug(stru + print_ip_sym(preempt_disable_ip); + pr_cont("\n"); + } +- if (panic_on_warn) +- panic("scheduling while atomic\n"); ++ check_panic_on_warn("scheduling while atomic"); + + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +--- a/mm/kasan/report.c ++++ b/mm/kasan/report.c +@@ -172,8 +172,7 @@ static void kasan_end_report(unsigned lo + pr_err("==================================================================\n"); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irqrestore(&report_lock, *flags); +- if (panic_on_warn) +- panic("panic_on_warn set ...\n"); ++ check_panic_on_warn("KASAN"); + kasan_enable_current(); + } + diff --git a/queue-4.14/panic-expose-warn_count-to-sysfs.patch b/queue-4.14/panic-expose-warn_count-to-sysfs.patch new file mode 100644 index 00000000000..8a916424485 --- /dev/null +++ b/queue-4.14/panic-expose-warn_count-to-sysfs.patch @@ -0,0 +1,86 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:46 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:52 -0800 +Subject: panic: Expose "warn_count" to sysfs +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Petr Mladek , Andrew Morton , tangmeng , "Guilherme G. Piccoli" , Sebastian Andrzej Siewior , Tiezhu Yang , Luis Chamberlain +Message-ID: <20230203003354.85691-14-ebiggers@kernel.org> + +From: Kees Cook + +commit 8b05aa26336113c4cea25f1c333ee8cd4fc212a6 upstream. + +Since Warn count is now tracked and is a fairly interesting signal, add +the entry /sys/kernel/warn_count to expose it to userspace. + +Cc: Petr Mladek +Cc: Andrew Morton +Cc: tangmeng +Cc: "Guilherme G. Piccoli" +Cc: Sebastian Andrzej Siewior +Cc: Tiezhu Yang +Reviewed-by: Luis Chamberlain +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221117234328.594699-6-keescook@chromium.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/ABI/testing/sysfs-kernel-warn_count | 6 ++++++ + kernel/panic.c | 22 ++++++++++++++++++++-- + 2 files changed, 26 insertions(+), 2 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-kernel-warn_count + +--- /dev/null ++++ b/Documentation/ABI/testing/sysfs-kernel-warn_count +@@ -0,0 +1,6 @@ ++What: /sys/kernel/oops_count ++Date: November 2022 ++KernelVersion: 6.2.0 ++Contact: Linux Kernel Hardening List ++Description: ++ Shows how many times the system has Warned since last boot. +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #define PANIC_TIMER_STEP 100 + #define PANIC_BLINK_SPD 18 +@@ -68,6 +69,25 @@ static __init int kernel_panic_sysctls_i + late_initcall(kernel_panic_sysctls_init); + #endif + ++static atomic_t warn_count = ATOMIC_INIT(0); ++ ++#ifdef CONFIG_SYSFS ++static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *page) ++{ ++ return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); ++} ++ ++static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); ++ ++static __init int kernel_panic_sysfs_init(void) ++{ ++ sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); ++ return 0; ++} ++late_initcall(kernel_panic_sysfs_init); ++#endif ++ + static long no_blink(int state) + { + return 0; +@@ -145,8 +165,6 @@ EXPORT_SYMBOL(nmi_panic); + + void check_panic_on_warn(const char *origin) + { +- static atomic_t warn_count = ATOMIC_INIT(0); +- + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); + diff --git a/queue-4.14/panic-introduce-warn_limit.patch b/queue-4.14/panic-introduce-warn_limit.patch new file mode 100644 index 00000000000..549ef880bb4 --- /dev/null +++ b/queue-4.14/panic-introduce-warn_limit.patch @@ -0,0 +1,115 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:49 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:51 -0800 +Subject: panic: Introduce warn_limit +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Jonathan Corbet , Andrew Morton , Baolin Wang , "Jason A. Donenfeld" , Eric Biggers , Huang Ying , Petr Mladek , tangmeng , "Guilherme G. Piccoli" , Tiezhu Yang , Sebastian Andrzej Siewior , linux-doc@vger.kernel.org, Luis Chamberlain +Message-ID: <20230203003354.85691-13-ebiggers@kernel.org> + +From: Kees Cook + +commit 9fc9e278a5c0b708eeffaf47d6eb0c82aa74ed78 upstream. + +Like oops_limit, add warn_limit for limiting the number of warnings when +panic_on_warn is not set. + +Cc: Jonathan Corbet +Cc: Andrew Morton +Cc: Baolin Wang +Cc: "Jason A. Donenfeld" +Cc: Eric Biggers +Cc: Huang Ying +Cc: Petr Mladek +Cc: tangmeng +Cc: "Guilherme G. Piccoli" +Cc: Tiezhu Yang +Cc: Sebastian Andrzej Siewior +Cc: linux-doc@vger.kernel.org +Reviewed-by: Luis Chamberlain +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221117234328.594699-5-keescook@chromium.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/sysctl/kernel.txt | 10 ++++++++++ + kernel/panic.c | 27 +++++++++++++++++++++++++++ + 2 files changed, 37 insertions(+) + +--- a/Documentation/sysctl/kernel.txt ++++ b/Documentation/sysctl/kernel.txt +@@ -94,6 +94,7 @@ show up in /proc/sys/kernel: + - threads-max + - unprivileged_bpf_disabled + - unknown_nmi_panic ++- warn_limit + - watchdog + - watchdog_thresh + - version +@@ -1072,6 +1073,15 @@ example. If a system hangs up, try pres + + ============================================================== + ++warn_limit: ++ ++Number of kernel warnings after which the kernel should panic when ++``panic_on_warn`` is not set. Setting this to 0 disables checking ++the warning count. Setting this to 1 has the same effect as setting ++``panic_on_warn=1``. The default value is 0. ++ ++============================================================== ++ + watchdog: + + This parameter can be used to disable or enable the soft lockup detector +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -39,6 +39,7 @@ static int pause_on_oops_flag; + static DEFINE_SPINLOCK(pause_on_oops_lock); + bool crash_kexec_post_notifiers; + int panic_on_warn __read_mostly; ++static unsigned int warn_limit __read_mostly; + + int panic_timeout = CONFIG_PANIC_TIMEOUT; + EXPORT_SYMBOL_GPL(panic_timeout); +@@ -47,6 +48,26 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list + + EXPORT_SYMBOL(panic_notifier_list); + ++#ifdef CONFIG_SYSCTL ++static struct ctl_table kern_panic_table[] = { ++ { ++ .procname = "warn_limit", ++ .data = &warn_limit, ++ .maxlen = sizeof(warn_limit), ++ .mode = 0644, ++ .proc_handler = proc_douintvec, ++ }, ++ { } ++}; ++ ++static __init int kernel_panic_sysctls_init(void) ++{ ++ register_sysctl_init("kernel", kern_panic_table); ++ return 0; ++} ++late_initcall(kernel_panic_sysctls_init); ++#endif ++ + static long no_blink(int state) + { + return 0; +@@ -124,8 +145,14 @@ EXPORT_SYMBOL(nmi_panic); + + void check_panic_on_warn(const char *origin) + { ++ static atomic_t warn_count = ATOMIC_INIT(0); ++ + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); ++ ++ if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) ++ panic("%s: system warned too often (kernel.warn_limit is %d)", ++ origin, warn_limit); + } + + /** diff --git a/queue-4.14/panic-unset-panic_on_warn-inside-panic.patch b/queue-4.14/panic-unset-panic_on_warn-inside-panic.patch new file mode 100644 index 00000000000..084398a2b9a --- /dev/null +++ b/queue-4.14/panic-unset-panic_on_warn-inside-panic.patch @@ -0,0 +1,75 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:38 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:41 -0800 +Subject: panic: unset panic_on_warn inside panic() +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Andrey Ryabinin , Baoquan He , Jonathan Corbet , Xuefeng Li , Tiezhu Yang , Marco Elver , Andrew Morton , Linus Torvalds +Message-ID: <20230203003354.85691-3-ebiggers@kernel.org> + +From: Tiezhu Yang + +commit 1a2383e8b84c0451fd9b1eec3b9aab16f30b597c upstream. + +In the current code, the following three places need to unset +panic_on_warn before calling panic() to avoid recursive panics: + +kernel/kcsan/report.c: print_report() +kernel/sched/core.c: __schedule_bug() +mm/kfence/report.c: kfence_report_error() + +In order to avoid copy-pasting "panic_on_warn = 0" all over the places, +it is better to move it inside panic() and then remove it from the other +places. + +Link: https://lkml.kernel.org/r/1644324666-15947-4-git-send-email-yangtiezhu@loongson.cn +Signed-off-by: Tiezhu Yang +Reviewed-by: Marco Elver +Cc: Andrey Ryabinin +Cc: Baoquan He +Cc: Jonathan Corbet +Cc: Xuefeng Li +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + kernel/panic.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -139,6 +139,16 @@ void panic(const char *fmt, ...) + int old_cpu, this_cpu; + bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + ++ if (panic_on_warn) { ++ /* ++ * This thread may hit another WARN() in the panic path. ++ * Resetting this prevents additional WARN() from panicking the ++ * system on this thread. Other threads are blocked by the ++ * panic_mutex in panic(). ++ */ ++ panic_on_warn = 0; ++ } ++ + /* + * Disable local interrupts. This will prevent panic_smp_self_stop + * from deadlocking the first cpu that invokes the panic, since +@@ -536,16 +546,8 @@ void __warn(const char *file, int line, + if (args) + vprintk(args->fmt, args->args); + +- if (panic_on_warn) { +- /* +- * This thread may hit another WARN() in the panic path. +- * Resetting this prevents additional WARN() from panicking the +- * system on this thread. Other threads are blocked by the +- * panic_mutex in panic(). +- */ +- panic_on_warn = 0; ++ if (panic_on_warn) + panic("panic_on_warn set ...\n"); +- } + + print_modules(); + diff --git a/queue-4.14/series b/queue-4.14/series index e1f57451244..efa0786c5a8 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -42,3 +42,18 @@ drm-radeon-dp-make-radeon_dp_get_dp_link_config-static.patch scsi-qla2xxx-don-t-break-the-bsg-lib-abstractions.patch x86-asm-fix-an-assembler-warning-with-current-binutils.patch x86-entry-64-add-instruction-suffix-to-sysret.patch +sysctl-add-a-new-register_sysctl_init-interface.patch +panic-unset-panic_on_warn-inside-panic.patch +exit-add-and-use-make_task_dead.patch +objtool-add-a-missing-comma-to-avoid-string-concatenation.patch +hexagon-fix-function-name-in-die.patch +h8300-fix-build-errors-from-do_exit-to-make_task_dead-transition.patch +ia64-make-ia64_mca_recovery-bool-instead-of-tristate.patch +exit-put-an-upper-limit-on-how-often-we-can-oops.patch +exit-expose-oops_count-to-sysfs.patch +exit-allow-oops_limit-to-be-disabled.patch +panic-consolidate-open-coded-panic_on_warn-checks.patch +panic-introduce-warn_limit.patch +panic-expose-warn_count-to-sysfs.patch +docs-fix-path-paste-o-for-sys-kernel-warn_count.patch +exit-use-read_once-for-all-oops-warn-limit-reads.patch diff --git a/queue-4.14/sysctl-add-a-new-register_sysctl_init-interface.patch b/queue-4.14/sysctl-add-a-new-register_sysctl_init-interface.patch new file mode 100644 index 00000000000..3d330ab5317 --- /dev/null +++ b/queue-4.14/sysctl-add-a-new-register_sysctl_init-interface.patch @@ -0,0 +1,178 @@ +From stable-owner@vger.kernel.org Fri Feb 3 01:35:41 2023 +From: Eric Biggers +Date: Thu, 2 Feb 2023 16:33:40 -0800 +Subject: sysctl: add a new register_sysctl_init() interface +To: stable@vger.kernel.org +Cc: Harshit Mogalapalli , Kees Cook , SeongJae Park , Seth Jenkins , Jann Horn , "Eric W . Biederman" , linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org, Iurii Zaikin , Peter Zijlstra , Greg Kroah-Hartman , Paul Turner , Andy Shevchenko , Sebastian Reichel , Tetsuo Handa , Petr Mladek , Sergey Senozhatsky , Qing Wang , Benjamin LaHaise , Al Viro , Jan Kara , Amir Goldstein , Stephen Kitt , Antti Palosaari , Arnd Bergmann , Benjamin Herrenschmidt , Clemens Ladisch , David Airlie , Jani Nikula , Joel Becker , Joonas Lahtinen , Joseph Qi , Julia Lawall , Lukas Middendorf , Mark Fasheh , Phillip Potter , Rodrigo Vivi , Douglas Gilbert , "James E . J . Bottomley" , Jani Nikula , John Ogness , "Martin K . Petersen" , "Rafael J. Wysocki" , Steven Rostedt , Suren Baghdasaryan , "Theodore Ts'o" , Xiaoming Ni , Luis Chamberlain , Andrew Morton , Linus Torvalds +Message-ID: <20230203003354.85691-2-ebiggers@kernel.org> + +From: Xiaoming Ni + +commit 3ddd9a808cee7284931312f2f3e854c9617f44b2 upstream. + +Patch series "sysctl: first set of kernel/sysctl cleanups", v2. + +Finally had time to respin the series of the work we had started last +year on cleaning up the kernel/sysct.c kitchen sink. People keeps +stuffing their sysctls in that file and this creates a maintenance +burden. So this effort is aimed at placing sysctls where they actually +belong. + +I'm going to split patches up into series as there is quite a bit of +work. + +This first set adds register_sysctl_init() for uses of registerting a +sysctl on the init path, adds const where missing to a few places, +generalizes common values so to be more easy to share, and starts the +move of a few kernel/sysctl.c out where they belong. + +The majority of rework on v2 in this first patch set is 0-day fixes. +Eric Biederman's feedback is later addressed in subsequent patch sets. + +I'll only post the first two patch sets for now. We can address the +rest once the first two patch sets get completely reviewed / Acked. + +This patch (of 9): + +The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty +dishes, this makes it very difficult to maintain. + +To help with this maintenance let's start by moving sysctls to places +where they actually belong. The proc sysctl maintainers do not want to +know what sysctl knobs you wish to add for your own piece of code, we +just care about the core logic. + +Today though folks heavily rely on tables on kernel/sysctl.c so they can +easily just extend this table with their needed sysctls. In order to +help users move their sysctls out we need to provide a helper which can +be used during code initialization. + +We special-case the initialization use of register_sysctl() since it +*is* safe to fail, given all that sysctls do is provide a dynamic +interface to query or modify at runtime an existing variable. So the +use case of register_sysctl() on init should *not* stop if the sysctls +don't end up getting registered. It would be counter productive to stop +boot if a simple sysctl registration failed. + +Provide a helper for init then, and document the recommended init levels +to use for callers of this routine. We will later use this in +subsequent patches to start slimming down kernel/sysctl.c tables and +moving sysctl registration to the code which actually needs these +sysctls. + +[mcgrof@kernel.org: major commit log and documentation rephrasing also moved to fs/proc/proc_sysctl.c ] + +Link: https://lkml.kernel.org/r/20211123202347.818157-1-mcgrof@kernel.org +Link: https://lkml.kernel.org/r/20211123202347.818157-2-mcgrof@kernel.org +Signed-off-by: Xiaoming Ni +Signed-off-by: Luis Chamberlain +Reviewed-by: Kees Cook +Cc: Iurii Zaikin +Cc: "Eric W. Biederman" +Cc: Peter Zijlstra +Cc: Greg Kroah-Hartman +Cc: Paul Turner +Cc: Andy Shevchenko +Cc: Sebastian Reichel +Cc: Tetsuo Handa +Cc: Petr Mladek +Cc: Sergey Senozhatsky +Cc: Qing Wang +Cc: Benjamin LaHaise +Cc: Al Viro +Cc: Jan Kara +Cc: Amir Goldstein +Cc: Stephen Kitt +Cc: Antti Palosaari +Cc: Arnd Bergmann +Cc: Benjamin Herrenschmidt +Cc: Clemens Ladisch +Cc: David Airlie +Cc: Jani Nikula +Cc: Joel Becker +Cc: Joonas Lahtinen +Cc: Joseph Qi +Cc: Julia Lawall +Cc: Lukas Middendorf +Cc: Mark Fasheh +Cc: Phillip Potter +Cc: Rodrigo Vivi +Cc: Douglas Gilbert +Cc: James E.J. Bottomley +Cc: Jani Nikula +Cc: John Ogness +Cc: Martin K. Petersen +Cc: "Rafael J. Wysocki" +Cc: Steven Rostedt (VMware) +Cc: Suren Baghdasaryan +Cc: "Theodore Ts'o" +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + fs/proc/proc_sysctl.c | 33 +++++++++++++++++++++++++++++++++ + include/linux/sysctl.h | 3 +++ + 2 files changed, 36 insertions(+) + +--- a/fs/proc/proc_sysctl.c ++++ b/fs/proc/proc_sysctl.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include "internal.h" + + static const struct dentry_operations proc_sys_dentry_operations; +@@ -1370,6 +1371,38 @@ struct ctl_table_header *register_sysctl + } + EXPORT_SYMBOL(register_sysctl); + ++/** ++ * __register_sysctl_init() - register sysctl table to path ++ * @path: path name for sysctl base ++ * @table: This is the sysctl table that needs to be registered to the path ++ * @table_name: The name of sysctl table, only used for log printing when ++ * registration fails ++ * ++ * The sysctl interface is used by userspace to query or modify at runtime ++ * a predefined value set on a variable. These variables however have default ++ * values pre-set. Code which depends on these variables will always work even ++ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the ++ * ability to query or modify the sysctls dynamically at run time. Chances of ++ * register_sysctl() failing on init are extremely low, and so for both reasons ++ * this function does not return any error as it is used by initialization code. ++ * ++ * Context: Can only be called after your respective sysctl base path has been ++ * registered. So for instance, most base directories are registered early on ++ * init before init levels are processed through proc_sys_init() and ++ * sysctl_init(). ++ */ ++void __init __register_sysctl_init(const char *path, struct ctl_table *table, ++ const char *table_name) ++{ ++ struct ctl_table_header *hdr = register_sysctl(path, table); ++ ++ if (unlikely(!hdr)) { ++ pr_err("failed when register_sysctl %s to %s\n", table_name, path); ++ return; ++ } ++ kmemleak_not_leak(hdr); ++} ++ + static char *append_path(const char *path, char *pos, const char *name) + { + int namelen; +--- a/include/linux/sysctl.h ++++ b/include/linux/sysctl.h +@@ -198,6 +198,9 @@ struct ctl_table_header *register_sysctl + void unregister_sysctl_table(struct ctl_table_header * table); + + extern int sysctl_init(void); ++extern void __register_sysctl_init(const char *path, struct ctl_table *table, ++ const char *table_name); ++#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) + + extern struct ctl_table sysctl_mount_point[]; +