From d9a566698245aaf314ebccebfb863d1083d2606b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Jul 2017 17:49:59 +0200 Subject: [PATCH] 4.4-stable patches added patches: add-shutdown-to-struct-class.patch arm-move-elf_et_dyn_base-to-4mb.patch arm64-move-elf_et_dyn_base-to-4gb-4mb.patch binfmt_elf-use-elf_et_dyn_base-only-for-pie.patch cfg80211-check-if-pmkid-attribute-is-of-expected-size.patch cfg80211-define-nla_policy-for-nl80211_attr_local_mesh_power_mode.patch cfg80211-validate-frequencies-nested-in-nl80211_attr_scan_frequencies.patch checkpatch-silence-perl-5.26.0-unescaped-left-brace-warnings.patch exec-limit-arg-stack-to-at-most-75-of-_stk_lim.patch fs-dcache.c-fix-spin-lockup-issue-on-nlru-lock.patch irqchip-gic-v3-fix-out-of-bound-access-in-gic_set_affinity.patch kernel-extable.c-mark-core_kernel_text-notrace.patch mm-list_lru.c-fix-list_lru_count_node-to-be-race-free.patch mnt-in-propgate_umount-handle-visiting-mounts-in-any-order.patch mnt-in-umount-propagation-reparent-in-a-separate-pass.patch mnt-make-propagate_umount-less-slow-for-overlapping-mount-propagation-trees.patch parisc-dma-api-return-error-instead-of-bug_on-for-dma-ops-on-non-dma-devs.patch parisc-mm-ensure-irqs-are-off-in-switch_mm.patch parisc-report-sigsegv-instead-of-sigbus-when-running-out-of-stack.patch parisc-use-compat_sys_keyctl.patch powerpc-move-elf_et_dyn_base-to-4gb-4mb.patch s390-reduce-elf_et_dyn_base.patch selftests-capabilities-fix-the-test_execve-test.patch tools-lib-lockdep-reduce-max_lock_depth-to-avoid-overflowing-lock_chain-depth.patch tpm-get-rid-of-chip-pdev.patch tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch tpm-provide-strong-locking-for-device-removal.patch vt-fix-unchecked-__put_user-in-tioclinux-ioctls.patch --- queue-4.4/add-shutdown-to-struct-class.patch | 59 ++ .../arm-move-elf_et_dyn_base-to-4mb.patch | 71 ++ ...rm64-move-elf_et_dyn_base-to-4gb-4mb.patch | 61 ++ ...elf-use-elf_et_dyn_base-only-for-pie.patch | 170 ++++ ...-pmkid-attribute-is-of-expected-size.patch | 42 + ...r-nl80211_attr_local_mesh_power_mode.patch | 36 + ...ted-in-nl80211_attr_scan_frequencies.patch | 41 + ...5.26.0-unescaped-left-brace-warnings.patch | 67 ++ ...-arg-stack-to-at-most-75-of-_stk_lim.patch | 52 ++ ...c-fix-spin-lockup-issue-on-nlru-lock.patch | 80 ++ ...-of-bound-access-in-gic_set_affinity.patch | 72 ++ ...able.c-mark-core_kernel_text-notrace.patch | 62 ++ ...-list_lru_count_node-to-be-race-free.patch | 87 ++ ...-handle-visiting-mounts-in-any-order.patch | 288 ++++++ ...pagation-reparent-in-a-separate-pass.patch | 171 ++++ ...-overlapping-mount-propagation-trees.patch | 204 +++++ ...f-bug_on-for-dma-ops-on-non-dma-devs.patch | 204 +++++ ...-mm-ensure-irqs-are-off-in-switch_mm.patch | 56 ++ ...-of-sigbus-when-running-out-of-stack.patch | 40 + queue-4.4/parisc-use-compat_sys_keyctl.patch | 33 + ...erpc-move-elf_et_dyn_base-to-4gb-4mb.patch | 63 ++ queue-4.4/s390-reduce-elf_et_dyn_base.patch | 66 ++ ...apabilities-fix-the-test_execve-test.patch | 74 ++ queue-4.4/series | 28 + ...o-avoid-overflowing-lock_chain-depth.patch | 53 ++ queue-4.4/tpm-get-rid-of-chip-pdev.patch | 821 ++++++++++++++++++ ...sue-a-tpm2_shutdown-for-tpm2-devices.patch | 101 +++ ...de-strong-locking-for-device-removal.patch | 307 +++++++ ...ecked-__put_user-in-tioclinux-ioctls.patch | 53 ++ 29 files changed, 3462 insertions(+) create mode 100644 queue-4.4/add-shutdown-to-struct-class.patch create mode 100644 queue-4.4/arm-move-elf_et_dyn_base-to-4mb.patch create mode 100644 queue-4.4/arm64-move-elf_et_dyn_base-to-4gb-4mb.patch create mode 100644 queue-4.4/binfmt_elf-use-elf_et_dyn_base-only-for-pie.patch create mode 100644 queue-4.4/cfg80211-check-if-pmkid-attribute-is-of-expected-size.patch create mode 100644 queue-4.4/cfg80211-define-nla_policy-for-nl80211_attr_local_mesh_power_mode.patch create mode 100644 queue-4.4/cfg80211-validate-frequencies-nested-in-nl80211_attr_scan_frequencies.patch create mode 100644 queue-4.4/checkpatch-silence-perl-5.26.0-unescaped-left-brace-warnings.patch create mode 100644 queue-4.4/exec-limit-arg-stack-to-at-most-75-of-_stk_lim.patch create mode 100644 queue-4.4/fs-dcache.c-fix-spin-lockup-issue-on-nlru-lock.patch create mode 100644 queue-4.4/irqchip-gic-v3-fix-out-of-bound-access-in-gic_set_affinity.patch create mode 100644 queue-4.4/kernel-extable.c-mark-core_kernel_text-notrace.patch create mode 100644 queue-4.4/mm-list_lru.c-fix-list_lru_count_node-to-be-race-free.patch create mode 100644 queue-4.4/mnt-in-propgate_umount-handle-visiting-mounts-in-any-order.patch create mode 100644 queue-4.4/mnt-in-umount-propagation-reparent-in-a-separate-pass.patch create mode 100644 queue-4.4/mnt-make-propagate_umount-less-slow-for-overlapping-mount-propagation-trees.patch create mode 100644 queue-4.4/parisc-dma-api-return-error-instead-of-bug_on-for-dma-ops-on-non-dma-devs.patch create mode 100644 queue-4.4/parisc-mm-ensure-irqs-are-off-in-switch_mm.patch create mode 100644 queue-4.4/parisc-report-sigsegv-instead-of-sigbus-when-running-out-of-stack.patch create mode 100644 queue-4.4/parisc-use-compat_sys_keyctl.patch create mode 100644 queue-4.4/powerpc-move-elf_et_dyn_base-to-4gb-4mb.patch create mode 100644 queue-4.4/s390-reduce-elf_et_dyn_base.patch create mode 100644 queue-4.4/selftests-capabilities-fix-the-test_execve-test.patch create mode 100644 queue-4.4/tools-lib-lockdep-reduce-max_lock_depth-to-avoid-overflowing-lock_chain-depth.patch create mode 100644 queue-4.4/tpm-get-rid-of-chip-pdev.patch create mode 100644 queue-4.4/tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch create mode 100644 queue-4.4/tpm-provide-strong-locking-for-device-removal.patch create mode 100644 queue-4.4/vt-fix-unchecked-__put_user-in-tioclinux-ioctls.patch diff --git a/queue-4.4/add-shutdown-to-struct-class.patch b/queue-4.4/add-shutdown-to-struct-class.patch new file mode 100644 index 00000000000..d4245ac8e0b --- /dev/null +++ b/queue-4.4/add-shutdown-to-struct-class.patch @@ -0,0 +1,59 @@ +From f77af15165847406b15d8f70c382c4cb15846b2a Mon Sep 17 00:00:00 2001 +From: Josh Zimmerman +Date: Sun, 25 Jun 2017 14:53:23 -0700 +Subject: Add "shutdown" to "struct class". + +From: Josh Zimmerman + +commit f77af15165847406b15d8f70c382c4cb15846b2a upstream. + +The TPM class has some common shutdown code that must be executed for +all drivers. This adds some needed functionality for that. + +Signed-off-by: Josh Zimmerman +Acked-by: Greg Kroah-Hartman +Fixes: 74d6b3ceaa17 ("tpm: fix suspend/resume paths for TPM 2.0") +Reviewed-by: Jarkko Sakkinen +Tested-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: James Morris +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/core.c | 6 +++++- + include/linux/device.h | 2 ++ + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -2094,7 +2094,11 @@ void device_shutdown(void) + pm_runtime_get_noresume(dev); + pm_runtime_barrier(dev); + +- if (dev->bus && dev->bus->shutdown) { ++ if (dev->class && dev->class->shutdown) { ++ if (initcall_debug) ++ dev_info(dev, "shutdown\n"); ++ dev->class->shutdown(dev); ++ } else if (dev->bus && dev->bus->shutdown) { + if (initcall_debug) + dev_info(dev, "shutdown\n"); + dev->bus->shutdown(dev); +--- a/include/linux/device.h ++++ b/include/linux/device.h +@@ -368,6 +368,7 @@ int subsys_virtual_register(struct bus_t + * @suspend: Used to put the device to sleep mode, usually to a low power + * state. + * @resume: Used to bring the device from the sleep mode. ++ * @shutdown: Called at shut-down time to quiesce the device. + * @ns_type: Callbacks so sysfs can detemine namespaces. + * @namespace: Namespace of the device belongs to this class. + * @pm: The default device power management operations of this class. +@@ -396,6 +397,7 @@ struct class { + + int (*suspend)(struct device *dev, pm_message_t state); + int (*resume)(struct device *dev); ++ int (*shutdown)(struct device *dev); + + const struct kobj_ns_type_operations *ns_type; + const void *(*namespace)(struct device *dev); diff --git a/queue-4.4/arm-move-elf_et_dyn_base-to-4mb.patch b/queue-4.4/arm-move-elf_et_dyn_base-to-4mb.patch new file mode 100644 index 00000000000..38e7a7c6b59 --- /dev/null +++ b/queue-4.4/arm-move-elf_et_dyn_base-to-4mb.patch @@ -0,0 +1,71 @@ +From 6a9af90a3bcde217a1c053e135f5f43e5d5fafbd Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Mon, 10 Jul 2017 15:52:40 -0700 +Subject: arm: move ELF_ET_DYN_BASE to 4MB + +From: Kees Cook + +commit 6a9af90a3bcde217a1c053e135f5f43e5d5fafbd upstream. + +Now that explicitly executed loaders are loaded in the mmap region, we +have more freedom to decide where we position PIE binaries in the +address space to avoid possible collisions with mmap or stack regions. + +4MB is chosen here mainly to have parity with x86, where this is the +traditional minimum load location, likely to avoid historically +requiring a 4MB page table entry when only a portion of the first 4MB +would be used (since the NULL address is avoided). + +For ARM the position could be 0x8000, the standard ET_EXEC load address, +but that is needlessly close to the NULL address, and anyone running PIE +on 32-bit ARM will have an MMU, so the tight mapping is not needed. + +Link: http://lkml.kernel.org/r/1498154792-49952-2-git-send-email-keescook@chromium.org +Signed-off-by: Kees Cook +Cc: Russell King +Cc: Catalin Marinas +Cc: Will Deacon +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Michael Ellerman +Cc: Martin Schwidefsky +Cc: Heiko Carstens +Cc: James Hogan +Cc: Pratyush Anand +Cc: Ingo Molnar +Cc: "H. Peter Anvin" +Cc: Alexander Viro +Cc: Andy Lutomirski +Cc: Daniel Micay +Cc: Dmitry Safonov +Cc: Grzegorz Andrejczuk +Cc: Kees Cook +Cc: Masahiro Yamada +Cc: Qualys Security Advisory +Cc: Rik van Riel +Cc: Thomas Gleixner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/elf.h | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/arch/arm/include/asm/elf.h ++++ b/arch/arm/include/asm/elf.h +@@ -112,12 +112,8 @@ int dump_task_regs(struct task_struct *t + #define CORE_DUMP_USE_REGSET + #define ELF_EXEC_PAGESIZE 4096 + +-/* This is the location that an ET_DYN program is loaded if exec'ed. Typical +- use of this is to invoke "./ld.so someprog" to test out a new version of +- the loader. We need to make sure that it is out of the way of the program +- that it will "exec", and that there is sufficient room for the brk. */ +- +-#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) ++/* This is the base location for PIE (ET_DYN with INTERP) loads. */ ++#define ELF_ET_DYN_BASE 0x400000UL + + /* When the program starts, a1 contains a pointer to a function to be + registered with atexit, as per the SVR4 ABI. A value of 0 means we diff --git a/queue-4.4/arm64-move-elf_et_dyn_base-to-4gb-4mb.patch b/queue-4.4/arm64-move-elf_et_dyn_base-to-4gb-4mb.patch new file mode 100644 index 00000000000..7cf8e25c488 --- /dev/null +++ b/queue-4.4/arm64-move-elf_et_dyn_base-to-4gb-4mb.patch @@ -0,0 +1,61 @@ +From 02445990a96e60a67526510d8b00f7e3d14101c3 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Mon, 10 Jul 2017 15:52:44 -0700 +Subject: arm64: move ELF_ET_DYN_BASE to 4GB / 4MB + +From: Kees Cook + +commit 02445990a96e60a67526510d8b00f7e3d14101c3 upstream. + +Now that explicitly executed loaders are loaded in the mmap region, we +have more freedom to decide where we position PIE binaries in the +address space to avoid possible collisions with mmap or stack regions. + +For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit +address space for 32-bit pointers. On 32-bit use 4MB, to match ARM. +This could be 0x8000, the standard ET_EXEC load address, but that is +needlessly close to the NULL address, and anyone running arm compat PIE +will have an MMU, so the tight mapping is not needed. + +Link: http://lkml.kernel.org/r/1498251600-132458-4-git-send-email-keescook@chromium.org +Signed-off-by: Kees Cook +Cc: Ard Biesheuvel +Cc: Catalin Marinas +Cc: Mark Rutland +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/elf.h | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/arm64/include/asm/elf.h ++++ b/arch/arm64/include/asm/elf.h +@@ -120,12 +120,11 @@ typedef struct user_fpsimd_state elf_fpr + #define ELF_EXEC_PAGESIZE PAGE_SIZE + + /* +- * This is the location that an ET_DYN program is loaded if exec'ed. Typical +- * use of this is to invoke "./ld.so someprog" to test out a new version of +- * the loader. We need to make sure that it is out of the way of the program +- * that it will "exec", and that there is sufficient room for the brk. ++ * This is the base location for PIE (ET_DYN with INTERP) loads. On ++ * 64-bit, this is raised to 4GB to leave the entire 32-bit address ++ * space open for things that want to use the area for 32-bit pointers. + */ +-#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) ++#define ELF_ET_DYN_BASE 0x100000000UL + + /* + * When the program starts, a1 contains a pointer to a function to be +@@ -165,7 +164,8 @@ extern int arch_setup_additional_pages(s + + #ifdef CONFIG_COMPAT + +-#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3) ++/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ ++#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL + + /* AArch32 registers. */ + #define COMPAT_ELF_NGREG 18 diff --git a/queue-4.4/binfmt_elf-use-elf_et_dyn_base-only-for-pie.patch b/queue-4.4/binfmt_elf-use-elf_et_dyn_base-only-for-pie.patch new file mode 100644 index 00000000000..6889e50f5af --- /dev/null +++ b/queue-4.4/binfmt_elf-use-elf_et_dyn_base-only-for-pie.patch @@ -0,0 +1,170 @@ +From eab09532d40090698b05a07c1c87f39fdbc5fab5 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Mon, 10 Jul 2017 15:52:37 -0700 +Subject: binfmt_elf: use ELF_ET_DYN_BASE only for PIE + +From: Kees Cook + +commit eab09532d40090698b05a07c1c87f39fdbc5fab5 upstream. + +The ELF_ET_DYN_BASE position was originally intended to keep loaders +away from ET_EXEC binaries. (For example, running "/lib/ld-linux.so.2 +/bin/cat" might cause the subsequent load of /bin/cat into where the +loader had been loaded.) + +With the advent of PIE (ET_DYN binaries with an INTERP Program Header), +ELF_ET_DYN_BASE continued to be used since the kernel was only looking +at ET_DYN. However, since ELF_ET_DYN_BASE is traditionally set at the +top 1/3rd of the TASK_SIZE, a substantial portion of the address space +is unused. + +For 32-bit tasks when RLIMIT_STACK is set to RLIM_INFINITY, programs are +loaded above the mmap region. This means they can be made to collide +(CVE-2017-1000370) or nearly collide (CVE-2017-1000371) with +pathological stack regions. + +Lowering ELF_ET_DYN_BASE solves both by moving programs below the mmap +region in all cases, and will now additionally avoid programs falling +back to the mmap region by enforcing MAP_FIXED for program loads (i.e. +if it would have collided with the stack, now it will fail to load +instead of falling back to the mmap region). + +To allow for a lower ELF_ET_DYN_BASE, loaders (ET_DYN without INTERP) +are loaded into the mmap region, leaving space available for either an +ET_EXEC binary with a fixed location or PIE being loaded into mmap by +the loader. Only PIE programs are loaded offset from ELF_ET_DYN_BASE, +which means architectures can now safely lower their values without risk +of loaders colliding with their subsequently loaded programs. + +For 64-bit, ELF_ET_DYN_BASE is best set to 4GB to allow runtimes to use +the entire 32-bit address space for 32-bit pointers. + +Thanks to PaX Team, Daniel Micay, and Rik van Riel for inspiration and +suggestions on how to implement this solution. + +Fixes: d1fd836dcf00 ("mm: split ET_DYN ASLR from mmap ASLR") +Link: http://lkml.kernel.org/r/20170621173201.GA114489@beast +Signed-off-by: Kees Cook +Acked-by: Rik van Riel +Cc: Daniel Micay +Cc: Qualys Security Advisory +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: "H. Peter Anvin" +Cc: Alexander Viro +Cc: Dmitry Safonov +Cc: Andy Lutomirski +Cc: Grzegorz Andrejczuk +Cc: Masahiro Yamada +Cc: Benjamin Herrenschmidt +Cc: Catalin Marinas +Cc: Heiko Carstens +Cc: James Hogan +Cc: Martin Schwidefsky +Cc: Michael Ellerman +Cc: Paul Mackerras +Cc: Pratyush Anand +Cc: Russell King +Cc: Will Deacon +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/elf.h | 13 +++++---- + fs/binfmt_elf.c | 59 ++++++++++++++++++++++++++++++++++++++------- + 2 files changed, 58 insertions(+), 14 deletions(-) + +--- a/arch/x86/include/asm/elf.h ++++ b/arch/x86/include/asm/elf.h +@@ -245,12 +245,13 @@ extern int force_personality32; + #define CORE_DUMP_USE_REGSET + #define ELF_EXEC_PAGESIZE 4096 + +-/* This is the location that an ET_DYN program is loaded if exec'ed. Typical +- use of this is to invoke "./ld.so someprog" to test out a new version of +- the loader. We need to make sure that it is out of the way of the program +- that it will "exec", and that there is sufficient room for the brk. */ +- +-#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) ++/* ++ * This is the base location for PIE (ET_DYN with INTERP) loads. On ++ * 64-bit, this is raised to 4GB to leave the entire 32-bit address ++ * space open for things that want to use the area for 32-bit pointers. ++ */ ++#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ ++ 0x100000000UL) + + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, +--- a/fs/binfmt_elf.c ++++ b/fs/binfmt_elf.c +@@ -905,17 +905,60 @@ static int load_elf_binary(struct linux_ + elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; + + vaddr = elf_ppnt->p_vaddr; ++ /* ++ * If we are loading ET_EXEC or we have already performed ++ * the ET_DYN load_addr calculations, proceed normally. ++ */ + if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { + elf_flags |= MAP_FIXED; + } else if (loc->elf_ex.e_type == ET_DYN) { +- /* Try and get dynamic programs out of the way of the +- * default mmap base, as well as whatever program they +- * might try to exec. This is because the brk will +- * follow the loader, and is not movable. */ +- load_bias = ELF_ET_DYN_BASE - vaddr; +- if (current->flags & PF_RANDOMIZE) +- load_bias += arch_mmap_rnd(); +- load_bias = ELF_PAGESTART(load_bias); ++ /* ++ * This logic is run once for the first LOAD Program ++ * Header for ET_DYN binaries to calculate the ++ * randomization (load_bias) for all the LOAD ++ * Program Headers, and to calculate the entire ++ * size of the ELF mapping (total_size). (Note that ++ * load_addr_set is set to true later once the ++ * initial mapping is performed.) ++ * ++ * There are effectively two types of ET_DYN ++ * binaries: programs (i.e. PIE: ET_DYN with INTERP) ++ * and loaders (ET_DYN without INTERP, since they ++ * _are_ the ELF interpreter). The loaders must ++ * be loaded away from programs since the program ++ * may otherwise collide with the loader (especially ++ * for ET_EXEC which does not have a randomized ++ * position). For example to handle invocations of ++ * "./ld.so someprog" to test out a new version of ++ * the loader, the subsequent program that the ++ * loader loads must avoid the loader itself, so ++ * they cannot share the same load range. Sufficient ++ * room for the brk must be allocated with the ++ * loader as well, since brk must be available with ++ * the loader. ++ * ++ * Therefore, programs are loaded offset from ++ * ELF_ET_DYN_BASE and loaders are loaded into the ++ * independently randomized mmap region (0 load_bias ++ * without MAP_FIXED). ++ */ ++ if (elf_interpreter) { ++ load_bias = ELF_ET_DYN_BASE; ++ if (current->flags & PF_RANDOMIZE) ++ load_bias += arch_mmap_rnd(); ++ elf_flags |= MAP_FIXED; ++ } else ++ load_bias = 0; ++ ++ /* ++ * Since load_bias is used for all subsequent loading ++ * calculations, we must lower it by the first vaddr ++ * so that the remaining calculations based on the ++ * ELF vaddrs will be correctly offset. The result ++ * is then page aligned. ++ */ ++ load_bias = ELF_PAGESTART(load_bias - vaddr); ++ + total_size = total_mapping_size(elf_phdata, + loc->elf_ex.e_phnum); + if (!total_size) { diff --git a/queue-4.4/cfg80211-check-if-pmkid-attribute-is-of-expected-size.patch b/queue-4.4/cfg80211-check-if-pmkid-attribute-is-of-expected-size.patch new file mode 100644 index 00000000000..031da429fc9 --- /dev/null +++ b/queue-4.4/cfg80211-check-if-pmkid-attribute-is-of-expected-size.patch @@ -0,0 +1,42 @@ +From 9361df14d1cbf966409d5d6f48bb334384fbe138 Mon Sep 17 00:00:00 2001 +From: Srinivas Dasari +Date: Fri, 7 Jul 2017 01:43:39 +0300 +Subject: cfg80211: Check if PMKID attribute is of expected size + +From: Srinivas Dasari + +commit 9361df14d1cbf966409d5d6f48bb334384fbe138 upstream. + +nla policy checks for only maximum length of the attribute data +when the attribute type is NLA_BINARY. If userspace sends less +data than specified, the wireless drivers may access illegal +memory. When type is NLA_UNSPEC, nla policy check ensures that +userspace sends minimum specified length number of bytes. + +Remove type assignment to NLA_BINARY from nla_policy of +NL80211_ATTR_PMKID to make this NLA_UNSPEC and to make sure minimum +WLAN_PMKID_LEN bytes are received from userspace with +NL80211_ATTR_PMKID. + +Fixes: 67fbb16be69d ("nl80211: PMKSA caching support") +Signed-off-by: Srinivas Dasari +Signed-off-by: Jouni Malinen +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/wireless/nl80211.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -302,8 +302,7 @@ static const struct nla_policy nl80211_p + [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, + [NL80211_ATTR_PID] = { .type = NLA_U32 }, + [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, +- [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, +- .len = WLAN_PMKID_LEN }, ++ [NL80211_ATTR_PMKID] = { .len = WLAN_PMKID_LEN }, + [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, + [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, + [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, diff --git a/queue-4.4/cfg80211-define-nla_policy-for-nl80211_attr_local_mesh_power_mode.patch b/queue-4.4/cfg80211-define-nla_policy-for-nl80211_attr_local_mesh_power_mode.patch new file mode 100644 index 00000000000..3d1c93fdb04 --- /dev/null +++ b/queue-4.4/cfg80211-define-nla_policy-for-nl80211_attr_local_mesh_power_mode.patch @@ -0,0 +1,36 @@ +From 8feb69c7bd89513be80eb19198d48f154b254021 Mon Sep 17 00:00:00 2001 +From: Srinivas Dasari +Date: Fri, 7 Jul 2017 01:43:41 +0300 +Subject: cfg80211: Define nla_policy for NL80211_ATTR_LOCAL_MESH_POWER_MODE + +From: Srinivas Dasari + +commit 8feb69c7bd89513be80eb19198d48f154b254021 upstream. + +Buffer overread may happen as nl80211_set_station() reads 4 bytes +from the attribute NL80211_ATTR_LOCAL_MESH_POWER_MODE without +validating the size of data received when userspace sends less +than 4 bytes of data with NL80211_ATTR_LOCAL_MESH_POWER_MODE. +Define nla_policy for NL80211_ATTR_LOCAL_MESH_POWER_MODE to avoid +the buffer overread. + +Fixes: 3b1c5a5307f ("{cfg,nl}80211: mesh power mode primitives and userspace access") +Signed-off-by: Srinivas Dasari +Signed-off-by: Jouni Malinen +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/wireless/nl80211.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -359,6 +359,7 @@ static const struct nla_policy nl80211_p + [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, + [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, + [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, ++ [NL80211_ATTR_LOCAL_MESH_POWER_MODE] = {. type = NLA_U32 }, + [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, + [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, diff --git a/queue-4.4/cfg80211-validate-frequencies-nested-in-nl80211_attr_scan_frequencies.patch b/queue-4.4/cfg80211-validate-frequencies-nested-in-nl80211_attr_scan_frequencies.patch new file mode 100644 index 00000000000..4a2ec6d2396 --- /dev/null +++ b/queue-4.4/cfg80211-validate-frequencies-nested-in-nl80211_attr_scan_frequencies.patch @@ -0,0 +1,41 @@ +From d7f13f7450369281a5d0ea463cc69890a15923ae Mon Sep 17 00:00:00 2001 +From: Srinivas Dasari +Date: Fri, 7 Jul 2017 01:43:42 +0300 +Subject: cfg80211: Validate frequencies nested in NL80211_ATTR_SCAN_FREQUENCIES + +From: Srinivas Dasari + +commit d7f13f7450369281a5d0ea463cc69890a15923ae upstream. + +validate_scan_freqs() retrieves frequencies from attributes +nested in the attribute NL80211_ATTR_SCAN_FREQUENCIES with +nla_get_u32(), which reads 4 bytes from each attribute +without validating the size of data received. Attributes +nested in NL80211_ATTR_SCAN_FREQUENCIES don't have an nla policy. + +Validate size of each attribute before parsing to avoid potential buffer +overread. + +Fixes: 2a519311926 ("cfg80211/nl80211: scanning (and mac80211 update to use it)") +Signed-off-by: Srinivas Dasari +Signed-off-by: Jouni Malinen +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/wireless/nl80211.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -5706,6 +5706,10 @@ static int validate_scan_freqs(struct nl + struct nlattr *attr1, *attr2; + int n_channels = 0, tmp1, tmp2; + ++ nla_for_each_nested(attr1, freqs, tmp1) ++ if (nla_len(attr1) != sizeof(u32)) ++ return 0; ++ + nla_for_each_nested(attr1, freqs, tmp1) { + n_channels++; + /* diff --git a/queue-4.4/checkpatch-silence-perl-5.26.0-unescaped-left-brace-warnings.patch b/queue-4.4/checkpatch-silence-perl-5.26.0-unescaped-left-brace-warnings.patch new file mode 100644 index 00000000000..3115f3ebc16 --- /dev/null +++ b/queue-4.4/checkpatch-silence-perl-5.26.0-unescaped-left-brace-warnings.patch @@ -0,0 +1,67 @@ +From 8d81ae05d0176da1c54aeaed697fa34be5c5575e Mon Sep 17 00:00:00 2001 +From: Cyril Bur +Date: Mon, 10 Jul 2017 15:52:21 -0700 +Subject: checkpatch: silence perl 5.26.0 unescaped left brace warnings + +From: Cyril Bur + +commit 8d81ae05d0176da1c54aeaed697fa34be5c5575e upstream. + +As of perl 5, version 26, subversion 0 (v5.26.0) some new warnings have +occurred when running checkpatch. + +Unescaped left brace in regex is deprecated here (and will be fatal in +Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){ +<-- HERE \s*/ at scripts/checkpatch.pl line 3544. + +Unescaped left brace in regex is deprecated here (and will be fatal in +Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){ +<-- HERE \s*/ at scripts/checkpatch.pl line 3885. + +Unescaped left brace in regex is deprecated here (and will be fatal in +Perl 5.30), passed through in regex; marked by <-- HERE in +m/^(\+.*(?:do|\))){ <-- HERE / at scripts/checkpatch.pl line 4374. + +It seems perfectly reasonable to do as the warning suggests and simply +escape the left brace in these three locations. + +Link: http://lkml.kernel.org/r/20170607060135.17384-1-cyrilbur@gmail.com +Signed-off-by: Cyril Bur +Acked-by: Joe Perches +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + scripts/checkpatch.pl | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/scripts/checkpatch.pl ++++ b/scripts/checkpatch.pl +@@ -3252,7 +3252,7 @@ sub process { + $fixedline =~ s/\s*=\s*$/ = {/; + fix_insert_line($fixlinenr, $fixedline); + $fixedline = $line; +- $fixedline =~ s/^(.\s*){\s*/$1/; ++ $fixedline =~ s/^(.\s*)\{\s*/$1/; + fix_insert_line($fixlinenr, $fixedline); + } + } +@@ -3602,7 +3602,7 @@ sub process { + my $fixedline = rtrim($prevrawline) . " {"; + fix_insert_line($fixlinenr, $fixedline); + $fixedline = $rawline; +- $fixedline =~ s/^(.\s*){\s*/$1\t/; ++ $fixedline =~ s/^(.\s*)\{\s*/$1\t/; + if ($fixedline !~ /^\+\s*$/) { + fix_insert_line($fixlinenr, $fixedline); + } +@@ -4091,7 +4091,7 @@ sub process { + if (ERROR("SPACING", + "space required before the open brace '{'\n" . $herecurr) && + $fix) { +- $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/; ++ $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/; + } + } + diff --git a/queue-4.4/exec-limit-arg-stack-to-at-most-75-of-_stk_lim.patch b/queue-4.4/exec-limit-arg-stack-to-at-most-75-of-_stk_lim.patch new file mode 100644 index 00000000000..99890433b3c --- /dev/null +++ b/queue-4.4/exec-limit-arg-stack-to-at-most-75-of-_stk_lim.patch @@ -0,0 +1,52 @@ +From da029c11e6b12f321f36dac8771e833b65cec962 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Fri, 7 Jul 2017 11:57:29 -0700 +Subject: exec: Limit arg stack to at most 75% of _STK_LIM + +From: Kees Cook + +commit da029c11e6b12f321f36dac8771e833b65cec962 upstream. + +To avoid pathological stack usage or the need to special-case setuid +execs, just limit all arg stack usage to at most 75% of _STK_LIM (6MB). + +Signed-off-by: Kees Cook +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/exec.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -206,8 +206,7 @@ static struct page *get_arg_page(struct + + if (write) { + unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; +- unsigned long ptr_size; +- struct rlimit *rlim; ++ unsigned long ptr_size, limit; + + /* + * Since the stack will hold pointers to the strings, we +@@ -236,14 +235,16 @@ static struct page *get_arg_page(struct + return page; + + /* +- * Limit to 1/4-th the stack size for the argv+env strings. ++ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM ++ * (whichever is smaller) for the argv+env strings. + * This ensures that: + * - the remaining binfmt code will not run out of stack space, + * - the program will have a reasonable amount of stack left + * to work from. + */ +- rlim = current->signal->rlim; +- if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) ++ limit = _STK_LIM / 4 * 3; ++ limit = min(limit, rlimit(RLIMIT_STACK) / 4); ++ if (size > limit) + goto fail; + } + diff --git a/queue-4.4/fs-dcache.c-fix-spin-lockup-issue-on-nlru-lock.patch b/queue-4.4/fs-dcache.c-fix-spin-lockup-issue-on-nlru-lock.patch new file mode 100644 index 00000000000..e0f56655803 --- /dev/null +++ b/queue-4.4/fs-dcache.c-fix-spin-lockup-issue-on-nlru-lock.patch @@ -0,0 +1,80 @@ +From b17c070fb624cf10162cf92ea5e1ec25cd8ac176 Mon Sep 17 00:00:00 2001 +From: Sahitya Tummala +Date: Mon, 10 Jul 2017 15:50:00 -0700 +Subject: fs/dcache.c: fix spin lockup issue on nlru->lock + +From: Sahitya Tummala + +commit b17c070fb624cf10162cf92ea5e1ec25cd8ac176 upstream. + +__list_lru_walk_one() acquires nlru spin lock (nlru->lock) for longer +duration if there are more number of items in the lru list. As per the +current code, it can hold the spin lock for upto maximum UINT_MAX +entries at a time. So if there are more number of items in the lru +list, then "BUG: spinlock lockup suspected" is observed in the below +path: + + spin_bug+0x90 + do_raw_spin_lock+0xfc + _raw_spin_lock+0x28 + list_lru_add+0x28 + dput+0x1c8 + path_put+0x20 + terminate_walk+0x3c + path_lookupat+0x100 + filename_lookup+0x6c + user_path_at_empty+0x54 + SyS_faccessat+0xd0 + el0_svc_naked+0x24 + +This nlru->lock is acquired by another CPU in this path - + + d_lru_shrink_move+0x34 + dentry_lru_isolate_shrink+0x48 + __list_lru_walk_one.isra.10+0x94 + list_lru_walk_node+0x40 + shrink_dcache_sb+0x60 + do_remount_sb+0xbc + do_emergency_remount+0xb0 + process_one_work+0x228 + worker_thread+0x2e0 + kthread+0xf4 + ret_from_fork+0x10 + +Fix this lockup by reducing the number of entries to be shrinked from +the lru list to 1024 at once. Also, add cond_resched() before +processing the lru list again. + +Link: http://marc.info/?t=149722864900001&r=1&w=2 +Link: http://lkml.kernel.org/r/1498707575-2472-1-git-send-email-stummala@codeaurora.org +Signed-off-by: Sahitya Tummala +Suggested-by: Jan Kara +Suggested-by: Vladimir Davydov +Acked-by: Vladimir Davydov +Cc: Alexander Polakov +Cc: Al Viro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dcache.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -1128,11 +1128,12 @@ void shrink_dcache_sb(struct super_block + LIST_HEAD(dispose); + + freed = list_lru_walk(&sb->s_dentry_lru, +- dentry_lru_isolate_shrink, &dispose, UINT_MAX); ++ dentry_lru_isolate_shrink, &dispose, 1024); + + this_cpu_sub(nr_dentry_unused, freed); + shrink_dentry_list(&dispose); +- } while (freed > 0); ++ cond_resched(); ++ } while (list_lru_count(&sb->s_dentry_lru) > 0); + } + EXPORT_SYMBOL(shrink_dcache_sb); + diff --git a/queue-4.4/irqchip-gic-v3-fix-out-of-bound-access-in-gic_set_affinity.patch b/queue-4.4/irqchip-gic-v3-fix-out-of-bound-access-in-gic_set_affinity.patch new file mode 100644 index 00000000000..c01c39a6de0 --- /dev/null +++ b/queue-4.4/irqchip-gic-v3-fix-out-of-bound-access-in-gic_set_affinity.patch @@ -0,0 +1,72 @@ +From 866d7c1b0a3c70387646c4e455e727a58c5d465a Mon Sep 17 00:00:00 2001 +From: Suzuki K Poulose +Date: Fri, 30 Jun 2017 10:58:28 +0100 +Subject: irqchip/gic-v3: Fix out-of-bound access in gic_set_affinity + +From: Suzuki K Poulose + +commit 866d7c1b0a3c70387646c4e455e727a58c5d465a upstream. + +The GICv3 driver doesn't check if the target CPU for gic_set_affinity +is valid before going ahead and making the changes. This triggers the +following splat with KASAN: + +[ 141.189434] BUG: KASAN: global-out-of-bounds in gic_set_affinity+0x8c/0x140 +[ 141.189704] Read of size 8 at addr ffff200009741d20 by task swapper/1/0 +[ 141.189958] +[ 141.190158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.12.0-rc7 +[ 141.190458] Hardware name: Foundation-v8A (DT) +[ 141.190658] Call trace: +[ 141.190908] [] dump_backtrace+0x0/0x328 +[ 141.191224] [] show_stack+0x14/0x20 +[ 141.191507] [] dump_stack+0xa4/0xc8 +[ 141.191858] [] print_address_description+0x13c/0x250 +[ 141.192219] [] kasan_report+0x210/0x300 +[ 141.192547] [] __asan_load8+0x84/0x98 +[ 141.192874] [] gic_set_affinity+0x8c/0x140 +[ 141.193158] [] irq_do_set_affinity+0x54/0xb8 +[ 141.193473] [] irq_set_affinity_locked+0x64/0xf0 +[ 141.193828] [] __irq_set_affinity+0x48/0x78 +[ 141.194158] [] arm_perf_starting_cpu+0x104/0x150 +[ 141.194513] [] cpuhp_invoke_callback+0x17c/0x1f8 +[ 141.194783] [] notify_cpu_starting+0x8c/0xb8 +[ 141.195130] [] secondary_start_kernel+0x15c/0x200 +[ 141.195390] [<0000000080db81b4>] 0x80db81b4 +[ 141.195603] +[ 141.195685] The buggy address belongs to the variable: +[ 141.196012] __cpu_logical_map+0x200/0x220 +[ 141.196176] +[ 141.196315] Memory state around the buggy address: +[ 141.196586] ffff200009741c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 141.196913] ffff200009741c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 141.197158] >ffff200009741d00: 00 00 00 00 fa fa fa fa 00 00 00 00 00 00 00 00 +[ 141.197487] ^ +[ 141.197758] ffff200009741d80: 00 00 00 00 00 00 00 00 fa fa fa fa 00 00 00 00 +[ 141.198060] ffff200009741e00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 141.198358] ================================================================== +[ 141.198609] Disabling lock debugging due to kernel taint +[ 141.198961] CPU1: Booted secondary processor [410fd051] + +This patch adds the check to make sure the cpu is valid. + +Fixes: commit 021f653791ad17e03f98 ("irqchip: gic-v3: Initial support for GICv3") +Signed-off-by: Suzuki K Poulose +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/irqchip/irq-gic-v3.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/irqchip/irq-gic-v3.c ++++ b/drivers/irqchip/irq-gic-v3.c +@@ -632,6 +632,9 @@ static int gic_set_affinity(struct irq_d + int enabled; + u64 val; + ++ if (cpu >= nr_cpu_ids) ++ return -EINVAL; ++ + if (gic_irq_in_rdist(d)) + return -EINVAL; + diff --git a/queue-4.4/kernel-extable.c-mark-core_kernel_text-notrace.patch b/queue-4.4/kernel-extable.c-mark-core_kernel_text-notrace.patch new file mode 100644 index 00000000000..3ff9e38db93 --- /dev/null +++ b/queue-4.4/kernel-extable.c-mark-core_kernel_text-notrace.patch @@ -0,0 +1,62 @@ +From c0d80ddab89916273cb97114889d3f337bc370ae Mon Sep 17 00:00:00 2001 +From: Marcin Nowakowski +Date: Thu, 6 Jul 2017 15:35:31 -0700 +Subject: kernel/extable.c: mark core_kernel_text notrace + +From: Marcin Nowakowski + +commit c0d80ddab89916273cb97114889d3f337bc370ae upstream. + +core_kernel_text is used by MIPS in its function graph trace processing, +so having this method traced leads to an infinite set of recursive calls +such as: + + Call Trace: + ftrace_return_to_handler+0x50/0x128 + core_kernel_text+0x10/0x1b8 + prepare_ftrace_return+0x6c/0x114 + ftrace_graph_caller+0x20/0x44 + return_to_handler+0x10/0x30 + return_to_handler+0x0/0x30 + return_to_handler+0x0/0x30 + ftrace_ops_no_ops+0x114/0x1bc + core_kernel_text+0x10/0x1b8 + core_kernel_text+0x10/0x1b8 + core_kernel_text+0x10/0x1b8 + ftrace_ops_no_ops+0x114/0x1bc + core_kernel_text+0x10/0x1b8 + prepare_ftrace_return+0x6c/0x114 + ftrace_graph_caller+0x20/0x44 + (...) + +Mark the function notrace to avoid it being traced. + +Link: http://lkml.kernel.org/r/1498028607-6765-1-git-send-email-marcin.nowakowski@imgtec.com +Signed-off-by: Marcin Nowakowski +Reviewed-by: Masami Hiramatsu +Cc: Peter Zijlstra +Cc: Thomas Meyer +Cc: Ingo Molnar +Cc: Steven Rostedt +Cc: Daniel Borkmann +Cc: Paul Gortmaker +Cc: Thomas Gleixner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/extable.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/extable.c ++++ b/kernel/extable.c +@@ -66,7 +66,7 @@ static inline int init_kernel_text(unsig + return 0; + } + +-int core_kernel_text(unsigned long addr) ++int notrace core_kernel_text(unsigned long addr) + { + if (addr >= (unsigned long)_stext && + addr < (unsigned long)_etext) diff --git a/queue-4.4/mm-list_lru.c-fix-list_lru_count_node-to-be-race-free.patch b/queue-4.4/mm-list_lru.c-fix-list_lru_count_node-to-be-race-free.patch new file mode 100644 index 00000000000..3ac68f1e9d9 --- /dev/null +++ b/queue-4.4/mm-list_lru.c-fix-list_lru_count_node-to-be-race-free.patch @@ -0,0 +1,87 @@ +From 2c80cd57c74339889a8752b20862a16c28929c3a Mon Sep 17 00:00:00 2001 +From: Sahitya Tummala +Date: Mon, 10 Jul 2017 15:49:57 -0700 +Subject: mm/list_lru.c: fix list_lru_count_node() to be race free + +From: Sahitya Tummala + +commit 2c80cd57c74339889a8752b20862a16c28929c3a upstream. + +list_lru_count_node() iterates over all memcgs to get the total number of +entries on the node but it can race with memcg_drain_all_list_lrus(), +which migrates the entries from a dead cgroup to another. This can return +incorrect number of entries from list_lru_count_node(). + +Fix this by keeping track of entries per node and simply return it in +list_lru_count_node(). + +Link: http://lkml.kernel.org/r/1498707555-30525-1-git-send-email-stummala@codeaurora.org +Signed-off-by: Sahitya Tummala +Acked-by: Vladimir Davydov +Cc: Jan Kara +Cc: Alexander Polakov +Cc: Al Viro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/list_lru.h | 1 + + mm/list_lru.c | 14 ++++++-------- + 2 files changed, 7 insertions(+), 8 deletions(-) + +--- a/include/linux/list_lru.h ++++ b/include/linux/list_lru.h +@@ -44,6 +44,7 @@ struct list_lru_node { + /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ + struct list_lru_memcg *memcg_lrus; + #endif ++ long nr_items; + } ____cacheline_aligned_in_smp; + + struct list_lru { +--- a/mm/list_lru.c ++++ b/mm/list_lru.c +@@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, + l = list_lru_from_kmem(nlru, item); + list_add_tail(item, &l->list); + l->nr_items++; ++ nlru->nr_items++; + spin_unlock(&nlru->lock); + return true; + } +@@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, + l = list_lru_from_kmem(nlru, item); + list_del_init(item); + l->nr_items--; ++ nlru->nr_items--; + spin_unlock(&nlru->lock); + return true; + } +@@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one); + + unsigned long list_lru_count_node(struct list_lru *lru, int nid) + { +- long count = 0; +- int memcg_idx; ++ struct list_lru_node *nlru; + +- count += __list_lru_count_one(lru, nid, -1); +- if (list_lru_memcg_aware(lru)) { +- for_each_memcg_cache_index(memcg_idx) +- count += __list_lru_count_one(lru, nid, memcg_idx); +- } +- return count; ++ nlru = &lru->node[nid]; ++ return nlru->nr_items; + } + EXPORT_SYMBOL_GPL(list_lru_count_node); + +@@ -226,6 +223,7 @@ restart: + assert_spin_locked(&nlru->lock); + case LRU_REMOVED: + isolated++; ++ nlru->nr_items--; + /* + * If the lru lock has been dropped, our list + * traversal is now invalid and so we have to diff --git a/queue-4.4/mnt-in-propgate_umount-handle-visiting-mounts-in-any-order.patch b/queue-4.4/mnt-in-propgate_umount-handle-visiting-mounts-in-any-order.patch new file mode 100644 index 00000000000..d1e44a1971a --- /dev/null +++ b/queue-4.4/mnt-in-propgate_umount-handle-visiting-mounts-in-any-order.patch @@ -0,0 +1,288 @@ +From 99b19d16471e9c3faa85cad38abc9cbbe04c6d55 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 24 Oct 2016 16:16:13 -0500 +Subject: mnt: In propgate_umount handle visiting mounts in any order + +From: Eric W. Biederman + +commit 99b19d16471e9c3faa85cad38abc9cbbe04c6d55 upstream. + +While investigating some poor umount performance I realized that in +the case of overlapping mount trees where some of the mounts are locked +the code has been failing to unmount all of the mounts it should +have been unmounting. + +This failure to unmount all of the necessary +mounts can be reproduced with: + +$ cat locked_mounts_test.sh + +mount -t tmpfs test-base /mnt +mount --make-shared /mnt +mkdir -p /mnt/b + +mount -t tmpfs test1 /mnt/b +mount --make-shared /mnt/b +mkdir -p /mnt/b/10 + +mount -t tmpfs test2 /mnt/b/10 +mount --make-shared /mnt/b/10 +mkdir -p /mnt/b/10/20 + +mount --rbind /mnt/b /mnt/b/10/20 + +unshare -Urm --propagation unchaged /bin/sh -c 'sleep 5; if [ $(grep test /proc/self/mountinfo | wc -l) -eq 1 ] ; then echo SUCCESS ; else echo FAILURE ; fi' +sleep 1 +umount -l /mnt/b +wait %% + +$ unshare -Urm ./locked_mounts_test.sh + +This failure is corrected by removing the prepass that marks mounts +that may be umounted. + +A first pass is added that umounts mounts if possible and if not sets +mount mark if they could be unmounted if they weren't locked and adds +them to a list to umount possibilities. This first pass reconsiders +the mounts parent if it is on the list of umount possibilities, ensuring +that information of umoutability will pass from child to mount parent. + +A second pass then walks through all mounts that are umounted and processes +their children unmounting them or marking them for reparenting. + +A last pass cleans up the state on the mounts that could not be umounted +and if applicable reparents them to their first parent that remained +mounted. + +While a bit longer than the old code this code is much more robust +as it allows information to flow up from the leaves and down +from the trunk making the order in which mounts are encountered +in the umount propgation tree irrelevant. + +Fixes: 0c56fe31420c ("mnt: Don't propagate unmounts to locked mounts") +Reviewed-by: Andrei Vagin +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/mount.h | 2 + fs/namespace.c | 2 + fs/pnode.c | 150 +++++++++++++++++++++++++++++++++------------------------ + 3 files changed, 91 insertions(+), 63 deletions(-) + +--- a/fs/mount.h ++++ b/fs/mount.h +@@ -57,7 +57,7 @@ struct mount { + struct mnt_namespace *mnt_ns; /* containing namespace */ + struct mountpoint *mnt_mp; /* where is it mounted */ + struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ +- struct list_head mnt_reparent; /* reparent list entry */ ++ struct list_head mnt_umounting; /* list entry for umount propagation */ + #ifdef CONFIG_FSNOTIFY + struct hlist_head mnt_fsnotify_marks; + __u32 mnt_fsnotify_mask; +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -237,7 +237,7 @@ static struct mount *alloc_vfsmnt(const + INIT_LIST_HEAD(&mnt->mnt_slave_list); + INIT_LIST_HEAD(&mnt->mnt_slave); + INIT_HLIST_NODE(&mnt->mnt_mp_list); +- INIT_LIST_HEAD(&mnt->mnt_reparent); ++ INIT_LIST_HEAD(&mnt->mnt_umounting); + #ifdef CONFIG_FSNOTIFY + INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); + #endif +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -415,86 +415,95 @@ void propagate_mount_unlock(struct mount + } + } + +-/* +- * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted. +- */ +-static void mark_umount_candidates(struct mount *mnt) ++static void umount_one(struct mount *mnt, struct list_head *to_umount) + { +- struct mount *parent = mnt->mnt_parent; +- struct mount *m; +- +- BUG_ON(parent == mnt); +- +- for (m = propagation_next(parent, parent); m; +- m = propagation_next(m, parent)) { +- struct mount *child = __lookup_mnt(&m->mnt, +- mnt->mnt_mountpoint); +- if (!child || (child->mnt.mnt_flags & MNT_UMOUNT)) +- continue; +- if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) { +- SET_MNT_MARK(child); +- } +- } ++ CLEAR_MNT_MARK(mnt); ++ mnt->mnt.mnt_flags |= MNT_UMOUNT; ++ list_del_init(&mnt->mnt_child); ++ list_del_init(&mnt->mnt_umounting); ++ list_move_tail(&mnt->mnt_list, to_umount); + } + + /* + * NOTE: unmounting 'mnt' naturally propagates to all other mounts its + * parent propagates to. + */ +-static void __propagate_umount(struct mount *mnt, struct list_head *to_reparent) ++static bool __propagate_umount(struct mount *mnt, ++ struct list_head *to_umount, ++ struct list_head *to_restore) + { +- struct mount *parent = mnt->mnt_parent; +- struct mount *m; ++ bool progress = false; ++ struct mount *child; + +- BUG_ON(parent == mnt); +- +- for (m = propagation_next(parent, parent); m; +- m = propagation_next(m, parent)) { +- struct mount *topper; +- struct mount *child = __lookup_mnt(&m->mnt, +- mnt->mnt_mountpoint); +- /* +- * umount the child only if the child has no children +- * and the child is marked safe to unmount. +- */ +- if (!child || !IS_MNT_MARKED(child)) ++ /* ++ * The state of the parent won't change if this mount is ++ * already unmounted or marked as without children. ++ */ ++ if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED)) ++ goto out; ++ ++ /* Verify topper is the only grandchild that has not been ++ * speculatively unmounted. ++ */ ++ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { ++ if (child->mnt_mountpoint == mnt->mnt.mnt_root) ++ continue; ++ if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child)) + continue; +- CLEAR_MNT_MARK(child); ++ /* Found a mounted child */ ++ goto children; ++ } + +- /* If there is exactly one mount covering all of child +- * replace child with that mount. +- */ +- topper = find_topper(child); +- if (topper) +- list_add_tail(&topper->mnt_reparent, to_reparent); +- +- if (topper || list_empty(&child->mnt_mounts)) { +- list_del_init(&child->mnt_child); +- list_del_init(&child->mnt_reparent); +- child->mnt.mnt_flags |= MNT_UMOUNT; +- list_move_tail(&child->mnt_list, &mnt->mnt_list); ++ /* Mark mounts that can be unmounted if not locked */ ++ SET_MNT_MARK(mnt); ++ progress = true; ++ ++ /* If a mount is without children and not locked umount it. */ ++ if (!IS_MNT_LOCKED(mnt)) { ++ umount_one(mnt, to_umount); ++ } else { ++children: ++ list_move_tail(&mnt->mnt_umounting, to_restore); ++ } ++out: ++ return progress; ++} ++ ++static void umount_list(struct list_head *to_umount, ++ struct list_head *to_restore) ++{ ++ struct mount *mnt, *child, *tmp; ++ list_for_each_entry(mnt, to_umount, mnt_list) { ++ list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) { ++ /* topper? */ ++ if (child->mnt_mountpoint == mnt->mnt.mnt_root) ++ list_move_tail(&child->mnt_umounting, to_restore); ++ else ++ umount_one(child, to_umount); + } + } + } + +-static void reparent_mounts(struct list_head *to_reparent) ++static void restore_mounts(struct list_head *to_restore) + { +- while (!list_empty(to_reparent)) { ++ /* Restore mounts to a clean working state */ ++ while (!list_empty(to_restore)) { + struct mount *mnt, *parent; + struct mountpoint *mp; + +- mnt = list_first_entry(to_reparent, struct mount, mnt_reparent); +- list_del_init(&mnt->mnt_reparent); ++ mnt = list_first_entry(to_restore, struct mount, mnt_umounting); ++ CLEAR_MNT_MARK(mnt); ++ list_del_init(&mnt->mnt_umounting); + +- /* Where should this mount be reparented to? */ ++ /* Should this mount be reparented? */ + mp = mnt->mnt_mp; + parent = mnt->mnt_parent; + while (parent->mnt.mnt_flags & MNT_UMOUNT) { + mp = parent->mnt_mp; + parent = parent->mnt_parent; + } +- +- mnt_change_mountpoint(parent, mp, mnt); ++ if (parent != mnt->mnt_parent) ++ mnt_change_mountpoint(parent, mp, mnt); + } + } + +@@ -508,15 +517,34 @@ static void reparent_mounts(struct list_ + int propagate_umount(struct list_head *list) + { + struct mount *mnt; +- LIST_HEAD(to_reparent); +- +- list_for_each_entry_reverse(mnt, list, mnt_list) +- mark_umount_candidates(mnt); ++ LIST_HEAD(to_restore); ++ LIST_HEAD(to_umount); + +- list_for_each_entry(mnt, list, mnt_list) +- __propagate_umount(mnt, &to_reparent); ++ list_for_each_entry(mnt, list, mnt_list) { ++ struct mount *parent = mnt->mnt_parent; ++ struct mount *m; ++ ++ for (m = propagation_next(parent, parent); m; ++ m = propagation_next(m, parent)) { ++ struct mount *child = __lookup_mnt(&m->mnt, ++ mnt->mnt_mountpoint); ++ if (!child) ++ continue; ++ ++ /* Check the child and parents while progress is made */ ++ while (__propagate_umount(child, ++ &to_umount, &to_restore)) { ++ /* Is the parent a umount candidate? */ ++ child = child->mnt_parent; ++ if (list_empty(&child->mnt_umounting)) ++ break; ++ } ++ } ++ } + +- reparent_mounts(&to_reparent); ++ umount_list(&to_umount, &to_restore); ++ restore_mounts(&to_restore); ++ list_splice_tail(&to_umount, list); + + return 0; + } diff --git a/queue-4.4/mnt-in-umount-propagation-reparent-in-a-separate-pass.patch b/queue-4.4/mnt-in-umount-propagation-reparent-in-a-separate-pass.patch new file mode 100644 index 00000000000..3fae0f8e736 --- /dev/null +++ b/queue-4.4/mnt-in-umount-propagation-reparent-in-a-separate-pass.patch @@ -0,0 +1,171 @@ +From 570487d3faf2a1d8a220e6ee10f472163123d7da Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 15 May 2017 14:42:07 -0500 +Subject: mnt: In umount propagation reparent in a separate pass + +From: Eric W. Biederman + +commit 570487d3faf2a1d8a220e6ee10f472163123d7da upstream. + +It was observed that in some pathlogical cases that the current code +does not unmount everything it should. After investigation it +was determined that the issue is that mnt_change_mntpoint can +can change which mounts are available to be unmounted during mount +propagation which is wrong. + +The trivial reproducer is: +$ cat ./pathological.sh + +mount -t tmpfs test-base /mnt +cd /mnt +mkdir 1 2 1/1 +mount --bind 1 1 +mount --make-shared 1 +mount --bind 1 2 +mount --bind 1/1 1/1 +mount --bind 1/1 1/1 +echo +grep test-base /proc/self/mountinfo +umount 1/1 +echo +grep test-base /proc/self/mountinfo + +$ unshare -Urm ./pathological.sh + +The expected output looks like: +46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 +47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +49 54 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +50 53 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +51 49 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +54 47 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +53 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +52 50 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 + +46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 +47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 + +The output without the fix looks like: +46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 +47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +49 54 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +50 53 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +51 49 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +54 47 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +53 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +52 50 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 + +46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 +47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 +52 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 + +That last mount in the output was in the propgation tree to be unmounted but +was missed because the mnt_change_mountpoint changed it's parent before the walk +through the mount propagation tree observed it. + +Fixes: 1064f874abc0 ("mnt: Tuck mounts under others instead of creating shadow/side mounts.") +Acked-by: Andrei Vagin +Reviewed-by: Ram Pai +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/mount.h | 1 + + fs/namespace.c | 1 + + fs/pnode.c | 35 ++++++++++++++++++++++++++++++----- + 3 files changed, 32 insertions(+), 5 deletions(-) + +--- a/fs/mount.h ++++ b/fs/mount.h +@@ -57,6 +57,7 @@ struct mount { + struct mnt_namespace *mnt_ns; /* containing namespace */ + struct mountpoint *mnt_mp; /* where is it mounted */ + struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ ++ struct list_head mnt_reparent; /* reparent list entry */ + #ifdef CONFIG_FSNOTIFY + struct hlist_head mnt_fsnotify_marks; + __u32 mnt_fsnotify_mask; +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -237,6 +237,7 @@ static struct mount *alloc_vfsmnt(const + INIT_LIST_HEAD(&mnt->mnt_slave_list); + INIT_LIST_HEAD(&mnt->mnt_slave); + INIT_HLIST_NODE(&mnt->mnt_mp_list); ++ INIT_LIST_HEAD(&mnt->mnt_reparent); + #ifdef CONFIG_FSNOTIFY + INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); + #endif +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -441,7 +441,7 @@ static void mark_umount_candidates(struc + * NOTE: unmounting 'mnt' naturally propagates to all other mounts its + * parent propagates to. + */ +-static void __propagate_umount(struct mount *mnt) ++static void __propagate_umount(struct mount *mnt, struct list_head *to_reparent) + { + struct mount *parent = mnt->mnt_parent; + struct mount *m; +@@ -466,17 +466,38 @@ static void __propagate_umount(struct mo + */ + topper = find_topper(child); + if (topper) +- mnt_change_mountpoint(child->mnt_parent, child->mnt_mp, +- topper); ++ list_add_tail(&topper->mnt_reparent, to_reparent); + +- if (list_empty(&child->mnt_mounts)) { ++ if (topper || list_empty(&child->mnt_mounts)) { + list_del_init(&child->mnt_child); ++ list_del_init(&child->mnt_reparent); + child->mnt.mnt_flags |= MNT_UMOUNT; + list_move_tail(&child->mnt_list, &mnt->mnt_list); + } + } + } + ++static void reparent_mounts(struct list_head *to_reparent) ++{ ++ while (!list_empty(to_reparent)) { ++ struct mount *mnt, *parent; ++ struct mountpoint *mp; ++ ++ mnt = list_first_entry(to_reparent, struct mount, mnt_reparent); ++ list_del_init(&mnt->mnt_reparent); ++ ++ /* Where should this mount be reparented to? */ ++ mp = mnt->mnt_mp; ++ parent = mnt->mnt_parent; ++ while (parent->mnt.mnt_flags & MNT_UMOUNT) { ++ mp = parent->mnt_mp; ++ parent = parent->mnt_parent; ++ } ++ ++ mnt_change_mountpoint(parent, mp, mnt); ++ } ++} ++ + /* + * collect all mounts that receive propagation from the mount in @list, + * and return these additional mounts in the same list. +@@ -487,11 +508,15 @@ static void __propagate_umount(struct mo + int propagate_umount(struct list_head *list) + { + struct mount *mnt; ++ LIST_HEAD(to_reparent); + + list_for_each_entry_reverse(mnt, list, mnt_list) + mark_umount_candidates(mnt); + + list_for_each_entry(mnt, list, mnt_list) +- __propagate_umount(mnt); ++ __propagate_umount(mnt, &to_reparent); ++ ++ reparent_mounts(&to_reparent); ++ + return 0; + } diff --git a/queue-4.4/mnt-make-propagate_umount-less-slow-for-overlapping-mount-propagation-trees.patch b/queue-4.4/mnt-make-propagate_umount-less-slow-for-overlapping-mount-propagation-trees.patch new file mode 100644 index 00000000000..e73a71368e5 --- /dev/null +++ b/queue-4.4/mnt-make-propagate_umount-less-slow-for-overlapping-mount-propagation-trees.patch @@ -0,0 +1,204 @@ +From 296990deb389c7da21c78030376ba244dc1badf5 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 24 Oct 2016 17:25:19 -0500 +Subject: mnt: Make propagate_umount less slow for overlapping mount propagation trees + +From: Eric W. Biederman + +commit 296990deb389c7da21c78030376ba244dc1badf5 upstream. + +Andrei Vagin pointed out that time to executue propagate_umount can go +non-linear (and take a ludicrious amount of time) when the mount +propogation trees of the mounts to be unmunted by a lazy unmount +overlap. + +Make the walk of the mount propagation trees nearly linear by +remembering which mounts have already been visited, allowing +subsequent walks to detect when walking a mount propgation tree or a +subtree of a mount propgation tree would be duplicate work and to skip +them entirely. + +Walk the list of mounts whose propgatation trees need to be traversed +from the mount highest in the mount tree to mounts lower in the mount +tree so that odds are higher that the code will walk the largest trees +first, allowing later tree walks to be skipped entirely. + +Add cleanup_umount_visitation to remover the code's memory of which +mounts have been visited. + +Add the functions last_slave and skip_propagation_subtree to allow +skipping appropriate parts of the mount propagation tree without +needing to change the logic of the rest of the code. + +A script to generate overlapping mount propagation trees: + +$ cat runs.h +set -e +mount -t tmpfs zdtm /mnt +mkdir -p /mnt/1 /mnt/2 +mount -t tmpfs zdtm /mnt/1 +mount --make-shared /mnt/1 +mkdir /mnt/1/1 + +iteration=10 +if [ -n "$1" ] ; then + iteration=$1 +fi + +for i in $(seq $iteration); do + mount --bind /mnt/1/1 /mnt/1/1 +done + +mount --rbind /mnt/1 /mnt/2 + +TIMEFORMAT='%Rs' +nr=$(( ( 2 ** ( $iteration + 1 ) ) + 1 )) +echo -n "umount -l /mnt/1 -> $nr " +time umount -l /mnt/1 + +nr=$(cat /proc/self/mountinfo | grep zdtm | wc -l ) +time umount -l /mnt/2 + +$ for i in $(seq 9 19); do echo $i; unshare -Urm bash ./run.sh $i; done + +Here are the performance numbers with and without the patch: + + mhash | 8192 | 8192 | 1048576 | 1048576 + mounts | before | after | before | after + ------------------------------------------------ + 1025 | 0.040s | 0.016s | 0.038s | 0.019s + 2049 | 0.094s | 0.017s | 0.080s | 0.018s + 4097 | 0.243s | 0.019s | 0.206s | 0.023s + 8193 | 1.202s | 0.028s | 1.562s | 0.032s + 16385 | 9.635s | 0.036s | 9.952s | 0.041s + 32769 | 60.928s | 0.063s | 44.321s | 0.064s + 65537 | | 0.097s | | 0.097s + 131073 | | 0.233s | | 0.176s + 262145 | | 0.653s | | 0.344s + 524289 | | 2.305s | | 0.735s + 1048577 | | 7.107s | | 2.603s + +Andrei Vagin reports fixing the performance problem is part of the +work to fix CVE-2016-6213. + +Fixes: a05964f3917c ("[PATCH] shared mounts handling: umount") +Reported-by: Andrei Vagin +Reviewed-by: Andrei Vagin +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/pnode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 62 insertions(+), 1 deletion(-) + +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -24,6 +24,11 @@ static inline struct mount *first_slave( + return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); + } + ++static inline struct mount *last_slave(struct mount *p) ++{ ++ return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave); ++} ++ + static inline struct mount *next_slave(struct mount *p) + { + return list_entry(p->mnt_slave.next, struct mount, mnt_slave); +@@ -164,6 +169,19 @@ static struct mount *propagation_next(st + } + } + ++static struct mount *skip_propagation_subtree(struct mount *m, ++ struct mount *origin) ++{ ++ /* ++ * Advance m such that propagation_next will not return ++ * the slaves of m. ++ */ ++ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) ++ m = last_slave(m); ++ ++ return m; ++} ++ + static struct mount *next_group(struct mount *m, struct mount *origin) + { + while (1) { +@@ -507,6 +525,15 @@ static void restore_mounts(struct list_h + } + } + ++static void cleanup_umount_visitations(struct list_head *visited) ++{ ++ while (!list_empty(visited)) { ++ struct mount *mnt = ++ list_first_entry(visited, struct mount, mnt_umounting); ++ list_del_init(&mnt->mnt_umounting); ++ } ++} ++ + /* + * collect all mounts that receive propagation from the mount in @list, + * and return these additional mounts in the same list. +@@ -519,11 +546,23 @@ int propagate_umount(struct list_head *l + struct mount *mnt; + LIST_HEAD(to_restore); + LIST_HEAD(to_umount); ++ LIST_HEAD(visited); + +- list_for_each_entry(mnt, list, mnt_list) { ++ /* Find candidates for unmounting */ ++ list_for_each_entry_reverse(mnt, list, mnt_list) { + struct mount *parent = mnt->mnt_parent; + struct mount *m; + ++ /* ++ * If this mount has already been visited it is known that it's ++ * entire peer group and all of their slaves in the propagation ++ * tree for the mountpoint has already been visited and there is ++ * no need to visit them again. ++ */ ++ if (!list_empty(&mnt->mnt_umounting)) ++ continue; ++ ++ list_add_tail(&mnt->mnt_umounting, &visited); + for (m = propagation_next(parent, parent); m; + m = propagation_next(m, parent)) { + struct mount *child = __lookup_mnt(&m->mnt, +@@ -531,6 +570,27 @@ int propagate_umount(struct list_head *l + if (!child) + continue; + ++ if (!list_empty(&child->mnt_umounting)) { ++ /* ++ * If the child has already been visited it is ++ * know that it's entire peer group and all of ++ * their slaves in the propgation tree for the ++ * mountpoint has already been visited and there ++ * is no need to visit this subtree again. ++ */ ++ m = skip_propagation_subtree(m, parent); ++ continue; ++ } else if (child->mnt.mnt_flags & MNT_UMOUNT) { ++ /* ++ * We have come accross an partially unmounted ++ * mount in list that has not been visited yet. ++ * Remember it has been visited and continue ++ * about our merry way. ++ */ ++ list_add_tail(&child->mnt_umounting, &visited); ++ continue; ++ } ++ + /* Check the child and parents while progress is made */ + while (__propagate_umount(child, + &to_umount, &to_restore)) { +@@ -544,6 +604,7 @@ int propagate_umount(struct list_head *l + + umount_list(&to_umount, &to_restore); + restore_mounts(&to_restore); ++ cleanup_umount_visitations(&visited); + list_splice_tail(&to_umount, list); + + return 0; diff --git a/queue-4.4/parisc-dma-api-return-error-instead-of-bug_on-for-dma-ops-on-non-dma-devs.patch b/queue-4.4/parisc-dma-api-return-error-instead-of-bug_on-for-dma-ops-on-non-dma-devs.patch new file mode 100644 index 00000000000..af119554a1e --- /dev/null +++ b/queue-4.4/parisc-dma-api-return-error-instead-of-bug_on-for-dma-ops-on-non-dma-devs.patch @@ -0,0 +1,204 @@ +From 33f9e02495d15a061f0c94ef46f5103a2d0c20f3 Mon Sep 17 00:00:00 2001 +From: Thomas Bogendoerfer +Date: Mon, 3 Jul 2017 10:38:05 +0200 +Subject: parisc: DMA API: return error instead of BUG_ON for dma ops on non dma devs + +From: Thomas Bogendoerfer + +commit 33f9e02495d15a061f0c94ef46f5103a2d0c20f3 upstream. + +Enabling parport pc driver on a B2600 (and probably other 64bit PARISC +systems) produced following BUG: + +CPU: 0 PID: 1 Comm: swapper Not tainted 4.12.0-rc5-30198-g1132d5e #156 +task: 000000009e050000 task.stack: 000000009e04c000 + + YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI +PSW: 00001000000001101111111100001111 Not tainted +r00-03 000000ff0806ff0f 000000009e04c990 0000000040871b78 000000009e04cac0 +r04-07 0000000040c14de0 ffffffffffffffff 000000009e07f098 000000009d82d200 +r08-11 000000009d82d210 0000000000000378 0000000000000000 0000000040c345e0 +r12-15 0000000000000005 0000000040c345e0 0000000000000000 0000000040c9d5e0 +r16-19 0000000040c345e0 00000000f00001c4 00000000f00001bc 0000000000000061 +r20-23 000000009e04ce28 0000000000000010 0000000000000010 0000000040b89e40 +r24-27 0000000000000003 0000000000ffffff 000000009d82d210 0000000040c14de0 +r28-31 0000000000000000 000000009e04ca90 000000009e04cb40 0000000000000000 +sr00-03 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +sr04-07 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + +IASQ: 0000000000000000 0000000000000000 IAOQ: 00000000404aece0 00000000404aece4 + IIR: 03ffe01f ISR: 0000000010340000 IOR: 000001781304cac8 + CPU: 0 CR30: 000000009e04c000 CR31: 00000000e2976de2 + ORIG_R28: 0000000000000200 + IAOQ[0]: sba_dma_supported+0x80/0xd0 + IAOQ[1]: sba_dma_supported+0x84/0xd0 + RP(r2): parport_pc_probe_port+0x178/0x1200 + +Cause is a call to dma_coerce_mask_and_coherenet in parport_pc_probe_port, +which PARISC DMA API doesn't handle very nicely. This commit gives back +DMA_ERROR_CODE for DMA API calls, if device isn't capable of DMA +transaction. + +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/include/asm/dma-mapping.h | 11 +++++++---- + drivers/parisc/ccio-dma.c | 12 ++++++++++++ + drivers/parisc/dino.c | 5 ++++- + drivers/parisc/lba_pci.c | 6 ++++-- + drivers/parisc/sba_iommu.c | 14 ++++++++++++++ + 5 files changed, 41 insertions(+), 7 deletions(-) + +--- a/arch/parisc/include/asm/dma-mapping.h ++++ b/arch/parisc/include/asm/dma-mapping.h +@@ -39,6 +39,8 @@ struct hppa_dma_ops { + ** flush/purge and allocate "regular" cacheable pages for everything. + */ + ++#define DMA_ERROR_CODE (~(dma_addr_t)0) ++ + #ifdef CONFIG_PA11 + extern struct hppa_dma_ops pcxl_dma_ops; + extern struct hppa_dma_ops pcx_dma_ops; +@@ -209,12 +211,13 @@ parisc_walk_tree(struct device *dev) + break; + } + } +- BUG_ON(!dev->platform_data); + return dev->platform_data; + } +- +-#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu) +- ++ ++#define GET_IOC(dev) ({ \ ++ void *__pdata = parisc_walk_tree(dev); \ ++ __pdata ? HBA_DATA(__pdata)->iommu : NULL; \ ++}) + + #ifdef CONFIG_IOMMU_CCIO + struct parisc_device; +--- a/drivers/parisc/ccio-dma.c ++++ b/drivers/parisc/ccio-dma.c +@@ -741,6 +741,8 @@ ccio_map_single(struct device *dev, void + + BUG_ON(!dev); + ioc = GET_IOC(dev); ++ if (!ioc) ++ return DMA_ERROR_CODE; + + BUG_ON(size <= 0); + +@@ -805,6 +807,10 @@ ccio_unmap_single(struct device *dev, dm + + BUG_ON(!dev); + ioc = GET_IOC(dev); ++ if (!ioc) { ++ WARN_ON(!ioc); ++ return; ++ } + + DBG_RUN("%s() iovp 0x%lx/%x\n", + __func__, (long)iova, size); +@@ -908,6 +914,8 @@ ccio_map_sg(struct device *dev, struct s + + BUG_ON(!dev); + ioc = GET_IOC(dev); ++ if (!ioc) ++ return 0; + + DBG_RUN_SG("%s() START %d entries\n", __func__, nents); + +@@ -980,6 +988,10 @@ ccio_unmap_sg(struct device *dev, struct + + BUG_ON(!dev); + ioc = GET_IOC(dev); ++ if (!ioc) { ++ WARN_ON(!ioc); ++ return; ++ } + + DBG_RUN_SG("%s() START %d entries, %p,%x\n", + __func__, nents, sg_virt(sglist), sglist->length); +--- a/drivers/parisc/dino.c ++++ b/drivers/parisc/dino.c +@@ -154,7 +154,10 @@ struct dino_device + }; + + /* Looks nice and keeps the compiler happy */ +-#define DINO_DEV(d) ((struct dino_device *) d) ++#define DINO_DEV(d) ({ \ ++ void *__pdata = d; \ ++ BUG_ON(!__pdata); \ ++ (struct dino_device *)__pdata; }) + + + /* +--- a/drivers/parisc/lba_pci.c ++++ b/drivers/parisc/lba_pci.c +@@ -111,8 +111,10 @@ static u32 lba_t32; + + + /* Looks nice and keeps the compiler happy */ +-#define LBA_DEV(d) ((struct lba_device *) (d)) +- ++#define LBA_DEV(d) ({ \ ++ void *__pdata = d; \ ++ BUG_ON(!__pdata); \ ++ (struct lba_device *)__pdata; }) + + /* + ** Only allow 8 subsidiary busses per LBA +--- a/drivers/parisc/sba_iommu.c ++++ b/drivers/parisc/sba_iommu.c +@@ -691,6 +691,8 @@ static int sba_dma_supported( struct dev + return 0; + + ioc = GET_IOC(dev); ++ if (!ioc) ++ return 0; + + /* + * check if mask is >= than the current max IO Virt Address +@@ -722,6 +724,8 @@ sba_map_single(struct device *dev, void + int pide; + + ioc = GET_IOC(dev); ++ if (!ioc) ++ return DMA_ERROR_CODE; + + /* save offset bits */ + offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK; +@@ -803,6 +807,10 @@ sba_unmap_single(struct device *dev, dma + DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size); + + ioc = GET_IOC(dev); ++ if (!ioc) { ++ WARN_ON(!ioc); ++ return; ++ } + offset = iova & ~IOVP_MASK; + iova ^= offset; /* clear offset bits */ + size += offset; +@@ -942,6 +950,8 @@ sba_map_sg(struct device *dev, struct sc + DBG_RUN_SG("%s() START %d entries\n", __func__, nents); + + ioc = GET_IOC(dev); ++ if (!ioc) ++ return 0; + + /* Fast path single entry scatterlists. */ + if (nents == 1) { +@@ -1027,6 +1037,10 @@ sba_unmap_sg(struct device *dev, struct + __func__, nents, sg_virt(sglist), sglist->length); + + ioc = GET_IOC(dev); ++ if (!ioc) { ++ WARN_ON(!ioc); ++ return; ++ } + + #ifdef SBA_COLLECT_STATS + ioc->usg_calls++; diff --git a/queue-4.4/parisc-mm-ensure-irqs-are-off-in-switch_mm.patch b/queue-4.4/parisc-mm-ensure-irqs-are-off-in-switch_mm.patch new file mode 100644 index 00000000000..a3401373d88 --- /dev/null +++ b/queue-4.4/parisc-mm-ensure-irqs-are-off-in-switch_mm.patch @@ -0,0 +1,56 @@ +From 649aa24254e85bf6bd7807dd372d083707852b1f Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 29 May 2017 17:14:16 +0200 +Subject: parisc/mm: Ensure IRQs are off in switch_mm() + +From: Helge Deller + +commit 649aa24254e85bf6bd7807dd372d083707852b1f upstream. + +This is because of commit f98db6013c55 ("sched/core: Add switch_mm_irqs_off() +and use it in the scheduler") in which switch_mm_irqs_off() is called by the +scheduler, vs switch_mm() which is used by use_mm(). + +This patch lets the parisc code mirror the x86 and powerpc code, ie. it +disables interrupts in switch_mm(), and optimises the scheduler case by +defining switch_mm_irqs_off(). + +Signed-off-by: Helge Deller +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/include/asm/mmu_context.h | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/arch/parisc/include/asm/mmu_context.h ++++ b/arch/parisc/include/asm/mmu_context.h +@@ -49,15 +49,26 @@ static inline void load_context(mm_conte + mtctl(__space_to_prot(context), 8); + } + +-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) ++static inline void switch_mm_irqs_off(struct mm_struct *prev, ++ struct mm_struct *next, struct task_struct *tsk) + { +- + if (prev != next) { + mtctl(__pa(next->pgd), 25); + load_context(next->context); + } + } + ++static inline void switch_mm(struct mm_struct *prev, ++ struct mm_struct *next, struct task_struct *tsk) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ switch_mm_irqs_off(prev, next, tsk); ++ local_irq_restore(flags); ++} ++#define switch_mm_irqs_off switch_mm_irqs_off ++ + #define deactivate_mm(tsk,mm) do { } while (0) + + static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/queue-4.4/parisc-report-sigsegv-instead-of-sigbus-when-running-out-of-stack.patch b/queue-4.4/parisc-report-sigsegv-instead-of-sigbus-when-running-out-of-stack.patch new file mode 100644 index 00000000000..e1e402ac34b --- /dev/null +++ b/queue-4.4/parisc-report-sigsegv-instead-of-sigbus-when-running-out-of-stack.patch @@ -0,0 +1,40 @@ +From 247462316f85a9e0479445c1a4223950b68ffac1 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Sun, 2 Jul 2017 22:00:41 +0200 +Subject: parisc: Report SIGSEGV instead of SIGBUS when running out of stack + +From: Helge Deller + +commit 247462316f85a9e0479445c1a4223950b68ffac1 upstream. + +When a process runs out of stack the parisc kernel wrongly faults with SIGBUS +instead of the expected SIGSEGV signal. + +This example shows how the kernel faults: +do_page_fault() command='a.out' type=15 address=0xfaac2000 in libc-2.24.so[f8308000+16c000] +trap #15: Data TLB miss fault, vm_start = 0xfa2c2000, vm_end = 0xfaac2000 + +The vma->vm_end value is the first address which does not belong to the vma, so +adjust the check to include vma->vm_end to the range for which to send the +SIGSEGV signal. + +This patch unbreaks building the debian libsigsegv package. + +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/mm/fault.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/parisc/mm/fault.c ++++ b/arch/parisc/mm/fault.c +@@ -303,7 +303,7 @@ bad_area: + case 15: /* Data TLB miss fault/Data page fault */ + /* send SIGSEGV when outside of vma */ + if (!vma || +- address < vma->vm_start || address > vma->vm_end) { ++ address < vma->vm_start || address >= vma->vm_end) { + si.si_signo = SIGSEGV; + si.si_code = SEGV_MAPERR; + break; diff --git a/queue-4.4/parisc-use-compat_sys_keyctl.patch b/queue-4.4/parisc-use-compat_sys_keyctl.patch new file mode 100644 index 00000000000..f29432c2873 --- /dev/null +++ b/queue-4.4/parisc-use-compat_sys_keyctl.patch @@ -0,0 +1,33 @@ +From b0f94efd5aa8daa8a07d7601714c2573266cd4c9 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Mon, 12 Jun 2017 23:18:30 -0700 +Subject: parisc: use compat_sys_keyctl() + +From: Eric Biggers + +commit b0f94efd5aa8daa8a07d7601714c2573266cd4c9 upstream. + +Architectures with a compat syscall table must put compat_sys_keyctl() +in it, not sys_keyctl(). The parisc architecture was not doing this; +fix it. + +Signed-off-by: Eric Biggers +Acked-by: Helge Deller +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/kernel/syscall_table.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/parisc/kernel/syscall_table.S ++++ b/arch/parisc/kernel/syscall_table.S +@@ -361,7 +361,7 @@ + ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ + ENTRY_SAME(add_key) + ENTRY_SAME(request_key) /* 265 */ +- ENTRY_SAME(keyctl) ++ ENTRY_COMP(keyctl) + ENTRY_SAME(ioprio_set) + ENTRY_SAME(ioprio_get) + ENTRY_SAME(inotify_init) diff --git a/queue-4.4/powerpc-move-elf_et_dyn_base-to-4gb-4mb.patch b/queue-4.4/powerpc-move-elf_et_dyn_base-to-4gb-4mb.patch new file mode 100644 index 00000000000..97a65a17bf1 --- /dev/null +++ b/queue-4.4/powerpc-move-elf_et_dyn_base-to-4gb-4mb.patch @@ -0,0 +1,63 @@ +From 47ebb09d54856500c5a5e14824781902b3bb738e Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Mon, 10 Jul 2017 15:52:47 -0700 +Subject: powerpc: move ELF_ET_DYN_BASE to 4GB / 4MB + +From: Kees Cook + +commit 47ebb09d54856500c5a5e14824781902b3bb738e upstream. + +Now that explicitly executed loaders are loaded in the mmap region, we +have more freedom to decide where we position PIE binaries in the +address space to avoid possible collisions with mmap or stack regions. + +For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit +address space for 32-bit pointers. On 32-bit use 4MB, which is the +traditional x86 minimum load location, likely to avoid historically +requiring a 4MB page table entry when only a portion of the first 4MB +would be used (since the NULL address is avoided). + +Link: http://lkml.kernel.org/r/1498154792-49952-4-git-send-email-keescook@chromium.org +Signed-off-by: Kees Cook +Tested-by: Michael Ellerman +Acked-by: Michael Ellerman +Cc: Russell King +Cc: Catalin Marinas +Cc: Will Deacon +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Martin Schwidefsky +Cc: Heiko Carstens +Cc: James Hogan +Cc: Pratyush Anand +Cc: Ingo Molnar +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/elf.h | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/arch/powerpc/include/asm/elf.h ++++ b/arch/powerpc/include/asm/elf.h +@@ -23,12 +23,13 @@ + #define CORE_DUMP_USE_REGSET + #define ELF_EXEC_PAGESIZE PAGE_SIZE + +-/* This is the location that an ET_DYN program is loaded if exec'ed. Typical +- use of this is to invoke "./ld.so someprog" to test out a new version of +- the loader. We need to make sure that it is out of the way of the program +- that it will "exec", and that there is sufficient room for the brk. */ +- +-#define ELF_ET_DYN_BASE 0x20000000 ++/* ++ * This is the base location for PIE (ET_DYN with INTERP) loads. On ++ * 64-bit, this is raised to 4GB to leave the entire 32-bit address ++ * space open for things that want to use the area for 32-bit pointers. ++ */ ++#define ELF_ET_DYN_BASE (is_32bit_task() ? 0x000400000UL : \ ++ 0x100000000UL) + + #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) + diff --git a/queue-4.4/s390-reduce-elf_et_dyn_base.patch b/queue-4.4/s390-reduce-elf_et_dyn_base.patch new file mode 100644 index 00000000000..64b516c2d0e --- /dev/null +++ b/queue-4.4/s390-reduce-elf_et_dyn_base.patch @@ -0,0 +1,66 @@ +From a73dc5370e153ac63718d850bddf0c9aa9d871e6 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Mon, 10 Jul 2017 15:52:51 -0700 +Subject: s390: reduce ELF_ET_DYN_BASE + +From: Kees Cook + +commit a73dc5370e153ac63718d850bddf0c9aa9d871e6 upstream. + +Now that explicitly executed loaders are loaded in the mmap region, we +have more freedom to decide where we position PIE binaries in the +address space to avoid possible collisions with mmap or stack regions. + +For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit +address space for 32-bit pointers. On 32-bit use 4MB, which is the +traditional x86 minimum load location, likely to avoid historically +requiring a 4MB page table entry when only a portion of the first 4MB +would be used (since the NULL address is avoided). For s390 the +position could be 0x10000, but that is needlessly close to the NULL +address. + +Link: http://lkml.kernel.org/r/1498154792-49952-5-git-send-email-keescook@chromium.org +Signed-off-by: Kees Cook +Cc: Russell King +Cc: Catalin Marinas +Cc: Will Deacon +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Michael Ellerman +Cc: Martin Schwidefsky +Cc: Heiko Carstens +Cc: James Hogan +Cc: Pratyush Anand +Cc: Ingo Molnar +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/include/asm/elf.h | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +--- a/arch/s390/include/asm/elf.h ++++ b/arch/s390/include/asm/elf.h +@@ -154,14 +154,13 @@ extern unsigned int vdso_enabled; + #define CORE_DUMP_USE_REGSET + #define ELF_EXEC_PAGESIZE 4096 + +-/* This is the location that an ET_DYN program is loaded if exec'ed. Typical +- use of this is to invoke "./ld.so someprog" to test out a new version of +- the loader. We need to make sure that it is out of the way of the program +- that it will "exec", and that there is sufficient room for the brk. 64-bit +- tasks are aligned to 4GB. */ +-#define ELF_ET_DYN_BASE (is_32bit_task() ? \ +- (STACK_TOP / 3 * 2) : \ +- (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) ++/* ++ * This is the base location for PIE (ET_DYN with INTERP) loads. On ++ * 64-bit, this is raised to 4GB to leave the entire 32-bit address ++ * space open for things that want to use the area for 32-bit pointers. ++ */ ++#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ ++ 0x100000000UL) + + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. */ diff --git a/queue-4.4/selftests-capabilities-fix-the-test_execve-test.patch b/queue-4.4/selftests-capabilities-fix-the-test_execve-test.patch new file mode 100644 index 00000000000..a5fcd63a788 --- /dev/null +++ b/queue-4.4/selftests-capabilities-fix-the-test_execve-test.patch @@ -0,0 +1,74 @@ +From 796a3bae2fba6810427efdb314a1c126c9490fb3 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 29 Jun 2017 08:46:12 -0700 +Subject: selftests/capabilities: Fix the test_execve test + +From: Andy Lutomirski + +commit 796a3bae2fba6810427efdb314a1c126c9490fb3 upstream. + +test_execve does rather odd mount manipulations to safely create +temporary setuid and setgid executables that aren't visible to the +rest of the system. Those executables end up in the test's cwd, but +that cwd is MNT_DETACHed. + +The core namespace code considers MNT_DETACHed trees to belong to no +mount namespace at all and, in general, MNT_DETACHed trees are only +barely function. This interacted with commit 380cf5ba6b0a ("fs: +Treat foreign mounts as nosuid") to cause all MNT_DETACHed trees to +act as though they're nosuid, breaking the test. + +Fix it by just not detaching the tree. It's still in a private +mount namespace and is therefore still invisible to the rest of the +system (except via /proc, and the same nosuid logic will protect all +other programs on the system from believing in test_execve's setuid +bits). + +While we're at it, fix some blatant whitespace problems. + +Reported-by: Naresh Kamboju +Fixes: 380cf5ba6b0a ("fs: Treat foreign mounts as nosuid") +Cc: "Eric W. Biederman" +Cc: Kees Cook +Cc: Shuah Khan +Cc: Greg KH +Cc: linux-kselftest@vger.kernel.org +Signed-off-by: Andy Lutomirski +Acked-by: Greg Kroah-Hartman +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/capabilities/test_execve.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/tools/testing/selftests/capabilities/test_execve.c ++++ b/tools/testing/selftests/capabilities/test_execve.c +@@ -138,9 +138,6 @@ static void chdir_to_tmpfs(void) + + if (chdir(cwd) != 0) + err(1, "chdir to private tmpfs"); +- +- if (umount2(".", MNT_DETACH) != 0) +- err(1, "detach private tmpfs"); + } + + static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) +@@ -248,7 +245,7 @@ static int do_tests(int uid, const char + err(1, "chown"); + if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) + err(1, "chmod"); +-} ++ } + + capng_get_caps_process(); + +@@ -384,7 +381,7 @@ static int do_tests(int uid, const char + } else { + printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); + exec_other_validate_cap("./validate_cap_sgidnonroot", +- false, false, true, false); ++ false, false, true, false); + + if (fork_wait()) { + printf("[RUN]\tNon-root +ia, sgidroot => i\n"); diff --git a/queue-4.4/series b/queue-4.4/series index 12a3e21dd75..b96fc397261 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -12,3 +12,31 @@ net-ipv6-compare-lwstate-in-detecting-duplicate-nexthops.patch vrf-fix-bug_on-triggered-by-rx-when-destroying-a-vrf.patch rds-tcp-use-sock_create_lite-to-create-the-accept-socket.patch brcmfmac-fix-possible-buffer-overflow-in-brcmf_cfg80211_mgmt_tx.patch +cfg80211-define-nla_policy-for-nl80211_attr_local_mesh_power_mode.patch +cfg80211-validate-frequencies-nested-in-nl80211_attr_scan_frequencies.patch +cfg80211-check-if-pmkid-attribute-is-of-expected-size.patch +irqchip-gic-v3-fix-out-of-bound-access-in-gic_set_affinity.patch +parisc-report-sigsegv-instead-of-sigbus-when-running-out-of-stack.patch +parisc-use-compat_sys_keyctl.patch +parisc-dma-api-return-error-instead-of-bug_on-for-dma-ops-on-non-dma-devs.patch +parisc-mm-ensure-irqs-are-off-in-switch_mm.patch +tools-lib-lockdep-reduce-max_lock_depth-to-avoid-overflowing-lock_chain-depth.patch +kernel-extable.c-mark-core_kernel_text-notrace.patch +mm-list_lru.c-fix-list_lru_count_node-to-be-race-free.patch +fs-dcache.c-fix-spin-lockup-issue-on-nlru-lock.patch +checkpatch-silence-perl-5.26.0-unescaped-left-brace-warnings.patch +binfmt_elf-use-elf_et_dyn_base-only-for-pie.patch +arm-move-elf_et_dyn_base-to-4mb.patch +arm64-move-elf_et_dyn_base-to-4gb-4mb.patch +powerpc-move-elf_et_dyn_base-to-4gb-4mb.patch +s390-reduce-elf_et_dyn_base.patch +exec-limit-arg-stack-to-at-most-75-of-_stk_lim.patch +vt-fix-unchecked-__put_user-in-tioclinux-ioctls.patch +mnt-in-umount-propagation-reparent-in-a-separate-pass.patch +mnt-in-propgate_umount-handle-visiting-mounts-in-any-order.patch +mnt-make-propagate_umount-less-slow-for-overlapping-mount-propagation-trees.patch +selftests-capabilities-fix-the-test_execve-test.patch +tpm-get-rid-of-chip-pdev.patch +tpm-provide-strong-locking-for-device-removal.patch +add-shutdown-to-struct-class.patch +tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch diff --git a/queue-4.4/tools-lib-lockdep-reduce-max_lock_depth-to-avoid-overflowing-lock_chain-depth.patch b/queue-4.4/tools-lib-lockdep-reduce-max_lock_depth-to-avoid-overflowing-lock_chain-depth.patch new file mode 100644 index 00000000000..f1ac6a1966d --- /dev/null +++ b/queue-4.4/tools-lib-lockdep-reduce-max_lock_depth-to-avoid-overflowing-lock_chain-depth.patch @@ -0,0 +1,53 @@ +From 98dcea0cfd04e083ac74137ceb9a632604740e2d Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Thu, 25 May 2017 12:58:33 +0000 +Subject: tools/lib/lockdep: Reduce MAX_LOCK_DEPTH to avoid overflowing lock_chain/: Depth + +From: Ben Hutchings + +commit 98dcea0cfd04e083ac74137ceb9a632604740e2d upstream. + +liblockdep has been broken since commit 75dd602a5198 ("lockdep: Fix +lock_chain::base size"), as that adds a check that MAX_LOCK_DEPTH is +within the range of lock_chain::depth and in liblockdep it is much +too large. + +That should have resulted in a compiler error, but didn't because: + +- the check uses ARRAY_SIZE(), which isn't yet defined in liblockdep + so is assumed to be an (undeclared) function +- putting a function call inside a BUILD_BUG_ON() expression quietly + turns it into some nonsense involving a variable-length array + +It did produce a compiler warning, but I didn't notice because +liblockdep already produces too many warnings if -Wall is enabled +(which I'll fix shortly). + +Even before that commit, which reduced lock_chain::depth from 8 bits +to 6, MAX_LOCK_DEPTH was too large. + +Signed-off-by: Ben Hutchings +Signed-off-by: Sasha Levin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: a.p.zijlstra@chello.nl +Link: http://lkml.kernel.org/r/20170525130005.5947-3-alexander.levin@verizon.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + tools/lib/lockdep/uinclude/linux/lockdep.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/lib/lockdep/uinclude/linux/lockdep.h ++++ b/tools/lib/lockdep/uinclude/linux/lockdep.h +@@ -8,7 +8,7 @@ + #include + #include + +-#define MAX_LOCK_DEPTH 2000UL ++#define MAX_LOCK_DEPTH 255UL + + #define asmlinkage + #define __visible diff --git a/queue-4.4/tpm-get-rid-of-chip-pdev.patch b/queue-4.4/tpm-get-rid-of-chip-pdev.patch new file mode 100644 index 00000000000..0144632e7cf --- /dev/null +++ b/queue-4.4/tpm-get-rid-of-chip-pdev.patch @@ -0,0 +1,821 @@ +From 8cfffc9d4d3786d3b496a021d7224e06328bac7d Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Mon, 29 Feb 2016 12:29:47 -0500 +Subject: tpm: Get rid of chip->pdev + +From: Jason Gunthorpe + +commit 8cfffc9d4d3786d3b496a021d7224e06328bac7d upstream. + +This is a hold over from before the struct device conversion. + +- All prints should be using &chip->dev, which is the Linux + standard. This changes prints to use tpm0 as the device name, + not the PnP/etc ID. +- The few places involving sysfs/modules that really do need the + parent just use chip->dev.parent instead +- We no longer need to get_device(pdev) in any places since it is no + longer used by any of the code. The kref on the parent is held + by the device core during device_add and dropped in device_del + +Signed-off-by: Jason Gunthorpe +Signed-off-by: Stefan Berger +Tested-by: Stefan Berger +Reviewed-by: Jarkko Sakkinen +Tested-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/tpm/tpm-chip.c | 15 ++++++--------- + drivers/char/tpm/tpm-dev.c | 4 +--- + drivers/char/tpm/tpm-interface.c | 30 ++++++++++++++++-------------- + drivers/char/tpm/tpm-sysfs.c | 6 +++--- + drivers/char/tpm/tpm.h | 3 +-- + drivers/char/tpm/tpm2-cmd.c | 8 ++++---- + drivers/char/tpm/tpm_atmel.c | 14 +++++++------- + drivers/char/tpm/tpm_i2c_atmel.c | 16 ++++++++-------- + drivers/char/tpm/tpm_i2c_infineon.c | 6 +++--- + drivers/char/tpm/tpm_i2c_nuvoton.c | 22 +++++++++++----------- + drivers/char/tpm/tpm_infineon.c | 22 +++++++++++----------- + drivers/char/tpm/tpm_nsc.c | 20 ++++++++++---------- + drivers/char/tpm/tpm_tis.c | 16 ++++++++-------- + 13 files changed, 89 insertions(+), 93 deletions(-) + +--- a/drivers/char/tpm/tpm-chip.c ++++ b/drivers/char/tpm/tpm-chip.c +@@ -49,7 +49,7 @@ struct tpm_chip *tpm_chip_find_get(int c + if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) + continue; + +- if (try_module_get(pos->pdev->driver->owner)) { ++ if (try_module_get(pos->dev.parent->driver->owner)) { + chip = pos; + break; + } +@@ -112,13 +112,11 @@ struct tpm_chip *tpmm_chip_alloc(struct + + scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num); + +- chip->pdev = dev; +- + dev_set_drvdata(dev, chip); + + chip->dev.class = tpm_class; + chip->dev.release = tpm_dev_release; +- chip->dev.parent = chip->pdev; ++ chip->dev.parent = dev; + #ifdef CONFIG_ACPI + chip->dev.groups = chip->groups; + #endif +@@ -133,7 +131,7 @@ struct tpm_chip *tpmm_chip_alloc(struct + device_initialize(&chip->dev); + + cdev_init(&chip->cdev, &tpm_fops); +- chip->cdev.owner = chip->pdev->driver->owner; ++ chip->cdev.owner = dev->driver->owner; + chip->cdev.kobj.parent = &chip->dev.kobj; + + devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); +@@ -236,9 +234,8 @@ int tpm_chip_register(struct tpm_chip *c + chip->flags |= TPM_CHIP_FLAG_REGISTERED; + + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { +- rc = __compat_only_sysfs_link_entry_to_kobj(&chip->pdev->kobj, +- &chip->dev.kobj, +- "ppi"); ++ rc = __compat_only_sysfs_link_entry_to_kobj( ++ &chip->dev.parent->kobj, &chip->dev.kobj, "ppi"); + if (rc && rc != -ENOENT) { + tpm_chip_unregister(chip); + return rc; +@@ -273,7 +270,7 @@ void tpm_chip_unregister(struct tpm_chip + synchronize_rcu(); + + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) +- sysfs_remove_link(&chip->pdev->kobj, "ppi"); ++ sysfs_remove_link(&chip->dev.parent->kobj, "ppi"); + + tpm1_chip_unregister(chip); + tpm_del_char_device(chip); +--- a/drivers/char/tpm/tpm-dev.c ++++ b/drivers/char/tpm/tpm-dev.c +@@ -61,7 +61,7 @@ static int tpm_open(struct inode *inode, + * by the check of is_open variable, which is protected + * by driver_lock. */ + if (test_and_set_bit(0, &chip->is_open)) { +- dev_dbg(chip->pdev, "Another process owns this TPM\n"); ++ dev_dbg(&chip->dev, "Another process owns this TPM\n"); + return -EBUSY; + } + +@@ -79,7 +79,6 @@ static int tpm_open(struct inode *inode, + INIT_WORK(&priv->work, timeout_work); + + file->private_data = priv; +- get_device(chip->pdev); + return 0; + } + +@@ -166,7 +165,6 @@ static int tpm_release(struct inode *ino + file->private_data = NULL; + atomic_set(&priv->data_pending, 0); + clear_bit(0, &priv->chip->is_open); +- put_device(priv->chip->pdev); + kfree(priv); + return 0; + } +--- a/drivers/char/tpm/tpm-interface.c ++++ b/drivers/char/tpm/tpm-interface.c +@@ -343,7 +343,7 @@ ssize_t tpm_transmit(struct tpm_chip *ch + if (count == 0) + return -ENODATA; + if (count > bufsiz) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "invalid count value %x %zx\n", count, bufsiz); + return -E2BIG; + } +@@ -353,7 +353,7 @@ ssize_t tpm_transmit(struct tpm_chip *ch + + rc = chip->ops->send(chip, (u8 *) buf, count); + if (rc < 0) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "tpm_transmit: tpm_send: error %zd\n", rc); + goto out; + } +@@ -372,7 +372,7 @@ ssize_t tpm_transmit(struct tpm_chip *ch + goto out_recv; + + if (chip->ops->req_canceled(chip, status)) { +- dev_err(chip->pdev, "Operation Canceled\n"); ++ dev_err(&chip->dev, "Operation Canceled\n"); + rc = -ECANCELED; + goto out; + } +@@ -382,14 +382,14 @@ ssize_t tpm_transmit(struct tpm_chip *ch + } while (time_before(jiffies, stop)); + + chip->ops->cancel(chip); +- dev_err(chip->pdev, "Operation Timed out\n"); ++ dev_err(&chip->dev, "Operation Timed out\n"); + rc = -ETIME; + goto out; + + out_recv: + rc = chip->ops->recv(chip, (u8 *) buf, bufsiz); + if (rc < 0) +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "tpm_transmit: tpm_recv: error %zd\n", rc); + out: + if (!(flags & TPM_TRANSMIT_UNLOCKED)) +@@ -416,7 +416,7 @@ ssize_t tpm_transmit_cmd(struct tpm_chip + + err = be32_to_cpu(header->return_code); + if (err != 0 && desc) +- dev_err(chip->pdev, "A TPM error (%d) occurred %s\n", err, ++ dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err, + desc); + + return err; +@@ -514,7 +514,7 @@ int tpm_get_timeouts(struct tpm_chip *ch + if (rc == TPM_ERR_INVALID_POSTINIT) { + /* The TPM is not started, we are the first to talk to it. + Execute a startup command. */ +- dev_info(chip->pdev, "Issuing TPM_STARTUP"); ++ dev_info(&chip->dev, "Issuing TPM_STARTUP"); + if (tpm_startup(chip, TPM_ST_CLEAR)) + return rc; + +@@ -526,7 +526,7 @@ int tpm_get_timeouts(struct tpm_chip *ch + 0, NULL); + } + if (rc) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "A TPM error (%zd) occurred attempting to determine the timeouts\n", + rc); + goto duration; +@@ -565,7 +565,7 @@ int tpm_get_timeouts(struct tpm_chip *ch + + /* Report adjusted timeouts */ + if (chip->vendor.timeout_adjusted) { +- dev_info(chip->pdev, ++ dev_info(&chip->dev, + HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n", + old_timeout[0], new_timeout[0], + old_timeout[1], new_timeout[1], +@@ -612,7 +612,7 @@ duration: + chip->vendor.duration[TPM_MEDIUM] *= 1000; + chip->vendor.duration[TPM_LONG] *= 1000; + chip->vendor.duration_adjusted = true; +- dev_info(chip->pdev, "Adjusting TPM timeout parameters."); ++ dev_info(&chip->dev, "Adjusting TPM timeout parameters."); + } + return 0; + } +@@ -802,7 +802,9 @@ int tpm_do_selftest(struct tpm_chip *chi + * around 300ms while the self test is ongoing, keep trying + * until the self test duration expires. */ + if (rc == -ETIME) { +- dev_info(chip->pdev, HW_ERR "TPM command timed out during continue self test"); ++ dev_info( ++ &chip->dev, HW_ERR ++ "TPM command timed out during continue self test"); + msleep(delay_msec); + continue; + } +@@ -812,7 +814,7 @@ int tpm_do_selftest(struct tpm_chip *chi + + rc = be32_to_cpu(cmd.header.out.return_code); + if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) { +- dev_info(chip->pdev, ++ dev_info(&chip->dev, + "TPM is disabled/deactivated (0x%X)\n", rc); + /* TPM is disabled and/or deactivated; driver can + * proceed and TPM does handle commands for +@@ -966,10 +968,10 @@ int tpm_pm_suspend(struct device *dev) + } + + if (rc) +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "Error (%d) sending savestate before suspend\n", rc); + else if (try > 0) +- dev_warn(chip->pdev, "TPM savestate took %dms\n", ++ dev_warn(&chip->dev, "TPM savestate took %dms\n", + try * TPM_TIMEOUT_RETRY); + + return rc; +--- a/drivers/char/tpm/tpm-sysfs.c ++++ b/drivers/char/tpm/tpm-sysfs.c +@@ -284,16 +284,16 @@ static const struct attribute_group tpm_ + int tpm_sysfs_add_device(struct tpm_chip *chip) + { + int err; +- err = sysfs_create_group(&chip->pdev->kobj, ++ err = sysfs_create_group(&chip->dev.parent->kobj, + &tpm_dev_group); + + if (err) +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "failed to create sysfs attributes, %d\n", err); + return err; + } + + void tpm_sysfs_del_device(struct tpm_chip *chip) + { +- sysfs_remove_group(&chip->pdev->kobj, &tpm_dev_group); ++ sysfs_remove_group(&chip->dev.parent->kobj, &tpm_dev_group); + } +--- a/drivers/char/tpm/tpm.h ++++ b/drivers/char/tpm/tpm.h +@@ -171,7 +171,6 @@ enum tpm_chip_flags { + }; + + struct tpm_chip { +- struct device *pdev; /* Device stuff */ + struct device dev; + struct cdev cdev; + +@@ -203,7 +202,7 @@ struct tpm_chip { + + static inline void tpm_chip_put(struct tpm_chip *chip) + { +- module_put(chip->pdev->driver->owner); ++ module_put(chip->dev.parent->driver->owner); + } + + static inline int tpm_read_index(int base, int index) +--- a/drivers/char/tpm/tpm2-cmd.c ++++ b/drivers/char/tpm/tpm2-cmd.c +@@ -570,7 +570,7 @@ static void tpm2_flush_context_cmd(struc + + rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_FLUSH_CONTEXT); + if (rc) { +- dev_warn(chip->pdev, "0x%08x was not flushed, out of memory\n", ++ dev_warn(&chip->dev, "0x%08x was not flushed, out of memory\n", + handle); + return; + } +@@ -580,7 +580,7 @@ static void tpm2_flush_context_cmd(struc + rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags, + "flushing context"); + if (rc) +- dev_warn(chip->pdev, "0x%08x was not flushed, rc=%d\n", handle, ++ dev_warn(&chip->dev, "0x%08x was not flushed, rc=%d\n", handle, + rc); + + tpm_buf_destroy(&buf); +@@ -753,7 +753,7 @@ void tpm2_shutdown(struct tpm_chip *chip + * except print the error code on a system failure. + */ + if (rc < 0) +- dev_warn(chip->pdev, "transmit returned %d while stopping the TPM", ++ dev_warn(&chip->dev, "transmit returned %d while stopping the TPM", + rc); + } + EXPORT_SYMBOL_GPL(tpm2_shutdown); +@@ -820,7 +820,7 @@ static int tpm2_start_selftest(struct tp + * immediately. This is a workaround for that. + */ + if (rc == TPM2_RC_TESTING) { +- dev_warn(chip->pdev, "Got RC_TESTING, ignoring\n"); ++ dev_warn(&chip->dev, "Got RC_TESTING, ignoring\n"); + rc = 0; + } + +--- a/drivers/char/tpm/tpm_atmel.c ++++ b/drivers/char/tpm/tpm_atmel.c +@@ -49,7 +49,7 @@ static int tpm_atml_recv(struct tpm_chip + for (i = 0; i < 6; i++) { + status = ioread8(chip->vendor.iobase + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { +- dev_err(chip->pdev, "error reading header\n"); ++ dev_err(&chip->dev, "error reading header\n"); + return -EIO; + } + *buf++ = ioread8(chip->vendor.iobase); +@@ -60,12 +60,12 @@ static int tpm_atml_recv(struct tpm_chip + size = be32_to_cpu(*native_size); + + if (count < size) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "Recv size(%d) less than available space\n", size); + for (; i < size; i++) { /* clear the waiting data anyway */ + status = ioread8(chip->vendor.iobase + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { +- dev_err(chip->pdev, "error reading data\n"); ++ dev_err(&chip->dev, "error reading data\n"); + return -EIO; + } + } +@@ -76,7 +76,7 @@ static int tpm_atml_recv(struct tpm_chip + for (; i < size; i++) { + status = ioread8(chip->vendor.iobase + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { +- dev_err(chip->pdev, "error reading data\n"); ++ dev_err(&chip->dev, "error reading data\n"); + return -EIO; + } + *buf++ = ioread8(chip->vendor.iobase); +@@ -86,7 +86,7 @@ static int tpm_atml_recv(struct tpm_chip + status = ioread8(chip->vendor.iobase + 1); + + if (status & ATML_STATUS_DATA_AVAIL) { +- dev_err(chip->pdev, "data available is stuck\n"); ++ dev_err(&chip->dev, "data available is stuck\n"); + return -EIO; + } + +@@ -97,9 +97,9 @@ static int tpm_atml_send(struct tpm_chip + { + int i; + +- dev_dbg(chip->pdev, "tpm_atml_send:\n"); ++ dev_dbg(&chip->dev, "tpm_atml_send:\n"); + for (i = 0; i < count; i++) { +- dev_dbg(chip->pdev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); ++ dev_dbg(&chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); + iowrite8(buf[i], chip->vendor.iobase); + } + +--- a/drivers/char/tpm/tpm_i2c_atmel.c ++++ b/drivers/char/tpm/tpm_i2c_atmel.c +@@ -52,7 +52,7 @@ struct priv_data { + static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) + { + struct priv_data *priv = chip->vendor.priv; +- struct i2c_client *client = to_i2c_client(chip->pdev); ++ struct i2c_client *client = to_i2c_client(chip->dev.parent); + s32 status; + + priv->len = 0; +@@ -62,7 +62,7 @@ static int i2c_atmel_send(struct tpm_chi + + status = i2c_master_send(client, buf, len); + +- dev_dbg(chip->pdev, ++ dev_dbg(&chip->dev, + "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, + (int)min_t(size_t, 64, len), buf, len, status); + return status; +@@ -71,7 +71,7 @@ static int i2c_atmel_send(struct tpm_chi + static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) + { + struct priv_data *priv = chip->vendor.priv; +- struct i2c_client *client = to_i2c_client(chip->pdev); ++ struct i2c_client *client = to_i2c_client(chip->dev.parent); + struct tpm_output_header *hdr = + (struct tpm_output_header *)priv->buffer; + u32 expected_len; +@@ -88,7 +88,7 @@ static int i2c_atmel_recv(struct tpm_chi + return -ENOMEM; + + if (priv->len >= expected_len) { +- dev_dbg(chip->pdev, ++ dev_dbg(&chip->dev, + "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__, + (int)min_t(size_t, 64, expected_len), buf, count, + expected_len); +@@ -97,7 +97,7 @@ static int i2c_atmel_recv(struct tpm_chi + } + + rc = i2c_master_recv(client, buf, expected_len); +- dev_dbg(chip->pdev, ++ dev_dbg(&chip->dev, + "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__, + (int)min_t(size_t, 64, expected_len), buf, count, + expected_len); +@@ -106,13 +106,13 @@ static int i2c_atmel_recv(struct tpm_chi + + static void i2c_atmel_cancel(struct tpm_chip *chip) + { +- dev_err(chip->pdev, "TPM operation cancellation was requested, but is not supported"); ++ dev_err(&chip->dev, "TPM operation cancellation was requested, but is not supported"); + } + + static u8 i2c_atmel_read_status(struct tpm_chip *chip) + { + struct priv_data *priv = chip->vendor.priv; +- struct i2c_client *client = to_i2c_client(chip->pdev); ++ struct i2c_client *client = to_i2c_client(chip->dev.parent); + int rc; + + /* The TPM fails the I2C read until it is ready, so we do the entire +@@ -125,7 +125,7 @@ static u8 i2c_atmel_read_status(struct t + /* Once the TPM has completed the command the command remains readable + * until another command is issued. */ + rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer)); +- dev_dbg(chip->pdev, ++ dev_dbg(&chip->dev, + "%s: sts=%d", __func__, rc); + if (rc <= 0) + return 0; +--- a/drivers/char/tpm/tpm_i2c_infineon.c ++++ b/drivers/char/tpm/tpm_i2c_infineon.c +@@ -446,7 +446,7 @@ static int tpm_tis_i2c_recv(struct tpm_c + /* read first 10 bytes, including tag, paramsize, and result */ + size = recv_data(chip, buf, TPM_HEADER_SIZE); + if (size < TPM_HEADER_SIZE) { +- dev_err(chip->pdev, "Unable to read header\n"); ++ dev_err(&chip->dev, "Unable to read header\n"); + goto out; + } + +@@ -459,14 +459,14 @@ static int tpm_tis_i2c_recv(struct tpm_c + size += recv_data(chip, &buf[TPM_HEADER_SIZE], + expected - TPM_HEADER_SIZE); + if (size < expected) { +- dev_err(chip->pdev, "Unable to read remainder of result\n"); ++ dev_err(&chip->dev, "Unable to read remainder of result\n"); + size = -ETIME; + goto out; + } + + wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &status); + if (status & TPM_STS_DATA_AVAIL) { /* retry? */ +- dev_err(chip->pdev, "Error left over data\n"); ++ dev_err(&chip->dev, "Error left over data\n"); + size = -EIO; + goto out; + } +--- a/drivers/char/tpm/tpm_i2c_nuvoton.c ++++ b/drivers/char/tpm/tpm_i2c_nuvoton.c +@@ -96,13 +96,13 @@ static s32 i2c_nuvoton_write_buf(struct + /* read TPM_STS register */ + static u8 i2c_nuvoton_read_status(struct tpm_chip *chip) + { +- struct i2c_client *client = to_i2c_client(chip->pdev); ++ struct i2c_client *client = to_i2c_client(chip->dev.parent); + s32 status; + u8 data; + + status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data); + if (status <= 0) { +- dev_err(chip->pdev, "%s() error return %d\n", __func__, ++ dev_err(&chip->dev, "%s() error return %d\n", __func__, + status); + data = TPM_STS_ERR_VAL; + } +@@ -127,13 +127,13 @@ static s32 i2c_nuvoton_write_status(stru + /* write commandReady to TPM_STS register */ + static void i2c_nuvoton_ready(struct tpm_chip *chip) + { +- struct i2c_client *client = to_i2c_client(chip->pdev); ++ struct i2c_client *client = to_i2c_client(chip->dev.parent); + s32 status; + + /* this causes the current command to be aborted */ + status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY); + if (status < 0) +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "%s() fail to write TPM_STS.commandReady\n", __func__); + } + +@@ -212,7 +212,7 @@ static int i2c_nuvoton_wait_for_stat(str + return 0; + } while (time_before(jiffies, stop)); + } +- dev_err(chip->pdev, "%s(%02x, %02x) -> timeout\n", __func__, mask, ++ dev_err(&chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask, + value); + return -ETIMEDOUT; + } +@@ -240,7 +240,7 @@ static int i2c_nuvoton_recv_data(struct + &chip->vendor.read_queue) == 0) { + burst_count = i2c_nuvoton_get_burstcount(client, chip); + if (burst_count < 0) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "%s() fail to read burstCount=%d\n", __func__, + burst_count); + return -EIO; +@@ -249,12 +249,12 @@ static int i2c_nuvoton_recv_data(struct + rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R, + bytes2read, &buf[size]); + if (rc < 0) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "%s() fail on i2c_nuvoton_read_buf()=%d\n", + __func__, rc); + return -EIO; + } +- dev_dbg(chip->pdev, "%s(%d):", __func__, bytes2read); ++ dev_dbg(&chip->dev, "%s(%d):", __func__, bytes2read); + size += bytes2read; + } + +@@ -264,7 +264,7 @@ static int i2c_nuvoton_recv_data(struct + /* Read TPM command results */ + static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) + { +- struct device *dev = chip->pdev; ++ struct device *dev = chip->dev.parent; + struct i2c_client *client = to_i2c_client(dev); + s32 rc; + int expected, status, burst_count, retries, size = 0; +@@ -334,7 +334,7 @@ static int i2c_nuvoton_recv(struct tpm_c + break; + } + i2c_nuvoton_ready(chip); +- dev_dbg(chip->pdev, "%s() -> %d\n", __func__, size); ++ dev_dbg(&chip->dev, "%s() -> %d\n", __func__, size); + return size; + } + +@@ -347,7 +347,7 @@ static int i2c_nuvoton_recv(struct tpm_c + */ + static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) + { +- struct device *dev = chip->pdev; ++ struct device *dev = chip->dev.parent; + struct i2c_client *client = to_i2c_client(dev); + u32 ordinal; + size_t count = 0; +--- a/drivers/char/tpm/tpm_infineon.c ++++ b/drivers/char/tpm/tpm_infineon.c +@@ -195,9 +195,9 @@ static int wait(struct tpm_chip *chip, i + } + if (i == TPM_MAX_TRIES) { /* timeout occurs */ + if (wait_for_bit == STAT_XFE) +- dev_err(chip->pdev, "Timeout in wait(STAT_XFE)\n"); ++ dev_err(&chip->dev, "Timeout in wait(STAT_XFE)\n"); + if (wait_for_bit == STAT_RDA) +- dev_err(chip->pdev, "Timeout in wait(STAT_RDA)\n"); ++ dev_err(&chip->dev, "Timeout in wait(STAT_RDA)\n"); + return -EIO; + } + return 0; +@@ -220,7 +220,7 @@ static void wait_and_send(struct tpm_chi + static void tpm_wtx(struct tpm_chip *chip) + { + number_of_wtx++; +- dev_info(chip->pdev, "Granting WTX (%02d / %02d)\n", ++ dev_info(&chip->dev, "Granting WTX (%02d / %02d)\n", + number_of_wtx, TPM_MAX_WTX_PACKAGES); + wait_and_send(chip, TPM_VL_VER); + wait_and_send(chip, TPM_CTRL_WTX); +@@ -231,7 +231,7 @@ static void tpm_wtx(struct tpm_chip *chi + + static void tpm_wtx_abort(struct tpm_chip *chip) + { +- dev_info(chip->pdev, "Aborting WTX\n"); ++ dev_info(&chip->dev, "Aborting WTX\n"); + wait_and_send(chip, TPM_VL_VER); + wait_and_send(chip, TPM_CTRL_WTX_ABORT); + wait_and_send(chip, 0x00); +@@ -257,7 +257,7 @@ recv_begin: + } + + if (buf[0] != TPM_VL_VER) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "Wrong transport protocol implementation!\n"); + return -EIO; + } +@@ -272,7 +272,7 @@ recv_begin: + } + + if ((size == 0x6D00) && (buf[1] == 0x80)) { +- dev_err(chip->pdev, "Error handling on vendor layer!\n"); ++ dev_err(&chip->dev, "Error handling on vendor layer!\n"); + return -EIO; + } + +@@ -284,7 +284,7 @@ recv_begin: + } + + if (buf[1] == TPM_CTRL_WTX) { +- dev_info(chip->pdev, "WTX-package received\n"); ++ dev_info(&chip->dev, "WTX-package received\n"); + if (number_of_wtx < TPM_MAX_WTX_PACKAGES) { + tpm_wtx(chip); + goto recv_begin; +@@ -295,14 +295,14 @@ recv_begin: + } + + if (buf[1] == TPM_CTRL_WTX_ABORT_ACK) { +- dev_info(chip->pdev, "WTX-abort acknowledged\n"); ++ dev_info(&chip->dev, "WTX-abort acknowledged\n"); + return size; + } + + if (buf[1] == TPM_CTRL_ERROR) { +- dev_err(chip->pdev, "ERROR-package received:\n"); ++ dev_err(&chip->dev, "ERROR-package received:\n"); + if (buf[4] == TPM_INF_NAK) +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "-> Negative acknowledgement" + " - retransmit command!\n"); + return -EIO; +@@ -321,7 +321,7 @@ static int tpm_inf_send(struct tpm_chip + + ret = empty_fifo(chip, 1); + if (ret) { +- dev_err(chip->pdev, "Timeout while clearing FIFO\n"); ++ dev_err(&chip->dev, "Timeout while clearing FIFO\n"); + return -EIO; + } + +--- a/drivers/char/tpm/tpm_nsc.c ++++ b/drivers/char/tpm/tpm_nsc.c +@@ -113,7 +113,7 @@ static int nsc_wait_for_ready(struct tpm + } + while (time_before(jiffies, stop)); + +- dev_info(chip->pdev, "wait for ready failed\n"); ++ dev_info(&chip->dev, "wait for ready failed\n"); + return -EBUSY; + } + +@@ -129,12 +129,12 @@ static int tpm_nsc_recv(struct tpm_chip + return -EIO; + + if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) { +- dev_err(chip->pdev, "F0 timeout\n"); ++ dev_err(&chip->dev, "F0 timeout\n"); + return -EIO; + } + if ((data = + inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_NORMAL) { +- dev_err(chip->pdev, "not in normal mode (0x%x)\n", ++ dev_err(&chip->dev, "not in normal mode (0x%x)\n", + data); + return -EIO; + } +@@ -143,7 +143,7 @@ static int tpm_nsc_recv(struct tpm_chip + for (p = buffer; p < &buffer[count]; p++) { + if (wait_for_stat + (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "OBF timeout (while reading data)\n"); + return -EIO; + } +@@ -154,11 +154,11 @@ static int tpm_nsc_recv(struct tpm_chip + + if ((data & NSC_STATUS_F0) == 0 && + (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0)) { +- dev_err(chip->pdev, "F0 not set\n"); ++ dev_err(&chip->dev, "F0 not set\n"); + return -EIO; + } + if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_EOC) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "expected end of command(0x%x)\n", data); + return -EIO; + } +@@ -189,19 +189,19 @@ static int tpm_nsc_send(struct tpm_chip + return -EIO; + + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { +- dev_err(chip->pdev, "IBF timeout\n"); ++ dev_err(&chip->dev, "IBF timeout\n"); + return -EIO; + } + + outb(NSC_COMMAND_NORMAL, chip->vendor.base + NSC_COMMAND); + if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) { +- dev_err(chip->pdev, "IBR timeout\n"); ++ dev_err(&chip->dev, "IBR timeout\n"); + return -EIO; + } + + for (i = 0; i < count; i++) { + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + "IBF timeout (while writing data)\n"); + return -EIO; + } +@@ -209,7 +209,7 @@ static int tpm_nsc_send(struct tpm_chip + } + + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { +- dev_err(chip->pdev, "IBF timeout\n"); ++ dev_err(&chip->dev, "IBF timeout\n"); + return -EIO; + } + outb(NSC_COMMAND_EOC, chip->vendor.base + NSC_COMMAND); +--- a/drivers/char/tpm/tpm_tis.c ++++ b/drivers/char/tpm/tpm_tis.c +@@ -293,7 +293,7 @@ static int tpm_tis_recv(struct tpm_chip + /* read first 10 bytes, including tag, paramsize, and result */ + if ((size = + recv_data(chip, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) { +- dev_err(chip->pdev, "Unable to read header\n"); ++ dev_err(&chip->dev, "Unable to read header\n"); + goto out; + } + +@@ -306,7 +306,7 @@ static int tpm_tis_recv(struct tpm_chip + if ((size += + recv_data(chip, &buf[TPM_HEADER_SIZE], + expected - TPM_HEADER_SIZE)) < expected) { +- dev_err(chip->pdev, "Unable to read remainder of result\n"); ++ dev_err(&chip->dev, "Unable to read remainder of result\n"); + size = -ETIME; + goto out; + } +@@ -315,7 +315,7 @@ static int tpm_tis_recv(struct tpm_chip + &chip->vendor.int_queue, false); + status = tpm_tis_status(chip); + if (status & TPM_STS_DATA_AVAIL) { /* retry? */ +- dev_err(chip->pdev, "Error left over data\n"); ++ dev_err(&chip->dev, "Error left over data\n"); + size = -EIO; + goto out; + } +@@ -401,7 +401,7 @@ static void disable_interrupts(struct tp + iowrite32(intmask, + chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); +- devm_free_irq(chip->pdev, chip->vendor.irq, chip); ++ devm_free_irq(&chip->dev, chip->vendor.irq, chip); + chip->vendor.irq = 0; + } + +@@ -463,7 +463,7 @@ static int tpm_tis_send(struct tpm_chip + msleep(1); + if (!priv->irq_tested) { + disable_interrupts(chip); +- dev_err(chip->pdev, ++ dev_err(&chip->dev, + FW_BUG "TPM interrupt not working, polling instead\n"); + } + priv->irq_tested = true; +@@ -533,7 +533,7 @@ static int probe_itpm(struct tpm_chip *c + + rc = tpm_tis_send_data(chip, cmd_getticks, len); + if (rc == 0) { +- dev_info(chip->pdev, "Detected an iTPM.\n"); ++ dev_info(&chip->dev, "Detected an iTPM.\n"); + rc = 1; + } else + rc = -EFAULT; +@@ -766,7 +766,7 @@ static int tpm_tis_init(struct device *d + if (devm_request_irq + (dev, i, tis_int_probe, IRQF_SHARED, + chip->devname, chip) != 0) { +- dev_info(chip->pdev, ++ dev_info(&chip->dev, + "Unable to request irq: %d for probe\n", + i); + continue; +@@ -818,7 +818,7 @@ static int tpm_tis_init(struct device *d + if (devm_request_irq + (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED, + chip->devname, chip) != 0) { +- dev_info(chip->pdev, ++ dev_info(&chip->dev, + "Unable to request irq: %d for use\n", + chip->vendor.irq); + chip->vendor.irq = 0; diff --git a/queue-4.4/tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch b/queue-4.4/tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch new file mode 100644 index 00000000000..a5fb3281d85 --- /dev/null +++ b/queue-4.4/tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch @@ -0,0 +1,101 @@ +From d1bd4a792d3961a04e6154118816b00167aad91a Mon Sep 17 00:00:00 2001 +From: Josh Zimmerman +Date: Sun, 25 Jun 2017 14:53:24 -0700 +Subject: tpm: Issue a TPM2_Shutdown for TPM2 devices. + +From: Josh Zimmerman + +commit d1bd4a792d3961a04e6154118816b00167aad91a upstream. + +If a TPM2 loses power without a TPM2_Shutdown command being issued (a +"disorderly reboot"), it may lose some state that has yet to be +persisted to NVRam, and will increment the DA counter. After the DA +counter gets sufficiently large, the TPM will lock the user out. + +NOTE: This only changes behavior on TPM2 devices. Since TPM1 uses sysfs, +and sysfs relies on implicit locking on chip->ops, it is not safe to +allow this code to run in TPM1, or to add sysfs support to TPM2, until +that locking is made explicit. + +Signed-off-by: Josh Zimmerman +Cc: stable@vger.kernel.org +Fixes: 74d6b3ceaa17 ("tpm: fix suspend/resume paths for TPM 2.0") +Reviewed-by: Jarkko Sakkinen +Tested-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: James Morris +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/tpm/tpm-chip.c | 36 ++++++++++++++++++++++++++++++++++++ + drivers/char/tpm/tpm-sysfs.c | 7 +++++++ + 2 files changed, 43 insertions(+) + +--- a/drivers/char/tpm/tpm-chip.c ++++ b/drivers/char/tpm/tpm-chip.c +@@ -124,6 +124,41 @@ static void tpm_dev_release(struct devic + kfree(chip); + } + ++ ++/** ++ * tpm_class_shutdown() - prepare the TPM device for loss of power. ++ * @dev: device to which the chip is associated. ++ * ++ * Issues a TPM2_Shutdown command prior to loss of power, as required by the ++ * TPM 2.0 spec. ++ * Then, calls bus- and device- specific shutdown code. ++ * ++ * XXX: This codepath relies on the fact that sysfs is not enabled for ++ * TPM2: sysfs uses an implicit lock on chip->ops, so this could race if TPM2 ++ * has sysfs support enabled before TPM sysfs's implicit locking is fixed. ++ */ ++static int tpm_class_shutdown(struct device *dev) ++{ ++ struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev); ++ ++ if (chip->flags & TPM_CHIP_FLAG_TPM2) { ++ down_write(&chip->ops_sem); ++ tpm2_shutdown(chip, TPM2_SU_CLEAR); ++ chip->ops = NULL; ++ up_write(&chip->ops_sem); ++ } ++ /* Allow bus- and device-specific code to run. Note: since chip->ops ++ * is NULL, more-specific shutdown code will not be able to issue TPM ++ * commands. ++ */ ++ if (dev->bus && dev->bus->shutdown) ++ dev->bus->shutdown(dev); ++ else if (dev->driver && dev->driver->shutdown) ++ dev->driver->shutdown(dev); ++ return 0; ++} ++ ++ + /** + * tpmm_chip_alloc() - allocate a new struct tpm_chip instance + * @dev: device to which the chip is associated +@@ -166,6 +201,7 @@ struct tpm_chip *tpmm_chip_alloc(struct + dev_set_drvdata(dev, chip); + + chip->dev.class = tpm_class; ++ chip->dev.class->shutdown = tpm_class_shutdown; + chip->dev.release = tpm_dev_release; + chip->dev.parent = dev; + #ifdef CONFIG_ACPI +--- a/drivers/char/tpm/tpm-sysfs.c ++++ b/drivers/char/tpm/tpm-sysfs.c +@@ -284,6 +284,13 @@ static const struct attribute_group tpm_ + int tpm_sysfs_add_device(struct tpm_chip *chip) + { + int err; ++ ++ /* XXX: If you wish to remove this restriction, you must first update ++ * tpm_sysfs to explicitly lock chip->ops. ++ */ ++ if (chip->flags & TPM_CHIP_FLAG_TPM2) ++ return 0; ++ + err = sysfs_create_group(&chip->dev.parent->kobj, + &tpm_dev_group); + diff --git a/queue-4.4/tpm-provide-strong-locking-for-device-removal.patch b/queue-4.4/tpm-provide-strong-locking-for-device-removal.patch new file mode 100644 index 00000000000..27bda97cfdf --- /dev/null +++ b/queue-4.4/tpm-provide-strong-locking-for-device-removal.patch @@ -0,0 +1,307 @@ +From 4e26195f240d73150e8308ae42874702e3df8d2c Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Fri, 12 Feb 2016 20:29:53 -0700 +Subject: tpm: Provide strong locking for device removal + +From: Jason Gunthorpe + +commit 4e26195f240d73150e8308ae42874702e3df8d2c upstream. + +Add a read/write semaphore around the ops function pointers so +ops can be set to null when the driver un-registers. + +Previously the tpm core expected module locking to be enough to +ensure that tpm_unregister could not be called during certain times, +however that hasn't been sufficient for a long time. + +Introduce a read/write semaphore around 'ops' so the core can set +it to null when unregistering. This provides a strong fence around +the driver callbacks, guaranteeing to the driver that no callbacks +are running or will run again. + +For now the ops_lock is placed very high in the call stack, it could +be pushed down and made more granular in future if necessary. + +Signed-off-by: Jason Gunthorpe +Reviewed-by: Stefan Berger +Reviewed-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman + + +--- + drivers/char/tpm/tpm-chip.c | 72 +++++++++++++++++++++++++++++++++++---- + drivers/char/tpm/tpm-dev.c | 11 +++++ + drivers/char/tpm/tpm-interface.c | 19 +++++----- + drivers/char/tpm/tpm-sysfs.c | 5 ++ + drivers/char/tpm/tpm.h | 14 ++++--- + 5 files changed, 100 insertions(+), 21 deletions(-) + +--- a/drivers/char/tpm/tpm-chip.c ++++ b/drivers/char/tpm/tpm-chip.c +@@ -36,10 +36,60 @@ static DEFINE_SPINLOCK(driver_lock); + struct class *tpm_class; + dev_t tpm_devt; + +-/* +- * tpm_chip_find_get - return tpm_chip for a given chip number +- * @chip_num the device number for the chip ++/** ++ * tpm_try_get_ops() - Get a ref to the tpm_chip ++ * @chip: Chip to ref ++ * ++ * The caller must already have some kind of locking to ensure that chip is ++ * valid. This function will lock the chip so that the ops member can be ++ * accessed safely. The locking prevents tpm_chip_unregister from ++ * completing, so it should not be held for long periods. ++ * ++ * Returns -ERRNO if the chip could not be got. + */ ++int tpm_try_get_ops(struct tpm_chip *chip) ++{ ++ int rc = -EIO; ++ ++ get_device(&chip->dev); ++ ++ down_read(&chip->ops_sem); ++ if (!chip->ops) ++ goto out_lock; ++ ++ if (!try_module_get(chip->dev.parent->driver->owner)) ++ goto out_lock; ++ ++ return 0; ++out_lock: ++ up_read(&chip->ops_sem); ++ put_device(&chip->dev); ++ return rc; ++} ++EXPORT_SYMBOL_GPL(tpm_try_get_ops); ++ ++/** ++ * tpm_put_ops() - Release a ref to the tpm_chip ++ * @chip: Chip to put ++ * ++ * This is the opposite pair to tpm_try_get_ops(). After this returns chip may ++ * be kfree'd. ++ */ ++void tpm_put_ops(struct tpm_chip *chip) ++{ ++ module_put(chip->dev.parent->driver->owner); ++ up_read(&chip->ops_sem); ++ put_device(&chip->dev); ++} ++EXPORT_SYMBOL_GPL(tpm_put_ops); ++ ++/** ++ * tpm_chip_find_get() - return tpm_chip for a given chip number ++ * @chip_num: id to find ++ * ++ * The return'd chip has been tpm_try_get_ops'd and must be released via ++ * tpm_put_ops ++ */ + struct tpm_chip *tpm_chip_find_get(int chip_num) + { + struct tpm_chip *pos, *chip = NULL; +@@ -49,10 +99,10 @@ struct tpm_chip *tpm_chip_find_get(int c + if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) + continue; + +- if (try_module_get(pos->dev.parent->driver->owner)) { ++ /* rcu prevents chip from being free'd */ ++ if (!tpm_try_get_ops(pos)) + chip = pos; +- break; +- } ++ break; + } + rcu_read_unlock(); + return chip; +@@ -94,6 +144,7 @@ struct tpm_chip *tpmm_chip_alloc(struct + return ERR_PTR(-ENOMEM); + + mutex_init(&chip->tpm_mutex); ++ init_rwsem(&chip->ops_sem); + INIT_LIST_HEAD(&chip->list); + + chip->ops = ops; +@@ -171,6 +222,12 @@ static int tpm_add_char_device(struct tp + static void tpm_del_char_device(struct tpm_chip *chip) + { + cdev_del(&chip->cdev); ++ ++ /* Make the driver uncallable. */ ++ down_write(&chip->ops_sem); ++ chip->ops = NULL; ++ up_write(&chip->ops_sem); ++ + device_del(&chip->dev); + } + +@@ -256,6 +313,9 @@ EXPORT_SYMBOL_GPL(tpm_chip_register); + * Takes the chip first away from the list of available TPM chips and then + * cleans up all the resources reserved by tpm_chip_register(). + * ++ * Once this function returns the driver call backs in 'op's will not be ++ * running and will no longer start. ++ * + * NOTE: This function should be only called before deinitializing chip + * resources. + */ +--- a/drivers/char/tpm/tpm-dev.c ++++ b/drivers/char/tpm/tpm-dev.c +@@ -136,9 +136,18 @@ static ssize_t tpm_write(struct file *fi + return -EFAULT; + } + +- /* atomic tpm command send and result receive */ ++ /* atomic tpm command send and result receive. We only hold the ops ++ * lock during this period so that the tpm can be unregistered even if ++ * the char dev is held open. ++ */ ++ if (tpm_try_get_ops(priv->chip)) { ++ mutex_unlock(&priv->buffer_mutex); ++ return -EPIPE; ++ } + out_size = tpm_transmit(priv->chip, priv->data_buffer, + sizeof(priv->data_buffer), 0); ++ ++ tpm_put_ops(priv->chip); + if (out_size < 0) { + mutex_unlock(&priv->buffer_mutex); + return out_size; +--- a/drivers/char/tpm/tpm-interface.c ++++ b/drivers/char/tpm/tpm-interface.c +@@ -687,7 +687,7 @@ int tpm_is_tpm2(u32 chip_num) + + rc = (chip->flags & TPM_CHIP_FLAG_TPM2) != 0; + +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + + return rc; + } +@@ -716,7 +716,7 @@ int tpm_pcr_read(u32 chip_num, int pcr_i + rc = tpm2_pcr_read(chip, pcr_idx, res_buf); + else + rc = tpm_pcr_read_dev(chip, pcr_idx, res_buf); +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + return rc; + } + EXPORT_SYMBOL_GPL(tpm_pcr_read); +@@ -751,7 +751,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr + + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + rc = tpm2_pcr_extend(chip, pcr_idx, hash); +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + return rc; + } + +@@ -761,7 +761,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr + rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, 0, + "attempting extend a PCR value"); + +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + return rc; + } + EXPORT_SYMBOL_GPL(tpm_pcr_extend); +@@ -842,7 +842,7 @@ int tpm_send(u32 chip_num, void *cmd, si + + rc = tpm_transmit_cmd(chip, cmd, buflen, 0, "attempting tpm_cmd"); + +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + return rc; + } + EXPORT_SYMBOL_GPL(tpm_send); +@@ -1025,7 +1025,7 @@ int tpm_get_random(u32 chip_num, u8 *out + + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + err = tpm2_get_random(chip, out, max); +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + return err; + } + +@@ -1047,7 +1047,7 @@ int tpm_get_random(u32 chip_num, u8 *out + num_bytes -= recd; + } while (retries-- && total < max); + +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + return total ? total : -EIO; + } + EXPORT_SYMBOL_GPL(tpm_get_random); +@@ -1073,7 +1073,7 @@ int tpm_seal_trusted(u32 chip_num, struc + + rc = tpm2_seal_trusted(chip, payload, options); + +- tpm_chip_put(chip); ++ tpm_put_ops(chip); + return rc; + } + EXPORT_SYMBOL_GPL(tpm_seal_trusted); +@@ -1099,7 +1099,8 @@ int tpm_unseal_trusted(u32 chip_num, str + + rc = tpm2_unseal_trusted(chip, payload, options); + +- tpm_chip_put(chip); ++ tpm_put_ops(chip); ++ + return rc; + } + EXPORT_SYMBOL_GPL(tpm_unseal_trusted); +--- a/drivers/char/tpm/tpm-sysfs.c ++++ b/drivers/char/tpm/tpm-sysfs.c +@@ -295,5 +295,10 @@ int tpm_sysfs_add_device(struct tpm_chip + + void tpm_sysfs_del_device(struct tpm_chip *chip) + { ++ /* The sysfs routines rely on an implicit tpm_try_get_ops, this ++ * function is called before ops is null'd and the sysfs core ++ * synchronizes this removal so that no callbacks are running or can ++ * run again ++ */ + sysfs_remove_group(&chip->dev.parent->kobj, &tpm_dev_group); + } +--- a/drivers/char/tpm/tpm.h ++++ b/drivers/char/tpm/tpm.h +@@ -174,7 +174,13 @@ struct tpm_chip { + struct device dev; + struct cdev cdev; + ++ /* A driver callback under ops cannot be run unless ops_sem is held ++ * (sometimes implicitly, eg for the sysfs code). ops becomes null ++ * when the driver is unregistered, see tpm_try_get_ops. ++ */ ++ struct rw_semaphore ops_sem; + const struct tpm_class_ops *ops; ++ + unsigned int flags; + + int dev_num; /* /dev/tpm# */ +@@ -200,11 +206,6 @@ struct tpm_chip { + + #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) + +-static inline void tpm_chip_put(struct tpm_chip *chip) +-{ +- module_put(chip->dev.parent->driver->owner); +-} +- + static inline int tpm_read_index(int base, int index) + { + outb(index, base); +@@ -516,6 +517,9 @@ extern int wait_for_tpm_stat(struct tpm_ + wait_queue_head_t *, bool); + + struct tpm_chip *tpm_chip_find_get(int chip_num); ++__must_check int tpm_try_get_ops(struct tpm_chip *chip); ++void tpm_put_ops(struct tpm_chip *chip); ++ + extern struct tpm_chip *tpmm_chip_alloc(struct device *dev, + const struct tpm_class_ops *ops); + extern int tpm_chip_register(struct tpm_chip *chip); diff --git a/queue-4.4/vt-fix-unchecked-__put_user-in-tioclinux-ioctls.patch b/queue-4.4/vt-fix-unchecked-__put_user-in-tioclinux-ioctls.patch new file mode 100644 index 00000000000..aa2b965b441 --- /dev/null +++ b/queue-4.4/vt-fix-unchecked-__put_user-in-tioclinux-ioctls.patch @@ -0,0 +1,53 @@ +From 6987dc8a70976561d22450b5858fc9767788cc1c Mon Sep 17 00:00:00 2001 +From: Adam Borowski +Date: Sat, 3 Jun 2017 09:35:06 +0200 +Subject: vt: fix unchecked __put_user() in tioclinux ioctls + +From: Adam Borowski + +commit 6987dc8a70976561d22450b5858fc9767788cc1c upstream. + +Only read access is checked before this call. + +Actually, at the moment this is not an issue, as every in-tree arch does +the same manual checks for VERIFY_READ vs VERIFY_WRITE, relying on the MMU +to tell them apart, but this wasn't the case in the past and may happen +again on some odd arch in the future. + +If anyone cares about 3.7 and earlier, this is a security hole (untested) +on real 80386 CPUs. + +Signed-off-by: Adam Borowski +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/vt/vt.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/tty/vt/vt.c ++++ b/drivers/tty/vt/vt.c +@@ -2708,13 +2708,13 @@ int tioclinux(struct tty_struct *tty, un + * related to the kernel should not use this. + */ + data = vt_get_shift_state(); +- ret = __put_user(data, p); ++ ret = put_user(data, p); + break; + case TIOCL_GETMOUSEREPORTING: + console_lock(); /* May be overkill */ + data = mouse_reporting(); + console_unlock(); +- ret = __put_user(data, p); ++ ret = put_user(data, p); + break; + case TIOCL_SETVESABLANK: + console_lock(); +@@ -2723,7 +2723,7 @@ int tioclinux(struct tty_struct *tty, un + break; + case TIOCL_GETKMSGREDIRECT: + data = vt_get_kmsg_redirect(); +- ret = __put_user(data, p); ++ ret = put_user(data, p); + break; + case TIOCL_SETKMSGREDIRECT: + if (!capable(CAP_SYS_ADMIN)) { -- 2.47.3