From a412bc0fd974f93b8f3dbcc61723e36fbdedbd99 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 2 Oct 2017 14:50:27 +0200 Subject: [PATCH] 4.9-stable patches added patches: arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch arm64-make-sure-spsel-is-always-set.patch bsg-lib-don-t-free-job-in-bsg_prepare_job.patch btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch btrfs-prevent-to-set-invalid-default-subvolid.patch btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch etnaviv-fix-gem-object-list-corruption.patch fix-smb3.1.1-guest-authentication-to-samba.patch iw_cxgb4-put-ep-reference-in-pass_accept_req.patch iw_cxgb4-remove-the-stid-on-listen-create-failure.patch kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch kvm-vmx-extract-__pi_post_block.patch kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch nl80211-check-for-the-required-netlink-attributes-presence.patch pci-fix-race-condition-with-driver_override.patch pm-core-fix-device_pm_check_callbacks.patch powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch s390-mm-fix-write-access-check-in-gup_huge_pmd.patch seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch xfs-validate-bdev-support-for-dax-inode-flag.patch --- ...tion-faults-via-do_translation_fault.patch | 65 ++++++ .../arm64-make-sure-spsel-is-always-set.patch | 40 ++++ ...ib-don-t-free-job-in-bsg_prepare_job.patch | 31 +++ ...er-dereference-from-free_reloc_roots.patch | 39 ++++ ...vent-to-set-invalid-default-subvolid.patch | 37 +++ ...ror-to-btrfs_cmp_data_prepare-caller.patch | 38 ++++ ...naviv-fix-gem-object-list-corruption.patch | 38 ++++ ...b3.1.1-guest-authentication-to-samba.patch | 32 +++ ...-put-ep-reference-in-pass_accept_req.patch | 35 +++ ...ve-the-stid-on-listen-create-failure.patch | 33 +++ ...-allow-l2-to-access-the-hardware-cr8.patch | 39 ++++ ...list-add-with-vt-d-posted-interrupts.patch | 157 +++++++++++++ ...o-not-bug-on-out-of-bounds-guest-irq.patch | 57 +++++ .../kvm-vmx-extract-__pi_post_block.patch | 118 ++++++++++ ...mx-simplify-and-fix-vmx_vcpu_pi_load.patch | 130 +++++++++++ ...f-in-rcu-read-side-critical-sections.patch | 81 +++++++ ...required-netlink-attributes-presence.patch | 41 ++++ ...-race-condition-with-driver_override.patch | 66 ++++++ ...m-core-fix-device_pm_check_callbacks.patch | 43 ++++ ...pointer-for-dynamic_ftrace_with_regs.patch | 62 +++++ ...ent_dn-reference-leak-in-add_dt_node.patch | 39 ++++ ...-flush-tm-only-if-cpu-has-tm-feature.patch | 45 ++++ ...x-write-access-check-in-gup_huge_pmd.patch | 45 ++++ ...seccomp_filter-in-seccomp_get_filter.patch | 91 ++++++++ ...ccomp-support-glibc-2.26-siginfo_t.h.patch | 61 +++++ queue-4.9/series | 31 +++ ...gainst-downgrade-even-if-signing-off.patch | 56 +++++ ...re-o_sync-o_dsync-and-o_direct-flags.patch | 34 +++ ...nnection-that-authenticated-as-guest.patch | 32 +++ ...negative-seek_hole-seek_data-offsets.patch | 44 ++++ ...-error-path-using-unsafe-vma-pointer.patch | 211 ++++++++++++++++++ ...date-bdev-support-for-dax-inode-flag.patch | 50 +++++ 32 files changed, 1921 insertions(+) create mode 100644 queue-4.9/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch create mode 100644 queue-4.9/arm64-make-sure-spsel-is-always-set.patch create mode 100644 queue-4.9/bsg-lib-don-t-free-job-in-bsg_prepare_job.patch create mode 100644 queue-4.9/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch create mode 100644 queue-4.9/btrfs-prevent-to-set-invalid-default-subvolid.patch create mode 100644 queue-4.9/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch create mode 100644 queue-4.9/etnaviv-fix-gem-object-list-corruption.patch create mode 100644 queue-4.9/fix-smb3.1.1-guest-authentication-to-samba.patch create mode 100644 queue-4.9/iw_cxgb4-put-ep-reference-in-pass_accept_req.patch create mode 100644 queue-4.9/iw_cxgb4-remove-the-stid-on-listen-create-failure.patch create mode 100644 queue-4.9/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch create mode 100644 queue-4.9/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch create mode 100644 queue-4.9/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch create mode 100644 queue-4.9/kvm-vmx-extract-__pi_post_block.patch create mode 100644 queue-4.9/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch create mode 100644 queue-4.9/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch create mode 100644 queue-4.9/nl80211-check-for-the-required-netlink-attributes-presence.patch create mode 100644 queue-4.9/pci-fix-race-condition-with-driver_override.patch create mode 100644 queue-4.9/pm-core-fix-device_pm_check_callbacks.patch create mode 100644 queue-4.9/powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch create mode 100644 queue-4.9/powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch create mode 100644 queue-4.9/powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch create mode 100644 queue-4.9/s390-mm-fix-write-access-check-in-gup_huge_pmd.patch create mode 100644 queue-4.9/seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch create mode 100644 queue-4.9/selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch create mode 100644 queue-4.9/smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch create mode 100644 queue-4.9/smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch create mode 100644 queue-4.9/smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch create mode 100644 queue-4.9/vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch create mode 100644 queue-4.9/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch create mode 100644 queue-4.9/xfs-validate-bdev-support-for-dax-inode-flag.patch diff --git a/queue-4.9/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch b/queue-4.9/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch new file mode 100644 index 00000000000..106b523c084 --- /dev/null +++ b/queue-4.9/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch @@ -0,0 +1,65 @@ +From 760bfb47c36a07741a089bf6a28e854ffbee7dc9 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Fri, 29 Sep 2017 12:27:41 +0100 +Subject: arm64: fault: Route pte translation faults via do_translation_fault + +From: Will Deacon + +commit 760bfb47c36a07741a089bf6a28e854ffbee7dc9 upstream. + +We currently route pte translation faults via do_page_fault, which elides +the address check against TASK_SIZE before invoking the mm fault handling +code. However, this can cause issues with the path walking code in +conjunction with our word-at-a-time implementation because +load_unaligned_zeropad can end up faulting in kernel space if it reads +across a page boundary and runs into a page fault (e.g. by attempting to +read from a guard region). + +In the case of such a fault, load_unaligned_zeropad has registered a +fixup to shift the valid data and pad with zeroes, however the abort is +reported as a level 3 translation fault and we dispatch it straight to +do_page_fault, despite it being a kernel address. This results in calling +a sleeping function from atomic context: + + BUG: sleeping function called from invalid context at arch/arm64/mm/fault.c:313 + in_atomic(): 0, irqs_disabled(): 0, pid: 10290 + Internal error: Oops - BUG: 0 [#1] PREEMPT SMP + [...] + [] ___might_sleep+0x134/0x144 + [] __might_sleep+0x7c/0x8c + [] do_page_fault+0x140/0x330 + [] do_mem_abort+0x54/0xb0 + Exception stack(0xfffffffb20247a70 to 0xfffffffb20247ba0) + [...] + [] el1_da+0x18/0x78 + [] path_parentat+0x44/0x88 + [] filename_parentat+0x5c/0xd8 + [] filename_create+0x4c/0x128 + [] SyS_mkdirat+0x50/0xc8 + [] el0_svc_naked+0x24/0x28 + Code: 36380080 d5384100 f9400800 9402566d (d4210000) + ---[ end trace 2d01889f2bca9b9f ]--- + +Fix this by dispatching all translation faults to do_translation_faults, +which avoids invoking the page fault logic for faults on kernel addresses. + +Reported-by: Ankit Jain +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/mm/fault.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -509,7 +509,7 @@ static const struct fault_info fault_inf + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, +- { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, ++ { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_bad, SIGBUS, 0, "unknown 8" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, diff --git a/queue-4.9/arm64-make-sure-spsel-is-always-set.patch b/queue-4.9/arm64-make-sure-spsel-is-always-set.patch new file mode 100644 index 00000000000..79585587afe --- /dev/null +++ b/queue-4.9/arm64-make-sure-spsel-is-always-set.patch @@ -0,0 +1,40 @@ +From 5371513fb338fb9989c569dc071326d369d6ade8 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 26 Sep 2017 15:57:16 +0100 +Subject: arm64: Make sure SPsel is always set + +From: Marc Zyngier + +commit 5371513fb338fb9989c569dc071326d369d6ade8 upstream. + +When the kernel is entered at EL2 on an ARMv8.0 system, we construct +the EL1 pstate and make sure this uses the the EL1 stack pointer +(we perform an exception return to EL1h). + +But if the kernel is either entered at EL1 or stays at EL2 (because +we're on a VHE-capable system), we fail to set SPsel, and use whatever +stack selection the higher exception level has choosen for us. + +Let's not take any chance, and make sure that SPsel is set to one +before we decide the mode we're going to run in. + +Acked-by: Mark Rutland +Signed-off-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/head.S | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm64/kernel/head.S ++++ b/arch/arm64/kernel/head.S +@@ -486,6 +486,7 @@ ENTRY(kimage_vaddr) + * booted in EL1 or EL2 respectively. + */ + ENTRY(el2_setup) ++ msr SPsel, #1 // We want to use SP_EL{1,2} + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.ne 1f diff --git a/queue-4.9/bsg-lib-don-t-free-job-in-bsg_prepare_job.patch b/queue-4.9/bsg-lib-don-t-free-job-in-bsg_prepare_job.patch new file mode 100644 index 00000000000..50a9d4a25a6 --- /dev/null +++ b/queue-4.9/bsg-lib-don-t-free-job-in-bsg_prepare_job.patch @@ -0,0 +1,31 @@ +From f507b54dccfd8000c517d740bc45f20c74532d18 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Thu, 7 Sep 2017 13:54:35 +0200 +Subject: bsg-lib: don't free job in bsg_prepare_job + +From: Christoph Hellwig + +commit f507b54dccfd8000c517d740bc45f20c74532d18 upstream. + +The job structure is allocated as part of the request, so we should not +free it in the error path of bsg_prepare_job. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Ming Lei +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/bsg-lib.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/block/bsg-lib.c ++++ b/block/bsg-lib.c +@@ -147,7 +147,6 @@ static int bsg_create_job(struct device + failjob_rls_rqst_payload: + kfree(job->request_payload.sg_list); + failjob_rls_job: +- kfree(job); + return -ENOMEM; + } + diff --git a/queue-4.9/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch b/queue-4.9/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch new file mode 100644 index 00000000000..03b64403afa --- /dev/null +++ b/queue-4.9/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch @@ -0,0 +1,39 @@ +From bb166d7207432d3c7d10c45dc052f12ba3a2121d Mon Sep 17 00:00:00 2001 +From: Naohiro Aota +Date: Fri, 25 Aug 2017 14:15:14 +0900 +Subject: btrfs: fix NULL pointer dereference from free_reloc_roots() + +From: Naohiro Aota + +commit bb166d7207432d3c7d10c45dc052f12ba3a2121d upstream. + +__del_reloc_root should be called before freeing up reloc_root->node. +If not, calling __del_reloc_root() dereference reloc_root->node, causing +the system BUG. + +Fixes: 6bdf131fac23 ("Btrfs: don't leak reloc root nodes on error") +Signed-off-by: Naohiro Aota +Reviewed-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/relocation.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -2367,11 +2367,11 @@ void free_reloc_roots(struct list_head * + while (!list_empty(list)) { + reloc_root = list_entry(list->next, struct btrfs_root, + root_list); ++ __del_reloc_root(reloc_root); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + reloc_root->node = NULL; + reloc_root->commit_root = NULL; +- __del_reloc_root(reloc_root); + } + } + diff --git a/queue-4.9/btrfs-prevent-to-set-invalid-default-subvolid.patch b/queue-4.9/btrfs-prevent-to-set-invalid-default-subvolid.patch new file mode 100644 index 00000000000..efa744a7fb1 --- /dev/null +++ b/queue-4.9/btrfs-prevent-to-set-invalid-default-subvolid.patch @@ -0,0 +1,37 @@ +From 6d6d282932d1a609e60dc4467677e0e863682f57 Mon Sep 17 00:00:00 2001 +From: satoru takeuchi +Date: Tue, 12 Sep 2017 22:42:52 +0900 +Subject: btrfs: prevent to set invalid default subvolid + +From: satoru takeuchi + +commit 6d6d282932d1a609e60dc4467677e0e863682f57 upstream. + +`btrfs sub set-default` succeeds to set an ID which isn't corresponding to any +fs/file tree. If such the bad ID is set to a filesystem, we can't mount this +filesystem without specifying `subvol` or `subvolid` mount options. + +Fixes: 6ef5ed0d386b ("Btrfs: add ioctl and incompat flag to set the default mount subvol") +Signed-off-by: Satoru Takeuchi +Reviewed-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -4082,6 +4082,10 @@ static long btrfs_ioctl_default_subvol(s + ret = PTR_ERR(new_root); + goto out; + } ++ if (!is_fstree(new_root->objectid)) { ++ ret = -ENOENT; ++ goto out; ++ } + + path = btrfs_alloc_path(); + if (!path) { diff --git a/queue-4.9/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch b/queue-4.9/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch new file mode 100644 index 00000000000..f0c36c01982 --- /dev/null +++ b/queue-4.9/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch @@ -0,0 +1,38 @@ +From 78ad4ce014d025f41b8dde3a81876832ead643cf Mon Sep 17 00:00:00 2001 +From: Naohiro Aota +Date: Fri, 8 Sep 2017 17:48:55 +0900 +Subject: btrfs: propagate error to btrfs_cmp_data_prepare caller + +From: Naohiro Aota + +commit 78ad4ce014d025f41b8dde3a81876832ead643cf upstream. + +btrfs_cmp_data_prepare() (almost) always returns 0 i.e. ignoring errors +from gather_extent_pages(). While the pages are freed by +btrfs_cmp_data_free(), cmp->num_pages still has > 0. Then, +btrfs_extent_same() try to access the already freed pages causing faults +(or violates PageLocked assertion). + +This patch just return the error as is so that the caller stop the process. + +Signed-off-by: Naohiro Aota +Fixes: f441460202cb ("btrfs: fix deadlock with extent-same and readpage") +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3052,7 +3052,7 @@ static int btrfs_cmp_data_prepare(struct + out: + if (ret) + btrfs_cmp_data_free(cmp); +- return 0; ++ return ret; + } + + static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, diff --git a/queue-4.9/etnaviv-fix-gem-object-list-corruption.patch b/queue-4.9/etnaviv-fix-gem-object-list-corruption.patch new file mode 100644 index 00000000000..840903a5d83 --- /dev/null +++ b/queue-4.9/etnaviv-fix-gem-object-list-corruption.patch @@ -0,0 +1,38 @@ +From 518417525f3652c12fb5fad6da4ade66c0072fa3 Mon Sep 17 00:00:00 2001 +From: Lucas Stach +Date: Mon, 11 Sep 2017 15:29:31 +0200 +Subject: etnaviv: fix gem object list corruption + +From: Lucas Stach + +commit 518417525f3652c12fb5fad6da4ade66c0072fa3 upstream. + +All manipulations of the gem_object list need to be protected by +the list mutex, as GEM objects can be created and freed in parallel. +This fixes a kernel memory corruption. + +Signed-off-by: Lucas Stach +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/etnaviv/etnaviv_gem.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c +@@ -549,12 +549,15 @@ static const struct etnaviv_gem_ops etna + void etnaviv_gem_free_object(struct drm_gem_object *obj) + { + struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); ++ struct etnaviv_drm_private *priv = obj->dev->dev_private; + struct etnaviv_vram_mapping *mapping, *tmp; + + /* object should not be active */ + WARN_ON(is_active(etnaviv_obj)); + ++ mutex_lock(&priv->gem_lock); + list_del(&etnaviv_obj->gem_node); ++ mutex_unlock(&priv->gem_lock); + + list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list, + obj_node) { diff --git a/queue-4.9/fix-smb3.1.1-guest-authentication-to-samba.patch b/queue-4.9/fix-smb3.1.1-guest-authentication-to-samba.patch new file mode 100644 index 00000000000..d41612be265 --- /dev/null +++ b/queue-4.9/fix-smb3.1.1-guest-authentication-to-samba.patch @@ -0,0 +1,32 @@ +From 23586b66d84ba3184b8820277f3fc42761640f87 Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Mon, 18 Sep 2017 18:18:45 -0500 +Subject: Fix SMB3.1.1 guest authentication to Samba + +From: Steve French + +commit 23586b66d84ba3184b8820277f3fc42761640f87 upstream. + +Samba rejects SMB3.1.1 dialect (vers=3.1.1) negotiate requests from +the kernel client due to the two byte pad at the end of the negotiate +contexts. + +Signed-off-by: Steve French +Reviewed-by: Ronnie Sahlberg +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2pdu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -366,7 +366,7 @@ assemble_neg_contexts(struct smb2_negoti + build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); + req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); + req->NegotiateContextCount = cpu_to_le16(2); +- inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2 ++ inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ + } + #else diff --git a/queue-4.9/iw_cxgb4-put-ep-reference-in-pass_accept_req.patch b/queue-4.9/iw_cxgb4-put-ep-reference-in-pass_accept_req.patch new file mode 100644 index 00000000000..5cbd980a55d --- /dev/null +++ b/queue-4.9/iw_cxgb4-put-ep-reference-in-pass_accept_req.patch @@ -0,0 +1,35 @@ +From 3d318605f5e32ff44fb290d9b67573b34213c4c8 Mon Sep 17 00:00:00 2001 +From: Steve Wise +Date: Wed, 13 Sep 2017 09:52:32 -0700 +Subject: iw_cxgb4: put ep reference in pass_accept_req() + +From: Steve Wise + +commit 3d318605f5e32ff44fb290d9b67573b34213c4c8 upstream. + +The listening endpoint should always be dereferenced at the end of +pass_accept_req(). + +Fixes: f86fac79afec ("RDMA/iw_cxgb4: atomic find and reference for listening endpoints") + +Signed-off-by: Steve Wise +Signed-off-by: Doug Ledford +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/cxgb4/cm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/cxgb4/cm.c ++++ b/drivers/infiniband/hw/cxgb4/cm.c +@@ -2577,9 +2577,9 @@ fail: + c4iw_put_ep(&child_ep->com); + reject: + reject_cr(dev, hwtid, skb); ++out: + if (parent_ep) + c4iw_put_ep(&parent_ep->com); +-out: + return 0; + } + diff --git a/queue-4.9/iw_cxgb4-remove-the-stid-on-listen-create-failure.patch b/queue-4.9/iw_cxgb4-remove-the-stid-on-listen-create-failure.patch new file mode 100644 index 00000000000..d8bda6777cb --- /dev/null +++ b/queue-4.9/iw_cxgb4-remove-the-stid-on-listen-create-failure.patch @@ -0,0 +1,33 @@ +From 8b1bbf36b7452c4acb20e91948eaa5e225ea6978 Mon Sep 17 00:00:00 2001 +From: Steve Wise +Date: Tue, 5 Sep 2017 11:52:34 -0700 +Subject: iw_cxgb4: remove the stid on listen create failure + +From: Steve Wise + +commit 8b1bbf36b7452c4acb20e91948eaa5e225ea6978 upstream. + +If a listen create fails, then the server tid (stid) is incorrectly left +in the stid idr table, which can cause a touch-after-free if the stid +is looked up and the already freed endpoint is touched. So make sure +and remove it in the error path. + +Signed-off-by: Steve Wise +Signed-off-by: Doug Ledford +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/cxgb4/cm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/cxgb4/cm.c ++++ b/drivers/infiniband/hw/cxgb4/cm.c +@@ -3441,7 +3441,7 @@ int c4iw_create_listen(struct iw_cm_id * + cm_id->provider_data = ep; + goto out; + } +- ++ remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, + ep->com.local_addr.ss_family); + fail2: diff --git a/queue-4.9/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch b/queue-4.9/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch new file mode 100644 index 00000000000..d8621e9e6d7 --- /dev/null +++ b/queue-4.9/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch @@ -0,0 +1,39 @@ +From 51aa68e7d57e3217192d88ce90fd5b8ef29ec94f Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Tue, 12 Sep 2017 13:02:54 -0700 +Subject: kvm: nVMX: Don't allow L2 to access the hardware CR8 + +From: Jim Mattson + +commit 51aa68e7d57e3217192d88ce90fd5b8ef29ec94f upstream. + +If L1 does not specify the "use TPR shadow" VM-execution control in +vmcs12, then L0 must specify the "CR8-load exiting" and "CR8-store +exiting" VM-execution controls in vmcs02. Failure to do so will give +the L2 VM unrestricted read/write access to the hardware CR8. + +This fixes CVE-2017-12154. + +Signed-off-by: Jim Mattson +Reviewed-by: David Hildenbrand +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -10001,6 +10001,11 @@ static void prepare_vmcs02(struct kvm_vc + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, + page_to_phys(vmx->nested.virtual_apic_page)); + vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); ++ } else { ++#ifdef CONFIG_X86_64 ++ exec_control |= CPU_BASED_CR8_LOAD_EXITING | ++ CPU_BASED_CR8_STORE_EXITING; ++#endif + } + + if (cpu_has_vmx_msr_bitmap() && diff --git a/queue-4.9/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch b/queue-4.9/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch new file mode 100644 index 00000000000..2c9df74a0e0 --- /dev/null +++ b/queue-4.9/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch @@ -0,0 +1,157 @@ +From 8b306e2f3c41939ea528e6174c88cfbfff893ce1 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 6 Jun 2017 12:57:05 +0200 +Subject: KVM: VMX: avoid double list add with VT-d posted interrupts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paolo Bonzini + +commit 8b306e2f3c41939ea528e6174c88cfbfff893ce1 upstream. + +In some cases, for example involving hot-unplug of assigned +devices, pi_post_block can forget to remove the vCPU from the +blocked_vcpu_list. When this happens, the next call to +pi_pre_block corrupts the list. + +Fix this in two ways. First, check vcpu->pre_pcpu in pi_pre_block +and WARN instead of adding the element twice in the list. Second, +always do the list removal in pi_post_block if vcpu->pre_pcpu is +set (not -1). + +The new code keeps interrupts disabled for the whole duration of +pi_pre_block/pi_post_block. This is not strictly necessary, but +easier to follow. For the same reason, PI.ON is checked only +after the cmpxchg, and to handle it we just call the post-block +code. This removes duplication of the list removal code. + +Cc: Huangweidong +Cc: Gonglei +Cc: wangxin +Cc: Radim Krčmář +Tested-by: Longpeng (Mike) +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 62 +++++++++++++++++++++-------------------------------- + 1 file changed, 25 insertions(+), 37 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -11005,10 +11005,11 @@ static void __pi_post_block(struct kvm_v + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; +- unsigned long flags; + + do { + old.control = new.control = pi_desc->control; ++ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, ++ "Wakeup handler not enabled while the VCPU is blocked\n"); + + dest = cpu_physical_id(vcpu->cpu); + +@@ -11025,14 +11026,10 @@ static void __pi_post_block(struct kvm_v + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + +- if(vcpu->pre_pcpu != -1) { +- spin_lock_irqsave( +- &per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); ++ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { ++ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_del(&vcpu->blocked_vcpu_list); +- spin_unlock_irqrestore( +- &per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); ++ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + vcpu->pre_pcpu = -1; + } + } +@@ -11052,7 +11049,6 @@ static void __pi_post_block(struct kvm_v + */ + static int pi_pre_block(struct kvm_vcpu *vcpu) + { +- unsigned long flags; + unsigned int dest; + struct pi_desc old, new; + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); +@@ -11062,34 +11058,20 @@ static int pi_pre_block(struct kvm_vcpu + !kvm_vcpu_apicv_active(vcpu)) + return 0; + +- vcpu->pre_pcpu = vcpu->cpu; +- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); +- list_add_tail(&vcpu->blocked_vcpu_list, +- &per_cpu(blocked_vcpu_on_cpu, +- vcpu->pre_pcpu)); +- spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); ++ WARN_ON(irqs_disabled()); ++ local_irq_disable(); ++ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { ++ vcpu->pre_pcpu = vcpu->cpu; ++ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ list_add_tail(&vcpu->blocked_vcpu_list, ++ &per_cpu(blocked_vcpu_on_cpu, ++ vcpu->pre_pcpu)); ++ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ } + + do { + old.control = new.control = pi_desc->control; + +- /* +- * We should not block the vCPU if +- * an interrupt is posted for it. +- */ +- if (pi_test_on(pi_desc) == 1) { +- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); +- list_del(&vcpu->blocked_vcpu_list); +- spin_unlock_irqrestore( +- &per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); +- vcpu->pre_pcpu = -1; +- +- return 1; +- } +- + WARN((pi_desc->sn == 1), + "Warning: SN field of posted-interrupts " + "is set before blocking\n"); +@@ -11114,7 +11096,12 @@ static int pi_pre_block(struct kvm_vcpu + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + +- return 0; ++ /* We should not block the vCPU if an interrupt is posted for it. */ ++ if (pi_test_on(pi_desc) == 1) ++ __pi_post_block(vcpu); ++ ++ local_irq_enable(); ++ return (vcpu->pre_pcpu == -1); + } + + static int vmx_pre_block(struct kvm_vcpu *vcpu) +@@ -11130,12 +11117,13 @@ static int vmx_pre_block(struct kvm_vcpu + + static void pi_post_block(struct kvm_vcpu *vcpu) + { +- if (!kvm_arch_has_assigned_device(vcpu->kvm) || +- !irq_remapping_cap(IRQ_POSTING_CAP) || +- !kvm_vcpu_apicv_active(vcpu)) ++ if (vcpu->pre_pcpu == -1) + return; + ++ WARN_ON(irqs_disabled()); ++ local_irq_disable(); + __pi_post_block(vcpu); ++ local_irq_enable(); + } + + static void vmx_post_block(struct kvm_vcpu *vcpu) diff --git a/queue-4.9/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch b/queue-4.9/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch new file mode 100644 index 00000000000..7c48399a69b --- /dev/null +++ b/queue-4.9/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch @@ -0,0 +1,57 @@ +From 3a8b0677fc6180a467e26cc32ce6b0c09a32f9bb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= +Date: Thu, 7 Sep 2017 19:02:30 +0100 +Subject: KVM: VMX: Do not BUG() on out-of-bounds guest IRQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jan H. Schönherr + +commit 3a8b0677fc6180a467e26cc32ce6b0c09a32f9bb upstream. + +The value of the guest_irq argument to vmx_update_pi_irte() is +ultimately coming from a KVM_IRQFD API call. Do not BUG() in +vmx_update_pi_irte() if the value is out-of bounds. (Especially, +since KVM as a whole seems to hang after that.) + +Instead, print a message only once if we find that we don't have a +route for a certain IRQ (which can be out-of-bounds or within the +array). + +This fixes CVE-2017-1000252. + +Fixes: efc644048ecde54 ("KVM: x86: Update IRTE for posted-interrupts") +Signed-off-by: Jan H. Schönherr +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -11153,7 +11153,7 @@ static int vmx_update_pi_irte(struct kvm + struct kvm_lapic_irq irq; + struct kvm_vcpu *vcpu; + struct vcpu_data vcpu_info; +- int idx, ret = -EINVAL; ++ int idx, ret = 0; + + if (!kvm_arch_has_assigned_device(kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP) || +@@ -11162,7 +11162,12 @@ static int vmx_update_pi_irte(struct kvm + + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); +- BUG_ON(guest_irq >= irq_rt->nr_rt_entries); ++ if (guest_irq >= irq_rt->nr_rt_entries || ++ hlist_empty(&irq_rt->map[guest_irq])) { ++ pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", ++ guest_irq, irq_rt->nr_rt_entries); ++ goto out; ++ } + + hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { + if (e->type != KVM_IRQ_ROUTING_MSI) diff --git a/queue-4.9/kvm-vmx-extract-__pi_post_block.patch b/queue-4.9/kvm-vmx-extract-__pi_post_block.patch new file mode 100644 index 00000000000..9213298194d --- /dev/null +++ b/queue-4.9/kvm-vmx-extract-__pi_post_block.patch @@ -0,0 +1,118 @@ +From cd39e1176d320157831ce030b4c869bd2d5eb142 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 6 Jun 2017 12:57:04 +0200 +Subject: KVM: VMX: extract __pi_post_block +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paolo Bonzini + +commit cd39e1176d320157831ce030b4c869bd2d5eb142 upstream. + +Simple code movement patch, preparing for the next one. + +Cc: Huangweidong +Cc: Gonglei +Cc: wangxin +Cc: Radim Krčmář +Tested-by: Longpeng (Mike) +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 71 ++++++++++++++++++++++++++++------------------------- + 1 file changed, 38 insertions(+), 33 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -11000,6 +11000,43 @@ static void vmx_enable_log_dirty_pt_mask + kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); + } + ++static void __pi_post_block(struct kvm_vcpu *vcpu) ++{ ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ++ struct pi_desc old, new; ++ unsigned int dest; ++ unsigned long flags; ++ ++ do { ++ old.control = new.control = pi_desc->control; ++ ++ dest = cpu_physical_id(vcpu->cpu); ++ ++ if (x2apic_enabled()) ++ new.ndst = dest; ++ else ++ new.ndst = (dest << 8) & 0xFF00; ++ ++ /* Allow posting non-urgent interrupts */ ++ new.sn = 0; ++ ++ /* set 'NV' to 'notification vector' */ ++ new.nv = POSTED_INTR_VECTOR; ++ } while (cmpxchg(&pi_desc->control, old.control, ++ new.control) != old.control); ++ ++ if(vcpu->pre_pcpu != -1) { ++ spin_lock_irqsave( ++ &per_cpu(blocked_vcpu_on_cpu_lock, ++ vcpu->pre_pcpu), flags); ++ list_del(&vcpu->blocked_vcpu_list); ++ spin_unlock_irqrestore( ++ &per_cpu(blocked_vcpu_on_cpu_lock, ++ vcpu->pre_pcpu), flags); ++ vcpu->pre_pcpu = -1; ++ } ++} ++ + /* + * This routine does the following things for vCPU which is going + * to be blocked if VT-d PI is enabled. +@@ -11093,44 +11130,12 @@ static int vmx_pre_block(struct kvm_vcpu + + static void pi_post_block(struct kvm_vcpu *vcpu) + { +- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); +- struct pi_desc old, new; +- unsigned int dest; +- unsigned long flags; +- + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) + return; + +- do { +- old.control = new.control = pi_desc->control; +- +- dest = cpu_physical_id(vcpu->cpu); +- +- if (x2apic_enabled()) +- new.ndst = dest; +- else +- new.ndst = (dest << 8) & 0xFF00; +- +- /* Allow posting non-urgent interrupts */ +- new.sn = 0; +- +- /* set 'NV' to 'notification vector' */ +- new.nv = POSTED_INTR_VECTOR; +- } while (cmpxchg(&pi_desc->control, old.control, +- new.control) != old.control); +- +- if(vcpu->pre_pcpu != -1) { +- spin_lock_irqsave( +- &per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); +- list_del(&vcpu->blocked_vcpu_list); +- spin_unlock_irqrestore( +- &per_cpu(blocked_vcpu_on_cpu_lock, +- vcpu->pre_pcpu), flags); +- vcpu->pre_pcpu = -1; +- } ++ __pi_post_block(vcpu); + } + + static void vmx_post_block(struct kvm_vcpu *vcpu) diff --git a/queue-4.9/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch b/queue-4.9/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch new file mode 100644 index 00000000000..47625a7c43a --- /dev/null +++ b/queue-4.9/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch @@ -0,0 +1,130 @@ +From 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 6 Jun 2017 12:57:06 +0200 +Subject: KVM: VMX: simplify and fix vmx_vcpu_pi_load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paolo Bonzini + +commit 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a upstream. + +The simplify part: do not touch pi_desc.nv, we can set it when the +VCPU is first created. Likewise, pi_desc.sn is only handled by +vmx_vcpu_pi_load, do not touch it in __pi_post_block. + +The fix part: do not check kvm_arch_has_assigned_device, instead +check the SN bit to figure out whether vmx_vcpu_pi_put ran before. +This matches what the previous patch did in pi_post_block. + +Cc: Huangweidong +Cc: Gonglei +Cc: wangxin +Cc: Radim Krčmář +Tested-by: Longpeng (Mike) +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 68 +++++++++++++++++++++++++++-------------------------- + 1 file changed, 35 insertions(+), 33 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2167,43 +2167,41 @@ static void vmx_vcpu_pi_load(struct kvm_ + struct pi_desc old, new; + unsigned int dest; + +- if (!kvm_arch_has_assigned_device(vcpu->kvm) || +- !irq_remapping_cap(IRQ_POSTING_CAP) || +- !kvm_vcpu_apicv_active(vcpu)) ++ /* ++ * In case of hot-plug or hot-unplug, we may have to undo ++ * vmx_vcpu_pi_put even if there is no assigned device. And we ++ * always keep PI.NDST up to date for simplicity: it makes the ++ * code easier, and CPU migration is not a fast path. ++ */ ++ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) ++ return; ++ ++ /* ++ * First handle the simple case where no cmpxchg is necessary; just ++ * allow posting non-urgent interrupts. ++ * ++ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change ++ * PI.NDST: pi_post_block will do it for us and the wakeup_handler ++ * expects the VCPU to be on the blocked_vcpu_list that matches ++ * PI.NDST. ++ */ ++ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || ++ vcpu->cpu == cpu) { ++ pi_clear_sn(pi_desc); + return; ++ } + ++ /* The full case. */ + do { + old.control = new.control = pi_desc->control; + +- /* +- * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there +- * are two possible cases: +- * 1. After running 'pre_block', context switch +- * happened. For this case, 'sn' was set in +- * vmx_vcpu_put(), so we need to clear it here. +- * 2. After running 'pre_block', we were blocked, +- * and woken up by some other guy. For this case, +- * we don't need to do anything, 'pi_post_block' +- * will do everything for us. However, we cannot +- * check whether it is case #1 or case #2 here +- * (maybe, not needed), so we also clear sn here, +- * I think it is not a big deal. +- */ +- if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { +- if (vcpu->cpu != cpu) { +- dest = cpu_physical_id(cpu); +- +- if (x2apic_enabled()) +- new.ndst = dest; +- else +- new.ndst = (dest << 8) & 0xFF00; +- } ++ dest = cpu_physical_id(cpu); + +- /* set 'NV' to 'notification vector' */ +- new.nv = POSTED_INTR_VECTOR; +- } ++ if (x2apic_enabled()) ++ new.ndst = dest; ++ else ++ new.ndst = (dest << 8) & 0xFF00; + +- /* Allow posting non-urgent interrupts */ + new.sn = 0; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); +@@ -9187,6 +9185,13 @@ static struct kvm_vcpu *vmx_create_vcpu( + + vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + ++ /* ++ * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR ++ * or POSTED_INTR_WAKEUP_VECTOR. ++ */ ++ vmx->pi_desc.nv = POSTED_INTR_VECTOR; ++ vmx->pi_desc.sn = 1; ++ + return &vmx->vcpu; + + free_vmcs: +@@ -11018,9 +11023,6 @@ static void __pi_post_block(struct kvm_v + else + new.ndst = (dest << 8) & 0xFF00; + +- /* Allow posting non-urgent interrupts */ +- new.sn = 0; +- + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, diff --git a/queue-4.9/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch b/queue-4.9/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch new file mode 100644 index 00000000000..98b325b6373 --- /dev/null +++ b/queue-4.9/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch @@ -0,0 +1,81 @@ +From b862789aa5186d5ea3a024b7cfe0f80c3a38b980 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Fri, 29 Sep 2017 19:01:45 +0800 +Subject: kvm/x86: Handle async PF in RCU read-side critical sections + +From: Boqun Feng + +commit b862789aa5186d5ea3a024b7cfe0f80c3a38b980 upstream. + +Sasha Levin reported a WARNING: + +| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329 +| rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline] +| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329 +| rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458 +... +| CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246 +| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS +| 1.10.1-1ubuntu1 04/01/2014 +| Call Trace: +... +| RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline] +| RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458 +| RSP: 0018:ffff88003b2debc8 EFLAGS: 00010002 +| RAX: 0000000000000001 RBX: 1ffff1000765bd85 RCX: 0000000000000000 +| RDX: 1ffff100075d7882 RSI: ffffffffb5c7da20 RDI: ffff88003aebc410 +| RBP: ffff88003b2def30 R08: dffffc0000000000 R09: 0000000000000001 +| R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003b2def08 +| R13: 0000000000000000 R14: ffff88003aebc040 R15: ffff88003aebc040 +| __schedule+0x201/0x2240 kernel/sched/core.c:3292 +| schedule+0x113/0x460 kernel/sched/core.c:3421 +| kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158 +| do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271 +| async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069 +| RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996 +| RSP: 0018:ffff88003b2df520 EFLAGS: 00010283 +| RAX: 000000000000003f RBX: ffffffffb5d1e141 RCX: ffff88003b2df670 +| RDX: 0000000000000001 RSI: dffffc0000000000 RDI: ffffffffb5d1e140 +| RBP: ffff88003b2df560 R08: dffffc0000000000 R09: 0000000000000000 +| R10: ffff88003b2df718 R11: 0000000000000000 R12: ffff88003b2df5d8 +| R13: 0000000000000064 R14: ffffffffb5d1e140 R15: 0000000000000000 +| vsnprintf+0x173/0x1700 lib/vsprintf.c:2136 +| sprintf+0xbe/0xf0 lib/vsprintf.c:2386 +| proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23 +| get_link fs/namei.c:1047 [inline] +| link_path_walk+0x1041/0x1490 fs/namei.c:2127 +... + +This happened when the host hit a page fault, and delivered it as in an +async page fault, while the guest was in an RCU read-side critical +section. The guest then tries to reschedule in kvm_async_pf_task_wait(), +but rcu_preempt_note_context_switch() would treat the reschedule as a +sleep in RCU read-side critical section, which is not allowed (even in +preemptible RCU). Thus the WARN. + +To cure this, make kvm_async_pf_task_wait() go to the halt path if the +PF happens in a RCU read-side critical section. + +Reported-by: Sasha Levin +Cc: "Paul E. McKenney" +Cc: Peter Zijlstra +Signed-off-by: Boqun Feng +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/kvm.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -141,7 +141,8 @@ void kvm_async_pf_task_wait(u32 token) + + n.token = token; + n.cpu = smp_processor_id(); +- n.halted = is_idle_task(current) || preempt_count() > 1; ++ n.halted = is_idle_task(current) || preempt_count() > 1 || ++ rcu_preempt_depth(); + init_swait_queue_head(&n.wq); + hlist_add_head(&n.link, &b->list); + raw_spin_unlock(&b->lock); diff --git a/queue-4.9/nl80211-check-for-the-required-netlink-attributes-presence.patch b/queue-4.9/nl80211-check-for-the-required-netlink-attributes-presence.patch new file mode 100644 index 00000000000..b97057a238b --- /dev/null +++ b/queue-4.9/nl80211-check-for-the-required-netlink-attributes-presence.patch @@ -0,0 +1,41 @@ +From e785fa0a164aa11001cba931367c7f94ffaff888 Mon Sep 17 00:00:00 2001 +From: Vladis Dronov +Date: Wed, 13 Sep 2017 00:21:21 +0200 +Subject: nl80211: check for the required netlink attributes presence + +From: Vladis Dronov + +commit e785fa0a164aa11001cba931367c7f94ffaff888 upstream. + +nl80211_set_rekey_data() does not check if the required attributes +NL80211_REKEY_DATA_{REPLAY_CTR,KEK,KCK} are present when processing +NL80211_CMD_SET_REKEY_OFFLOAD request. This request can be issued by +users with CAP_NET_ADMIN privilege and may result in NULL dereference +and a system crash. Add a check for the required attributes presence. +This patch is based on the patch by bo Zhang. + +This fixes CVE-2017-12153. + +References: https://bugzilla.redhat.com/show_bug.cgi?id=1491046 +Fixes: e5497d766ad ("cfg80211/nl80211: support GTK rekey offload") +Reported-by: bo Zhang +Signed-off-by: Vladis Dronov +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/wireless/nl80211.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -10385,6 +10385,9 @@ static int nl80211_set_rekey_data(struct + if (err) + return err; + ++ if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] || ++ !tb[NL80211_REKEY_DATA_KCK]) ++ return -EINVAL; + if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN) + return -ERANGE; + if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN) diff --git a/queue-4.9/pci-fix-race-condition-with-driver_override.patch b/queue-4.9/pci-fix-race-condition-with-driver_override.patch new file mode 100644 index 00000000000..e47bc3c8137 --- /dev/null +++ b/queue-4.9/pci-fix-race-condition-with-driver_override.patch @@ -0,0 +1,66 @@ +From 9561475db680f7144d2223a409dd3d7e322aca03 Mon Sep 17 00:00:00 2001 +From: Nicolai Stange +Date: Mon, 11 Sep 2017 09:45:40 +0200 +Subject: PCI: Fix race condition with driver_override + +From: Nicolai Stange + +commit 9561475db680f7144d2223a409dd3d7e322aca03 upstream. + +The driver_override implementation is susceptible to a race condition when +different threads are reading vs. storing a different driver override. Add +locking to avoid the race condition. + +This is in close analogy to commit 6265539776a0 ("driver core: platform: +fix race condition with driver_override") from Adrian Salido. + +Fixes: 782a985d7af2 ("PCI: Introduce new device binding path using pci_dev.driver_override") +Signed-off-by: Nicolai Stange +Signed-off-by: Bjorn Helgaas +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/pci-sysfs.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/drivers/pci/pci-sysfs.c ++++ b/drivers/pci/pci-sysfs.c +@@ -527,7 +527,7 @@ static ssize_t driver_override_store(str + const char *buf, size_t count) + { + struct pci_dev *pdev = to_pci_dev(dev); +- char *driver_override, *old = pdev->driver_override, *cp; ++ char *driver_override, *old, *cp; + + /* We need to keep extra room for a newline */ + if (count >= (PAGE_SIZE - 1)) +@@ -541,12 +541,15 @@ static ssize_t driver_override_store(str + if (cp) + *cp = '\0'; + ++ device_lock(dev); ++ old = pdev->driver_override; + if (strlen(driver_override)) { + pdev->driver_override = driver_override; + } else { + kfree(driver_override); + pdev->driver_override = NULL; + } ++ device_unlock(dev); + + kfree(old); + +@@ -557,8 +560,12 @@ static ssize_t driver_override_show(stru + struct device_attribute *attr, char *buf) + { + struct pci_dev *pdev = to_pci_dev(dev); ++ ssize_t len; + +- return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); ++ device_lock(dev); ++ len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); ++ device_unlock(dev); ++ return len; + } + static DEVICE_ATTR_RW(driver_override); + diff --git a/queue-4.9/pm-core-fix-device_pm_check_callbacks.patch b/queue-4.9/pm-core-fix-device_pm_check_callbacks.patch new file mode 100644 index 00000000000..949e217c952 --- /dev/null +++ b/queue-4.9/pm-core-fix-device_pm_check_callbacks.patch @@ -0,0 +1,43 @@ +From 157c460e10cb6eca29ccbd0f023db159d0c55ec7 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Tue, 19 Sep 2017 02:22:39 +0200 +Subject: PM: core: Fix device_pm_check_callbacks() + +From: Rafael J. Wysocki + +commit 157c460e10cb6eca29ccbd0f023db159d0c55ec7 upstream. + +The device_pm_check_callbacks() function doesn't check legacy +->suspend and ->resume callback pointers under the device's +bus type, class and driver, so in some cases it may set the +no_pm_callbacks flag for the device incorrectly and then the +callbacks may be skipped during system suspend/resume, which +shouldn't happen. + +Fixes: aa8e54b55947 (PM / sleep: Go direct_complete if driver has no callbacks) +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/power/main.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/base/power/main.c ++++ b/drivers/base/power/main.c +@@ -1757,10 +1757,13 @@ void device_pm_check_callbacks(struct de + { + spin_lock_irq(&dev->power.lock); + dev->power.no_pm_callbacks = +- (!dev->bus || pm_ops_is_empty(dev->bus->pm)) && +- (!dev->class || pm_ops_is_empty(dev->class->pm)) && ++ (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && ++ !dev->bus->suspend && !dev->bus->resume)) && ++ (!dev->class || (pm_ops_is_empty(dev->class->pm) && ++ !dev->class->suspend && !dev->class->resume)) && + (!dev->type || pm_ops_is_empty(dev->type->pm)) && + (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && +- (!dev->driver || pm_ops_is_empty(dev->driver->pm)); ++ (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && ++ !dev->driver->suspend && !dev->driver->resume)); + spin_unlock_irq(&dev->power.lock); + } diff --git a/queue-4.9/powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch b/queue-4.9/powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch new file mode 100644 index 00000000000..e44a4ddb153 --- /dev/null +++ b/queue-4.9/powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch @@ -0,0 +1,62 @@ +rom a4979a7e71eb8da976cbe4a0a1fa50636e76b04f Mon Sep 17 00:00:00 2001 +From: "Naveen N. Rao" +Date: Thu, 1 Jun 2017 16:18:16 +0530 +Subject: powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS + +From: Naveen N. Rao + +commit a4979a7e71eb8da976cbe4a0a1fa50636e76b04f upstream. + +For DYNAMIC_FTRACE_WITH_REGS, we should be passing-in the original set +of registers in pt_regs, to capture the state _before_ ftrace_caller. +However, we are instead passing the stack pointer *after* allocating a +stack frame in ftrace_caller. Fix this by saving the proper value of r1 +in pt_regs. Also, use SAVE_10GPRS() to simplify the code. + +Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI") +Cc: stable@vger.kernel.org # v4.6+ +Signed-off-by: Naveen N. Rao +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + + +--- + arch/powerpc/kernel/entry_64.S | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +--- a/arch/powerpc/kernel/entry_64.S ++++ b/arch/powerpc/kernel/entry_64.S +@@ -1235,10 +1235,14 @@ _GLOBAL(ftrace_caller) + stdu r1,-SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ +- SAVE_8GPRS(0,r1) +- SAVE_8GPRS(8,r1) +- SAVE_8GPRS(16,r1) +- SAVE_8GPRS(24,r1) ++ SAVE_GPR(0, r1) ++ SAVE_10GPRS(2, r1) ++ SAVE_10GPRS(12, r1) ++ SAVE_10GPRS(22, r1) ++ ++ /* Save previous stack pointer (r1) */ ++ addi r8, r1, SWITCH_FRAME_SIZE ++ std r8, GPR1(r1) + + /* Load special regs for save below */ + mfmsr r8 +@@ -1292,10 +1296,10 @@ ftrace_call: + #endif + + /* Restore gprs */ +- REST_8GPRS(0,r1) +- REST_8GPRS(8,r1) +- REST_8GPRS(16,r1) +- REST_8GPRS(24,r1) ++ REST_GPR(0,r1) ++ REST_10GPRS(2,r1) ++ REST_10GPRS(12,r1) ++ REST_10GPRS(22,r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) diff --git a/queue-4.9/powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch b/queue-4.9/powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch new file mode 100644 index 00000000000..d6d0ed50a63 --- /dev/null +++ b/queue-4.9/powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch @@ -0,0 +1,39 @@ +From b537ca6fede69a281dc524983e5e633d79a10a08 Mon Sep 17 00:00:00 2001 +From: Tyrel Datwyler +Date: Wed, 20 Sep 2017 17:02:52 -0400 +Subject: powerpc/pseries: Fix parent_dn reference leak in add_dt_node() + +From: Tyrel Datwyler + +commit b537ca6fede69a281dc524983e5e633d79a10a08 upstream. + +A reference to the parent device node is held by add_dt_node() for the +node to be added. If the call to dlpar_configure_connector() fails +add_dt_node() returns ENOENT and that reference is not freed. + +Add a call to of_node_put(parent_dn) prior to bailing out after a +failed dlpar_configure_connector() call. + +Fixes: 8d5ff320766f ("powerpc/pseries: Make dlpar_configure_connector parent node aware") +Signed-off-by: Tyrel Datwyler +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/platforms/pseries/mobility.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/platforms/pseries/mobility.c ++++ b/arch/powerpc/platforms/pseries/mobility.c +@@ -225,8 +225,10 @@ static int add_dt_node(__be32 parent_pha + return -ENOENT; + + dn = dlpar_configure_connector(drc_index, parent_dn); +- if (!dn) ++ if (!dn) { ++ of_node_put(parent_dn); + return -ENOENT; ++ } + + rc = dlpar_attach_node(dn); + if (rc) diff --git a/queue-4.9/powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch b/queue-4.9/powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch new file mode 100644 index 00000000000..b400cddbcbb --- /dev/null +++ b/queue-4.9/powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch @@ -0,0 +1,45 @@ +From c1fa0768a8713b135848f78fd43ffc208d8ded70 Mon Sep 17 00:00:00 2001 +From: Gustavo Romero +Date: Wed, 13 Sep 2017 22:13:48 -0400 +Subject: powerpc/tm: Flush TM only if CPU has TM feature + +From: Gustavo Romero + +commit c1fa0768a8713b135848f78fd43ffc208d8ded70 upstream. + +Commit cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump") +added code to access TM SPRs in flush_tmregs_to_thread(). However +flush_tmregs_to_thread() does not check if TM feature is available on +CPU before trying to access TM SPRs in order to copy live state to +thread structures. flush_tmregs_to_thread() is indeed guarded by +CONFIG_PPC_TRANSACTIONAL_MEM but it might be the case that kernel +was compiled with CONFIG_PPC_TRANSACTIONAL_MEM enabled and ran on +a CPU without TM feature available, thus rendering the execution +of TM instructions that are treated by the CPU as illegal instructions. + +The fix is just to add proper checking in flush_tmregs_to_thread() +if CPU has the TM feature before accessing any TM-specific resource, +returning immediately if TM is no available on the CPU. Adding +that checking in flush_tmregs_to_thread() instead of in places +where it is called, like in vsr_get() and vsr_set(), is better because +avoids the same problem cropping up elsewhere. + +Fixes: cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump") +Signed-off-by: Gustavo Romero +Reviewed-by: Cyril Bur +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c +index 07cd22e35405..f52ad5bb7109 100644 +--- a/arch/powerpc/kernel/ptrace.c ++++ b/arch/powerpc/kernel/ptrace.c +@@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) + * in the appropriate thread structures from live. + */ + +- if (tsk != current) ++ if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current)) + return; + + if (MSR_TM_SUSPENDED(mfmsr())) { diff --git a/queue-4.9/s390-mm-fix-write-access-check-in-gup_huge_pmd.patch b/queue-4.9/s390-mm-fix-write-access-check-in-gup_huge_pmd.patch new file mode 100644 index 00000000000..5b441a63be8 --- /dev/null +++ b/queue-4.9/s390-mm-fix-write-access-check-in-gup_huge_pmd.patch @@ -0,0 +1,45 @@ +From ba385c0594e723d41790ecfb12c610e6f90c7785 Mon Sep 17 00:00:00 2001 +From: Gerald Schaefer +Date: Mon, 18 Sep 2017 16:51:51 +0200 +Subject: s390/mm: fix write access check in gup_huge_pmd() + +From: Gerald Schaefer + +commit ba385c0594e723d41790ecfb12c610e6f90c7785 upstream. + +The check for the _SEGMENT_ENTRY_PROTECT bit in gup_huge_pmd() is the +wrong way around. It must not be set for write==1, and not be checked for +write==0. Fix this similar to how it was fixed for ptes long time ago in +commit 25591b070336 ("[S390] fix get_user_pages_fast"). + +One impact of this bug would be unnecessarily using the gup slow path for +write==0 on r/w mappings. A potentially more severe impact would be that +gup_huge_pmd() will succeed for write==1 on r/o mappings. + +Signed-off-by: Gerald Schaefer +Signed-off-by: Martin Schwidefsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/mm/gup.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/arch/s390/mm/gup.c ++++ b/arch/s390/mm/gup.c +@@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *p + static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) + { +- unsigned long mask, result; + struct page *head, *page; ++ unsigned long mask; + int refs; + +- result = write ? 0 : _SEGMENT_ENTRY_PROTECT; +- mask = result | _SEGMENT_ENTRY_INVALID; +- if ((pmd_val(pmd) & mask) != result) ++ mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID; ++ if ((pmd_val(pmd) & mask) != 0) + return 0; + VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); + diff --git a/queue-4.9/seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch b/queue-4.9/seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch new file mode 100644 index 00000000000..1184bfcb84c --- /dev/null +++ b/queue-4.9/seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch @@ -0,0 +1,91 @@ +From 66a733ea6b611aecf0119514d2dddab5f9d6c01e Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Wed, 27 Sep 2017 09:25:30 -0600 +Subject: seccomp: fix the usage of get/put_seccomp_filter() in seccomp_get_filter() + +From: Oleg Nesterov + +commit 66a733ea6b611aecf0119514d2dddab5f9d6c01e upstream. + +As Chris explains, get_seccomp_filter() and put_seccomp_filter() can end +up using different filters. Once we drop ->siglock it is possible for +task->seccomp.filter to have been replaced by SECCOMP_FILTER_FLAG_TSYNC. + +Fixes: f8e529ed941b ("seccomp, ptrace: add support for dumping seccomp filters") +Reported-by: Chris Salls +Signed-off-by: Oleg Nesterov +[tycho: add __get_seccomp_filter vs. open coding refcount_inc()] +Signed-off-by: Tycho Andersen +[kees: tweak commit log] +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/seccomp.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -457,14 +457,19 @@ static long seccomp_attach_filter(unsign + return 0; + } + ++void __get_seccomp_filter(struct seccomp_filter *filter) ++{ ++ /* Reference count is bounded by the number of total processes. */ ++ atomic_inc(&filter->usage); ++} ++ + /* get_seccomp_filter - increments the reference count of the filter on @tsk */ + void get_seccomp_filter(struct task_struct *tsk) + { + struct seccomp_filter *orig = tsk->seccomp.filter; + if (!orig) + return; +- /* Reference count is bounded by the number of total processes. */ +- atomic_inc(&orig->usage); ++ __get_seccomp_filter(orig); + } + + static inline void seccomp_filter_free(struct seccomp_filter *filter) +@@ -475,10 +480,8 @@ static inline void seccomp_filter_free(s + } + } + +-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ +-void put_seccomp_filter(struct task_struct *tsk) ++static void __put_seccomp_filter(struct seccomp_filter *orig) + { +- struct seccomp_filter *orig = tsk->seccomp.filter; + /* Clean up single-reference branches iteratively. */ + while (orig && atomic_dec_and_test(&orig->usage)) { + struct seccomp_filter *freeme = orig; +@@ -487,6 +490,12 @@ void put_seccomp_filter(struct task_stru + } + } + ++/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ ++void put_seccomp_filter(struct task_struct *tsk) ++{ ++ __put_seccomp_filter(tsk->seccomp.filter); ++} ++ + /** + * seccomp_send_sigsys - signals the task to allow in-process syscall emulation + * @syscall: syscall number to send to userland +@@ -892,13 +901,13 @@ long seccomp_get_filter(struct task_stru + if (!data) + goto out; + +- get_seccomp_filter(task); ++ __get_seccomp_filter(filter); + spin_unlock_irq(&task->sighand->siglock); + + if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) + ret = -EFAULT; + +- put_seccomp_filter(task); ++ __put_seccomp_filter(filter); + return ret; + + out: diff --git a/queue-4.9/selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch b/queue-4.9/selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch new file mode 100644 index 00000000000..afc25d671d9 --- /dev/null +++ b/queue-4.9/selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch @@ -0,0 +1,61 @@ +From 10859f3855db4c6f10dc7974ff4b3a292f3de8e0 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 7 Sep 2017 16:32:46 -0700 +Subject: selftests/seccomp: Support glibc 2.26 siginfo_t.h + +From: Kees Cook + +commit 10859f3855db4c6f10dc7974ff4b3a292f3de8e0 upstream. + +The 2.26 release of glibc changed how siginfo_t is defined, and the earlier +work-around to using the kernel definition are no longer needed. The old +way needs to stay around for a while, though. + +Reported-by: Seth Forshee +Cc: Andy Lutomirski +Cc: Will Drewry +Cc: Shuah Khan +Cc: linux-kselftest@vger.kernel.org +Signed-off-by: Kees Cook +Tested-by: Seth Forshee +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/seccomp/seccomp_bpf.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/tools/testing/selftests/seccomp/seccomp_bpf.c ++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c +@@ -6,10 +6,18 @@ + */ + + #include +-#include +-#define __have_siginfo_t 1 +-#define __have_sigval_t 1 +-#define __have_sigevent_t 1 ++ ++/* ++ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, ++ * we need to use the kernel's siginfo.h file and trick glibc ++ * into accepting it. ++ */ ++#if !__GLIBC_PREREQ(2, 26) ++# include ++# define __have_siginfo_t 1 ++# define __have_sigval_t 1 ++# define __have_sigevent_t 1 ++#endif + + #include + #include +@@ -676,7 +684,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS) + syscall(__NR_getpid); + } + +-static struct siginfo TRAP_info; ++static siginfo_t TRAP_info; + static volatile int TRAP_nr; + static void TRAP_action(int nr, siginfo_t *info, void *void_context) + { diff --git a/queue-4.9/series b/queue-4.9/series index 4905a38cac1..27b1a6488b8 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -22,3 +22,34 @@ security-keys-rewrite-all-of-big_key-crypto.patch keys-fix-writing-past-end-of-user-supplied-buffer-in-keyring_read.patch keys-prevent-creating-a-different-user-s-keyrings.patch keys-prevent-keyctl_read-on-negative-key.patch +powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch +powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch +powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch +s390-mm-fix-write-access-check-in-gup_huge_pmd.patch +pm-core-fix-device_pm_check_callbacks.patch +fix-smb3.1.1-guest-authentication-to-samba.patch +smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch +smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch +smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch +vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch +nl80211-check-for-the-required-netlink-attributes-presence.patch +bsg-lib-don-t-free-job-in-bsg_prepare_job.patch +iw_cxgb4-remove-the-stid-on-listen-create-failure.patch +iw_cxgb4-put-ep-reference-in-pass_accept_req.patch +selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch +seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch +arm64-make-sure-spsel-is-always-set.patch +arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch +kvm-vmx-extract-__pi_post_block.patch +kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch +kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch +kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch +kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch +kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch +xfs-validate-bdev-support-for-dax-inode-flag.patch +etnaviv-fix-gem-object-list-corruption.patch +pci-fix-race-condition-with-driver_override.patch +btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch +btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch +btrfs-prevent-to-set-invalid-default-subvolid.patch +x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch diff --git a/queue-4.9/smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch b/queue-4.9/smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch new file mode 100644 index 00000000000..bba74597dca --- /dev/null +++ b/queue-4.9/smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch @@ -0,0 +1,56 @@ +From 0603c96f3af50e2f9299fa410c224ab1d465e0f9 Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Wed, 20 Sep 2017 19:57:18 -0500 +Subject: SMB: Validate negotiate (to protect against downgrade) even if signing off + +From: Steve French + +commit 0603c96f3af50e2f9299fa410c224ab1d465e0f9 upstream. + +As long as signing is supported (ie not a guest user connection) and +connection is SMB3 or SMB3.02, then validate negotiate (protect +against man in the middle downgrade attacks). We had been doing this +only when signing was required, not when signing was just enabled, +but this more closely matches recommended SMB3 behavior and is +better security. Suggested by Metze. + +Signed-off-by: Steve French +Reviewed-by: Jeremy Allison +Acked-by: Stefan Metzmacher +Reviewed-by: Ronnie Sahlberg +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2pdu.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -531,15 +531,22 @@ int smb3_validate_negotiate(const unsign + + /* + * validation ioctl must be signed, so no point sending this if we +- * can not sign it. We could eventually change this to selectively ++ * can not sign it (ie are not known user). Even if signing is not ++ * required (enabled but not negotiated), in those cases we selectively + * sign just this, the first and only signed request on a connection. +- * This is good enough for now since a user who wants better security +- * would also enable signing on the mount. Having validation of +- * negotiate info for signed connections helps reduce attack vectors ++ * Having validation of negotiate info helps reduce attack vectors. + */ +- if (tcon->ses->server->sign == false) ++ if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) + return 0; /* validation requires signing */ + ++ if (tcon->ses->user_name == NULL) { ++ cifs_dbg(FYI, "Can't validate negotiate: null user mount\n"); ++ return 0; /* validation requires signing */ ++ } ++ ++ if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) ++ cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n"); ++ + vneg_inbuf.Capabilities = + cpu_to_le32(tcon->ses->server->vals->req_capabilities); + memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, diff --git a/queue-4.9/smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch b/queue-4.9/smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch new file mode 100644 index 00000000000..d63c58f4d79 --- /dev/null +++ b/queue-4.9/smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch @@ -0,0 +1,34 @@ +From 1013e760d10e614dc10b5624ce9fc41563ba2e65 Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Fri, 22 Sep 2017 01:40:27 -0500 +Subject: SMB3: Don't ignore O_SYNC/O_DSYNC and O_DIRECT flags + +From: Steve French + +commit 1013e760d10e614dc10b5624ce9fc41563ba2e65 upstream. + +Signed-off-by: Steve French +Reviewed-by: Ronnie Sahlberg +Reviewed-by: Pavel Shilovsky +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/file.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/cifs/file.c ++++ b/fs/cifs/file.c +@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct ino + if (backup_cred(cifs_sb)) + create_options |= CREATE_OPEN_BACKUP_INTENT; + ++ /* O_SYNC also has bit for O_DSYNC so following check picks up either */ ++ if (f_flags & O_SYNC) ++ create_options |= CREATE_WRITE_THROUGH; ++ ++ if (f_flags & O_DIRECT) ++ create_options |= CREATE_NO_BUFFER; ++ + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = desired_access; diff --git a/queue-4.9/smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch b/queue-4.9/smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch new file mode 100644 index 00000000000..0a0fe732ff4 --- /dev/null +++ b/queue-4.9/smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch @@ -0,0 +1,32 @@ +From c721c38957fb19982416f6be71aae7b30630d83b Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Tue, 19 Sep 2017 18:40:03 -0500 +Subject: SMB3: Warn user if trying to sign connection that authenticated as guest + +From: Steve French + +commit c721c38957fb19982416f6be71aae7b30630d83b upstream. + +It can be confusing if user ends up authenticated as guest but they +requested signing (server will return error validating signed packets) +so add log message for this. + +Signed-off-by: Steve French +Reviewed-by: Ronnie Sahlberg +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2pdu.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -1010,6 +1010,8 @@ SMB2_sess_setup(const unsigned int xid, + while (sess_data->func) + sess_data->func(sess_data); + ++ if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign)) ++ cifs_dbg(VFS, "signing requested but authenticated as guest\n"); + rc = sess_data->result; + out: + kfree(sess_data); diff --git a/queue-4.9/vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch b/queue-4.9/vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch new file mode 100644 index 00000000000..b4efe721353 --- /dev/null +++ b/queue-4.9/vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch @@ -0,0 +1,44 @@ +From fc46820b27a2d9a46f7e90c9ceb4a64a1bc5fab8 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Mon, 25 Sep 2017 12:23:03 +0200 +Subject: vfs: Return -ENXIO for negative SEEK_HOLE / SEEK_DATA offsets + +From: Andreas Gruenbacher + +commit fc46820b27a2d9a46f7e90c9ceb4a64a1bc5fab8 upstream. + +In generic_file_llseek_size, return -ENXIO for negative offsets as well +as offsets beyond EOF. This affects filesystems which don't implement +SEEK_HOLE / SEEK_DATA internally, possibly because they don't support +holes. + +Fixes xfstest generic/448. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/read_write.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/read_write.c ++++ b/fs/read_write.c +@@ -114,7 +114,7 @@ generic_file_llseek_size(struct file *fi + * In the generic case the entire file is data, so as long as + * offset isn't at the end of the file then the offset is data. + */ +- if (offset >= eof) ++ if ((unsigned long long)offset >= eof) + return -ENXIO; + break; + case SEEK_HOLE: +@@ -122,7 +122,7 @@ generic_file_llseek_size(struct file *fi + * There is a virtual hole at the end of the file, so as long as + * offset isn't i_size or larger, return i_size. + */ +- if (offset >= eof) ++ if ((unsigned long long)offset >= eof) + return -ENXIO; + offset = eof; + break; diff --git a/queue-4.9/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch b/queue-4.9/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch new file mode 100644 index 00000000000..de37d41c59d --- /dev/null +++ b/queue-4.9/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch @@ -0,0 +1,211 @@ +From a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 Mon Sep 17 00:00:00 2001 +From: Laurent Dufour +Date: Mon, 4 Sep 2017 10:32:15 +0200 +Subject: x86/mm: Fix fault error path using unsafe vma pointer + +From: Laurent Dufour + +commit a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 upstream. + +commit 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal +generation code") passes down a vma pointer to the error path, but that is +done once the mmap_sem is released when calling mm_fault_error() from +__do_page_fault(). + +This is dangerous as the vma structure is no more safe to be used once the +mmap_sem has been released. As only the protection key value is required in +the error processing, we could just pass down this value. + +Fix it by passing a pointer to a protection key value down to the fault +signal generation code. The use of a pointer allows to keep the check +generating a warning message in fill_sig_info_pkey() when the vma was not +known. If the pointer is valid, the protection value can be accessed by +deferencing the pointer. + +[ tglx: Made *pkey u32 as that's the type which is passed in siginfo ] + +Fixes: 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal generation code") +Signed-off-by: Laurent Dufour +Signed-off-by: Thomas Gleixner +Cc: linux-mm@kvack.org +Cc: Dave Hansen +Link: http://lkml.kernel.org/r/1504513935-12742-1-git-send-email-ldufour@linux.vnet.ibm.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/fault.c | 47 ++++++++++++++++++++++++----------------------- + 1 file changed, 24 insertions(+), 23 deletions(-) + +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -191,8 +191,7 @@ is_prefetch(struct pt_regs *regs, unsign + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ +-static void fill_sig_info_pkey(int si_code, siginfo_t *info, +- struct vm_area_struct *vma) ++static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) + { + /* This is effectively an #ifdef */ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) +@@ -208,7 +207,7 @@ static void fill_sig_info_pkey(int si_co + * valid VMA, so we should never reach this without a + * valid VMA. + */ +- if (!vma) { ++ if (!pkey) { + WARN_ONCE(1, "PKU fault with no VMA passed in"); + info->si_pkey = 0; + return; +@@ -218,13 +217,12 @@ static void fill_sig_info_pkey(int si_co + * absolutely guranteed to be 100% accurate because of + * the race explained above. + */ +- info->si_pkey = vma_pkey(vma); ++ info->si_pkey = *pkey; + } + + static void + force_sig_info_fault(int si_signo, int si_code, unsigned long address, +- struct task_struct *tsk, struct vm_area_struct *vma, +- int fault) ++ struct task_struct *tsk, u32 *pkey, int fault) + { + unsigned lsb = 0; + siginfo_t info; +@@ -239,7 +237,7 @@ force_sig_info_fault(int si_signo, int s + lsb = PAGE_SHIFT; + info.si_addr_lsb = lsb; + +- fill_sig_info_pkey(si_code, &info, vma); ++ fill_sig_info_pkey(si_code, &info, pkey); + + force_sig_info(si_signo, &info, tsk); + } +@@ -718,8 +716,6 @@ no_context(struct pt_regs *regs, unsigne + struct task_struct *tsk = current; + unsigned long flags; + int sig; +- /* No context means no VMA to pass down */ +- struct vm_area_struct *vma = NULL; + + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs, X86_TRAP_PF)) { +@@ -744,7 +740,7 @@ no_context(struct pt_regs *regs, unsigne + + /* XXX: hwpoison faults will set the wrong code. */ + force_sig_info_fault(signal, si_code, address, +- tsk, vma, 0); ++ tsk, NULL, 0); + } + + /* +@@ -853,8 +849,7 @@ show_signal_msg(struct pt_regs *regs, un + + static void + __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, +- unsigned long address, struct vm_area_struct *vma, +- int si_code) ++ unsigned long address, u32 *pkey, int si_code) + { + struct task_struct *tsk = current; + +@@ -902,7 +897,7 @@ __bad_area_nosemaphore(struct pt_regs *r + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_PF; + +- force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0); ++ force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); + + return; + } +@@ -915,9 +910,9 @@ __bad_area_nosemaphore(struct pt_regs *r + + static noinline void + bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, +- unsigned long address, struct vm_area_struct *vma) ++ unsigned long address, u32 *pkey) + { +- __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR); ++ __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); + } + + static void +@@ -925,6 +920,10 @@ __bad_area(struct pt_regs *regs, unsigne + unsigned long address, struct vm_area_struct *vma, int si_code) + { + struct mm_struct *mm = current->mm; ++ u32 pkey; ++ ++ if (vma) ++ pkey = vma_pkey(vma); + + /* + * Something tried to access memory that isn't in our memory map.. +@@ -932,7 +931,8 @@ __bad_area(struct pt_regs *regs, unsigne + */ + up_read(&mm->mmap_sem); + +- __bad_area_nosemaphore(regs, error_code, address, vma, si_code); ++ __bad_area_nosemaphore(regs, error_code, address, ++ (vma) ? &pkey : NULL, si_code); + } + + static noinline void +@@ -975,7 +975,7 @@ bad_area_access_error(struct pt_regs *re + + static void + do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, +- struct vm_area_struct *vma, unsigned int fault) ++ u32 *pkey, unsigned int fault) + { + struct task_struct *tsk = current; + int code = BUS_ADRERR; +@@ -1002,13 +1002,12 @@ do_sigbus(struct pt_regs *regs, unsigned + code = BUS_MCEERR_AR; + } + #endif +- force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault); ++ force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); + } + + static noinline void + mm_fault_error(struct pt_regs *regs, unsigned long error_code, +- unsigned long address, struct vm_area_struct *vma, +- unsigned int fault) ++ unsigned long address, u32 *pkey, unsigned int fault) + { + if (fatal_signal_pending(current) && !(error_code & PF_USER)) { + no_context(regs, error_code, address, 0, 0); +@@ -1032,9 +1031,9 @@ mm_fault_error(struct pt_regs *regs, uns + } else { + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| + VM_FAULT_HWPOISON_LARGE)) +- do_sigbus(regs, error_code, address, vma, fault); ++ do_sigbus(regs, error_code, address, pkey, fault); + else if (fault & VM_FAULT_SIGSEGV) +- bad_area_nosemaphore(regs, error_code, address, vma); ++ bad_area_nosemaphore(regs, error_code, address, pkey); + else + BUG(); + } +@@ -1220,6 +1219,7 @@ __do_page_fault(struct pt_regs *regs, un + struct mm_struct *mm; + int fault, major = 0; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; ++ u32 pkey; + + tsk = current; + mm = tsk->mm; +@@ -1420,9 +1420,10 @@ good_area: + return; + } + ++ pkey = vma_pkey(vma); + up_read(&mm->mmap_sem); + if (unlikely(fault & VM_FAULT_ERROR)) { +- mm_fault_error(regs, error_code, address, vma, fault); ++ mm_fault_error(regs, error_code, address, &pkey, fault); + return; + } + diff --git a/queue-4.9/xfs-validate-bdev-support-for-dax-inode-flag.patch b/queue-4.9/xfs-validate-bdev-support-for-dax-inode-flag.patch new file mode 100644 index 00000000000..28a59e6a76f --- /dev/null +++ b/queue-4.9/xfs-validate-bdev-support-for-dax-inode-flag.patch @@ -0,0 +1,50 @@ +From 6851a3db7e224bbb85e23b3c64a506c9e0904382 Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Mon, 18 Sep 2017 14:46:03 -0700 +Subject: xfs: validate bdev support for DAX inode flag + +From: Ross Zwisler + +commit 6851a3db7e224bbb85e23b3c64a506c9e0904382 upstream. + +Currently only the blocksize is checked, but we should really be calling +bdev_dax_supported() which also tests to make sure we can get a +struct dax_device and that the dax_direct_access() path is working. + +This is the same check that we do for the "-o dax" mount option in +xfs_fs_fill_super(). + +This does not fix the race issues that caused the XFS DAX inode option to +be disabled, so that option will still be disabled. If/when we re-enable +it, though, I think we will want this issue to have been fixed. I also do +think that we want to fix this in stable kernels. + +Signed-off-by: Ross Zwisler +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_ioctl.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -1085,6 +1085,7 @@ xfs_ioctl_setattr_dax_invalidate( + int *join_flags) + { + struct inode *inode = VFS_I(ip); ++ struct super_block *sb = inode->i_sb; + int error; + + *join_flags = 0; +@@ -1097,7 +1098,7 @@ xfs_ioctl_setattr_dax_invalidate( + if (fa->fsx_xflags & FS_XFLAG_DAX) { + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) + return -EINVAL; +- if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) ++ if (bdev_dax_supported(sb, sb->s_blocksize) < 0) + return -EINVAL; + } + -- 2.47.3