]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 2 Oct 2017 12:50:27 +0000 (14:50 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 2 Oct 2017 12:50:27 +0000 (14:50 +0200)
added patches:
arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
arm64-make-sure-spsel-is-always-set.patch
bsg-lib-don-t-free-job-in-bsg_prepare_job.patch
btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
btrfs-prevent-to-set-invalid-default-subvolid.patch
btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
etnaviv-fix-gem-object-list-corruption.patch
fix-smb3.1.1-guest-authentication-to-samba.patch
iw_cxgb4-put-ep-reference-in-pass_accept_req.patch
iw_cxgb4-remove-the-stid-on-listen-create-failure.patch
kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
kvm-vmx-extract-__pi_post_block.patch
kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
nl80211-check-for-the-required-netlink-attributes-presence.patch
pci-fix-race-condition-with-driver_override.patch
pm-core-fix-device_pm_check_callbacks.patch
powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch
powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch
powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch
s390-mm-fix-write-access-check-in-gup_huge_pmd.patch
seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch
selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch
smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch
smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch
smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch
vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch
x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
xfs-validate-bdev-support-for-dax-inode-flag.patch

32 files changed:
queue-4.9/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch [new file with mode: 0644]
queue-4.9/arm64-make-sure-spsel-is-always-set.patch [new file with mode: 0644]
queue-4.9/bsg-lib-don-t-free-job-in-bsg_prepare_job.patch [new file with mode: 0644]
queue-4.9/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch [new file with mode: 0644]
queue-4.9/btrfs-prevent-to-set-invalid-default-subvolid.patch [new file with mode: 0644]
queue-4.9/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch [new file with mode: 0644]
queue-4.9/etnaviv-fix-gem-object-list-corruption.patch [new file with mode: 0644]
queue-4.9/fix-smb3.1.1-guest-authentication-to-samba.patch [new file with mode: 0644]
queue-4.9/iw_cxgb4-put-ep-reference-in-pass_accept_req.patch [new file with mode: 0644]
queue-4.9/iw_cxgb4-remove-the-stid-on-listen-create-failure.patch [new file with mode: 0644]
queue-4.9/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch [new file with mode: 0644]
queue-4.9/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch [new file with mode: 0644]
queue-4.9/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch [new file with mode: 0644]
queue-4.9/kvm-vmx-extract-__pi_post_block.patch [new file with mode: 0644]
queue-4.9/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch [new file with mode: 0644]
queue-4.9/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch [new file with mode: 0644]
queue-4.9/nl80211-check-for-the-required-netlink-attributes-presence.patch [new file with mode: 0644]
queue-4.9/pci-fix-race-condition-with-driver_override.patch [new file with mode: 0644]
queue-4.9/pm-core-fix-device_pm_check_callbacks.patch [new file with mode: 0644]
queue-4.9/powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch [new file with mode: 0644]
queue-4.9/powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch [new file with mode: 0644]
queue-4.9/powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch [new file with mode: 0644]
queue-4.9/s390-mm-fix-write-access-check-in-gup_huge_pmd.patch [new file with mode: 0644]
queue-4.9/seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch [new file with mode: 0644]
queue-4.9/selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch [new file with mode: 0644]
queue-4.9/smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch [new file with mode: 0644]
queue-4.9/smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch [new file with mode: 0644]
queue-4.9/vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch [new file with mode: 0644]
queue-4.9/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch [new file with mode: 0644]
queue-4.9/xfs-validate-bdev-support-for-dax-inode-flag.patch [new file with mode: 0644]

diff --git a/queue-4.9/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch b/queue-4.9/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
new file mode 100644 (file)
index 0000000..106b523
--- /dev/null
@@ -0,0 +1,65 @@
+From 760bfb47c36a07741a089bf6a28e854ffbee7dc9 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 29 Sep 2017 12:27:41 +0100
+Subject: arm64: fault: Route pte translation faults via do_translation_fault
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 760bfb47c36a07741a089bf6a28e854ffbee7dc9 upstream.
+
+We currently route pte translation faults via do_page_fault, which elides
+the address check against TASK_SIZE before invoking the mm fault handling
+code. However, this can cause issues with the path walking code in
+conjunction with our word-at-a-time implementation because
+load_unaligned_zeropad can end up faulting in kernel space if it reads
+across a page boundary and runs into a page fault (e.g. by attempting to
+read from a guard region).
+
+In the case of such a fault, load_unaligned_zeropad has registered a
+fixup to shift the valid data and pad with zeroes, however the abort is
+reported as a level 3 translation fault and we dispatch it straight to
+do_page_fault, despite it being a kernel address. This results in calling
+a sleeping function from atomic context:
+
+  BUG: sleeping function called from invalid context at arch/arm64/mm/fault.c:313
+  in_atomic(): 0, irqs_disabled(): 0, pid: 10290
+  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
+  [...]
+  [<ffffff8e016cd0cc>] ___might_sleep+0x134/0x144
+  [<ffffff8e016cd158>] __might_sleep+0x7c/0x8c
+  [<ffffff8e016977f0>] do_page_fault+0x140/0x330
+  [<ffffff8e01681328>] do_mem_abort+0x54/0xb0
+  Exception stack(0xfffffffb20247a70 to 0xfffffffb20247ba0)
+  [...]
+  [<ffffff8e016844fc>] el1_da+0x18/0x78
+  [<ffffff8e017f399c>] path_parentat+0x44/0x88
+  [<ffffff8e017f4c9c>] filename_parentat+0x5c/0xd8
+  [<ffffff8e017f5044>] filename_create+0x4c/0x128
+  [<ffffff8e017f59e4>] SyS_mkdirat+0x50/0xc8
+  [<ffffff8e01684e30>] el0_svc_naked+0x24/0x28
+  Code: 36380080 d5384100 f9400800 9402566d (d4210000)
+  ---[ end trace 2d01889f2bca9b9f ]---
+
+Fix this by dispatching all translation faults to do_translation_faults,
+which avoids invoking the page fault logic for faults on kernel addresses.
+
+Reported-by: Ankit Jain <ankijain@codeaurora.org>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/fault.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -509,7 +509,7 @@ static const struct fault_info fault_inf
+       { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 0 translation fault"     },
+       { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 1 translation fault"     },
+       { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 2 translation fault"     },
+-      { do_page_fault,        SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
++      { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
+       { do_bad,               SIGBUS,  0,             "unknown 8"                     },
+       { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 access flag fault"     },
+       { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 access flag fault"     },
diff --git a/queue-4.9/arm64-make-sure-spsel-is-always-set.patch b/queue-4.9/arm64-make-sure-spsel-is-always-set.patch
new file mode 100644 (file)
index 0000000..7958558
--- /dev/null
@@ -0,0 +1,40 @@
+From 5371513fb338fb9989c569dc071326d369d6ade8 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <marc.zyngier@arm.com>
+Date: Tue, 26 Sep 2017 15:57:16 +0100
+Subject: arm64: Make sure SPsel is always set
+
+From: Marc Zyngier <marc.zyngier@arm.com>
+
+commit 5371513fb338fb9989c569dc071326d369d6ade8 upstream.
+
+When the kernel is entered at EL2 on an ARMv8.0 system, we construct
+the EL1 pstate and make sure this uses the the EL1 stack pointer
+(we perform an exception return to EL1h).
+
+But if the kernel is either entered at EL1 or stays at EL2 (because
+we're on a VHE-capable system), we fail to set SPsel, and use whatever
+stack selection the higher exception level has choosen for us.
+
+Let's not take any chance, and make sure that SPsel is set to one
+before we decide the mode we're going to run in.
+
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/head.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm64/kernel/head.S
++++ b/arch/arm64/kernel/head.S
+@@ -486,6 +486,7 @@ ENTRY(kimage_vaddr)
+  * booted in EL1 or EL2 respectively.
+  */
+ ENTRY(el2_setup)
++      msr     SPsel, #1                       // We want to use SP_EL{1,2}
+       mrs     x0, CurrentEL
+       cmp     x0, #CurrentEL_EL2
+       b.ne    1f
diff --git a/queue-4.9/bsg-lib-don-t-free-job-in-bsg_prepare_job.patch b/queue-4.9/bsg-lib-don-t-free-job-in-bsg_prepare_job.patch
new file mode 100644 (file)
index 0000000..50a9d4a
--- /dev/null
@@ -0,0 +1,31 @@
+From f507b54dccfd8000c517d740bc45f20c74532d18 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 7 Sep 2017 13:54:35 +0200
+Subject: bsg-lib: don't free job in bsg_prepare_job
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit f507b54dccfd8000c517d740bc45f20c74532d18 upstream.
+
+The job structure is allocated as part of the request, so we should not
+free it in the error path of bsg_prepare_job.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/bsg-lib.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/block/bsg-lib.c
++++ b/block/bsg-lib.c
+@@ -147,7 +147,6 @@ static int bsg_create_job(struct device
+ failjob_rls_rqst_payload:
+       kfree(job->request_payload.sg_list);
+ failjob_rls_job:
+-      kfree(job);
+       return -ENOMEM;
+ }
diff --git a/queue-4.9/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch b/queue-4.9/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
new file mode 100644 (file)
index 0000000..03b6440
--- /dev/null
@@ -0,0 +1,39 @@
+From bb166d7207432d3c7d10c45dc052f12ba3a2121d Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 25 Aug 2017 14:15:14 +0900
+Subject: btrfs: fix NULL pointer dereference from free_reloc_roots()
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit bb166d7207432d3c7d10c45dc052f12ba3a2121d upstream.
+
+__del_reloc_root should be called before freeing up reloc_root->node.
+If not, calling __del_reloc_root() dereference reloc_root->node, causing
+the system BUG.
+
+Fixes: 6bdf131fac23 ("Btrfs: don't leak reloc root nodes on error")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -2367,11 +2367,11 @@ void free_reloc_roots(struct list_head *
+       while (!list_empty(list)) {
+               reloc_root = list_entry(list->next, struct btrfs_root,
+                                       root_list);
++              __del_reloc_root(reloc_root);
+               free_extent_buffer(reloc_root->node);
+               free_extent_buffer(reloc_root->commit_root);
+               reloc_root->node = NULL;
+               reloc_root->commit_root = NULL;
+-              __del_reloc_root(reloc_root);
+       }
+ }
diff --git a/queue-4.9/btrfs-prevent-to-set-invalid-default-subvolid.patch b/queue-4.9/btrfs-prevent-to-set-invalid-default-subvolid.patch
new file mode 100644 (file)
index 0000000..efa744a
--- /dev/null
@@ -0,0 +1,37 @@
+From 6d6d282932d1a609e60dc4467677e0e863682f57 Mon Sep 17 00:00:00 2001
+From: satoru takeuchi <satoru.takeuchi@gmail.com>
+Date: Tue, 12 Sep 2017 22:42:52 +0900
+Subject: btrfs: prevent to set invalid default subvolid
+
+From: satoru takeuchi <satoru.takeuchi@gmail.com>
+
+commit 6d6d282932d1a609e60dc4467677e0e863682f57 upstream.
+
+`btrfs sub set-default` succeeds to set an ID which isn't corresponding to any
+fs/file tree. If such the bad ID is set to a filesystem, we can't mount this
+filesystem without specifying `subvol` or `subvolid` mount options.
+
+Fixes: 6ef5ed0d386b ("Btrfs: add ioctl and incompat flag to set the default mount subvol")
+Signed-off-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
+Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -4082,6 +4082,10 @@ static long btrfs_ioctl_default_subvol(s
+               ret = PTR_ERR(new_root);
+               goto out;
+       }
++      if (!is_fstree(new_root->objectid)) {
++              ret = -ENOENT;
++              goto out;
++      }
+       path = btrfs_alloc_path();
+       if (!path) {
diff --git a/queue-4.9/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch b/queue-4.9/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
new file mode 100644 (file)
index 0000000..f0c36c0
--- /dev/null
@@ -0,0 +1,38 @@
+From 78ad4ce014d025f41b8dde3a81876832ead643cf Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 8 Sep 2017 17:48:55 +0900
+Subject: btrfs: propagate error to btrfs_cmp_data_prepare caller
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 78ad4ce014d025f41b8dde3a81876832ead643cf upstream.
+
+btrfs_cmp_data_prepare() (almost) always returns 0 i.e. ignoring errors
+from gather_extent_pages(). While the pages are freed by
+btrfs_cmp_data_free(), cmp->num_pages still has > 0. Then,
+btrfs_extent_same() try to access the already freed pages causing faults
+(or violates PageLocked assertion).
+
+This patch just return the error as is so that the caller stop the process.
+
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Fixes: f441460202cb ("btrfs: fix deadlock with extent-same and readpage")
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3052,7 +3052,7 @@ static int btrfs_cmp_data_prepare(struct
+ out:
+       if (ret)
+               btrfs_cmp_data_free(cmp);
+-      return 0;
++      return ret;
+ }
+ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
diff --git a/queue-4.9/etnaviv-fix-gem-object-list-corruption.patch b/queue-4.9/etnaviv-fix-gem-object-list-corruption.patch
new file mode 100644 (file)
index 0000000..840903a
--- /dev/null
@@ -0,0 +1,38 @@
+From 518417525f3652c12fb5fad6da4ade66c0072fa3 Mon Sep 17 00:00:00 2001
+From: Lucas Stach <l.stach@pengutronix.de>
+Date: Mon, 11 Sep 2017 15:29:31 +0200
+Subject: etnaviv: fix gem object list corruption
+
+From: Lucas Stach <l.stach@pengutronix.de>
+
+commit 518417525f3652c12fb5fad6da4ade66c0072fa3 upstream.
+
+All manipulations of the gem_object list need to be protected by
+the list mutex, as GEM objects can be created and freed in parallel.
+This fixes a kernel memory corruption.
+
+Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/etnaviv/etnaviv_gem.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+@@ -549,12 +549,15 @@ static const struct etnaviv_gem_ops etna
+ void etnaviv_gem_free_object(struct drm_gem_object *obj)
+ {
+       struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
++      struct etnaviv_drm_private *priv = obj->dev->dev_private;
+       struct etnaviv_vram_mapping *mapping, *tmp;
+       /* object should not be active */
+       WARN_ON(is_active(etnaviv_obj));
++      mutex_lock(&priv->gem_lock);
+       list_del(&etnaviv_obj->gem_node);
++      mutex_unlock(&priv->gem_lock);
+       list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
+                                obj_node) {
diff --git a/queue-4.9/fix-smb3.1.1-guest-authentication-to-samba.patch b/queue-4.9/fix-smb3.1.1-guest-authentication-to-samba.patch
new file mode 100644 (file)
index 0000000..d41612b
--- /dev/null
@@ -0,0 +1,32 @@
+From 23586b66d84ba3184b8820277f3fc42761640f87 Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Mon, 18 Sep 2017 18:18:45 -0500
+Subject: Fix SMB3.1.1 guest authentication to Samba
+
+From: Steve French <smfrench@gmail.com>
+
+commit 23586b66d84ba3184b8820277f3fc42761640f87 upstream.
+
+Samba rejects SMB3.1.1 dialect (vers=3.1.1) negotiate requests from
+the kernel client due to the two byte pad at the end of the negotiate
+contexts.
+
+Signed-off-by: Steve French <smfrench@gmail.com>
+Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2pdu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -366,7 +366,7 @@ assemble_neg_contexts(struct smb2_negoti
+       build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
+       req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+       req->NegotiateContextCount = cpu_to_le16(2);
+-      inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
++      inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
+                       + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
+ }
+ #else
diff --git a/queue-4.9/iw_cxgb4-put-ep-reference-in-pass_accept_req.patch b/queue-4.9/iw_cxgb4-put-ep-reference-in-pass_accept_req.patch
new file mode 100644 (file)
index 0000000..5cbd980
--- /dev/null
@@ -0,0 +1,35 @@
+From 3d318605f5e32ff44fb290d9b67573b34213c4c8 Mon Sep 17 00:00:00 2001
+From: Steve Wise <swise@opengridcomputing.com>
+Date: Wed, 13 Sep 2017 09:52:32 -0700
+Subject: iw_cxgb4: put ep reference in pass_accept_req()
+
+From: Steve Wise <swise@opengridcomputing.com>
+
+commit 3d318605f5e32ff44fb290d9b67573b34213c4c8 upstream.
+
+The listening endpoint should always be dereferenced at the end of
+pass_accept_req().
+
+Fixes: f86fac79afec ("RDMA/iw_cxgb4: atomic find and reference for listening endpoints")
+
+Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/cxgb4/cm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/cxgb4/cm.c
++++ b/drivers/infiniband/hw/cxgb4/cm.c
+@@ -2577,9 +2577,9 @@ fail:
+       c4iw_put_ep(&child_ep->com);
+ reject:
+       reject_cr(dev, hwtid, skb);
++out:
+       if (parent_ep)
+               c4iw_put_ep(&parent_ep->com);
+-out:
+       return 0;
+ }
diff --git a/queue-4.9/iw_cxgb4-remove-the-stid-on-listen-create-failure.patch b/queue-4.9/iw_cxgb4-remove-the-stid-on-listen-create-failure.patch
new file mode 100644 (file)
index 0000000..d8bda67
--- /dev/null
@@ -0,0 +1,33 @@
+From 8b1bbf36b7452c4acb20e91948eaa5e225ea6978 Mon Sep 17 00:00:00 2001
+From: Steve Wise <swise@opengridcomputing.com>
+Date: Tue, 5 Sep 2017 11:52:34 -0700
+Subject: iw_cxgb4: remove the stid on listen create failure
+
+From: Steve Wise <swise@opengridcomputing.com>
+
+commit 8b1bbf36b7452c4acb20e91948eaa5e225ea6978 upstream.
+
+If a listen create fails, then the server tid (stid) is incorrectly left
+in the stid idr table, which can cause a touch-after-free if the stid
+is looked up and the already freed endpoint is touched.  So make sure
+and remove it in the error path.
+
+Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/cxgb4/cm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/cxgb4/cm.c
++++ b/drivers/infiniband/hw/cxgb4/cm.c
+@@ -3441,7 +3441,7 @@ int c4iw_create_listen(struct iw_cm_id *
+               cm_id->provider_data = ep;
+               goto out;
+       }
+-
++      remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
+       cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
+                       ep->com.local_addr.ss_family);
+ fail2:
diff --git a/queue-4.9/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch b/queue-4.9/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
new file mode 100644 (file)
index 0000000..d8621e9
--- /dev/null
@@ -0,0 +1,39 @@
+From 51aa68e7d57e3217192d88ce90fd5b8ef29ec94f Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Tue, 12 Sep 2017 13:02:54 -0700
+Subject: kvm: nVMX: Don't allow L2 to access the hardware CR8
+
+From: Jim Mattson <jmattson@google.com>
+
+commit 51aa68e7d57e3217192d88ce90fd5b8ef29ec94f upstream.
+
+If L1 does not specify the "use TPR shadow" VM-execution control in
+vmcs12, then L0 must specify the "CR8-load exiting" and "CR8-store
+exiting" VM-execution controls in vmcs02. Failure to do so will give
+the L2 VM unrestricted read/write access to the hardware CR8.
+
+This fixes CVE-2017-12154.
+
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -10001,6 +10001,11 @@ static void prepare_vmcs02(struct kvm_vc
+               vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+                               page_to_phys(vmx->nested.virtual_apic_page));
+               vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
++      } else {
++#ifdef CONFIG_X86_64
++              exec_control |= CPU_BASED_CR8_LOAD_EXITING |
++                              CPU_BASED_CR8_STORE_EXITING;
++#endif
+       }
+       if (cpu_has_vmx_msr_bitmap() &&
diff --git a/queue-4.9/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch b/queue-4.9/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
new file mode 100644 (file)
index 0000000..2c9df74
--- /dev/null
@@ -0,0 +1,157 @@
+From 8b306e2f3c41939ea528e6174c88cfbfff893ce1 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 6 Jun 2017 12:57:05 +0200
+Subject: KVM: VMX: avoid double list add with VT-d posted interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 8b306e2f3c41939ea528e6174c88cfbfff893ce1 upstream.
+
+In some cases, for example involving hot-unplug of assigned
+devices, pi_post_block can forget to remove the vCPU from the
+blocked_vcpu_list.  When this happens, the next call to
+pi_pre_block corrupts the list.
+
+Fix this in two ways.  First, check vcpu->pre_pcpu in pi_pre_block
+and WARN instead of adding the element twice in the list.  Second,
+always do the list removal in pi_post_block if vcpu->pre_pcpu is
+set (not -1).
+
+The new code keeps interrupts disabled for the whole duration of
+pi_pre_block/pi_post_block.  This is not strictly necessary, but
+easier to follow.  For the same reason, PI.ON is checked only
+after the cmpxchg, and to handle it we just call the post-block
+code.  This removes duplication of the list removal code.
+
+Cc: Huangweidong <weidong.huang@huawei.com>
+Cc: Gonglei <arei.gonglei@huawei.com>
+Cc: wangxin <wangxinxin.wang@huawei.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   62 +++++++++++++++++++++--------------------------------
+ 1 file changed, 25 insertions(+), 37 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -11005,10 +11005,11 @@ static void __pi_post_block(struct kvm_v
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+       struct pi_desc old, new;
+       unsigned int dest;
+-      unsigned long flags;
+       do {
+               old.control = new.control = pi_desc->control;
++              WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
++                   "Wakeup handler not enabled while the VCPU is blocked\n");
+               dest = cpu_physical_id(vcpu->cpu);
+@@ -11025,14 +11026,10 @@ static void __pi_post_block(struct kvm_v
+       } while (cmpxchg(&pi_desc->control, old.control,
+                       new.control) != old.control);
+-      if(vcpu->pre_pcpu != -1) {
+-              spin_lock_irqsave(
+-                      &per_cpu(blocked_vcpu_on_cpu_lock,
+-                      vcpu->pre_pcpu), flags);
++      if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
++              spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_del(&vcpu->blocked_vcpu_list);
+-              spin_unlock_irqrestore(
+-                      &per_cpu(blocked_vcpu_on_cpu_lock,
+-                      vcpu->pre_pcpu), flags);
++              spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               vcpu->pre_pcpu = -1;
+       }
+ }
+@@ -11052,7 +11049,6 @@ static void __pi_post_block(struct kvm_v
+  */
+ static int pi_pre_block(struct kvm_vcpu *vcpu)
+ {
+-      unsigned long flags;
+       unsigned int dest;
+       struct pi_desc old, new;
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+@@ -11062,34 +11058,20 @@ static int pi_pre_block(struct kvm_vcpu
+               !kvm_vcpu_apicv_active(vcpu))
+               return 0;
+-      vcpu->pre_pcpu = vcpu->cpu;
+-      spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+-                        vcpu->pre_pcpu), flags);
+-      list_add_tail(&vcpu->blocked_vcpu_list,
+-                    &per_cpu(blocked_vcpu_on_cpu,
+-                    vcpu->pre_pcpu));
+-      spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+-                             vcpu->pre_pcpu), flags);
++      WARN_ON(irqs_disabled());
++      local_irq_disable();
++      if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
++              vcpu->pre_pcpu = vcpu->cpu;
++              spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++              list_add_tail(&vcpu->blocked_vcpu_list,
++                            &per_cpu(blocked_vcpu_on_cpu,
++                                     vcpu->pre_pcpu));
++              spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++      }
+       do {
+               old.control = new.control = pi_desc->control;
+-              /*
+-               * We should not block the vCPU if
+-               * an interrupt is posted for it.
+-               */
+-              if (pi_test_on(pi_desc) == 1) {
+-                      spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+-                                        vcpu->pre_pcpu), flags);
+-                      list_del(&vcpu->blocked_vcpu_list);
+-                      spin_unlock_irqrestore(
+-                                      &per_cpu(blocked_vcpu_on_cpu_lock,
+-                                      vcpu->pre_pcpu), flags);
+-                      vcpu->pre_pcpu = -1;
+-
+-                      return 1;
+-              }
+-
+               WARN((pi_desc->sn == 1),
+                    "Warning: SN field of posted-interrupts "
+                    "is set before blocking\n");
+@@ -11114,7 +11096,12 @@ static int pi_pre_block(struct kvm_vcpu
+       } while (cmpxchg(&pi_desc->control, old.control,
+                       new.control) != old.control);
+-      return 0;
++      /* We should not block the vCPU if an interrupt is posted for it.  */
++      if (pi_test_on(pi_desc) == 1)
++              __pi_post_block(vcpu);
++
++      local_irq_enable();
++      return (vcpu->pre_pcpu == -1);
+ }
+ static int vmx_pre_block(struct kvm_vcpu *vcpu)
+@@ -11130,12 +11117,13 @@ static int vmx_pre_block(struct kvm_vcpu
+ static void pi_post_block(struct kvm_vcpu *vcpu)
+ {
+-      if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-              !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-              !kvm_vcpu_apicv_active(vcpu))
++      if (vcpu->pre_pcpu == -1)
+               return;
++      WARN_ON(irqs_disabled());
++      local_irq_disable();
+       __pi_post_block(vcpu);
++      local_irq_enable();
+ }
+ static void vmx_post_block(struct kvm_vcpu *vcpu)
diff --git a/queue-4.9/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch b/queue-4.9/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
new file mode 100644 (file)
index 0000000..7c48399
--- /dev/null
@@ -0,0 +1,57 @@
+From 3a8b0677fc6180a467e26cc32ce6b0c09a32f9bb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= <jschoenh@amazon.de>
+Date: Thu, 7 Sep 2017 19:02:30 +0100
+Subject: KVM: VMX: Do not BUG() on out-of-bounds guest IRQ
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jan H. Schönherr <jschoenh@amazon.de>
+
+commit 3a8b0677fc6180a467e26cc32ce6b0c09a32f9bb upstream.
+
+The value of the guest_irq argument to vmx_update_pi_irte() is
+ultimately coming from a KVM_IRQFD API call. Do not BUG() in
+vmx_update_pi_irte() if the value is out-of bounds. (Especially,
+since KVM as a whole seems to hang after that.)
+
+Instead, print a message only once if we find that we don't have a
+route for a certain IRQ (which can be out-of-bounds or within the
+array).
+
+This fixes CVE-2017-1000252.
+
+Fixes: efc644048ecde54 ("KVM: x86: Update IRTE for posted-interrupts")
+Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -11153,7 +11153,7 @@ static int vmx_update_pi_irte(struct kvm
+       struct kvm_lapic_irq irq;
+       struct kvm_vcpu *vcpu;
+       struct vcpu_data vcpu_info;
+-      int idx, ret = -EINVAL;
++      int idx, ret = 0;
+       if (!kvm_arch_has_assigned_device(kvm) ||
+               !irq_remapping_cap(IRQ_POSTING_CAP) ||
+@@ -11162,7 +11162,12 @@ static int vmx_update_pi_irte(struct kvm
+       idx = srcu_read_lock(&kvm->irq_srcu);
+       irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+-      BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
++      if (guest_irq >= irq_rt->nr_rt_entries ||
++          hlist_empty(&irq_rt->map[guest_irq])) {
++              pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
++                           guest_irq, irq_rt->nr_rt_entries);
++              goto out;
++      }
+       hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+               if (e->type != KVM_IRQ_ROUTING_MSI)
diff --git a/queue-4.9/kvm-vmx-extract-__pi_post_block.patch b/queue-4.9/kvm-vmx-extract-__pi_post_block.patch
new file mode 100644 (file)
index 0000000..9213298
--- /dev/null
@@ -0,0 +1,118 @@
+From cd39e1176d320157831ce030b4c869bd2d5eb142 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 6 Jun 2017 12:57:04 +0200
+Subject: KVM: VMX: extract __pi_post_block
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit cd39e1176d320157831ce030b4c869bd2d5eb142 upstream.
+
+Simple code movement patch, preparing for the next one.
+
+Cc: Huangweidong <weidong.huang@huawei.com>
+Cc: Gonglei <arei.gonglei@huawei.com>
+Cc: wangxin <wangxinxin.wang@huawei.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   71 ++++++++++++++++++++++++++++-------------------------
+ 1 file changed, 38 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -11000,6 +11000,43 @@ static void vmx_enable_log_dirty_pt_mask
+       kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
+ }
++static void __pi_post_block(struct kvm_vcpu *vcpu)
++{
++      struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
++      struct pi_desc old, new;
++      unsigned int dest;
++      unsigned long flags;
++
++      do {
++              old.control = new.control = pi_desc->control;
++
++              dest = cpu_physical_id(vcpu->cpu);
++
++              if (x2apic_enabled())
++                      new.ndst = dest;
++              else
++                      new.ndst = (dest << 8) & 0xFF00;
++
++              /* Allow posting non-urgent interrupts */
++              new.sn = 0;
++
++              /* set 'NV' to 'notification vector' */
++              new.nv = POSTED_INTR_VECTOR;
++      } while (cmpxchg(&pi_desc->control, old.control,
++                      new.control) != old.control);
++
++      if(vcpu->pre_pcpu != -1) {
++              spin_lock_irqsave(
++                      &per_cpu(blocked_vcpu_on_cpu_lock,
++                      vcpu->pre_pcpu), flags);
++              list_del(&vcpu->blocked_vcpu_list);
++              spin_unlock_irqrestore(
++                      &per_cpu(blocked_vcpu_on_cpu_lock,
++                      vcpu->pre_pcpu), flags);
++              vcpu->pre_pcpu = -1;
++      }
++}
++
+ /*
+  * This routine does the following things for vCPU which is going
+  * to be blocked if VT-d PI is enabled.
+@@ -11093,44 +11130,12 @@ static int vmx_pre_block(struct kvm_vcpu
+ static void pi_post_block(struct kvm_vcpu *vcpu)
+ {
+-      struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+-      struct pi_desc old, new;
+-      unsigned int dest;
+-      unsigned long flags;
+-
+       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+               !kvm_vcpu_apicv_active(vcpu))
+               return;
+-      do {
+-              old.control = new.control = pi_desc->control;
+-
+-              dest = cpu_physical_id(vcpu->cpu);
+-
+-              if (x2apic_enabled())
+-                      new.ndst = dest;
+-              else
+-                      new.ndst = (dest << 8) & 0xFF00;
+-
+-              /* Allow posting non-urgent interrupts */
+-              new.sn = 0;
+-
+-              /* set 'NV' to 'notification vector' */
+-              new.nv = POSTED_INTR_VECTOR;
+-      } while (cmpxchg(&pi_desc->control, old.control,
+-                      new.control) != old.control);
+-
+-      if(vcpu->pre_pcpu != -1) {
+-              spin_lock_irqsave(
+-                      &per_cpu(blocked_vcpu_on_cpu_lock,
+-                      vcpu->pre_pcpu), flags);
+-              list_del(&vcpu->blocked_vcpu_list);
+-              spin_unlock_irqrestore(
+-                      &per_cpu(blocked_vcpu_on_cpu_lock,
+-                      vcpu->pre_pcpu), flags);
+-              vcpu->pre_pcpu = -1;
+-      }
++      __pi_post_block(vcpu);
+ }
+ static void vmx_post_block(struct kvm_vcpu *vcpu)
diff --git a/queue-4.9/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch b/queue-4.9/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
new file mode 100644 (file)
index 0000000..47625a7
--- /dev/null
@@ -0,0 +1,130 @@
+From 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 6 Jun 2017 12:57:06 +0200
+Subject: KVM: VMX: simplify and fix vmx_vcpu_pi_load
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a upstream.
+
+The simplify part: do not touch pi_desc.nv, we can set it when the
+VCPU is first created.  Likewise, pi_desc.sn is only handled by
+vmx_vcpu_pi_load, do not touch it in __pi_post_block.
+
+The fix part: do not check kvm_arch_has_assigned_device, instead
+check the SN bit to figure out whether vmx_vcpu_pi_put ran before.
+This matches what the previous patch did in pi_post_block.
+
+Cc: Huangweidong <weidong.huang@huawei.com>
+Cc: Gonglei <arei.gonglei@huawei.com>
+Cc: wangxin <wangxinxin.wang@huawei.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   68 +++++++++++++++++++++++++++--------------------------
+ 1 file changed, 35 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2167,43 +2167,41 @@ static void vmx_vcpu_pi_load(struct kvm_
+       struct pi_desc old, new;
+       unsigned int dest;
+-      if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-              !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-              !kvm_vcpu_apicv_active(vcpu))
++      /*
++       * In case of hot-plug or hot-unplug, we may have to undo
++       * vmx_vcpu_pi_put even if there is no assigned device.  And we
++       * always keep PI.NDST up to date for simplicity: it makes the
++       * code easier, and CPU migration is not a fast path.
++       */
++      if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
++              return;
++
++      /*
++       * First handle the simple case where no cmpxchg is necessary; just
++       * allow posting non-urgent interrupts.
++       *
++       * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
++       * PI.NDST: pi_post_block will do it for us and the wakeup_handler
++       * expects the VCPU to be on the blocked_vcpu_list that matches
++       * PI.NDST.
++       */
++      if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
++          vcpu->cpu == cpu) {
++              pi_clear_sn(pi_desc);
+               return;
++      }
++      /* The full case.  */
+       do {
+               old.control = new.control = pi_desc->control;
+-              /*
+-               * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
+-               * are two possible cases:
+-               * 1. After running 'pre_block', context switch
+-               *    happened. For this case, 'sn' was set in
+-               *    vmx_vcpu_put(), so we need to clear it here.
+-               * 2. After running 'pre_block', we were blocked,
+-               *    and woken up by some other guy. For this case,
+-               *    we don't need to do anything, 'pi_post_block'
+-               *    will do everything for us. However, we cannot
+-               *    check whether it is case #1 or case #2 here
+-               *    (maybe, not needed), so we also clear sn here,
+-               *    I think it is not a big deal.
+-               */
+-              if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
+-                      if (vcpu->cpu != cpu) {
+-                              dest = cpu_physical_id(cpu);
+-
+-                              if (x2apic_enabled())
+-                                      new.ndst = dest;
+-                              else
+-                                      new.ndst = (dest << 8) & 0xFF00;
+-                      }
++              dest = cpu_physical_id(cpu);
+-                      /* set 'NV' to 'notification vector' */
+-                      new.nv = POSTED_INTR_VECTOR;
+-              }
++              if (x2apic_enabled())
++                      new.ndst = dest;
++              else
++                      new.ndst = (dest << 8) & 0xFF00;
+-              /* Allow posting non-urgent interrupts */
+               new.sn = 0;
+       } while (cmpxchg(&pi_desc->control, old.control,
+                       new.control) != old.control);
+@@ -9187,6 +9185,13 @@ static struct kvm_vcpu *vmx_create_vcpu(
+       vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
++      /*
++       * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
++       * or POSTED_INTR_WAKEUP_VECTOR.
++       */
++      vmx->pi_desc.nv = POSTED_INTR_VECTOR;
++      vmx->pi_desc.sn = 1;
++
+       return &vmx->vcpu;
+ free_vmcs:
+@@ -11018,9 +11023,6 @@ static void __pi_post_block(struct kvm_v
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
+-              /* Allow posting non-urgent interrupts */
+-              new.sn = 0;
+-
+               /* set 'NV' to 'notification vector' */
+               new.nv = POSTED_INTR_VECTOR;
+       } while (cmpxchg(&pi_desc->control, old.control,
diff --git a/queue-4.9/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch b/queue-4.9/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
new file mode 100644 (file)
index 0000000..98b325b
--- /dev/null
@@ -0,0 +1,81 @@
+From b862789aa5186d5ea3a024b7cfe0f80c3a38b980 Mon Sep 17 00:00:00 2001
+From: Boqun Feng <boqun.feng@gmail.com>
+Date: Fri, 29 Sep 2017 19:01:45 +0800
+Subject: kvm/x86: Handle async PF in RCU read-side critical sections
+
+From: Boqun Feng <boqun.feng@gmail.com>
+
+commit b862789aa5186d5ea3a024b7cfe0f80c3a38b980 upstream.
+
+Sasha Levin reported a WARNING:
+
+| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
+| rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
+| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
+| rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
+...
+| CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246
+| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+| 1.10.1-1ubuntu1 04/01/2014
+| Call Trace:
+...
+| RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
+| RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
+| RSP: 0018:ffff88003b2debc8 EFLAGS: 00010002
+| RAX: 0000000000000001 RBX: 1ffff1000765bd85 RCX: 0000000000000000
+| RDX: 1ffff100075d7882 RSI: ffffffffb5c7da20 RDI: ffff88003aebc410
+| RBP: ffff88003b2def30 R08: dffffc0000000000 R09: 0000000000000001
+| R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003b2def08
+| R13: 0000000000000000 R14: ffff88003aebc040 R15: ffff88003aebc040
+| __schedule+0x201/0x2240 kernel/sched/core.c:3292
+| schedule+0x113/0x460 kernel/sched/core.c:3421
+| kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158
+| do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271
+| async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069
+| RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996
+| RSP: 0018:ffff88003b2df520 EFLAGS: 00010283
+| RAX: 000000000000003f RBX: ffffffffb5d1e141 RCX: ffff88003b2df670
+| RDX: 0000000000000001 RSI: dffffc0000000000 RDI: ffffffffb5d1e140
+| RBP: ffff88003b2df560 R08: dffffc0000000000 R09: 0000000000000000
+| R10: ffff88003b2df718 R11: 0000000000000000 R12: ffff88003b2df5d8
+| R13: 0000000000000064 R14: ffffffffb5d1e140 R15: 0000000000000000
+| vsnprintf+0x173/0x1700 lib/vsprintf.c:2136
+| sprintf+0xbe/0xf0 lib/vsprintf.c:2386
+| proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23
+| get_link fs/namei.c:1047 [inline]
+| link_path_walk+0x1041/0x1490 fs/namei.c:2127
+...
+
+This happened when the host hit a page fault, and delivered it as in an
+async page fault, while the guest was in an RCU read-side critical
+section.  The guest then tries to reschedule in kvm_async_pf_task_wait(),
+but rcu_preempt_note_context_switch() would treat the reschedule as a
+sleep in RCU read-side critical section, which is not allowed (even in
+preemptible RCU).  Thus the WARN.
+
+To cure this, make kvm_async_pf_task_wait() go to the halt path if the
+PF happens in a RCU read-side critical section.
+
+Reported-by: Sasha Levin <levinsasha928@gmail.com>
+Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/kvm.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -141,7 +141,8 @@ void kvm_async_pf_task_wait(u32 token)
+       n.token = token;
+       n.cpu = smp_processor_id();
+-      n.halted = is_idle_task(current) || preempt_count() > 1;
++      n.halted = is_idle_task(current) || preempt_count() > 1 ||
++                 rcu_preempt_depth();
+       init_swait_queue_head(&n.wq);
+       hlist_add_head(&n.link, &b->list);
+       raw_spin_unlock(&b->lock);
diff --git a/queue-4.9/nl80211-check-for-the-required-netlink-attributes-presence.patch b/queue-4.9/nl80211-check-for-the-required-netlink-attributes-presence.patch
new file mode 100644 (file)
index 0000000..b97057a
--- /dev/null
@@ -0,0 +1,41 @@
+From e785fa0a164aa11001cba931367c7f94ffaff888 Mon Sep 17 00:00:00 2001
+From: Vladis Dronov <vdronov@redhat.com>
+Date: Wed, 13 Sep 2017 00:21:21 +0200
+Subject: nl80211: check for the required netlink attributes presence
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+commit e785fa0a164aa11001cba931367c7f94ffaff888 upstream.
+
+nl80211_set_rekey_data() does not check if the required attributes
+NL80211_REKEY_DATA_{REPLAY_CTR,KEK,KCK} are present when processing
+NL80211_CMD_SET_REKEY_OFFLOAD request. This request can be issued by
+users with CAP_NET_ADMIN privilege and may result in NULL dereference
+and a system crash. Add a check for the required attributes presence.
+This patch is based on the patch by bo Zhang.
+
+This fixes CVE-2017-12153.
+
+References: https://bugzilla.redhat.com/show_bug.cgi?id=1491046
+Fixes: e5497d766ad ("cfg80211/nl80211: support GTK rekey offload")
+Reported-by: bo Zhang <zhangbo5891001@gmail.com>
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/wireless/nl80211.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -10385,6 +10385,9 @@ static int nl80211_set_rekey_data(struct
+       if (err)
+               return err;
++      if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
++          !tb[NL80211_REKEY_DATA_KCK])
++              return -EINVAL;
+       if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
+               return -ERANGE;
+       if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
diff --git a/queue-4.9/pci-fix-race-condition-with-driver_override.patch b/queue-4.9/pci-fix-race-condition-with-driver_override.patch
new file mode 100644 (file)
index 0000000..e47bc3c
--- /dev/null
@@ -0,0 +1,66 @@
+From 9561475db680f7144d2223a409dd3d7e322aca03 Mon Sep 17 00:00:00 2001
+From: Nicolai Stange <nstange@suse.de>
+Date: Mon, 11 Sep 2017 09:45:40 +0200
+Subject: PCI: Fix race condition with driver_override
+
+From: Nicolai Stange <nstange@suse.de>
+
+commit 9561475db680f7144d2223a409dd3d7e322aca03 upstream.
+
+The driver_override implementation is susceptible to a race condition when
+different threads are reading vs. storing a different driver override.  Add
+locking to avoid the race condition.
+
+This is in close analogy to commit 6265539776a0 ("driver core: platform:
+fix race condition with driver_override") from Adrian Salido.
+
+Fixes: 782a985d7af2 ("PCI: Introduce new device binding path using pci_dev.driver_override")
+Signed-off-by: Nicolai Stange <nstange@suse.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/pci-sysfs.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/pci/pci-sysfs.c
++++ b/drivers/pci/pci-sysfs.c
+@@ -527,7 +527,7 @@ static ssize_t driver_override_store(str
+                                    const char *buf, size_t count)
+ {
+       struct pci_dev *pdev = to_pci_dev(dev);
+-      char *driver_override, *old = pdev->driver_override, *cp;
++      char *driver_override, *old, *cp;
+       /* We need to keep extra room for a newline */
+       if (count >= (PAGE_SIZE - 1))
+@@ -541,12 +541,15 @@ static ssize_t driver_override_store(str
+       if (cp)
+               *cp = '\0';
++      device_lock(dev);
++      old = pdev->driver_override;
+       if (strlen(driver_override)) {
+               pdev->driver_override = driver_override;
+       } else {
+               kfree(driver_override);
+               pdev->driver_override = NULL;
+       }
++      device_unlock(dev);
+       kfree(old);
+@@ -557,8 +560,12 @@ static ssize_t driver_override_show(stru
+                                   struct device_attribute *attr, char *buf)
+ {
+       struct pci_dev *pdev = to_pci_dev(dev);
++      ssize_t len;
+-      return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
++      device_lock(dev);
++      len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
++      device_unlock(dev);
++      return len;
+ }
+ static DEVICE_ATTR_RW(driver_override);
diff --git a/queue-4.9/pm-core-fix-device_pm_check_callbacks.patch b/queue-4.9/pm-core-fix-device_pm_check_callbacks.patch
new file mode 100644 (file)
index 0000000..949e217
--- /dev/null
@@ -0,0 +1,43 @@
+From 157c460e10cb6eca29ccbd0f023db159d0c55ec7 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Tue, 19 Sep 2017 02:22:39 +0200
+Subject: PM: core: Fix device_pm_check_callbacks()
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 157c460e10cb6eca29ccbd0f023db159d0c55ec7 upstream.
+
+The device_pm_check_callbacks() function doesn't check legacy
+->suspend and ->resume callback pointers under the device's
+bus type, class and driver, so in some cases it may set the
+no_pm_callbacks flag for the device incorrectly and then the
+callbacks may be skipped during system suspend/resume, which
+shouldn't happen.
+
+Fixes: aa8e54b55947 (PM / sleep: Go direct_complete if driver has no callbacks)
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/power/main.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/base/power/main.c
++++ b/drivers/base/power/main.c
+@@ -1757,10 +1757,13 @@ void device_pm_check_callbacks(struct de
+ {
+       spin_lock_irq(&dev->power.lock);
+       dev->power.no_pm_callbacks =
+-              (!dev->bus || pm_ops_is_empty(dev->bus->pm)) &&
+-              (!dev->class || pm_ops_is_empty(dev->class->pm)) &&
++              (!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
++               !dev->bus->suspend && !dev->bus->resume)) &&
++              (!dev->class || (pm_ops_is_empty(dev->class->pm) &&
++               !dev->class->suspend && !dev->class->resume)) &&
+               (!dev->type || pm_ops_is_empty(dev->type->pm)) &&
+               (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
+-              (!dev->driver || pm_ops_is_empty(dev->driver->pm));
++              (!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
++               !dev->driver->suspend && !dev->driver->resume));
+       spin_unlock_irq(&dev->power.lock);
+ }
diff --git a/queue-4.9/powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch b/queue-4.9/powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch
new file mode 100644 (file)
index 0000000..e44a4dd
--- /dev/null
@@ -0,0 +1,62 @@
+rom a4979a7e71eb8da976cbe4a0a1fa50636e76b04f Mon Sep 17 00:00:00 2001
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+Date: Thu, 1 Jun 2017 16:18:16 +0530
+Subject: powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+commit a4979a7e71eb8da976cbe4a0a1fa50636e76b04f upstream.
+
+For DYNAMIC_FTRACE_WITH_REGS, we should be passing-in the original set
+of registers in pt_regs, to capture the state _before_ ftrace_caller.
+However, we are instead passing the stack pointer *after* allocating a
+stack frame in ftrace_caller. Fix this by saving the proper value of r1
+in pt_regs. Also, use SAVE_10GPRS() to simplify the code.
+
+Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI")
+Cc: stable@vger.kernel.org # v4.6+
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ arch/powerpc/kernel/entry_64.S |   20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/arch/powerpc/kernel/entry_64.S
++++ b/arch/powerpc/kernel/entry_64.S
+@@ -1235,10 +1235,14 @@ _GLOBAL(ftrace_caller)
+       stdu    r1,-SWITCH_FRAME_SIZE(r1)
+       /* Save all gprs to pt_regs */
+-      SAVE_8GPRS(0,r1)
+-      SAVE_8GPRS(8,r1)
+-      SAVE_8GPRS(16,r1)
+-      SAVE_8GPRS(24,r1)
++      SAVE_GPR(0, r1)
++      SAVE_10GPRS(2, r1)
++      SAVE_10GPRS(12, r1)
++      SAVE_10GPRS(22, r1)
++
++      /* Save previous stack pointer (r1) */
++      addi    r8, r1, SWITCH_FRAME_SIZE
++      std     r8, GPR1(r1)
+       /* Load special regs for save below */
+       mfmsr   r8
+@@ -1292,10 +1296,10 @@ ftrace_call:
+ #endif
+       /* Restore gprs */
+-      REST_8GPRS(0,r1)
+-      REST_8GPRS(8,r1)
+-      REST_8GPRS(16,r1)
+-      REST_8GPRS(24,r1)
++      REST_GPR(0,r1)
++      REST_10GPRS(2,r1)
++      REST_10GPRS(12,r1)
++      REST_10GPRS(22,r1)
+       /* Restore callee's TOC */
+       ld      r2, 24(r1)
diff --git a/queue-4.9/powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch b/queue-4.9/powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch
new file mode 100644 (file)
index 0000000..d6d0ed5
--- /dev/null
@@ -0,0 +1,39 @@
+From b537ca6fede69a281dc524983e5e633d79a10a08 Mon Sep 17 00:00:00 2001
+From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
+Date: Wed, 20 Sep 2017 17:02:52 -0400
+Subject: powerpc/pseries: Fix parent_dn reference leak in add_dt_node()
+
+From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
+
+commit b537ca6fede69a281dc524983e5e633d79a10a08 upstream.
+
+A reference to the parent device node is held by add_dt_node() for the
+node to be added. If the call to dlpar_configure_connector() fails
+add_dt_node() returns ENOENT and that reference is not freed.
+
+Add a call to of_node_put(parent_dn) prior to bailing out after a
+failed dlpar_configure_connector() call.
+
+Fixes: 8d5ff320766f ("powerpc/pseries: Make dlpar_configure_connector parent node aware")
+Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/pseries/mobility.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/platforms/pseries/mobility.c
++++ b/arch/powerpc/platforms/pseries/mobility.c
+@@ -225,8 +225,10 @@ static int add_dt_node(__be32 parent_pha
+               return -ENOENT;
+       dn = dlpar_configure_connector(drc_index, parent_dn);
+-      if (!dn)
++      if (!dn) {
++              of_node_put(parent_dn);
+               return -ENOENT;
++      }
+       rc = dlpar_attach_node(dn);
+       if (rc)
diff --git a/queue-4.9/powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch b/queue-4.9/powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch
new file mode 100644 (file)
index 0000000..b400cdd
--- /dev/null
@@ -0,0 +1,45 @@
+From c1fa0768a8713b135848f78fd43ffc208d8ded70 Mon Sep 17 00:00:00 2001
+From: Gustavo Romero <gromero@linux.vnet.ibm.com>
+Date: Wed, 13 Sep 2017 22:13:48 -0400
+Subject: powerpc/tm: Flush TM only if CPU has TM feature
+
+From: Gustavo Romero <gromero@linux.vnet.ibm.com>
+
+commit c1fa0768a8713b135848f78fd43ffc208d8ded70 upstream.
+
+Commit cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump")
+added code to access TM SPRs in flush_tmregs_to_thread(). However
+flush_tmregs_to_thread() does not check if TM feature is available on
+CPU before trying to access TM SPRs in order to copy live state to
+thread structures. flush_tmregs_to_thread() is indeed guarded by
+CONFIG_PPC_TRANSACTIONAL_MEM but it might be the case that kernel
+was compiled with CONFIG_PPC_TRANSACTIONAL_MEM enabled and ran on
+a CPU without TM feature available, thus rendering the execution
+of TM instructions that are treated by the CPU as illegal instructions.
+
+The fix is just to add proper checking in flush_tmregs_to_thread()
+if CPU has the TM feature before accessing any TM-specific resource,
+returning immediately if TM is no available on the CPU. Adding
+that checking in flush_tmregs_to_thread() instead of in places
+where it is called, like in vsr_get() and vsr_set(), is better because
+avoids the same problem cropping up elsewhere.
+
+Fixes: cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump")
+Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
+Reviewed-by: Cyril Bur <cyrilbur@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
+index 07cd22e35405..f52ad5bb7109 100644
+--- a/arch/powerpc/kernel/ptrace.c
++++ b/arch/powerpc/kernel/ptrace.c
+@@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
+        * in the appropriate thread structures from live.
+        */
+-      if (tsk != current)
++      if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
+               return;
+       if (MSR_TM_SUSPENDED(mfmsr())) {
diff --git a/queue-4.9/s390-mm-fix-write-access-check-in-gup_huge_pmd.patch b/queue-4.9/s390-mm-fix-write-access-check-in-gup_huge_pmd.patch
new file mode 100644 (file)
index 0000000..5b441a6
--- /dev/null
@@ -0,0 +1,45 @@
+From ba385c0594e723d41790ecfb12c610e6f90c7785 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Date: Mon, 18 Sep 2017 16:51:51 +0200
+Subject: s390/mm: fix write access check in gup_huge_pmd()
+
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+
+commit ba385c0594e723d41790ecfb12c610e6f90c7785 upstream.
+
+The check for the _SEGMENT_ENTRY_PROTECT bit in gup_huge_pmd() is the
+wrong way around. It must not be set for write==1, and not be checked for
+write==0. Fix this similar to how it was fixed for ptes long time ago in
+commit 25591b070336 ("[S390] fix get_user_pages_fast").
+
+One impact of this bug would be unnecessarily using the gup slow path for
+write==0 on r/w mappings. A potentially more severe impact would be that
+gup_huge_pmd() will succeed for write==1 on r/o mappings.
+
+Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/mm/gup.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/s390/mm/gup.c
++++ b/arch/s390/mm/gup.c
+@@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *p
+ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+               unsigned long end, int write, struct page **pages, int *nr)
+ {
+-      unsigned long mask, result;
+       struct page *head, *page;
++      unsigned long mask;
+       int refs;
+-      result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+-      mask = result | _SEGMENT_ENTRY_INVALID;
+-      if ((pmd_val(pmd) & mask) != result)
++      mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
++      if ((pmd_val(pmd) & mask) != 0)
+               return 0;
+       VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
diff --git a/queue-4.9/seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch b/queue-4.9/seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch
new file mode 100644 (file)
index 0000000..1184bfc
--- /dev/null
@@ -0,0 +1,91 @@
+From 66a733ea6b611aecf0119514d2dddab5f9d6c01e Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Wed, 27 Sep 2017 09:25:30 -0600
+Subject: seccomp: fix the usage of get/put_seccomp_filter() in seccomp_get_filter()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 66a733ea6b611aecf0119514d2dddab5f9d6c01e upstream.
+
+As Chris explains, get_seccomp_filter() and put_seccomp_filter() can end
+up using different filters. Once we drop ->siglock it is possible for
+task->seccomp.filter to have been replaced by SECCOMP_FILTER_FLAG_TSYNC.
+
+Fixes: f8e529ed941b ("seccomp, ptrace: add support for dumping seccomp filters")
+Reported-by: Chris Salls <chrissalls5@gmail.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+[tycho: add __get_seccomp_filter vs. open coding refcount_inc()]
+Signed-off-by: Tycho Andersen <tycho@docker.com>
+[kees: tweak commit log]
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/seccomp.c |   23 ++++++++++++++++-------
+ 1 file changed, 16 insertions(+), 7 deletions(-)
+
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -457,14 +457,19 @@ static long seccomp_attach_filter(unsign
+       return 0;
+ }
++void __get_seccomp_filter(struct seccomp_filter *filter)
++{
++      /* Reference count is bounded by the number of total processes. */
++      atomic_inc(&filter->usage);
++}
++
+ /* get_seccomp_filter - increments the reference count of the filter on @tsk */
+ void get_seccomp_filter(struct task_struct *tsk)
+ {
+       struct seccomp_filter *orig = tsk->seccomp.filter;
+       if (!orig)
+               return;
+-      /* Reference count is bounded by the number of total processes. */
+-      atomic_inc(&orig->usage);
++      __get_seccomp_filter(orig);
+ }
+ static inline void seccomp_filter_free(struct seccomp_filter *filter)
+@@ -475,10 +480,8 @@ static inline void seccomp_filter_free(s
+       }
+ }
+-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
+-void put_seccomp_filter(struct task_struct *tsk)
++static void __put_seccomp_filter(struct seccomp_filter *orig)
+ {
+-      struct seccomp_filter *orig = tsk->seccomp.filter;
+       /* Clean up single-reference branches iteratively. */
+       while (orig && atomic_dec_and_test(&orig->usage)) {
+               struct seccomp_filter *freeme = orig;
+@@ -487,6 +490,12 @@ void put_seccomp_filter(struct task_stru
+       }
+ }
++/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
++void put_seccomp_filter(struct task_struct *tsk)
++{
++      __put_seccomp_filter(tsk->seccomp.filter);
++}
++
+ /**
+  * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
+  * @syscall: syscall number to send to userland
+@@ -892,13 +901,13 @@ long seccomp_get_filter(struct task_stru
+       if (!data)
+               goto out;
+-      get_seccomp_filter(task);
++      __get_seccomp_filter(filter);
+       spin_unlock_irq(&task->sighand->siglock);
+       if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
+               ret = -EFAULT;
+-      put_seccomp_filter(task);
++      __put_seccomp_filter(filter);
+       return ret;
+ out:
diff --git a/queue-4.9/selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch b/queue-4.9/selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch
new file mode 100644 (file)
index 0000000..afc25d6
--- /dev/null
@@ -0,0 +1,61 @@
+From 10859f3855db4c6f10dc7974ff4b3a292f3de8e0 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Thu, 7 Sep 2017 16:32:46 -0700
+Subject: selftests/seccomp: Support glibc 2.26 siginfo_t.h
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 10859f3855db4c6f10dc7974ff4b3a292f3de8e0 upstream.
+
+The 2.26 release of glibc changed how siginfo_t is defined, and the earlier
+work-around to using the kernel definition are no longer needed. The old
+way needs to stay around for a while, though.
+
+Reported-by: Seth Forshee <seth.forshee@canonical.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Will Drewry <wad@chromium.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: linux-kselftest@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Tested-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/seccomp/seccomp_bpf.c |   18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
+@@ -6,10 +6,18 @@
+  */
+ #include <sys/types.h>
+-#include <asm/siginfo.h>
+-#define __have_siginfo_t 1
+-#define __have_sigval_t 1
+-#define __have_sigevent_t 1
++
++/*
++ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
++ * we need to use the kernel's siginfo.h file and trick glibc
++ * into accepting it.
++ */
++#if !__GLIBC_PREREQ(2, 26)
++# include <asm/siginfo.h>
++# define __have_siginfo_t 1
++# define __have_sigval_t 1
++# define __have_sigevent_t 1
++#endif
+ #include <errno.h>
+ #include <linux/filter.h>
+@@ -676,7 +684,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS)
+       syscall(__NR_getpid);
+ }
+-static struct siginfo TRAP_info;
++static siginfo_t TRAP_info;
+ static volatile int TRAP_nr;
+ static void TRAP_action(int nr, siginfo_t *info, void *void_context)
+ {
index 4905a38cac1e6a78bd75fba45407732e04a1d6dd..27b1a6488b80c85f807ef13e12770b718e0dc5f3 100644 (file)
@@ -22,3 +22,34 @@ security-keys-rewrite-all-of-big_key-crypto.patch
 keys-fix-writing-past-end-of-user-supplied-buffer-in-keyring_read.patch
 keys-prevent-creating-a-different-user-s-keyrings.patch
 keys-prevent-keyctl_read-on-negative-key.patch
+powerpc-pseries-fix-parent_dn-reference-leak-in-add_dt_node.patch
+powerpc-tm-flush-tm-only-if-cpu-has-tm-feature.patch
+powerpc-ftrace-pass-the-correct-stack-pointer-for-dynamic_ftrace_with_regs.patch
+s390-mm-fix-write-access-check-in-gup_huge_pmd.patch
+pm-core-fix-device_pm_check_callbacks.patch
+fix-smb3.1.1-guest-authentication-to-samba.patch
+smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch
+smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch
+smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch
+vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch
+nl80211-check-for-the-required-netlink-attributes-presence.patch
+bsg-lib-don-t-free-job-in-bsg_prepare_job.patch
+iw_cxgb4-remove-the-stid-on-listen-create-failure.patch
+iw_cxgb4-put-ep-reference-in-pass_accept_req.patch
+selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch
+seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch
+arm64-make-sure-spsel-is-always-set.patch
+arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
+kvm-vmx-extract-__pi_post_block.patch
+kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
+kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
+kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
+kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
+kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
+xfs-validate-bdev-support-for-dax-inode-flag.patch
+etnaviv-fix-gem-object-list-corruption.patch
+pci-fix-race-condition-with-driver_override.patch
+btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
+btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
+btrfs-prevent-to-set-invalid-default-subvolid.patch
+x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
diff --git a/queue-4.9/smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch b/queue-4.9/smb-validate-negotiate-to-protect-against-downgrade-even-if-signing-off.patch
new file mode 100644 (file)
index 0000000..bba7459
--- /dev/null
@@ -0,0 +1,56 @@
+From 0603c96f3af50e2f9299fa410c224ab1d465e0f9 Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Wed, 20 Sep 2017 19:57:18 -0500
+Subject: SMB: Validate negotiate (to protect against downgrade) even if signing off
+
+From: Steve French <smfrench@gmail.com>
+
+commit 0603c96f3af50e2f9299fa410c224ab1d465e0f9 upstream.
+
+As long as signing is supported (ie not a guest user connection) and
+connection is SMB3 or SMB3.02, then validate negotiate (protect
+against man in the middle downgrade attacks).  We had been doing this
+only when signing was required, not when signing was just enabled,
+but this more closely matches recommended SMB3 behavior and is
+better security.  Suggested by Metze.
+
+Signed-off-by: Steve French <smfrench@gmail.com>
+Reviewed-by: Jeremy Allison <jra@samba.org>
+Acked-by: Stefan Metzmacher <metze@samba.org>
+Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2pdu.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -531,15 +531,22 @@ int smb3_validate_negotiate(const unsign
+       /*
+        * validation ioctl must be signed, so no point sending this if we
+-       * can not sign it.  We could eventually change this to selectively
++       * can not sign it (ie are not known user).  Even if signing is not
++       * required (enabled but not negotiated), in those cases we selectively
+        * sign just this, the first and only signed request on a connection.
+-       * This is good enough for now since a user who wants better security
+-       * would also enable signing on the mount. Having validation of
+-       * negotiate info for signed connections helps reduce attack vectors
++       * Having validation of negotiate info  helps reduce attack vectors.
+        */
+-      if (tcon->ses->server->sign == false)
++      if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
+               return 0; /* validation requires signing */
++      if (tcon->ses->user_name == NULL) {
++              cifs_dbg(FYI, "Can't validate negotiate: null user mount\n");
++              return 0; /* validation requires signing */
++      }
++
++      if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
++              cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
++
+       vneg_inbuf.Capabilities =
+                       cpu_to_le32(tcon->ses->server->vals->req_capabilities);
+       memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
diff --git a/queue-4.9/smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch b/queue-4.9/smb3-don-t-ignore-o_sync-o_dsync-and-o_direct-flags.patch
new file mode 100644 (file)
index 0000000..d63c58f
--- /dev/null
@@ -0,0 +1,34 @@
+From 1013e760d10e614dc10b5624ce9fc41563ba2e65 Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Fri, 22 Sep 2017 01:40:27 -0500
+Subject: SMB3: Don't ignore O_SYNC/O_DSYNC and O_DIRECT flags
+
+From: Steve French <smfrench@gmail.com>
+
+commit 1013e760d10e614dc10b5624ce9fc41563ba2e65 upstream.
+
+Signed-off-by: Steve French <smfrench@gmail.com>
+Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/file.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct ino
+       if (backup_cred(cifs_sb))
+               create_options |= CREATE_OPEN_BACKUP_INTENT;
++      /* O_SYNC also has bit for O_DSYNC so following check picks up either */
++      if (f_flags & O_SYNC)
++              create_options |= CREATE_WRITE_THROUGH;
++
++      if (f_flags & O_DIRECT)
++              create_options |= CREATE_NO_BUFFER;
++
+       oparms.tcon = tcon;
+       oparms.cifs_sb = cifs_sb;
+       oparms.desired_access = desired_access;
diff --git a/queue-4.9/smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch b/queue-4.9/smb3-warn-user-if-trying-to-sign-connection-that-authenticated-as-guest.patch
new file mode 100644 (file)
index 0000000..0a0fe73
--- /dev/null
@@ -0,0 +1,32 @@
+From c721c38957fb19982416f6be71aae7b30630d83b Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Tue, 19 Sep 2017 18:40:03 -0500
+Subject: SMB3: Warn user if trying to sign connection that authenticated as guest
+
+From: Steve French <smfrench@gmail.com>
+
+commit c721c38957fb19982416f6be71aae7b30630d83b upstream.
+
+It can be confusing if user ends up authenticated as guest but they
+requested signing (server will return error validating signed packets)
+so add log message for this.
+
+Signed-off-by: Steve French <smfrench@gmail.com>
+Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2pdu.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -1010,6 +1010,8 @@ SMB2_sess_setup(const unsigned int xid,
+       while (sess_data->func)
+               sess_data->func(sess_data);
++      if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign))
++              cifs_dbg(VFS, "signing requested but authenticated as guest\n");
+       rc = sess_data->result;
+ out:
+       kfree(sess_data);
diff --git a/queue-4.9/vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch b/queue-4.9/vfs-return-enxio-for-negative-seek_hole-seek_data-offsets.patch
new file mode 100644 (file)
index 0000000..b4efe72
--- /dev/null
@@ -0,0 +1,44 @@
+From fc46820b27a2d9a46f7e90c9ceb4a64a1bc5fab8 Mon Sep 17 00:00:00 2001
+From: Andreas Gruenbacher <agruenba@redhat.com>
+Date: Mon, 25 Sep 2017 12:23:03 +0200
+Subject: vfs: Return -ENXIO for negative SEEK_HOLE / SEEK_DATA offsets
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+commit fc46820b27a2d9a46f7e90c9ceb4a64a1bc5fab8 upstream.
+
+In generic_file_llseek_size, return -ENXIO for negative offsets as well
+as offsets beyond EOF.  This affects filesystems which don't implement
+SEEK_HOLE / SEEK_DATA internally, possibly because they don't support
+holes.
+
+Fixes xfstest generic/448.
+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/read_write.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/read_write.c
++++ b/fs/read_write.c
+@@ -114,7 +114,7 @@ generic_file_llseek_size(struct file *fi
+                * In the generic case the entire file is data, so as long as
+                * offset isn't at the end of the file then the offset is data.
+                */
+-              if (offset >= eof)
++              if ((unsigned long long)offset >= eof)
+                       return -ENXIO;
+               break;
+       case SEEK_HOLE:
+@@ -122,7 +122,7 @@ generic_file_llseek_size(struct file *fi
+                * There is a virtual hole at the end of the file, so as long as
+                * offset isn't i_size or larger, return i_size.
+                */
+-              if (offset >= eof)
++              if ((unsigned long long)offset >= eof)
+                       return -ENXIO;
+               offset = eof;
+               break;
diff --git a/queue-4.9/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch b/queue-4.9/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
new file mode 100644 (file)
index 0000000..de37d41
--- /dev/null
@@ -0,0 +1,211 @@
+From a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+Date: Mon, 4 Sep 2017 10:32:15 +0200
+Subject: x86/mm: Fix fault error path using unsafe vma pointer
+
+From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+
+commit a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 upstream.
+
+commit 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal
+generation code") passes down a vma pointer to the error path, but that is
+done once the mmap_sem is released when calling mm_fault_error() from
+__do_page_fault().
+
+This is dangerous as the vma structure is no more safe to be used once the
+mmap_sem has been released. As only the protection key value is required in
+the error processing, we could just pass down this value.
+
+Fix it by passing a pointer to a protection key value down to the fault
+signal generation code. The use of a pointer allows to keep the check
+generating a warning message in fill_sig_info_pkey() when the vma was not
+known. If the pointer is valid, the protection value can be accessed by
+deferencing the pointer.
+
+[ tglx: Made *pkey u32 as that's the type which is passed in siginfo ]
+
+Fixes: 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal generation code")
+Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Link: http://lkml.kernel.org/r/1504513935-12742-1-git-send-email-ldufour@linux.vnet.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c |   47 ++++++++++++++++++++++++-----------------------
+ 1 file changed, 24 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -191,8 +191,7 @@ is_prefetch(struct pt_regs *regs, unsign
+  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+  *         faulted on a pte with its pkey=4.
+  */
+-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
+-              struct vm_area_struct *vma)
++static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+ {
+       /* This is effectively an #ifdef */
+       if (!boot_cpu_has(X86_FEATURE_OSPKE))
+@@ -208,7 +207,7 @@ static void fill_sig_info_pkey(int si_co
+        * valid VMA, so we should never reach this without a
+        * valid VMA.
+        */
+-      if (!vma) {
++      if (!pkey) {
+               WARN_ONCE(1, "PKU fault with no VMA passed in");
+               info->si_pkey = 0;
+               return;
+@@ -218,13 +217,12 @@ static void fill_sig_info_pkey(int si_co
+        * absolutely guranteed to be 100% accurate because of
+        * the race explained above.
+        */
+-      info->si_pkey = vma_pkey(vma);
++      info->si_pkey = *pkey;
+ }
+ static void
+ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
+-                   struct task_struct *tsk, struct vm_area_struct *vma,
+-                   int fault)
++                   struct task_struct *tsk, u32 *pkey, int fault)
+ {
+       unsigned lsb = 0;
+       siginfo_t info;
+@@ -239,7 +237,7 @@ force_sig_info_fault(int si_signo, int s
+               lsb = PAGE_SHIFT;
+       info.si_addr_lsb = lsb;
+-      fill_sig_info_pkey(si_code, &info, vma);
++      fill_sig_info_pkey(si_code, &info, pkey);
+       force_sig_info(si_signo, &info, tsk);
+ }
+@@ -718,8 +716,6 @@ no_context(struct pt_regs *regs, unsigne
+       struct task_struct *tsk = current;
+       unsigned long flags;
+       int sig;
+-      /* No context means no VMA to pass down */
+-      struct vm_area_struct *vma = NULL;
+       /* Are we prepared to handle this kernel fault? */
+       if (fixup_exception(regs, X86_TRAP_PF)) {
+@@ -744,7 +740,7 @@ no_context(struct pt_regs *regs, unsigne
+                       /* XXX: hwpoison faults will set the wrong code. */
+                       force_sig_info_fault(signal, si_code, address,
+-                                           tsk, vma, 0);
++                                           tsk, NULL, 0);
+               }
+               /*
+@@ -853,8 +849,7 @@ show_signal_msg(struct pt_regs *regs, un
+ static void
+ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+-                     unsigned long address, struct vm_area_struct *vma,
+-                     int si_code)
++                     unsigned long address, u32 *pkey, int si_code)
+ {
+       struct task_struct *tsk = current;
+@@ -902,7 +897,7 @@ __bad_area_nosemaphore(struct pt_regs *r
+               tsk->thread.error_code  = error_code;
+               tsk->thread.trap_nr     = X86_TRAP_PF;
+-              force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
++              force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
+               return;
+       }
+@@ -915,9 +910,9 @@ __bad_area_nosemaphore(struct pt_regs *r
+ static noinline void
+ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+-                   unsigned long address, struct vm_area_struct *vma)
++                   unsigned long address, u32 *pkey)
+ {
+-      __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
++      __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
+ }
+ static void
+@@ -925,6 +920,10 @@ __bad_area(struct pt_regs *regs, unsigne
+          unsigned long address,  struct vm_area_struct *vma, int si_code)
+ {
+       struct mm_struct *mm = current->mm;
++      u32 pkey;
++
++      if (vma)
++              pkey = vma_pkey(vma);
+       /*
+        * Something tried to access memory that isn't in our memory map..
+@@ -932,7 +931,8 @@ __bad_area(struct pt_regs *regs, unsigne
+        */
+       up_read(&mm->mmap_sem);
+-      __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
++      __bad_area_nosemaphore(regs, error_code, address,
++                             (vma) ? &pkey : NULL, si_code);
+ }
+ static noinline void
+@@ -975,7 +975,7 @@ bad_area_access_error(struct pt_regs *re
+ static void
+ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+-        struct vm_area_struct *vma, unsigned int fault)
++        u32 *pkey, unsigned int fault)
+ {
+       struct task_struct *tsk = current;
+       int code = BUS_ADRERR;
+@@ -1002,13 +1002,12 @@ do_sigbus(struct pt_regs *regs, unsigned
+               code = BUS_MCEERR_AR;
+       }
+ #endif
+-      force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
++      force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
+ }
+ static noinline void
+ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+-             unsigned long address, struct vm_area_struct *vma,
+-             unsigned int fault)
++             unsigned long address, u32 *pkey, unsigned int fault)
+ {
+       if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
+               no_context(regs, error_code, address, 0, 0);
+@@ -1032,9 +1031,9 @@ mm_fault_error(struct pt_regs *regs, uns
+       } else {
+               if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+                            VM_FAULT_HWPOISON_LARGE))
+-                      do_sigbus(regs, error_code, address, vma, fault);
++                      do_sigbus(regs, error_code, address, pkey, fault);
+               else if (fault & VM_FAULT_SIGSEGV)
+-                      bad_area_nosemaphore(regs, error_code, address, vma);
++                      bad_area_nosemaphore(regs, error_code, address, pkey);
+               else
+                       BUG();
+       }
+@@ -1220,6 +1219,7 @@ __do_page_fault(struct pt_regs *regs, un
+       struct mm_struct *mm;
+       int fault, major = 0;
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
++      u32 pkey;
+       tsk = current;
+       mm = tsk->mm;
+@@ -1420,9 +1420,10 @@ good_area:
+               return;
+       }
++      pkey = vma_pkey(vma);
+       up_read(&mm->mmap_sem);
+       if (unlikely(fault & VM_FAULT_ERROR)) {
+-              mm_fault_error(regs, error_code, address, vma, fault);
++              mm_fault_error(regs, error_code, address, &pkey, fault);
+               return;
+       }
diff --git a/queue-4.9/xfs-validate-bdev-support-for-dax-inode-flag.patch b/queue-4.9/xfs-validate-bdev-support-for-dax-inode-flag.patch
new file mode 100644 (file)
index 0000000..28a59e6
--- /dev/null
@@ -0,0 +1,50 @@
+From 6851a3db7e224bbb85e23b3c64a506c9e0904382 Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Mon, 18 Sep 2017 14:46:03 -0700
+Subject: xfs: validate bdev support for DAX inode flag
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit 6851a3db7e224bbb85e23b3c64a506c9e0904382 upstream.
+
+Currently only the blocksize is checked, but we should really be calling
+bdev_dax_supported() which also tests to make sure we can get a
+struct dax_device and that the dax_direct_access() path is working.
+
+This is the same check that we do for the "-o dax" mount option in
+xfs_fs_fill_super().
+
+This does not fix the race issues that caused the XFS DAX inode option to
+be disabled, so that option will still be disabled.  If/when we re-enable
+it, though, I think we will want this issue to have been fixed.  I also do
+think that we want to fix this in stable kernels.
+
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_ioctl.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -1085,6 +1085,7 @@ xfs_ioctl_setattr_dax_invalidate(
+       int                     *join_flags)
+ {
+       struct inode            *inode = VFS_I(ip);
++      struct super_block      *sb = inode->i_sb;
+       int                     error;
+       *join_flags = 0;
+@@ -1097,7 +1098,7 @@ xfs_ioctl_setattr_dax_invalidate(
+       if (fa->fsx_xflags & FS_XFLAG_DAX) {
+               if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+                       return -EINVAL;
+-              if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
++              if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
+                       return -EINVAL;
+       }