From f9578d5644104a1e18475972a5f556773a7915b9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 2 Oct 2017 14:30:00 +0200
Subject: [PATCH] 4.13-stable patches

added patches:
	arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
	arm64-make-sure-spsel-is-always-set.patch
	arm64-mm-use-read_once-when-dereferencing-pointer-to-pte-table.patch
	btrfs-clear-ordered-flag-on-cleaning-up-ordered-extents.patch
	btrfs-finish-ordered-extent-cleaning-if-no-progress-is-found.patch
	btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
	btrfs-prevent-to-set-invalid-default-subvolid.patch
	btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
	etnaviv-fix-gem-object-list-corruption.patch
	etnaviv-fix-submit-error-path.patch
	fix-infoleak-in-waitid-2.patch
	futex-fix-pi_state-owner-serialization.patch
	irq-generic-chip-don-t-replace-domain-s-name.patch
	kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
	kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
	kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
	kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
	kvm-vmx-extract-__pi_post_block.patch
	kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
	kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
	md-fix-a-race-condition-for-flush-request-handling.patch
	md-separate-request-handling.patch
	mtd-fix-partition-alignment-check-on-multi-erasesize-devices.patch
	mtd-nand-atmel-fix-buffer-overflow-in-atmel_pmecc_user.patch
	pci-fix-race-condition-with-driver_override.patch
	platform-x86-fujitsu-laptop-don-t-oops-when-fuj02e3-is-not-presnt.patch
	pm-opp-call-notifier-without-holding-opp_table-lock.patch
	sched-sysctl-check-user-input-value-of-sysctl_sched_time_avg.patch
	x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
	xfs-validate-bdev-support-for-dax-inode-flag.patch
---
 ...tion-faults-via-do_translation_fault.patch |  65 ++++++
 .../arm64-make-sure-spsel-is-always-set.patch |  40 ++++
 ...n-dereferencing-pointer-to-pte-table.patch |  77 +++++++
 ...-flag-on-cleaning-up-ordered-extents.patch |  58 +++++
 ...ent-cleaning-if-no-progress-is-found.patch |  64 ++++++
 ...er-dereference-from-free_reloc_roots.patch |  39 ++++
 ...vent-to-set-invalid-default-subvolid.patch |  37 +++
 ...ror-to-btrfs_cmp_data_prepare-caller.patch |  38 ++++
 ...naviv-fix-gem-object-list-corruption.patch |  38 ++++
 .../etnaviv-fix-submit-error-path.patch       |  34 +++
 queue-4.13/fix-infoleak-in-waitid-2.patch     |  65 ++++++
 ...tex-fix-pi_state-owner-serialization.patch | 124 ++++++++++
 ...ric-chip-don-t-replace-domain-s-name.patch |  39 ++++
 ...-allow-l2-to-access-the-hardware-cr8.patch |  39 ++++
 ...kvm-nvmx-fix-host_cr3-host_cr4-cache.patch |  83 +++++++
 ...list-add-with-vt-d-posted-interrupts.patch | 157 +++++++++++++
 ...o-not-bug-on-out-of-bounds-guest-irq.patch |  57 +++++
 .../kvm-vmx-extract-__pi_post_block.patch     | 118 ++++++++++
 ...mx-simplify-and-fix-vmx_vcpu_pi_load.patch | 130 +++++++++++
 ...f-in-rcu-read-side-critical-sections.patch |  81 +++++++
 ...condition-for-flush-request-handling.patch |  52 +++++
 queue-4.13/md-separate-request-handling.patch | 122 ++++++++++
 ...ent-check-on-multi-erasesize-devices.patch |  47 ++++
 ...-buffer-overflow-in-atmel_pmecc_user.patch |  37 +++
 ...-race-condition-with-driver_override.patch |  66 ++++++
 ...on-t-oops-when-fuj02e3-is-not-presnt.patch |  48 ++++
 ...ifier-without-holding-opp_table-lock.patch |  58 +++++
 ...input-value-of-sysctl_sched_time_avg.patch |  83 +++++++
 queue-4.13/series                             |  30 +++
 ...-error-path-using-unsafe-vma-pointer.patch | 211 ++++++++++++++++++
 ...date-bdev-support-for-dax-inode-flag.patch |  50 +++++
 31 files changed, 2187 insertions(+)
 create mode 100644 queue-4.13/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
 create mode 100644 queue-4.13/arm64-make-sure-spsel-is-always-set.patch
 create mode 100644 queue-4.13/arm64-mm-use-read_once-when-dereferencing-pointer-to-pte-table.patch
 create mode 100644 queue-4.13/btrfs-clear-ordered-flag-on-cleaning-up-ordered-extents.patch
 create mode 100644 queue-4.13/btrfs-finish-ordered-extent-cleaning-if-no-progress-is-found.patch
 create mode 100644 queue-4.13/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
 create mode 100644 queue-4.13/btrfs-prevent-to-set-invalid-default-subvolid.patch
 create mode 100644 queue-4.13/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
 create mode 100644 queue-4.13/etnaviv-fix-gem-object-list-corruption.patch
 create mode 100644 queue-4.13/etnaviv-fix-submit-error-path.patch
 create mode 100644 queue-4.13/fix-infoleak-in-waitid-2.patch
 create mode 100644 queue-4.13/futex-fix-pi_state-owner-serialization.patch
 create mode 100644 queue-4.13/irq-generic-chip-don-t-replace-domain-s-name.patch
 create mode 100644 queue-4.13/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
 create mode 100644 queue-4.13/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
 create mode 100644 queue-4.13/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
 create mode 100644 queue-4.13/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
 create mode 100644 queue-4.13/kvm-vmx-extract-__pi_post_block.patch
 create mode 100644 queue-4.13/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
 create mode 100644 queue-4.13/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
 create mode 100644 queue-4.13/md-fix-a-race-condition-for-flush-request-handling.patch
 create mode 100644 queue-4.13/md-separate-request-handling.patch
 create mode 100644 queue-4.13/mtd-fix-partition-alignment-check-on-multi-erasesize-devices.patch
 create mode 100644 queue-4.13/mtd-nand-atmel-fix-buffer-overflow-in-atmel_pmecc_user.patch
 create mode 100644 queue-4.13/pci-fix-race-condition-with-driver_override.patch
 create mode 100644 queue-4.13/platform-x86-fujitsu-laptop-don-t-oops-when-fuj02e3-is-not-presnt.patch
 create mode 100644 queue-4.13/pm-opp-call-notifier-without-holding-opp_table-lock.patch
 create mode 100644 queue-4.13/sched-sysctl-check-user-input-value-of-sysctl_sched_time_avg.patch
 create mode 100644 queue-4.13/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
 create mode 100644 queue-4.13/xfs-validate-bdev-support-for-dax-inode-flag.patch

diff --git a/queue-4.13/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch b/queue-4.13/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
new file mode 100644
index 00000000000..bede0c894f9
--- /dev/null
+++ b/queue-4.13/arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
@@ -0,0 +1,65 @@
+From 760bfb47c36a07741a089bf6a28e854ffbee7dc9 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 29 Sep 2017 12:27:41 +0100
+Subject: arm64: fault: Route pte translation faults via do_translation_fault
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 760bfb47c36a07741a089bf6a28e854ffbee7dc9 upstream.
+
+We currently route pte translation faults via do_page_fault, which elides
+the address check against TASK_SIZE before invoking the mm fault handling
+code. However, this can cause issues with the path walking code in
+conjunction with our word-at-a-time implementation because
+load_unaligned_zeropad can end up faulting in kernel space if it reads
+across a page boundary and runs into a page fault (e.g. by attempting to
+read from a guard region).
+
+In the case of such a fault, load_unaligned_zeropad has registered a
+fixup to shift the valid data and pad with zeroes, however the abort is
+reported as a level 3 translation fault and we dispatch it straight to
+do_page_fault, despite it being a kernel address. This results in calling
+a sleeping function from atomic context:
+
+  BUG: sleeping function called from invalid context at arch/arm64/mm/fault.c:313
+  in_atomic(): 0, irqs_disabled(): 0, pid: 10290
+  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
+  [...]
+  [<ffffff8e016cd0cc>] ___might_sleep+0x134/0x144
+  [<ffffff8e016cd158>] __might_sleep+0x7c/0x8c
+  [<ffffff8e016977f0>] do_page_fault+0x140/0x330
+  [<ffffff8e01681328>] do_mem_abort+0x54/0xb0
+  Exception stack(0xfffffffb20247a70 to 0xfffffffb20247ba0)
+  [...]
+  [<ffffff8e016844fc>] el1_da+0x18/0x78
+  [<ffffff8e017f399c>] path_parentat+0x44/0x88
+  [<ffffff8e017f4c9c>] filename_parentat+0x5c/0xd8
+  [<ffffff8e017f5044>] filename_create+0x4c/0x128
+  [<ffffff8e017f59e4>] SyS_mkdirat+0x50/0xc8
+  [<ffffff8e01684e30>] el0_svc_naked+0x24/0x28
+  Code: 36380080 d5384100 f9400800 9402566d (d4210000)
+  ---[ end trace 2d01889f2bca9b9f ]---
+
+Fix this by dispatching all translation faults to do_translation_faults,
+which avoids invoking the page fault logic for faults on kernel addresses.
+
+Reported-by: Ankit Jain <ankijain@codeaurora.org>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/fault.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -614,7 +614,7 @@ static const struct fault_info fault_inf
+ 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
+ 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
+ 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
+-	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
++	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
+ 	{ do_bad,		SIGBUS,  0,		"unknown 8"			},
+ 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
+ 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
diff --git a/queue-4.13/arm64-make-sure-spsel-is-always-set.patch b/queue-4.13/arm64-make-sure-spsel-is-always-set.patch
new file mode 100644
index 00000000000..9aa07d7a523
--- /dev/null
+++ b/queue-4.13/arm64-make-sure-spsel-is-always-set.patch
@@ -0,0 +1,40 @@
+From 5371513fb338fb9989c569dc071326d369d6ade8 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <marc.zyngier@arm.com>
+Date: Tue, 26 Sep 2017 15:57:16 +0100
+Subject: arm64: Make sure SPsel is always set
+
+From: Marc Zyngier <marc.zyngier@arm.com>
+
+commit 5371513fb338fb9989c569dc071326d369d6ade8 upstream.
+
+When the kernel is entered at EL2 on an ARMv8.0 system, we construct
+the EL1 pstate and make sure this uses the the EL1 stack pointer
+(we perform an exception return to EL1h).
+
+But if the kernel is either entered at EL1 or stays at EL2 (because
+we're on a VHE-capable system), we fail to set SPsel, and use whatever
+stack selection the higher exception level has choosen for us.
+
+Let's not take any chance, and make sure that SPsel is set to one
+before we decide the mode we're going to run in.
+
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/head.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm64/kernel/head.S
++++ b/arch/arm64/kernel/head.S
+@@ -381,6 +381,7 @@ ENTRY(kimage_vaddr)
+  * booted in EL1 or EL2 respectively.
+  */
+ ENTRY(el2_setup)
++	msr	SPsel, #1			// We want to use SP_EL{1,2}
+ 	mrs	x0, CurrentEL
+ 	cmp	x0, #CurrentEL_EL2
+ 	b.eq	1f
diff --git a/queue-4.13/arm64-mm-use-read_once-when-dereferencing-pointer-to-pte-table.patch b/queue-4.13/arm64-mm-use-read_once-when-dereferencing-pointer-to-pte-table.patch
new file mode 100644
index 00000000000..13b2e91617a
--- /dev/null
+++ b/queue-4.13/arm64-mm-use-read_once-when-dereferencing-pointer-to-pte-table.patch
@@ -0,0 +1,77 @@
+From f069faba688701c4d56b6c3452a130f97bf02e95 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 29 Sep 2017 11:29:55 +0100
+Subject: arm64: mm: Use READ_ONCE when dereferencing pointer to pte table
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit f069faba688701c4d56b6c3452a130f97bf02e95 upstream.
+
+On kernels built with support for transparent huge pages, different CPUs
+can access the PMD concurrently due to e.g. fast GUP or page_vma_mapped_walk
+and they must take care to use READ_ONCE to avoid value tearing or caching
+of stale values by the compiler. Unfortunately, these functions call into
+our pgtable macros, which don't use READ_ONCE, and compiler caching has
+been observed to cause the following crash during ext4 writeback:
+
+PC is at check_pte+0x20/0x170
+LR is at page_vma_mapped_walk+0x2e0/0x540
+[...]
+Process doio (pid: 2463, stack limit = 0xffff00000f2e8000)
+Call trace:
+[<ffff000008233328>] check_pte+0x20/0x170
+[<ffff000008233758>] page_vma_mapped_walk+0x2e0/0x540
+[<ffff000008234adc>] page_mkclean_one+0xac/0x278
+[<ffff000008234d98>] rmap_walk_file+0xf0/0x238
+[<ffff000008236e74>] rmap_walk+0x64/0xa0
+[<ffff0000082370c8>] page_mkclean+0x90/0xa8
+[<ffff0000081f3c64>] clear_page_dirty_for_io+0x84/0x2a8
+[<ffff00000832f984>] mpage_submit_page+0x34/0x98
+[<ffff00000832fb4c>] mpage_process_page_bufs+0x164/0x170
+[<ffff00000832fc8c>] mpage_prepare_extent_to_map+0x134/0x2b8
+[<ffff00000833530c>] ext4_writepages+0x484/0xe30
+[<ffff0000081f6ab4>] do_writepages+0x44/0xe8
+[<ffff0000081e5bd4>] __filemap_fdatawrite_range+0xbc/0x110
+[<ffff0000081e5e68>] file_write_and_wait_range+0x48/0xd8
+[<ffff000008324310>] ext4_sync_file+0x80/0x4b8
+[<ffff0000082bd434>] vfs_fsync_range+0x64/0xc0
+[<ffff0000082332b4>] SyS_msync+0x194/0x1e8
+
+This is because page_vma_mapped_walk loads the PMD twice before calling
+pte_offset_map: the first time without READ_ONCE (where it gets all zeroes
+due to a concurrent pmdp_invalidate) and the second time with READ_ONCE
+(where it sees a valid table pointer due to a concurrent pmd_populate).
+However, the compiler inlines everything and caches the first value in
+a register, which is subsequently used in pte_offset_phys which returns
+a junk pointer that is later dereferenced when attempting to access the
+relevant pte.
+
+This patch fixes the issue by using READ_ONCE in pte_offset_phys to ensure
+that a stale value is not used. Whilst this is a point fix for a known
+failure (and simple to backport), a full fix moving all of our page table
+accessors over to {READ,WRITE}_ONCE and consistently using READ_ONCE in
+page_vma_mapped_walk is in the works for a future kernel release.
+
+Cc: Jon Masters <jcm@redhat.com>
+Cc: Timur Tabi <timur@codeaurora.org>
+Fixes: f27176cfc363 ("mm: convert page_mkclean_one() to use page_vma_mapped_walk()")
+Tested-by: Richard Ruigrok <rruigrok@codeaurora.org>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/pgtable.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -412,7 +412,7 @@ static inline phys_addr_t pmd_page_paddr
+ /* Find an entry in the third-level page table. */
+ #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ 
+-#define pte_offset_phys(dir,addr)	(pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
++#define pte_offset_phys(dir,addr)	(pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
+ #define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
+ 
+ #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
diff --git a/queue-4.13/btrfs-clear-ordered-flag-on-cleaning-up-ordered-extents.patch b/queue-4.13/btrfs-clear-ordered-flag-on-cleaning-up-ordered-extents.patch
new file mode 100644
index 00000000000..b3e6a61c312
--- /dev/null
+++ b/queue-4.13/btrfs-clear-ordered-flag-on-cleaning-up-ordered-extents.patch
@@ -0,0 +1,58 @@
+From 63d71450c8d817649a79e37d685523f988b9cc98 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 1 Sep 2017 17:58:47 +0900
+Subject: btrfs: clear ordered flag on cleaning up ordered extents
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 63d71450c8d817649a79e37d685523f988b9cc98 upstream.
+
+Commit 524272607e88 ("btrfs: Handle delalloc error correctly to avoid
+ordered extent hang") introduced btrfs_cleanup_ordered_extents() to cleanup
+submitted ordered extents. However, it does not clear the ordered bit
+(Private2) of corresponding pages. Thus, the following BUG occurs from
+free_pages_check_bad() (on btrfs/125 with nospace_cache).
+
+BUG: Bad page state in process btrfs  pfn:3fa787
+page:ffffdf2acfe9e1c0 count:0 mapcount:0 mapping:          (null) index:0xd
+flags: 0x8000000000002008(uptodate|private_2)
+raw: 8000000000002008 0000000000000000 000000000000000d 00000000ffffffff
+raw: ffffdf2acf5c1b20 ffffb443802238b0 0000000000000000 0000000000000000
+page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
+bad because of flags: 0x2000(private_2)
+
+This patch clears the flag same as other places calling
+btrfs_dec_test_ordered_pending() for every page in the specified range.
+
+Fixes: 524272607e88 ("btrfs: Handle delalloc error correctly to avoid ordered extent hang")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered
+ 						 const u64 offset,
+ 						 const u64 bytes)
+ {
++	unsigned long index = offset >> PAGE_SHIFT;
++	unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
++	struct page *page;
++
++	while (index <= end_index) {
++		page = find_get_page(inode->i_mapping, index);
++		index++;
++		if (!page)
++			continue;
++		ClearPagePrivate2(page);
++		put_page(page);
++	}
+ 	return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
+ 					    bytes - PAGE_SIZE, false);
+ }
diff --git a/queue-4.13/btrfs-finish-ordered-extent-cleaning-if-no-progress-is-found.patch b/queue-4.13/btrfs-finish-ordered-extent-cleaning-if-no-progress-is-found.patch
new file mode 100644
index 00000000000..90355381414
--- /dev/null
+++ b/queue-4.13/btrfs-finish-ordered-extent-cleaning-if-no-progress-is-found.patch
@@ -0,0 +1,64 @@
+From 67c003f90fd68062d92a7ffade36f9b2a9098bd8 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 1 Sep 2017 17:59:07 +0900
+Subject: btrfs: finish ordered extent cleaning if no progress is found
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 67c003f90fd68062d92a7ffade36f9b2a9098bd8 upstream.
+
+__endio_write_update_ordered() repeats the search until it reaches the end
+of the specified range. This works well with direct IO path, because before
+the function is called, it's ensured that there are ordered extents filling
+whole the range. It's not the case, however, when it's called from
+run_delalloc_range(): it is possible to have error in the midle of the loop
+in e.g. run_delalloc_nocow(), so that there exisits the range not covered
+by any ordered extents. By cleaning such "uncomplete" range,
+__endio_write_update_ordered() stucks at offset where there're no ordered
+extents.
+
+Since the ordered extents are created from head to tail, we can stop the
+search if there are no offset progress.
+
+Fixes: 524272607e88 ("btrfs: Handle delalloc error correctly to avoid ordered extent hang")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -8309,6 +8309,7 @@ static void __endio_write_update_ordered
+ 	btrfs_work_func_t func;
+ 	u64 ordered_offset = offset;
+ 	u64 ordered_bytes = bytes;
++	u64 last_offset;
+ 	int ret;
+ 
+ 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+@@ -8320,6 +8321,7 @@ static void __endio_write_update_ordered
+ 	}
+ 
+ again:
++	last_offset = ordered_offset;
+ 	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
+ 						   &ordered_offset,
+ 						   ordered_bytes,
+@@ -8331,6 +8333,12 @@ again:
+ 	btrfs_queue_work(wq, &ordered->work);
+ out_test:
+ 	/*
++	 * If btrfs_dec_test_ordered_pending does not find any ordered extent
++	 * in the range, we can exit.
++	 */
++	if (ordered_offset == last_offset)
++		return;
++	/*
+ 	 * our bio might span multiple ordered extents.  If we haven't
+ 	 * completed the accounting for the whole dio, go back and try again
+ 	 */
diff --git a/queue-4.13/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch b/queue-4.13/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
new file mode 100644
index 00000000000..a3dab80fab2
--- /dev/null
+++ b/queue-4.13/btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
@@ -0,0 +1,39 @@
+From bb166d7207432d3c7d10c45dc052f12ba3a2121d Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 25 Aug 2017 14:15:14 +0900
+Subject: btrfs: fix NULL pointer dereference from free_reloc_roots()
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit bb166d7207432d3c7d10c45dc052f12ba3a2121d upstream.
+
+__del_reloc_root should be called before freeing up reloc_root->node.
+If not, calling __del_reloc_root() dereference reloc_root->node, causing
+the system BUG.
+
+Fixes: 6bdf131fac23 ("Btrfs: don't leak reloc root nodes on error")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -2393,11 +2393,11 @@ void free_reloc_roots(struct list_head *
+ 	while (!list_empty(list)) {
+ 		reloc_root = list_entry(list->next, struct btrfs_root,
+ 					root_list);
++		__del_reloc_root(reloc_root);
+ 		free_extent_buffer(reloc_root->node);
+ 		free_extent_buffer(reloc_root->commit_root);
+ 		reloc_root->node = NULL;
+ 		reloc_root->commit_root = NULL;
+-		__del_reloc_root(reloc_root);
+ 	}
+ }
+ 
diff --git a/queue-4.13/btrfs-prevent-to-set-invalid-default-subvolid.patch b/queue-4.13/btrfs-prevent-to-set-invalid-default-subvolid.patch
new file mode 100644
index 00000000000..b1cfc8bdcc8
--- /dev/null
+++ b/queue-4.13/btrfs-prevent-to-set-invalid-default-subvolid.patch
@@ -0,0 +1,37 @@
+From 6d6d282932d1a609e60dc4467677e0e863682f57 Mon Sep 17 00:00:00 2001
+From: satoru takeuchi <satoru.takeuchi@gmail.com>
+Date: Tue, 12 Sep 2017 22:42:52 +0900
+Subject: btrfs: prevent to set invalid default subvolid
+
+From: satoru takeuchi <satoru.takeuchi@gmail.com>
+
+commit 6d6d282932d1a609e60dc4467677e0e863682f57 upstream.
+
+`btrfs sub set-default` succeeds to set an ID which isn't corresponding to any
+fs/file tree. If such the bad ID is set to a filesystem, we can't mount this
+filesystem without specifying `subvol` or `subvolid` mount options.
+
+Fixes: 6ef5ed0d386b ("Btrfs: add ioctl and incompat flag to set the default mount subvol")
+Signed-off-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
+Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -4072,6 +4072,10 @@ static long btrfs_ioctl_default_subvol(s
+ 		ret = PTR_ERR(new_root);
+ 		goto out;
+ 	}
++	if (!is_fstree(new_root->objectid)) {
++		ret = -ENOENT;
++		goto out;
++	}
+ 
+ 	path = btrfs_alloc_path();
+ 	if (!path) {
diff --git a/queue-4.13/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch b/queue-4.13/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
new file mode 100644
index 00000000000..ed4b66595e7
--- /dev/null
+++ b/queue-4.13/btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
@@ -0,0 +1,38 @@
+From 78ad4ce014d025f41b8dde3a81876832ead643cf Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 8 Sep 2017 17:48:55 +0900
+Subject: btrfs: propagate error to btrfs_cmp_data_prepare caller
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 78ad4ce014d025f41b8dde3a81876832ead643cf upstream.
+
+btrfs_cmp_data_prepare() (almost) always returns 0 i.e. ignoring errors
+from gather_extent_pages(). While the pages are freed by
+btrfs_cmp_data_free(), cmp->num_pages still has > 0. Then,
+btrfs_extent_same() try to access the already freed pages causing faults
+(or violates PageLocked assertion).
+
+This patch just return the error as is so that the caller stop the process.
+
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Fixes: f441460202cb ("btrfs: fix deadlock with extent-same and readpage")
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3063,7 +3063,7 @@ static int btrfs_cmp_data_prepare(struct
+ out:
+ 	if (ret)
+ 		btrfs_cmp_data_free(cmp);
+-	return 0;
++	return ret;
+ }
+ 
+ static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
diff --git a/queue-4.13/etnaviv-fix-gem-object-list-corruption.patch b/queue-4.13/etnaviv-fix-gem-object-list-corruption.patch
new file mode 100644
index 00000000000..9cafa938fbd
--- /dev/null
+++ b/queue-4.13/etnaviv-fix-gem-object-list-corruption.patch
@@ -0,0 +1,38 @@
+From 518417525f3652c12fb5fad6da4ade66c0072fa3 Mon Sep 17 00:00:00 2001
+From: Lucas Stach <l.stach@pengutronix.de>
+Date: Mon, 11 Sep 2017 15:29:31 +0200
+Subject: etnaviv: fix gem object list corruption
+
+From: Lucas Stach <l.stach@pengutronix.de>
+
+commit 518417525f3652c12fb5fad6da4ade66c0072fa3 upstream.
+
+All manipulations of the gem_object list need to be protected by
+the list mutex, as GEM objects can be created and freed in parallel.
+This fixes a kernel memory corruption.
+
+Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/etnaviv/etnaviv_gem.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+@@ -551,12 +551,15 @@ static const struct etnaviv_gem_ops etna
+ void etnaviv_gem_free_object(struct drm_gem_object *obj)
+ {
+ 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
++	struct etnaviv_drm_private *priv = obj->dev->dev_private;
+ 	struct etnaviv_vram_mapping *mapping, *tmp;
+ 
+ 	/* object should not be active */
+ 	WARN_ON(is_active(etnaviv_obj));
+ 
++	mutex_lock(&priv->gem_lock);
+ 	list_del(&etnaviv_obj->gem_node);
++	mutex_unlock(&priv->gem_lock);
+ 
+ 	list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
+ 				 obj_node) {
diff --git a/queue-4.13/etnaviv-fix-submit-error-path.patch b/queue-4.13/etnaviv-fix-submit-error-path.patch
new file mode 100644
index 00000000000..29256dfdba8
--- /dev/null
+++ b/queue-4.13/etnaviv-fix-submit-error-path.patch
@@ -0,0 +1,34 @@
+From 5a642e6bc49f59922e19ebd639e74f72753fc77b Mon Sep 17 00:00:00 2001
+From: Lucas Stach <l.stach@pengutronix.de>
+Date: Fri, 8 Sep 2017 16:24:32 +0200
+Subject: etnaviv: fix submit error path
+
+From: Lucas Stach <l.stach@pengutronix.de>
+
+commit 5a642e6bc49f59922e19ebd639e74f72753fc77b upstream.
+
+If the gpu submit fails, bail out to avoid accessing a potentially
+unititalized fence.
+
+Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+@@ -445,8 +445,10 @@ int etnaviv_ioctl_gem_submit(struct drm_
+ 	cmdbuf->user_size = ALIGN(args->stream_size, 8);
+ 
+ 	ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
+-	if (ret == 0)
+-		cmdbuf = NULL;
++	if (ret)
++		goto out;
++
++	cmdbuf = NULL;
+ 
+ 	if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
+ 		/*
diff --git a/queue-4.13/fix-infoleak-in-waitid-2.patch b/queue-4.13/fix-infoleak-in-waitid-2.patch
new file mode 100644
index 00000000000..e18e71c5c63
--- /dev/null
+++ b/queue-4.13/fix-infoleak-in-waitid-2.patch
@@ -0,0 +1,65 @@
+From 6c85501f2fabcfc4fc6ed976543d252c4eaf4be9 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Fri, 29 Sep 2017 13:43:15 -0400
+Subject: fix infoleak in waitid(2)
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 6c85501f2fabcfc4fc6ed976543d252c4eaf4be9 upstream.
+
+kernel_waitid() can return a PID, an error or 0.  rusage is filled in the first
+case and waitid(2) rusage should've been copied out exactly in that case, *not*
+whenever kernel_waitid() has not returned an error.  Compat variant shares that
+braino; none of kernel_wait4() callers do, so the below ought to fix it.
+
+Reported-and-tested-by: Alexander Potapenko <glider@google.com>
+Fixes: ce72a16fa705 ("wait4(2)/waitid(2): separate copying rusage to userland")
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/exit.c |   23 ++++++++++-------------
+ 1 file changed, 10 insertions(+), 13 deletions(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1601,12 +1601,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_
+ 	struct waitid_info info = {.status = 0};
+ 	long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
+ 	int signo = 0;
++
+ 	if (err > 0) {
+ 		signo = SIGCHLD;
+ 		err = 0;
+-	}
+-
+-	if (!err) {
+ 		if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
+ 			return -EFAULT;
+ 	}
+@@ -1724,16 +1722,15 @@ COMPAT_SYSCALL_DEFINE5(waitid,
+ 	if (err > 0) {
+ 		signo = SIGCHLD;
+ 		err = 0;
+-	}
+-
+-	if (!err && uru) {
+-		/* kernel_waitid() overwrites everything in ru */
+-		if (COMPAT_USE_64BIT_TIME)
+-			err = copy_to_user(uru, &ru, sizeof(ru));
+-		else
+-			err = put_compat_rusage(&ru, uru);
+-		if (err)
+-			return -EFAULT;
++		if (uru) {
++			/* kernel_waitid() overwrites everything in ru */
++			if (COMPAT_USE_64BIT_TIME)
++				err = copy_to_user(uru, &ru, sizeof(ru));
++			else
++				err = put_compat_rusage(&ru, uru);
++			if (err)
++				return -EFAULT;
++		}
+ 	}
+ 
+ 	if (!infop)
diff --git a/queue-4.13/futex-fix-pi_state-owner-serialization.patch b/queue-4.13/futex-fix-pi_state-owner-serialization.patch
new file mode 100644
index 00000000000..7ca4c3426c8
--- /dev/null
+++ b/queue-4.13/futex-fix-pi_state-owner-serialization.patch
@@ -0,0 +1,124 @@
+From c74aef2d06a9f59cece89093eecc552933cba72a Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 22 Sep 2017 17:48:06 +0200
+Subject: futex: Fix pi_state->owner serialization
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c74aef2d06a9f59cece89093eecc552933cba72a upstream.
+
+There was a reported suspicion about a race between exit_pi_state_list()
+and put_pi_state(). The same report mentioned the comment with
+put_pi_state() said it should be called with hb->lock held, and it no
+longer is in all places.
+
+As it turns out, the pi_state->owner serialization is indeed broken. As per
+the new rules:
+
+  734009e96d19 ("futex: Change locking rules")
+
+pi_state->owner should be serialized by pi_state->pi_mutex.wait_lock.
+For the sites setting pi_state->owner we already hold wait_lock (where
+required) but exit_pi_state_list() and put_pi_state() were not and
+raced on clearing it.
+
+Fixes: 734009e96d19 ("futex: Change locking rules")
+Reported-by: Gratian Crisan <gratian.crisan@ni.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: dvhart@infradead.org
+Link: https://lkml.kernel.org/r/20170922154806.jd3ffltfk24m4o4y@hirez.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c |   33 ++++++++++++++++++++++-----------
+ 1 file changed, 22 insertions(+), 11 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -821,8 +821,6 @@ static void get_pi_state(struct futex_pi
+ /*
+  * Drops a reference to the pi_state object and frees or caches it
+  * when the last reference is gone.
+- *
+- * Must be called with the hb lock held.
+  */
+ static void put_pi_state(struct futex_pi_state *pi_state)
+ {
+@@ -837,16 +835,22 @@ static void put_pi_state(struct futex_pi
+ 	 * and has cleaned up the pi_state already
+ 	 */
+ 	if (pi_state->owner) {
+-		raw_spin_lock_irq(&pi_state->owner->pi_lock);
+-		list_del_init(&pi_state->list);
+-		raw_spin_unlock_irq(&pi_state->owner->pi_lock);
++		struct task_struct *owner;
+ 
+-		rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
++		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++		owner = pi_state->owner;
++		if (owner) {
++			raw_spin_lock(&owner->pi_lock);
++			list_del_init(&pi_state->list);
++			raw_spin_unlock(&owner->pi_lock);
++		}
++		rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
++		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ 	}
+ 
+-	if (current->pi_state_cache)
++	if (current->pi_state_cache) {
+ 		kfree(pi_state);
+-	else {
++	} else {
+ 		/*
+ 		 * pi_state->list is already empty.
+ 		 * clear pi_state->owner.
+@@ -905,13 +909,14 @@ void exit_pi_state_list(struct task_stru
+ 		raw_spin_unlock_irq(&curr->pi_lock);
+ 
+ 		spin_lock(&hb->lock);
+-
+-		raw_spin_lock_irq(&curr->pi_lock);
++		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++		raw_spin_lock(&curr->pi_lock);
+ 		/*
+ 		 * We dropped the pi-lock, so re-check whether this
+ 		 * task still owns the PI-state:
+ 		 */
+ 		if (head->next != next) {
++			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+ 			spin_unlock(&hb->lock);
+ 			continue;
+ 		}
+@@ -920,9 +925,10 @@ void exit_pi_state_list(struct task_stru
+ 		WARN_ON(list_empty(&pi_state->list));
+ 		list_del_init(&pi_state->list);
+ 		pi_state->owner = NULL;
+-		raw_spin_unlock_irq(&curr->pi_lock);
++		raw_spin_unlock(&curr->pi_lock);
+ 
+ 		get_pi_state(pi_state);
++		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ 		spin_unlock(&hb->lock);
+ 
+ 		rt_mutex_futex_unlock(&pi_state->pi_mutex);
+@@ -1204,6 +1210,10 @@ static int attach_to_pi_owner(u32 uval,
+ 
+ 	WARN_ON(!list_empty(&pi_state->list));
+ 	list_add(&pi_state->list, &p->pi_state_list);
++	/*
++	 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
++	 * because there is no concurrency as the object is not published yet.
++	 */
+ 	pi_state->owner = p;
+ 	raw_spin_unlock_irq(&p->pi_lock);
+ 
+@@ -2820,6 +2830,7 @@ retry:
+ 		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ 		spin_unlock(&hb->lock);
+ 
++		/* drops pi_state->pi_mutex.wait_lock */
+ 		ret = wake_futex_pi(uaddr, uval, pi_state);
+ 
+ 		put_pi_state(pi_state);
diff --git a/queue-4.13/irq-generic-chip-don-t-replace-domain-s-name.patch b/queue-4.13/irq-generic-chip-don-t-replace-domain-s-name.patch
new file mode 100644
index 00000000000..96dbe054825
--- /dev/null
+++ b/queue-4.13/irq-generic-chip-don-t-replace-domain-s-name.patch
@@ -0,0 +1,39 @@
+From 72364d320644c12948786962673772f271039a4a Mon Sep 17 00:00:00 2001
+From: Jeffy Chen <jeffy.chen@rock-chips.com>
+Date: Thu, 28 Sep 2017 12:37:31 +0800
+Subject: irq/generic-chip: Don't replace domain's name
+
+From: Jeffy Chen <jeffy.chen@rock-chips.com>
+
+commit 72364d320644c12948786962673772f271039a4a upstream.
+
+When generic irq chips are allocated for an irq domain the domain name is
+set to the irq chip name. That was done to have named domains before the
+recent changes which enforce domain naming were done.
+
+Since then the overwrite causes a memory leak when the domain name is
+dynamically allocated and even worse it would cause the domain free code to
+free the wrong name pointer, which might point to a constant.
+
+Remove the name assignment to prevent this.
+
+Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only")
+Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lkml.kernel.org/r/20170928043731.4764-1-jeffy.chen@rock-chips.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/irq/generic-chip.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/kernel/irq/generic-chip.c
++++ b/kernel/irq/generic-chip.c
+@@ -322,7 +322,6 @@ int __irq_alloc_domain_generic_chips(str
+ 		/* Calc pointer to the next generic chip */
+ 		tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+ 	}
+-	d->name = name;
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
diff --git a/queue-4.13/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch b/queue-4.13/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
new file mode 100644
index 00000000000..26e4191311e
--- /dev/null
+++ b/queue-4.13/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
@@ -0,0 +1,39 @@
+From 51aa68e7d57e3217192d88ce90fd5b8ef29ec94f Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Tue, 12 Sep 2017 13:02:54 -0700
+Subject: kvm: nVMX: Don't allow L2 to access the hardware CR8
+
+From: Jim Mattson <jmattson@google.com>
+
+commit 51aa68e7d57e3217192d88ce90fd5b8ef29ec94f upstream.
+
+If L1 does not specify the "use TPR shadow" VM-execution control in
+vmcs12, then L0 must specify the "CR8-load exiting" and "CR8-store
+exiting" VM-execution controls in vmcs02. Failure to do so will give
+the L2 VM unrestricted read/write access to the hardware CR8.
+
+This fixes CVE-2017-12154.
+
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -10271,6 +10271,11 @@ static int prepare_vmcs02(struct kvm_vcp
+ 	if (exec_control & CPU_BASED_TPR_SHADOW) {
+ 		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
+ 		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
++	} else {
++#ifdef CONFIG_X86_64
++		exec_control |= CPU_BASED_CR8_LOAD_EXITING |
++				CPU_BASED_CR8_STORE_EXITING;
++#endif
+ 	}
+ 
+ 	/*
diff --git a/queue-4.13/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch b/queue-4.13/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
new file mode 100644
index 00000000000..482ec795afe
--- /dev/null
+++ b/queue-4.13/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
@@ -0,0 +1,83 @@
+From 44889942b6eb356eab27ce25fe10701adfec7776 Mon Sep 17 00:00:00 2001
+From: Ladi Prosek <lprosek@redhat.com>
+Date: Fri, 22 Sep 2017 07:53:15 +0200
+Subject: KVM: nVMX: fix HOST_CR3/HOST_CR4 cache
+
+From: Ladi Prosek <lprosek@redhat.com>
+
+commit 44889942b6eb356eab27ce25fe10701adfec7776 upstream.
+
+For nested virt we maintain multiple VMCS that can run on a vCPU. So it is
+incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching
+the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in
+vCPU-wide data structures.
+
+Hyper-V nested on KVM runs into this consistently for me with PCID enabled.
+CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3)
+fires, and the currently loaded VMCS is updated. Then we switch from L2 to
+L1 and the next exit reverts CR3 to its old value.
+
+Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant")
+Signed-off-by: Ladi Prosek <lprosek@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -200,6 +200,8 @@ struct loaded_vmcs {
+ 	int cpu;
+ 	bool launched;
+ 	bool nmi_known_unmasked;
++	unsigned long vmcs_host_cr3;	/* May not match real cr3 */
++	unsigned long vmcs_host_cr4;	/* May not match real cr4 */
+ 	struct list_head loaded_vmcss_on_cpu_link;
+ };
+ 
+@@ -595,8 +597,6 @@ struct vcpu_vmx {
+ 		int           gs_ldt_reload_needed;
+ 		int           fs_reload_needed;
+ 		u64           msr_host_bndcfgs;
+-		unsigned long vmcs_host_cr3;	/* May not match real cr3 */
+-		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
+ 	} host_state;
+ 	struct {
+ 		int vm86_active;
+@@ -5138,12 +5138,12 @@ static void vmx_set_constant_host_state(
+ 	 */
+ 	cr3 = __read_cr3();
+ 	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
+-	vmx->host_state.vmcs_host_cr3 = cr3;
++	vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+ 
+ 	/* Save the most likely value for this task's CR4 in the VMCS. */
+ 	cr4 = cr4_read_shadow();
+ 	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
+-	vmx->host_state.vmcs_host_cr4 = cr4;
++	vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+ 
+ 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
+ #ifdef CONFIG_X86_64
+@@ -8992,15 +8992,15 @@ static void __noclone vmx_vcpu_run(struc
+ 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ 
+ 	cr3 = __get_current_cr3_fast();
+-	if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
++	if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
+ 		vmcs_writel(HOST_CR3, cr3);
+-		vmx->host_state.vmcs_host_cr3 = cr3;
++		vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+ 	}
+ 
+ 	cr4 = cr4_read_shadow();
+-	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
++	if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
+ 		vmcs_writel(HOST_CR4, cr4);
+-		vmx->host_state.vmcs_host_cr4 = cr4;
++		vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+ 	}
+ 
+ 	/* When single-stepping over STI and MOV SS, we must clear the
diff --git a/queue-4.13/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch b/queue-4.13/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
new file mode 100644
index 00000000000..9adfcbcbe2d
--- /dev/null
+++ b/queue-4.13/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
@@ -0,0 +1,157 @@
+From 8b306e2f3c41939ea528e6174c88cfbfff893ce1 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 6 Jun 2017 12:57:05 +0200
+Subject: KVM: VMX: avoid double list add with VT-d posted interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 8b306e2f3c41939ea528e6174c88cfbfff893ce1 upstream.
+
+In some cases, for example involving hot-unplug of assigned
+devices, pi_post_block can forget to remove the vCPU from the
+blocked_vcpu_list.  When this happens, the next call to
+pi_pre_block corrupts the list.
+
+Fix this in two ways.  First, check vcpu->pre_pcpu in pi_pre_block
+and WARN instead of adding the element twice in the list.  Second,
+always do the list removal in pi_post_block if vcpu->pre_pcpu is
+set (not -1).
+
+The new code keeps interrupts disabled for the whole duration of
+pi_pre_block/pi_post_block.  This is not strictly necessary, but
+easier to follow.  For the same reason, PI.ON is checked only
+after the cmpxchg, and to handle it we just call the post-block
+code.  This removes duplication of the list removal code.
+
+Cc: Huangweidong <weidong.huang@huawei.com>
+Cc: Gonglei <arei.gonglei@huawei.com>
+Cc: wangxin <wangxinxin.wang@huawei.com>
+Cc: Radim KrÄmÃ¡Å <rkrcmar@redhat.com>
+Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   62 +++++++++++++++++++++--------------------------------
+ 1 file changed, 25 insertions(+), 37 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -11394,10 +11394,11 @@ static void __pi_post_block(struct kvm_v
+ 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ 	struct pi_desc old, new;
+ 	unsigned int dest;
+-	unsigned long flags;
+ 
+ 	do {
+ 		old.control = new.control = pi_desc->control;
++		WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
++		     "Wakeup handler not enabled while the VCPU is blocked\n");
+ 
+ 		dest = cpu_physical_id(vcpu->cpu);
+ 
+@@ -11414,14 +11415,10 @@ static void __pi_post_block(struct kvm_v
+ 	} while (cmpxchg(&pi_desc->control, old.control,
+ 			new.control) != old.control);
+ 
+-	if(vcpu->pre_pcpu != -1) {
+-		spin_lock_irqsave(
+-			&per_cpu(blocked_vcpu_on_cpu_lock,
+-			vcpu->pre_pcpu), flags);
++	if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
++		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ 		list_del(&vcpu->blocked_vcpu_list);
+-		spin_unlock_irqrestore(
+-			&per_cpu(blocked_vcpu_on_cpu_lock,
+-			vcpu->pre_pcpu), flags);
++		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ 		vcpu->pre_pcpu = -1;
+ 	}
+ }
+@@ -11441,7 +11438,6 @@ static void __pi_post_block(struct kvm_v
+  */
+ static int pi_pre_block(struct kvm_vcpu *vcpu)
+ {
+-	unsigned long flags;
+ 	unsigned int dest;
+ 	struct pi_desc old, new;
+ 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+@@ -11451,34 +11447,20 @@ static int pi_pre_block(struct kvm_vcpu
+ 		!kvm_vcpu_apicv_active(vcpu))
+ 		return 0;
+ 
+-	vcpu->pre_pcpu = vcpu->cpu;
+-	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+-			  vcpu->pre_pcpu), flags);
+-	list_add_tail(&vcpu->blocked_vcpu_list,
+-		      &per_cpu(blocked_vcpu_on_cpu,
+-		      vcpu->pre_pcpu));
+-	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+-			       vcpu->pre_pcpu), flags);
++	WARN_ON(irqs_disabled());
++	local_irq_disable();
++	if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
++		vcpu->pre_pcpu = vcpu->cpu;
++		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++		list_add_tail(&vcpu->blocked_vcpu_list,
++			      &per_cpu(blocked_vcpu_on_cpu,
++				       vcpu->pre_pcpu));
++		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++	}
+ 
+ 	do {
+ 		old.control = new.control = pi_desc->control;
+ 
+-		/*
+-		 * We should not block the vCPU if
+-		 * an interrupt is posted for it.
+-		 */
+-		if (pi_test_on(pi_desc) == 1) {
+-			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+-					  vcpu->pre_pcpu), flags);
+-			list_del(&vcpu->blocked_vcpu_list);
+-			spin_unlock_irqrestore(
+-					&per_cpu(blocked_vcpu_on_cpu_lock,
+-					vcpu->pre_pcpu), flags);
+-			vcpu->pre_pcpu = -1;
+-
+-			return 1;
+-		}
+-
+ 		WARN((pi_desc->sn == 1),
+ 		     "Warning: SN field of posted-interrupts "
+ 		     "is set before blocking\n");
+@@ -11503,7 +11485,12 @@ static int pi_pre_block(struct kvm_vcpu
+ 	} while (cmpxchg(&pi_desc->control, old.control,
+ 			new.control) != old.control);
+ 
+-	return 0;
++	/* We should not block the vCPU if an interrupt is posted for it.  */
++	if (pi_test_on(pi_desc) == 1)
++		__pi_post_block(vcpu);
++
++	local_irq_enable();
++	return (vcpu->pre_pcpu == -1);
+ }
+ 
+ static int vmx_pre_block(struct kvm_vcpu *vcpu)
+@@ -11519,12 +11506,13 @@ static int vmx_pre_block(struct kvm_vcpu
+ 
+ static void pi_post_block(struct kvm_vcpu *vcpu)
+ {
+-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-		!kvm_vcpu_apicv_active(vcpu))
++	if (vcpu->pre_pcpu == -1)
+ 		return;
+ 
++	WARN_ON(irqs_disabled());
++	local_irq_disable();
+ 	__pi_post_block(vcpu);
++	local_irq_enable();
+ }
+ 
+ static void vmx_post_block(struct kvm_vcpu *vcpu)
diff --git a/queue-4.13/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch b/queue-4.13/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
new file mode 100644
index 00000000000..d2dde79d463
--- /dev/null
+++ b/queue-4.13/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
@@ -0,0 +1,57 @@
+From 3a8b0677fc6180a467e26cc32ce6b0c09a32f9bb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= <jschoenh@amazon.de>
+Date: Thu, 7 Sep 2017 19:02:30 +0100
+Subject: KVM: VMX: Do not BUG() on out-of-bounds guest IRQ
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jan H. SchÃ¶nherr <jschoenh@amazon.de>
+
+commit 3a8b0677fc6180a467e26cc32ce6b0c09a32f9bb upstream.
+
+The value of the guest_irq argument to vmx_update_pi_irte() is
+ultimately coming from a KVM_IRQFD API call. Do not BUG() in
+vmx_update_pi_irte() if the value is out-of bounds. (Especially,
+since KVM as a whole seems to hang after that.)
+
+Instead, print a message only once if we find that we don't have a
+route for a certain IRQ (which can be out-of-bounds or within the
+array).
+
+This fixes CVE-2017-1000252.
+
+Fixes: efc644048ecde54 ("KVM: x86: Update IRTE for posted-interrupts")
+Signed-off-by: Jan H. SchÃ¶nherr <jschoenh@amazon.de>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -11542,7 +11542,7 @@ static int vmx_update_pi_irte(struct kvm
+ 	struct kvm_lapic_irq irq;
+ 	struct kvm_vcpu *vcpu;
+ 	struct vcpu_data vcpu_info;
+-	int idx, ret = -EINVAL;
++	int idx, ret = 0;
+ 
+ 	if (!kvm_arch_has_assigned_device(kvm) ||
+ 		!irq_remapping_cap(IRQ_POSTING_CAP) ||
+@@ -11551,7 +11551,12 @@ static int vmx_update_pi_irte(struct kvm
+ 
+ 	idx = srcu_read_lock(&kvm->irq_srcu);
+ 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+-	BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
++	if (guest_irq >= irq_rt->nr_rt_entries ||
++	    hlist_empty(&irq_rt->map[guest_irq])) {
++		pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
++			     guest_irq, irq_rt->nr_rt_entries);
++		goto out;
++	}
+ 
+ 	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+ 		if (e->type != KVM_IRQ_ROUTING_MSI)
diff --git a/queue-4.13/kvm-vmx-extract-__pi_post_block.patch b/queue-4.13/kvm-vmx-extract-__pi_post_block.patch
new file mode 100644
index 00000000000..eb0d9cbb4e2
--- /dev/null
+++ b/queue-4.13/kvm-vmx-extract-__pi_post_block.patch
@@ -0,0 +1,118 @@
+From cd39e1176d320157831ce030b4c869bd2d5eb142 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 6 Jun 2017 12:57:04 +0200
+Subject: KVM: VMX: extract __pi_post_block
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit cd39e1176d320157831ce030b4c869bd2d5eb142 upstream.
+
+Simple code movement patch, preparing for the next one.
+
+Cc: Huangweidong <weidong.huang@huawei.com>
+Cc: Gonglei <arei.gonglei@huawei.com>
+Cc: wangxin <wangxinxin.wang@huawei.com>
+Cc: Radim KrÄmÃ¡Å <rkrcmar@redhat.com>
+Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   71 ++++++++++++++++++++++++++++-------------------------
+ 1 file changed, 38 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -11389,6 +11389,43 @@ static void vmx_enable_log_dirty_pt_mask
+ 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
+ }
+ 
++static void __pi_post_block(struct kvm_vcpu *vcpu)
++{
++	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
++	struct pi_desc old, new;
++	unsigned int dest;
++	unsigned long flags;
++
++	do {
++		old.control = new.control = pi_desc->control;
++
++		dest = cpu_physical_id(vcpu->cpu);
++
++		if (x2apic_enabled())
++			new.ndst = dest;
++		else
++			new.ndst = (dest << 8) & 0xFF00;
++
++		/* Allow posting non-urgent interrupts */
++		new.sn = 0;
++
++		/* set 'NV' to 'notification vector' */
++		new.nv = POSTED_INTR_VECTOR;
++	} while (cmpxchg(&pi_desc->control, old.control,
++			new.control) != old.control);
++
++	if(vcpu->pre_pcpu != -1) {
++		spin_lock_irqsave(
++			&per_cpu(blocked_vcpu_on_cpu_lock,
++			vcpu->pre_pcpu), flags);
++		list_del(&vcpu->blocked_vcpu_list);
++		spin_unlock_irqrestore(
++			&per_cpu(blocked_vcpu_on_cpu_lock,
++			vcpu->pre_pcpu), flags);
++		vcpu->pre_pcpu = -1;
++	}
++}
++
+ /*
+  * This routine does the following things for vCPU which is going
+  * to be blocked if VT-d PI is enabled.
+@@ -11482,44 +11519,12 @@ static int vmx_pre_block(struct kvm_vcpu
+ 
+ static void pi_post_block(struct kvm_vcpu *vcpu)
+ {
+-	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+-	struct pi_desc old, new;
+-	unsigned int dest;
+-	unsigned long flags;
+-
+ 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+ 		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+ 		!kvm_vcpu_apicv_active(vcpu))
+ 		return;
+ 
+-	do {
+-		old.control = new.control = pi_desc->control;
+-
+-		dest = cpu_physical_id(vcpu->cpu);
+-
+-		if (x2apic_enabled())
+-			new.ndst = dest;
+-		else
+-			new.ndst = (dest << 8) & 0xFF00;
+-
+-		/* Allow posting non-urgent interrupts */
+-		new.sn = 0;
+-
+-		/* set 'NV' to 'notification vector' */
+-		new.nv = POSTED_INTR_VECTOR;
+-	} while (cmpxchg(&pi_desc->control, old.control,
+-			new.control) != old.control);
+-
+-	if(vcpu->pre_pcpu != -1) {
+-		spin_lock_irqsave(
+-			&per_cpu(blocked_vcpu_on_cpu_lock,
+-			vcpu->pre_pcpu), flags);
+-		list_del(&vcpu->blocked_vcpu_list);
+-		spin_unlock_irqrestore(
+-			&per_cpu(blocked_vcpu_on_cpu_lock,
+-			vcpu->pre_pcpu), flags);
+-		vcpu->pre_pcpu = -1;
+-	}
++	__pi_post_block(vcpu);
+ }
+ 
+ static void vmx_post_block(struct kvm_vcpu *vcpu)
diff --git a/queue-4.13/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch b/queue-4.13/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
new file mode 100644
index 00000000000..ecd19c6f826
--- /dev/null
+++ b/queue-4.13/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
@@ -0,0 +1,130 @@
+From 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 6 Jun 2017 12:57:06 +0200
+Subject: KVM: VMX: simplify and fix vmx_vcpu_pi_load
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a upstream.
+
+The simplify part: do not touch pi_desc.nv, we can set it when the
+VCPU is first created.  Likewise, pi_desc.sn is only handled by
+vmx_vcpu_pi_load, do not touch it in __pi_post_block.
+
+The fix part: do not check kvm_arch_has_assigned_device, instead
+check the SN bit to figure out whether vmx_vcpu_pi_put ran before.
+This matches what the previous patch did in pi_post_block.
+
+Cc: Huangweidong <weidong.huang@huawei.com>
+Cc: Gonglei <arei.gonglei@huawei.com>
+Cc: wangxin <wangxinxin.wang@huawei.com>
+Cc: Radim KrÄmÃ¡Å <rkrcmar@redhat.com>
+Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   68 +++++++++++++++++++++++++++--------------------------
+ 1 file changed, 35 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2187,43 +2187,41 @@ static void vmx_vcpu_pi_load(struct kvm_
+ 	struct pi_desc old, new;
+ 	unsigned int dest;
+ 
+-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-		!kvm_vcpu_apicv_active(vcpu))
++	/*
++	 * In case of hot-plug or hot-unplug, we may have to undo
++	 * vmx_vcpu_pi_put even if there is no assigned device.  And we
++	 * always keep PI.NDST up to date for simplicity: it makes the
++	 * code easier, and CPU migration is not a fast path.
++	 */
++	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
++		return;
++
++	/*
++	 * First handle the simple case where no cmpxchg is necessary; just
++	 * allow posting non-urgent interrupts.
++	 *
++	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
++	 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
++	 * expects the VCPU to be on the blocked_vcpu_list that matches
++	 * PI.NDST.
++	 */
++	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
++	    vcpu->cpu == cpu) {
++		pi_clear_sn(pi_desc);
+ 		return;
++	}
+ 
++	/* The full case.  */
+ 	do {
+ 		old.control = new.control = pi_desc->control;
+ 
+-		/*
+-		 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
+-		 * are two possible cases:
+-		 * 1. After running 'pre_block', context switch
+-		 *    happened. For this case, 'sn' was set in
+-		 *    vmx_vcpu_put(), so we need to clear it here.
+-		 * 2. After running 'pre_block', we were blocked,
+-		 *    and woken up by some other guy. For this case,
+-		 *    we don't need to do anything, 'pi_post_block'
+-		 *    will do everything for us. However, we cannot
+-		 *    check whether it is case #1 or case #2 here
+-		 *    (maybe, not needed), so we also clear sn here,
+-		 *    I think it is not a big deal.
+-		 */
+-		if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
+-			if (vcpu->cpu != cpu) {
+-				dest = cpu_physical_id(cpu);
+-
+-				if (x2apic_enabled())
+-					new.ndst = dest;
+-				else
+-					new.ndst = (dest << 8) & 0xFF00;
+-			}
++		dest = cpu_physical_id(cpu);
+ 
+-			/* set 'NV' to 'notification vector' */
+-			new.nv = POSTED_INTR_VECTOR;
+-		}
++		if (x2apic_enabled())
++			new.ndst = dest;
++		else
++			new.ndst = (dest << 8) & 0xFF00;
+ 
+-		/* Allow posting non-urgent interrupts */
+ 		new.sn = 0;
+ 	} while (cmpxchg(&pi_desc->control, old.control,
+ 			new.control) != old.control);
+@@ -9310,6 +9308,13 @@ static struct kvm_vcpu *vmx_create_vcpu(
+ 
+ 	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+ 
++	/*
++	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
++	 * or POSTED_INTR_WAKEUP_VECTOR.
++	 */
++	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
++	vmx->pi_desc.sn = 1;
++
+ 	return &vmx->vcpu;
+ 
+ free_vmcs:
+@@ -11407,9 +11412,6 @@ static void __pi_post_block(struct kvm_v
+ 		else
+ 			new.ndst = (dest << 8) & 0xFF00;
+ 
+-		/* Allow posting non-urgent interrupts */
+-		new.sn = 0;
+-
+ 		/* set 'NV' to 'notification vector' */
+ 		new.nv = POSTED_INTR_VECTOR;
+ 	} while (cmpxchg(&pi_desc->control, old.control,
diff --git a/queue-4.13/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch b/queue-4.13/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
new file mode 100644
index 00000000000..307113ab564
--- /dev/null
+++ b/queue-4.13/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
@@ -0,0 +1,81 @@
+From b862789aa5186d5ea3a024b7cfe0f80c3a38b980 Mon Sep 17 00:00:00 2001
+From: Boqun Feng <boqun.feng@gmail.com>
+Date: Fri, 29 Sep 2017 19:01:45 +0800
+Subject: kvm/x86: Handle async PF in RCU read-side critical sections
+
+From: Boqun Feng <boqun.feng@gmail.com>
+
+commit b862789aa5186d5ea3a024b7cfe0f80c3a38b980 upstream.
+
+Sasha Levin reported a WARNING:
+
+| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
+| rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
+| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
+| rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
+...
+| CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246
+| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+| 1.10.1-1ubuntu1 04/01/2014
+| Call Trace:
+...
+| RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
+| RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
+| RSP: 0018:ffff88003b2debc8 EFLAGS: 00010002
+| RAX: 0000000000000001 RBX: 1ffff1000765bd85 RCX: 0000000000000000
+| RDX: 1ffff100075d7882 RSI: ffffffffb5c7da20 RDI: ffff88003aebc410
+| RBP: ffff88003b2def30 R08: dffffc0000000000 R09: 0000000000000001
+| R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003b2def08
+| R13: 0000000000000000 R14: ffff88003aebc040 R15: ffff88003aebc040
+| __schedule+0x201/0x2240 kernel/sched/core.c:3292
+| schedule+0x113/0x460 kernel/sched/core.c:3421
+| kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158
+| do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271
+| async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069
+| RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996
+| RSP: 0018:ffff88003b2df520 EFLAGS: 00010283
+| RAX: 000000000000003f RBX: ffffffffb5d1e141 RCX: ffff88003b2df670
+| RDX: 0000000000000001 RSI: dffffc0000000000 RDI: ffffffffb5d1e140
+| RBP: ffff88003b2df560 R08: dffffc0000000000 R09: 0000000000000000
+| R10: ffff88003b2df718 R11: 0000000000000000 R12: ffff88003b2df5d8
+| R13: 0000000000000064 R14: ffffffffb5d1e140 R15: 0000000000000000
+| vsnprintf+0x173/0x1700 lib/vsprintf.c:2136
+| sprintf+0xbe/0xf0 lib/vsprintf.c:2386
+| proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23
+| get_link fs/namei.c:1047 [inline]
+| link_path_walk+0x1041/0x1490 fs/namei.c:2127
+...
+
+This happened when the host hit a page fault, and delivered it as in an
+async page fault, while the guest was in an RCU read-side critical
+section.  The guest then tries to reschedule in kvm_async_pf_task_wait(),
+but rcu_preempt_note_context_switch() would treat the reschedule as a
+sleep in RCU read-side critical section, which is not allowed (even in
+preemptible RCU).  Thus the WARN.
+
+To cure this, make kvm_async_pf_task_wait() go to the halt path if the
+PF happens in a RCU read-side critical section.
+
+Reported-by: Sasha Levin <levinsasha928@gmail.com>
+Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/kvm.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
+ 
+ 	n.token = token;
+ 	n.cpu = smp_processor_id();
+-	n.halted = is_idle_task(current) || preempt_count() > 1;
++	n.halted = is_idle_task(current) || preempt_count() > 1 ||
++		   rcu_preempt_depth();
+ 	init_swait_queue_head(&n.wq);
+ 	hlist_add_head(&n.link, &b->list);
+ 	raw_spin_unlock(&b->lock);
diff --git a/queue-4.13/md-fix-a-race-condition-for-flush-request-handling.patch b/queue-4.13/md-fix-a-race-condition-for-flush-request-handling.patch
new file mode 100644
index 00000000000..753568e4d39
--- /dev/null
+++ b/queue-4.13/md-fix-a-race-condition-for-flush-request-handling.patch
@@ -0,0 +1,52 @@
+From 79bf31a3b2a7ca467cfec8ff97d359a77065d01f Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shli@fb.com>
+Date: Thu, 21 Sep 2017 09:55:28 -0700
+Subject: md: fix a race condition for flush request handling
+
+From: Shaohua Li <shli@fb.com>
+
+commit 79bf31a3b2a7ca467cfec8ff97d359a77065d01f upstream.
+
+md_submit_flush_data calls pers->make_request, which missed the suspend check.
+Fix it with the new md_handle_request API.
+
+Reported-by: Nate Dailey <nate.dailey@stratus.com>
+Tested-by: Nate Dailey <nate.dailey@stratus.com>
+Fix: cc27b0c78c79(md: fix deadlock between mddev_suspend() and md_write_start())
+Reviewed-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -439,16 +439,22 @@ static void md_submit_flush_data(struct
+ 	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
+ 	struct bio *bio = mddev->flush_bio;
+ 
++	/*
++	 * must reset flush_bio before calling into md_handle_request to avoid a
++	 * deadlock, because other bios passed md_handle_request suspend check
++	 * could wait for this and below md_handle_request could wait for those
++	 * bios because of suspend check
++	 */
++	mddev->flush_bio = NULL;
++	wake_up(&mddev->sb_wait);
++
+ 	if (bio->bi_iter.bi_size == 0)
+ 		/* an empty barrier - all done */
+ 		bio_endio(bio);
+ 	else {
+ 		bio->bi_opf &= ~REQ_PREFLUSH;
+-		mddev->pers->make_request(mddev, bio);
++		md_handle_request(mddev, bio);
+ 	}
+-
+-	mddev->flush_bio = NULL;
+-	wake_up(&mddev->sb_wait);
+ }
+ 
+ void md_flush_request(struct mddev *mddev, struct bio *bio)
diff --git a/queue-4.13/md-separate-request-handling.patch b/queue-4.13/md-separate-request-handling.patch
new file mode 100644
index 00000000000..c1f65bf3d53
--- /dev/null
+++ b/queue-4.13/md-separate-request-handling.patch
@@ -0,0 +1,122 @@
+From 393debc23c7820211d1c8253dd6a8408a7628fe7 Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shli@fb.com>
+Date: Thu, 21 Sep 2017 10:23:35 -0700
+Subject: md: separate request handling
+
+From: Shaohua Li <shli@fb.com>
+
+commit 393debc23c7820211d1c8253dd6a8408a7628fe7 upstream.
+
+With commit cc27b0c78c79, pers->make_request could bail out without handling
+the bio. If that happens, we should retry.  The commit fixes md_make_request
+but not other call sites. Separate the request handling part, so other call
+sites can use it.
+
+Reported-by: Nate Dailey <nate.dailey@stratus.com>
+Fix: cc27b0c78c79(md: fix deadlock between mddev_suspend() and md_write_start())
+Reviewed-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   58 +++++++++++++++++++++++++++++++-------------------------
+ drivers/md/md.h |    1 
+ 2 files changed, 34 insertions(+), 25 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -266,6 +266,37 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
+  * call has finished, the bio has been linked into some internal structure
+  * and so is visible to ->quiesce(), so we don't need the refcount any more.
+  */
++void md_handle_request(struct mddev *mddev, struct bio *bio)
++{
++check_suspended:
++	rcu_read_lock();
++	if (mddev->suspended) {
++		DEFINE_WAIT(__wait);
++		for (;;) {
++			prepare_to_wait(&mddev->sb_wait, &__wait,
++					TASK_UNINTERRUPTIBLE);
++			if (!mddev->suspended)
++				break;
++			rcu_read_unlock();
++			schedule();
++			rcu_read_lock();
++		}
++		finish_wait(&mddev->sb_wait, &__wait);
++	}
++	atomic_inc(&mddev->active_io);
++	rcu_read_unlock();
++
++	if (!mddev->pers->make_request(mddev, bio)) {
++		atomic_dec(&mddev->active_io);
++		wake_up(&mddev->sb_wait);
++		goto check_suspended;
++	}
++
++	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
++		wake_up(&mddev->sb_wait);
++}
++EXPORT_SYMBOL(md_handle_request);
++
+ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
+ {
+ 	const int rw = bio_data_dir(bio);
+@@ -285,23 +316,6 @@ static blk_qc_t md_make_request(struct r
+ 		bio_endio(bio);
+ 		return BLK_QC_T_NONE;
+ 	}
+-check_suspended:
+-	rcu_read_lock();
+-	if (mddev->suspended) {
+-		DEFINE_WAIT(__wait);
+-		for (;;) {
+-			prepare_to_wait(&mddev->sb_wait, &__wait,
+-					TASK_UNINTERRUPTIBLE);
+-			if (!mddev->suspended)
+-				break;
+-			rcu_read_unlock();
+-			schedule();
+-			rcu_read_lock();
+-		}
+-		finish_wait(&mddev->sb_wait, &__wait);
+-	}
+-	atomic_inc(&mddev->active_io);
+-	rcu_read_unlock();
+ 
+ 	/*
+ 	 * save the sectors now since our bio can
+@@ -310,20 +324,14 @@ check_suspended:
+ 	sectors = bio_sectors(bio);
+ 	/* bio could be mergeable after passing to underlayer */
+ 	bio->bi_opf &= ~REQ_NOMERGE;
+-	if (!mddev->pers->make_request(mddev, bio)) {
+-		atomic_dec(&mddev->active_io);
+-		wake_up(&mddev->sb_wait);
+-		goto check_suspended;
+-	}
++
++	md_handle_request(mddev, bio);
+ 
+ 	cpu = part_stat_lock();
+ 	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+ 	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+ 	part_stat_unlock();
+ 
+-	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
+-		wake_up(&mddev->sb_wait);
+-
+ 	return BLK_QC_T_NONE;
+ }
+ 
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -686,6 +686,7 @@ extern void md_stop_writes(struct mddev
+ extern int md_rdev_init(struct md_rdev *rdev);
+ extern void md_rdev_clear(struct md_rdev *rdev);
+ 
++extern void md_handle_request(struct mddev *mddev, struct bio *bio);
+ extern void mddev_suspend(struct mddev *mddev);
+ extern void mddev_resume(struct mddev *mddev);
+ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
diff --git a/queue-4.13/mtd-fix-partition-alignment-check-on-multi-erasesize-devices.patch b/queue-4.13/mtd-fix-partition-alignment-check-on-multi-erasesize-devices.patch
new file mode 100644
index 00000000000..a77be0ff342
--- /dev/null
+++ b/queue-4.13/mtd-fix-partition-alignment-check-on-multi-erasesize-devices.patch
@@ -0,0 +1,47 @@
+From 7e439681af82984045efc215437ebb2ca8d33a4c Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@free-electrons.com>
+Date: Mon, 25 Sep 2017 10:19:57 +0200
+Subject: mtd: Fix partition alignment check on multi-erasesize devices
+
+From: Boris Brezillon <boris.brezillon@free-electrons.com>
+
+commit 7e439681af82984045efc215437ebb2ca8d33a4c upstream.
+
+Commit 1eeef2d7483a ("mtd: handle partitioning on devices with 0
+erasesize") introduced a regression on heterogeneous erase region
+devices. Alignment of the partition was tested against the master
+eraseblock size which can be bigger than the slave one, thus leading
+to some partitions being marked as read-only.
+
+Update wr_alignment to match this slave erasesize after this erasesize
+has been determined by picking the biggest erasesize of all the regions
+embedded in the MTD partition.
+
+Reported-by: Mathias Thore <Mathias.Thore@infinera.com>
+Fixes: 1eeef2d7483a ("mtd: handle partitioning on devices with 0 erasesize")
+Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Tested-by: Mathias Thore <Mathias.Thore@infinera.com>
+Reviewed-by: Mathias Thore <Mathias.Thore@infinera.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/mtdpart.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -581,6 +581,14 @@ static struct mtd_part *allocate_partiti
+ 		slave->mtd.erasesize = parent->erasesize;
+ 	}
+ 
++	/*
++	 * Slave erasesize might differ from the master one if the master
++	 * exposes several regions with different erasesize. Adjust
++	 * wr_alignment accordingly.
++	 */
++	if (!(slave->mtd.flags & MTD_NO_ERASE))
++		wr_alignment = slave->mtd.erasesize;
++
+ 	tmp = slave->offset;
+ 	remainder = do_div(tmp, wr_alignment);
+ 	if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
diff --git a/queue-4.13/mtd-nand-atmel-fix-buffer-overflow-in-atmel_pmecc_user.patch b/queue-4.13/mtd-nand-atmel-fix-buffer-overflow-in-atmel_pmecc_user.patch
new file mode 100644
index 00000000000..e7f5d1378d2
--- /dev/null
+++ b/queue-4.13/mtd-nand-atmel-fix-buffer-overflow-in-atmel_pmecc_user.patch
@@ -0,0 +1,37 @@
+From 36de80740008e6a4a55115b4a92e2059e47c1cba Mon Sep 17 00:00:00 2001
+From: Richard Genoud <richard.genoud@gmail.com>
+Date: Wed, 27 Sep 2017 14:49:17 +0200
+Subject: mtd: nand: atmel: fix buffer overflow in atmel_pmecc_user
+
+From: Richard Genoud <richard.genoud@gmail.com>
+
+commit 36de80740008e6a4a55115b4a92e2059e47c1cba upstream.
+
+When calculating the size needed by struct atmel_pmecc_user *user,
+the dmu and delta buffer sizes were forgotten.
+This lead to a memory corruption (especially with a large ecc_strength).
+
+Link: http://lkml.kernel.org/r/1506503157.3016.5.camel@gmail.com
+Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver")
+Reported-by: Richard Genoud <richard.genoud@gmail.com>
+Pointed-at-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
+Reviewed-by: Nicolas Ferre <nicolas.ferre@microchip.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/atmel/pmecc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mtd/nand/atmel/pmecc.c
++++ b/drivers/mtd/nand/atmel/pmecc.c
+@@ -363,7 +363,7 @@ atmel_pmecc_create_user(struct atmel_pme
+ 	size += (req->ecc.strength + 1) * sizeof(u16);
+ 	/* Reserve space for mu, dmu and delta. */
+ 	size = ALIGN(size, sizeof(s32));
+-	size += (req->ecc.strength + 1) * sizeof(s32);
++	size += (req->ecc.strength + 1) * sizeof(s32) * 3;
+ 
+ 	user = kzalloc(size, GFP_KERNEL);
+ 	if (!user)
diff --git a/queue-4.13/pci-fix-race-condition-with-driver_override.patch b/queue-4.13/pci-fix-race-condition-with-driver_override.patch
new file mode 100644
index 00000000000..08a35f9d9fb
--- /dev/null
+++ b/queue-4.13/pci-fix-race-condition-with-driver_override.patch
@@ -0,0 +1,66 @@
+From 9561475db680f7144d2223a409dd3d7e322aca03 Mon Sep 17 00:00:00 2001
+From: Nicolai Stange <nstange@suse.de>
+Date: Mon, 11 Sep 2017 09:45:40 +0200
+Subject: PCI: Fix race condition with driver_override
+
+From: Nicolai Stange <nstange@suse.de>
+
+commit 9561475db680f7144d2223a409dd3d7e322aca03 upstream.
+
+The driver_override implementation is susceptible to a race condition when
+different threads are reading vs. storing a different driver override.  Add
+locking to avoid the race condition.
+
+This is in close analogy to commit 6265539776a0 ("driver core: platform:
+fix race condition with driver_override") from Adrian Salido.
+
+Fixes: 782a985d7af2 ("PCI: Introduce new device binding path using pci_dev.driver_override")
+Signed-off-by: Nicolai Stange <nstange@suse.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/pci-sysfs.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/pci/pci-sysfs.c
++++ b/drivers/pci/pci-sysfs.c
+@@ -686,7 +686,7 @@ static ssize_t driver_override_store(str
+ 				     const char *buf, size_t count)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+-	char *driver_override, *old = pdev->driver_override, *cp;
++	char *driver_override, *old, *cp;
+ 
+ 	/* We need to keep extra room for a newline */
+ 	if (count >= (PAGE_SIZE - 1))
+@@ -700,12 +700,15 @@ static ssize_t driver_override_store(str
+ 	if (cp)
+ 		*cp = '\0';
+ 
++	device_lock(dev);
++	old = pdev->driver_override;
+ 	if (strlen(driver_override)) {
+ 		pdev->driver_override = driver_override;
+ 	} else {
+ 		kfree(driver_override);
+ 		pdev->driver_override = NULL;
+ 	}
++	device_unlock(dev);
+ 
+ 	kfree(old);
+ 
+@@ -716,8 +719,12 @@ static ssize_t driver_override_show(stru
+ 				    struct device_attribute *attr, char *buf)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(dev);
++	ssize_t len;
+ 
+-	return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
++	device_lock(dev);
++	len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
++	device_unlock(dev);
++	return len;
+ }
+ static DEVICE_ATTR_RW(driver_override);
+ 
diff --git a/queue-4.13/platform-x86-fujitsu-laptop-don-t-oops-when-fuj02e3-is-not-presnt.patch b/queue-4.13/platform-x86-fujitsu-laptop-don-t-oops-when-fuj02e3-is-not-presnt.patch
new file mode 100644
index 00000000000..31d6e7cf7fd
--- /dev/null
+++ b/queue-4.13/platform-x86-fujitsu-laptop-don-t-oops-when-fuj02e3-is-not-presnt.patch
@@ -0,0 +1,48 @@
+From ce7c47d60bda6c7f09ccf16e978d971c8fa16ff0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Mon, 18 Sep 2017 23:00:59 +0300
+Subject: platform/x86: fujitsu-laptop: Don't oops when FUJ02E3 is not presnt
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
+
+commit ce7c47d60bda6c7f09ccf16e978d971c8fa16ff0 upstream.
+
+My Fujitsu-Siemens Lifebook S6120 doesn't have the FUJ02E3 device,
+but it does have FUJ02B1. That means we do register the backlight
+device (and it even seems to work), but the code will oops as soon
+as we try to set the backlight brightness because it's trying to
+call call_fext_func() with a NULL device. Let's just skip those
+function calls when the FUJ02E3 device is not present.
+
+Cc: Jonathan Woithe <jwoithe@just42.net>
+Cc: Andy Shevchenko <andy@infradead.org>
+Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
+Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/platform/x86/fujitsu-laptop.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/platform/x86/fujitsu-laptop.c
++++ b/drivers/platform/x86/fujitsu-laptop.c
+@@ -254,10 +254,12 @@ static int bl_update_status(struct backl
+ {
+ 	struct acpi_device *device = bl_get_data(b);
+ 
+-	if (b->props.power == FB_BLANK_POWERDOWN)
+-		call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
+-	else
+-		call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
++	if (fext) {
++		if (b->props.power == FB_BLANK_POWERDOWN)
++			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
++		else
++			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
++	}
+ 
+ 	return set_lcd_level(device, b->props.brightness);
+ }
diff --git a/queue-4.13/pm-opp-call-notifier-without-holding-opp_table-lock.patch b/queue-4.13/pm-opp-call-notifier-without-holding-opp_table-lock.patch
new file mode 100644
index 00000000000..4d505627dcc
--- /dev/null
+++ b/queue-4.13/pm-opp-call-notifier-without-holding-opp_table-lock.patch
@@ -0,0 +1,58 @@
+From e4d8ae00169f7686e1da5a62e5cf797d12bf8822 Mon Sep 17 00:00:00 2001
+From: Viresh Kumar <viresh.kumar@linaro.org>
+Date: Thu, 21 Sep 2017 10:44:36 -0700
+Subject: PM / OPP: Call notifier without holding opp_table->lock
+
+From: Viresh Kumar <viresh.kumar@linaro.org>
+
+commit e4d8ae00169f7686e1da5a62e5cf797d12bf8822 upstream.
+
+The notifier callbacks may want to call some OPP helper routines which
+may try to take the same opp_table->lock again and cause a deadlock. One
+such usecase was reported by Chanwoo Choi, where calling
+dev_pm_opp_disable() leads us to the devfreq's OPP notifier handler,
+which further calls dev_pm_opp_find_freq_floor() and it deadlocks.
+
+We don't really need the opp_table->lock to be held across the notifier
+call though, all we want to make sure is that the 'opp' doesn't get
+freed while being used from within the notifier chain. We can do it with
+help of dev_pm_opp_get/put() as well. Let's do it.
+
+Fixes: 5b650b388844 "PM / OPP: Take kref from _find_opp_table()"
+Reported-by: Chanwoo Choi <cw00.choi@samsung.com>
+Tested-by: Chanwoo Choi <cw00.choi@samsung.com>
+Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
+Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/power/opp/core.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/base/power/opp/core.c
++++ b/drivers/base/power/opp/core.c
+@@ -1581,6 +1581,9 @@ static int _opp_set_availability(struct
+ 
+ 	opp->available = availability_req;
+ 
++	dev_pm_opp_get(opp);
++	mutex_unlock(&opp_table->lock);
++
+ 	/* Notify the change of the OPP availability */
+ 	if (availability_req)
+ 		blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
+@@ -1589,8 +1592,12 @@ static int _opp_set_availability(struct
+ 		blocking_notifier_call_chain(&opp_table->head,
+ 					     OPP_EVENT_DISABLE, opp);
+ 
++	dev_pm_opp_put(opp);
++	goto put_table;
++
+ unlock:
+ 	mutex_unlock(&opp_table->lock);
++put_table:
+ 	dev_pm_opp_put_opp_table(opp_table);
+ 	return r;
+ }
diff --git a/queue-4.13/sched-sysctl-check-user-input-value-of-sysctl_sched_time_avg.patch b/queue-4.13/sched-sysctl-check-user-input-value-of-sysctl_sched_time_avg.patch
new file mode 100644
index 00000000000..85f06a76500
--- /dev/null
+++ b/queue-4.13/sched-sysctl-check-user-input-value-of-sysctl_sched_time_avg.patch
@@ -0,0 +1,83 @@
+From 5ccba44ba118a5000cccc50076b0344632459779 Mon Sep 17 00:00:00 2001
+From: Ethan Zhao <ethan.zhao@oracle.com>
+Date: Mon, 4 Sep 2017 13:59:34 +0800
+Subject: sched/sysctl: Check user input value of sysctl_sched_time_avg
+
+From: Ethan Zhao <ethan.zhao@oracle.com>
+
+commit 5ccba44ba118a5000cccc50076b0344632459779 upstream.
+
+System will hang if user set sysctl_sched_time_avg to 0:
+
+  [root@XXX ~]# sysctl kernel.sched_time_avg_ms=0
+
+  Stack traceback for pid 0
+  0xffff883f6406c600 0 0 1 3 R 0xffff883f6406cf50 *swapper/3
+  ffff883f7ccc3ae8 0000000000000018 ffffffff810c4dd0 0000000000000000
+  0000000000017800 ffff883f7ccc3d78 0000000000000003 ffff883f7ccc3bf8
+  ffffffff810c4fc9 ffff883f7ccc3c08 00000000810c5043 ffff883f7ccc3c08
+  Call Trace:
+  <IRQ> [<ffffffff810c4dd0>] ? update_group_capacity+0x110/0x200
+  [<ffffffff810c4fc9>] ? update_sd_lb_stats+0x109/0x600
+  [<ffffffff810c5507>] ? find_busiest_group+0x47/0x530
+  [<ffffffff810c5b84>] ? load_balance+0x194/0x900
+  [<ffffffff810ad5ca>] ? update_rq_clock.part.83+0x1a/0xe0
+  [<ffffffff810c6d42>] ? rebalance_domains+0x152/0x290
+  [<ffffffff810c6f5c>] ? run_rebalance_domains+0xdc/0x1d0
+  [<ffffffff8108a75b>] ? __do_softirq+0xfb/0x320
+  [<ffffffff8108ac85>] ? irq_exit+0x125/0x130
+  [<ffffffff810b3a17>] ? scheduler_ipi+0x97/0x160
+  [<ffffffff81052709>] ? smp_reschedule_interrupt+0x29/0x30
+  [<ffffffff8173a1be>] ? reschedule_interrupt+0x6e/0x80
+   <EOI> [<ffffffff815bc83c>] ? cpuidle_enter_state+0xcc/0x230
+  [<ffffffff815bc80c>] ? cpuidle_enter_state+0x9c/0x230
+  [<ffffffff815bc9d7>] ? cpuidle_enter+0x17/0x20
+  [<ffffffff810cd6dc>] ? cpu_startup_entry+0x38c/0x420
+  [<ffffffff81053373>] ? start_secondary+0x173/0x1e0
+
+Because divide-by-zero error happens in function:
+
+update_group_capacity()
+  update_cpu_capacity()
+    scale_rt_capacity()
+     {
+          ...
+          total = sched_avg_period() + delta;
+          used = div_u64(avg, total);
+          ...
+     }
+
+To fix this issue, check user input value of sysctl_sched_time_avg, keep
+it unchanged when hitting invalid input, and set the minimum limit of
+sysctl_sched_time_avg to 1 ms.
+
+Reported-by: James Puthukattukaran <james.puthukattukaran@oracle.com>
+Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: efault@gmx.de
+Cc: ethan.kernel@gmail.com
+Cc: keescook@chromium.org
+Cc: mcgrof@kernel.org
+Link: http://lkml.kernel.org/r/1504504774-18253-1-git-send-email-ethan.zhao@oracle.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sysctl.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -367,7 +367,8 @@ static struct ctl_table kern_table[] = {
+ 		.data		= &sysctl_sched_time_avg,
+ 		.maxlen		= sizeof(unsigned int),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec,
++		.proc_handler	= proc_dointvec_minmax,
++		.extra1		= &one,
+ 	},
+ #ifdef CONFIG_SCHEDSTATS
+ 	{
diff --git a/queue-4.13/series b/queue-4.13/series
index 44184652780..5c7ecc85572 100644
--- a/queue-4.13/series
+++ b/queue-4.13/series
@@ -73,3 +73,33 @@ extable-consolidate-kernel_text_address-functions.patch
 extable-enable-rcu-if-it-is-not-watching-in-kernel_text_address.patch
 selftests-seccomp-support-glibc-2.26-siginfo_t.h.patch
 seccomp-fix-the-usage-of-get-put_seccomp_filter-in-seccomp_get_filter.patch
+arm64-make-sure-spsel-is-always-set.patch
+arm64-mm-use-read_once-when-dereferencing-pointer-to-pte-table.patch
+arm64-fault-route-pte-translation-faults-via-do_translation_fault.patch
+kvm-vmx-extract-__pi_post_block.patch
+kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch
+kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch
+kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
+kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch
+kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch
+kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch
+xfs-validate-bdev-support-for-dax-inode-flag.patch
+fix-infoleak-in-waitid-2.patch
+sched-sysctl-check-user-input-value-of-sysctl_sched_time_avg.patch
+irq-generic-chip-don-t-replace-domain-s-name.patch
+mtd-fix-partition-alignment-check-on-multi-erasesize-devices.patch
+mtd-nand-atmel-fix-buffer-overflow-in-atmel_pmecc_user.patch
+etnaviv-fix-submit-error-path.patch
+etnaviv-fix-gem-object-list-corruption.patch
+futex-fix-pi_state-owner-serialization.patch
+md-fix-a-race-condition-for-flush-request-handling.patch
+md-separate-request-handling.patch
+pci-fix-race-condition-with-driver_override.patch
+btrfs-fix-null-pointer-dereference-from-free_reloc_roots.patch
+btrfs-clear-ordered-flag-on-cleaning-up-ordered-extents.patch
+btrfs-finish-ordered-extent-cleaning-if-no-progress-is-found.patch
+btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch
+btrfs-prevent-to-set-invalid-default-subvolid.patch
+platform-x86-fujitsu-laptop-don-t-oops-when-fuj02e3-is-not-presnt.patch
+pm-opp-call-notifier-without-holding-opp_table-lock.patch
+x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
diff --git a/queue-4.13/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch b/queue-4.13/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
new file mode 100644
index 00000000000..0c7db6b89b5
--- /dev/null
+++ b/queue-4.13/x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch
@@ -0,0 +1,211 @@
+From a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+Date: Mon, 4 Sep 2017 10:32:15 +0200
+Subject: x86/mm: Fix fault error path using unsafe vma pointer
+
+From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+
+commit a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 upstream.
+
+commit 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal
+generation code") passes down a vma pointer to the error path, but that is
+done once the mmap_sem is released when calling mm_fault_error() from
+__do_page_fault().
+
+This is dangerous as the vma structure is no more safe to be used once the
+mmap_sem has been released. As only the protection key value is required in
+the error processing, we could just pass down this value.
+
+Fix it by passing a pointer to a protection key value down to the fault
+signal generation code. The use of a pointer allows to keep the check
+generating a warning message in fill_sig_info_pkey() when the vma was not
+known. If the pointer is valid, the protection value can be accessed by
+deferencing the pointer.
+
+[ tglx: Made *pkey u32 as that's the type which is passed in siginfo ]
+
+Fixes: 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal generation code")
+Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Link: http://lkml.kernel.org/r/1504513935-12742-1-git-send-email-ldufour@linux.vnet.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c |   47 ++++++++++++++++++++++++-----------------------
+ 1 file changed, 24 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsign
+  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+  *	     faulted on a pte with its pkey=4.
+  */
+-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
+-		struct vm_area_struct *vma)
++static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+ {
+ 	/* This is effectively an #ifdef */
+ 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
+@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_co
+ 	 * valid VMA, so we should never reach this without a
+ 	 * valid VMA.
+ 	 */
+-	if (!vma) {
++	if (!pkey) {
+ 		WARN_ONCE(1, "PKU fault with no VMA passed in");
+ 		info->si_pkey = 0;
+ 		return;
+@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_co
+ 	 * absolutely guranteed to be 100% accurate because of
+ 	 * the race explained above.
+ 	 */
+-	info->si_pkey = vma_pkey(vma);
++	info->si_pkey = *pkey;
+ }
+ 
+ static void
+ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
+-		     struct task_struct *tsk, struct vm_area_struct *vma,
+-		     int fault)
++		     struct task_struct *tsk, u32 *pkey, int fault)
+ {
+ 	unsigned lsb = 0;
+ 	siginfo_t info;
+@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int s
+ 		lsb = PAGE_SHIFT;
+ 	info.si_addr_lsb = lsb;
+ 
+-	fill_sig_info_pkey(si_code, &info, vma);
++	fill_sig_info_pkey(si_code, &info, pkey);
+ 
+ 	force_sig_info(si_signo, &info, tsk);
+ }
+@@ -758,8 +756,6 @@ no_context(struct pt_regs *regs, unsigne
+ 	struct task_struct *tsk = current;
+ 	unsigned long flags;
+ 	int sig;
+-	/* No context means no VMA to pass down */
+-	struct vm_area_struct *vma = NULL;
+ 
+ 	/* Are we prepared to handle this kernel fault? */
+ 	if (fixup_exception(regs, X86_TRAP_PF)) {
+@@ -784,7 +780,7 @@ no_context(struct pt_regs *regs, unsigne
+ 
+ 			/* XXX: hwpoison faults will set the wrong code. */
+ 			force_sig_info_fault(signal, si_code, address,
+-					     tsk, vma, 0);
++					     tsk, NULL, 0);
+ 		}
+ 
+ 		/*
+@@ -893,8 +889,7 @@ show_signal_msg(struct pt_regs *regs, un
+ 
+ static void
+ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+-		       unsigned long address, struct vm_area_struct *vma,
+-		       int si_code)
++		       unsigned long address, u32 *pkey, int si_code)
+ {
+ 	struct task_struct *tsk = current;
+ 
+@@ -942,7 +937,7 @@ __bad_area_nosemaphore(struct pt_regs *r
+ 		tsk->thread.error_code	= error_code;
+ 		tsk->thread.trap_nr	= X86_TRAP_PF;
+ 
+-		force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
++		force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
+ 
+ 		return;
+ 	}
+@@ -955,9 +950,9 @@ __bad_area_nosemaphore(struct pt_regs *r
+ 
+ static noinline void
+ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+-		     unsigned long address, struct vm_area_struct *vma)
++		     unsigned long address, u32 *pkey)
+ {
+-	__bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
++	__bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
+ }
+ 
+ static void
+@@ -965,6 +960,10 @@ __bad_area(struct pt_regs *regs, unsigne
+ 	   unsigned long address,  struct vm_area_struct *vma, int si_code)
+ {
+ 	struct mm_struct *mm = current->mm;
++	u32 pkey;
++
++	if (vma)
++		pkey = vma_pkey(vma);
+ 
+ 	/*
+ 	 * Something tried to access memory that isn't in our memory map..
+@@ -972,7 +971,8 @@ __bad_area(struct pt_regs *regs, unsigne
+ 	 */
+ 	up_read(&mm->mmap_sem);
+ 
+-	__bad_area_nosemaphore(regs, error_code, address, vma, si_code);
++	__bad_area_nosemaphore(regs, error_code, address,
++			       (vma) ? &pkey : NULL, si_code);
+ }
+ 
+ static noinline void
+@@ -1015,7 +1015,7 @@ bad_area_access_error(struct pt_regs *re
+ 
+ static void
+ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+-	  struct vm_area_struct *vma, unsigned int fault)
++	  u32 *pkey, unsigned int fault)
+ {
+ 	struct task_struct *tsk = current;
+ 	int code = BUS_ADRERR;
+@@ -1042,13 +1042,12 @@ do_sigbus(struct pt_regs *regs, unsigned
+ 		code = BUS_MCEERR_AR;
+ 	}
+ #endif
+-	force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
++	force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
+ }
+ 
+ static noinline void
+ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+-	       unsigned long address, struct vm_area_struct *vma,
+-	       unsigned int fault)
++	       unsigned long address, u32 *pkey, unsigned int fault)
+ {
+ 	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
+ 		no_context(regs, error_code, address, 0, 0);
+@@ -1072,9 +1071,9 @@ mm_fault_error(struct pt_regs *regs, uns
+ 	} else {
+ 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+ 			     VM_FAULT_HWPOISON_LARGE))
+-			do_sigbus(regs, error_code, address, vma, fault);
++			do_sigbus(regs, error_code, address, pkey, fault);
+ 		else if (fault & VM_FAULT_SIGSEGV)
+-			bad_area_nosemaphore(regs, error_code, address, vma);
++			bad_area_nosemaphore(regs, error_code, address, pkey);
+ 		else
+ 			BUG();
+ 	}
+@@ -1268,6 +1267,7 @@ __do_page_fault(struct pt_regs *regs, un
+ 	struct mm_struct *mm;
+ 	int fault, major = 0;
+ 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
++	u32 pkey;
+ 
+ 	tsk = current;
+ 	mm = tsk->mm;
+@@ -1468,9 +1468,10 @@ good_area:
+ 		return;
+ 	}
+ 
++	pkey = vma_pkey(vma);
+ 	up_read(&mm->mmap_sem);
+ 	if (unlikely(fault & VM_FAULT_ERROR)) {
+-		mm_fault_error(regs, error_code, address, vma, fault);
++		mm_fault_error(regs, error_code, address, &pkey, fault);
+ 		return;
+ 	}
+ 
diff --git a/queue-4.13/xfs-validate-bdev-support-for-dax-inode-flag.patch b/queue-4.13/xfs-validate-bdev-support-for-dax-inode-flag.patch
new file mode 100644
index 00000000000..513d66724f8
--- /dev/null
+++ b/queue-4.13/xfs-validate-bdev-support-for-dax-inode-flag.patch
@@ -0,0 +1,50 @@
+From 6851a3db7e224bbb85e23b3c64a506c9e0904382 Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Mon, 18 Sep 2017 14:46:03 -0700
+Subject: xfs: validate bdev support for DAX inode flag
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit 6851a3db7e224bbb85e23b3c64a506c9e0904382 upstream.
+
+Currently only the blocksize is checked, but we should really be calling
+bdev_dax_supported() which also tests to make sure we can get a
+struct dax_device and that the dax_direct_access() path is working.
+
+This is the same check that we do for the "-o dax" mount option in
+xfs_fs_fill_super().
+
+This does not fix the race issues that caused the XFS DAX inode option to
+be disabled, so that option will still be disabled.  If/when we re-enable
+it, though, I think we will want this issue to have been fixed.  I also do
+think that we want to fix this in stable kernels.
+
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_ioctl.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
+ 	int			*join_flags)
+ {
+ 	struct inode		*inode = VFS_I(ip);
++	struct super_block	*sb = inode->i_sb;
+ 	int			error;
+ 
+ 	*join_flags = 0;
+@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
+ 	if (fa->fsx_xflags & FS_XFLAG_DAX) {
+ 		if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+ 			return -EINVAL;
+-		if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
++		if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
+ 			return -EINVAL;
+ 	}
+ 
-- 
2.47.3