--- /dev/null
+From ac34f15e0c6d2fd58480052b6985f6991fb53bcc Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 29 Dec 2015 14:02:29 -0800
+Subject: block: fix del_gendisk() vs blkdev_ioctl crash
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit ac34f15e0c6d2fd58480052b6985f6991fb53bcc upstream.
+
+When tearing down a block device early in its lifetime, userspace may
+still be performing discovery actions like blkdev_ioctl() to re-read
+partitions.
+
+The nvdimm_revalidate_disk() implementation depends on
+disk->driverfs_dev to be valid at entry. However, it is set to NULL in
+del_gendisk() and fatally this is happening *before* the disk device is
+deleted from userspace view.
+
+There's no reason for del_gendisk() to clear ->driverfs_dev. That
+device is the parent of the disk. It is guaranteed to not be freed
+until the disk, as a child, drops its ->parent reference.
+
+We could also fix this issue locally in nvdimm_revalidate_disk() by
+using disk_to_dev(disk)->parent, but lets fix it globally since
+->driverfs_dev follows the lifetime of the parent. Longer term we
+should probably just add a @parent parameter to add_disk(), and stop
+carrying this pointer in the gendisk.
+
+ BUG: unable to handle kernel NULL pointer dereference at (null)
+ IP: [<ffffffffa00340a8>] nvdimm_revalidate_disk+0x18/0x90 [libnvdimm]
+ CPU: 2 PID: 538 Comm: systemd-udevd Tainted: G O 4.4.0-rc5 #2257
+ [..]
+ Call Trace:
+ [<ffffffff8143e5c7>] rescan_partitions+0x87/0x2c0
+ [<ffffffff810f37f9>] ? __lock_is_held+0x49/0x70
+ [<ffffffff81438c62>] __blkdev_reread_part+0x72/0xb0
+ [<ffffffff81438cc5>] blkdev_reread_part+0x25/0x40
+ [<ffffffff8143982d>] blkdev_ioctl+0x4fd/0x9c0
+ [<ffffffff811246c9>] ? current_kernel_time64+0x69/0xd0
+ [<ffffffff812916dd>] block_ioctl+0x3d/0x50
+ [<ffffffff81264c38>] do_vfs_ioctl+0x308/0x560
+ [<ffffffff8115dbd1>] ? __audit_syscall_entry+0xb1/0x100
+ [<ffffffff810031d6>] ? do_audit_syscall_entry+0x66/0x70
+ [<ffffffff81264f09>] SyS_ioctl+0x79/0x90
+ [<ffffffff81902672>] entry_SYSCALL_64_fastpath+0x12/0x76
+
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jens Axboe <axboe@fb.com>
+Reported-by: Robert Hu <robert.hu@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/genhd.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -664,7 +664,6 @@ void del_gendisk(struct gendisk *disk)
+
+ kobject_put(disk->part0.holder_dir);
+ kobject_put(disk->slave_dir);
+- disk->driverfs_dev = NULL;
+ if (!sysfs_deprecated)
+ sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
+ pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
--- /dev/null
+From cb7a5724c7e1bfb5766ad1c3beba14cc715991cf Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Wed, 24 Aug 2016 16:23:10 -0700
+Subject: Drivers: hv: balloon: account for gaps in hot add regions
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit cb7a5724c7e1bfb5766ad1c3beba14cc715991cf upstream.
+
+I'm observing the following hot add requests from the WS2012 host:
+
+hot_add_req: start_pfn = 0x108200 count = 330752
+hot_add_req: start_pfn = 0x158e00 count = 193536
+hot_add_req: start_pfn = 0x188400 count = 239616
+
+As the host doesn't specify hot add regions we're trying to create
+128Mb-aligned region covering the first request, we create the 0x108000 -
+0x160000 region and we add 0x108000 - 0x158e00 memory. The second request
+passes the pfn_covered() check, we enlarge the region to 0x108000 -
+0x190000 and add 0x158e00 - 0x188200 memory. The problem emerges with the
+third request as it starts at 0x188400 so there is a 0x200 gap which is
+not covered. As the end of our region is 0x190000 now it again passes the
+pfn_covered() check were we just adjust the covered_end_pfn and make it
+0x188400 instead of 0x188200 which means that we'll try to online
+0x188200-0x188400 pages but these pages were never assigned to us and we
+crash.
+
+We can't react to such requests by creating new hot add regions as it may
+happen that the whole suggested range falls into the previously identified
+128Mb-aligned area so we'll end up adding nothing or create intersecting
+regions and our current logic doesn't allow that. Instead, create a list of
+such 'gaps' and check for them in the page online callback.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hv/hv_balloon.c | 131 ++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 94 insertions(+), 37 deletions(-)
+
+--- a/drivers/hv/hv_balloon.c
++++ b/drivers/hv/hv_balloon.c
+@@ -441,6 +441,16 @@ struct hv_hotadd_state {
+ unsigned long covered_end_pfn;
+ unsigned long ha_end_pfn;
+ unsigned long end_pfn;
++ /*
++ * A list of gaps.
++ */
++ struct list_head gap_list;
++};
++
++struct hv_hotadd_gap {
++ struct list_head list;
++ unsigned long start_pfn;
++ unsigned long end_pfn;
+ };
+
+ struct balloon_state {
+@@ -596,18 +606,46 @@ static struct notifier_block hv_memory_n
+ .priority = 0
+ };
+
++/* Check if the particular page is backed and can be onlined and online it. */
++static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
++{
++ unsigned long cur_start_pgp;
++ unsigned long cur_end_pgp;
++ struct hv_hotadd_gap *gap;
++
++ cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn);
++ cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
++
++ /* The page is not backed. */
++ if (((unsigned long)pg < cur_start_pgp) ||
++ ((unsigned long)pg >= cur_end_pgp))
++ return;
++
++ /* Check for gaps. */
++ list_for_each_entry(gap, &has->gap_list, list) {
++ cur_start_pgp = (unsigned long)
++ pfn_to_page(gap->start_pfn);
++ cur_end_pgp = (unsigned long)
++ pfn_to_page(gap->end_pfn);
++ if (((unsigned long)pg >= cur_start_pgp) &&
++ ((unsigned long)pg < cur_end_pgp)) {
++ return;
++ }
++ }
+
+-static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
++ /* This frame is currently backed; online the page. */
++ __online_page_set_limits(pg);
++ __online_page_increment_counters(pg);
++ __online_page_free(pg);
++}
++
++static void hv_bring_pgs_online(struct hv_hotadd_state *has,
++ unsigned long start_pfn, unsigned long size)
+ {
+ int i;
+
+- for (i = 0; i < size; i++) {
+- struct page *pg;
+- pg = pfn_to_page(start_pfn + i);
+- __online_page_set_limits(pg);
+- __online_page_increment_counters(pg);
+- __online_page_free(pg);
+- }
++ for (i = 0; i < size; i++)
++ hv_page_online_one(has, pfn_to_page(start_pfn + i));
+ }
+
+ static void hv_mem_hot_add(unsigned long start, unsigned long size,
+@@ -684,26 +722,24 @@ static void hv_online_page(struct page *
+ list_for_each(cur, &dm_device.ha_region_list) {
+ has = list_entry(cur, struct hv_hotadd_state, list);
+ cur_start_pgp = (unsigned long)
+- pfn_to_page(has->covered_start_pfn);
+- cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
++ pfn_to_page(has->start_pfn);
++ cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
+
+- if (((unsigned long)pg >= cur_start_pgp) &&
+- ((unsigned long)pg < cur_end_pgp)) {
+- /*
+- * This frame is currently backed; online the
+- * page.
+- */
+- __online_page_set_limits(pg);
+- __online_page_increment_counters(pg);
+- __online_page_free(pg);
+- }
++ /* The page belongs to a different HAS. */
++ if (((unsigned long)pg < cur_start_pgp) ||
++ ((unsigned long)pg >= cur_end_pgp))
++ continue;
++
++ hv_page_online_one(has, pg);
++ break;
+ }
+ }
+
+-static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
++static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
+ {
+ struct list_head *cur;
+ struct hv_hotadd_state *has;
++ struct hv_hotadd_gap *gap;
+ unsigned long residual, new_inc;
+
+ if (list_empty(&dm_device.ha_region_list))
+@@ -718,6 +754,24 @@ static bool pfn_covered(unsigned long st
+ */
+ if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn)
+ continue;
++
++ /*
++ * If the current start pfn is not where the covered_end
++ * is, create a gap and update covered_end_pfn.
++ */
++ if (has->covered_end_pfn != start_pfn) {
++ gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
++ if (!gap)
++ return -ENOMEM;
++
++ INIT_LIST_HEAD(&gap->list);
++ gap->start_pfn = has->covered_end_pfn;
++ gap->end_pfn = start_pfn;
++ list_add_tail(&gap->list, &has->gap_list);
++
++ has->covered_end_pfn = start_pfn;
++ }
++
+ /*
+ * If the current hot add-request extends beyond
+ * our current limit; extend it.
+@@ -734,19 +788,10 @@ static bool pfn_covered(unsigned long st
+ has->end_pfn += new_inc;
+ }
+
+- /*
+- * If the current start pfn is not where the covered_end
+- * is, update it.
+- */
+-
+- if (has->covered_end_pfn != start_pfn)
+- has->covered_end_pfn = start_pfn;
+-
+- return true;
+-
++ return 1;
+ }
+
+- return false;
++ return 0;
+ }
+
+ static unsigned long handle_pg_range(unsigned long pg_start,
+@@ -785,6 +830,8 @@ static unsigned long handle_pg_range(uns
+ if (pgs_ol > pfn_cnt)
+ pgs_ol = pfn_cnt;
+
++ has->covered_end_pfn += pgs_ol;
++ pfn_cnt -= pgs_ol;
+ /*
+ * Check if the corresponding memory block is already
+ * online by checking its last previously backed page.
+@@ -793,10 +840,8 @@ static unsigned long handle_pg_range(uns
+ */
+ if (start_pfn > has->start_pfn &&
+ !PageReserved(pfn_to_page(start_pfn - 1)))
+- hv_bring_pgs_online(start_pfn, pgs_ol);
++ hv_bring_pgs_online(has, start_pfn, pgs_ol);
+
+- has->covered_end_pfn += pgs_ol;
+- pfn_cnt -= pgs_ol;
+ }
+
+ if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
+@@ -834,13 +879,19 @@ static unsigned long process_hot_add(uns
+ unsigned long rg_size)
+ {
+ struct hv_hotadd_state *ha_region = NULL;
++ int covered;
+
+ if (pfn_cnt == 0)
+ return 0;
+
+- if (!dm_device.host_specified_ha_region)
+- if (pfn_covered(pg_start, pfn_cnt))
++ if (!dm_device.host_specified_ha_region) {
++ covered = pfn_covered(pg_start, pfn_cnt);
++ if (covered < 0)
++ return 0;
++
++ if (covered)
+ goto do_pg_range;
++ }
+
+ /*
+ * If the host has specified a hot-add range; deal with it first.
+@@ -852,6 +903,7 @@ static unsigned long process_hot_add(uns
+ return 0;
+
+ INIT_LIST_HEAD(&ha_region->list);
++ INIT_LIST_HEAD(&ha_region->gap_list);
+
+ list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+ ha_region->start_pfn = rg_start;
+@@ -1584,6 +1636,7 @@ static int balloon_remove(struct hv_devi
+ struct hv_dynmem_device *dm = hv_get_drvdata(dev);
+ struct list_head *cur, *tmp;
+ struct hv_hotadd_state *has;
++ struct hv_hotadd_gap *gap, *tmp_gap;
+
+ if (dm->num_pages_ballooned != 0)
+ pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
+@@ -1600,6 +1653,10 @@ static int balloon_remove(struct hv_devi
+ #endif
+ list_for_each_safe(cur, tmp, &dm->ha_region_list) {
+ has = list_entry(cur, struct hv_hotadd_state, list);
++ list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
++ list_del(&gap->list);
++ kfree(gap);
++ }
+ list_del(&has->list);
+ kfree(has);
+ }
--- /dev/null
+From 7cf3b79ec85ee1a5bbaaf936bb1d050dc652983b Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Wed, 24 Aug 2016 16:23:09 -0700
+Subject: Drivers: hv: balloon: keep track of where ha_region starts
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 7cf3b79ec85ee1a5bbaaf936bb1d050dc652983b upstream.
+
+Windows 2012 (non-R2) does not specify hot add region in hot add requests
+and the logic in hot_add_req() is trying to find a 128Mb-aligned region
+covering the request. It may also happen that host's requests are not 128Mb
+aligned and the created ha_region will start before the first specified
+PFN. We can't online these non-present pages but we don't remember the real
+start of the region.
+
+This is a regression introduced by the commit 5abbbb75d733 ("Drivers: hv:
+hv_balloon: don't lose memory when onlining order is not natural"). While
+the idea of keeping the 'moving window' was wrong (as there is no guarantee
+that hot add requests come ordered) we should still keep track of
+covered_start_pfn. This is not a revert, the logic is different.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hv/hv_balloon.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/hv/hv_balloon.c
++++ b/drivers/hv/hv_balloon.c
+@@ -430,13 +430,14 @@ struct dm_info_msg {
+ * currently hot added. We hot add in multiples of 128M
+ * chunks; it is possible that we may not be able to bring
+ * online all the pages in the region. The range
+- * covered_end_pfn defines the pages that can
++ * covered_start_pfn:covered_end_pfn defines the pages that can
+ * be brough online.
+ */
+
+ struct hv_hotadd_state {
+ struct list_head list;
+ unsigned long start_pfn;
++ unsigned long covered_start_pfn;
+ unsigned long covered_end_pfn;
+ unsigned long ha_end_pfn;
+ unsigned long end_pfn;
+@@ -682,7 +683,8 @@ static void hv_online_page(struct page *
+
+ list_for_each(cur, &dm_device.ha_region_list) {
+ has = list_entry(cur, struct hv_hotadd_state, list);
+- cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
++ cur_start_pgp = (unsigned long)
++ pfn_to_page(has->covered_start_pfn);
+ cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+
+ if (((unsigned long)pg >= cur_start_pgp) &&
+@@ -854,6 +856,7 @@ static unsigned long process_hot_add(uns
+ list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+ ha_region->start_pfn = rg_start;
+ ha_region->ha_end_pfn = rg_start;
++ ha_region->covered_start_pfn = pg_start;
+ ha_region->covered_end_pfn = pg_start;
+ ha_region->end_pfn = rg_start + rg_size;
+ }
--- /dev/null
+From 56ef6718a1d8d77745033c5291e025ce18504159 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Wed, 7 Dec 2016 01:16:27 -0800
+Subject: hv: don't reset hv_context.tsc_page on crash
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 56ef6718a1d8d77745033c5291e025ce18504159 upstream.
+
+It may happen that secondary CPUs are still alive and resetting
+hv_context.tsc_page will cause a consequent crash in read_hv_clock_tsc()
+as we don't check for it being not NULL there. It is safe as we're not
+freeing this page anyways.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hv/hv.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/hv/hv.c
++++ b/drivers/hv/hv.c
+@@ -305,9 +305,10 @@ void hv_cleanup(bool crash)
+
+ hypercall_msr.as_uint64 = 0;
+ wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
+- if (!crash)
++ if (!crash) {
+ vfree(hv_context.tsc_page);
+- hv_context.tsc_page = NULL;
++ hv_context.tsc_page = NULL;
++ }
+ }
+ #endif
+ }
--- /dev/null
+From 8b3405e345b5a098101b0c31b264c812bba045d9 Mon Sep 17 00:00:00 2001
+From: Suzuki K Poulose <suzuki.poulose@arm.com>
+Date: Mon, 3 Apr 2017 15:12:43 +0100
+Subject: kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd
+
+From: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+commit 8b3405e345b5a098101b0c31b264c812bba045d9 upstream.
+
+In kvm_free_stage2_pgd() we don't hold the kvm->mmu_lock while calling
+unmap_stage2_range() on the entire memory range for the guest. This could
+cause problems with other callers (e.g, munmap on a memslot) trying to
+unmap a range. And since we have to unmap the entire Guest memory range
+holding a spinlock, make sure we yield the lock if necessary, after we
+unmap each PUD range.
+
+Fixes: commit d5d8184d35c9 ("KVM: ARM: Memory virtualization setup")
+Cc: Paolo Bonzini <pbonzin@redhat.com>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Cc: Christoffer Dall <christoffer.dall@linaro.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+[ Avoid vCPU starvation and lockup detector warnings ]
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Signed-off-by: Christoffer Dall <cdall@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kvm/mmu.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/arch/arm/kvm/mmu.c
++++ b/arch/arm/kvm/mmu.c
+@@ -300,6 +300,14 @@ static void unmap_range(struct kvm *kvm,
+ next = kvm_pgd_addr_end(addr, end);
+ if (!pgd_none(*pgd))
+ unmap_puds(kvm, pgd, addr, next);
++ /*
++ * If we are dealing with a large range in
++ * stage2 table, release the kvm->mmu_lock
++ * to prevent starvation and lockup detector
++ * warnings.
++ */
++ if (kvm && (next != end))
++ cond_resched_lock(&kvm->mmu_lock);
+ } while (pgd++, addr = next, addr != end);
+ }
+
+@@ -738,6 +746,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm
+ */
+ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+ {
++ assert_spin_locked(&kvm->mmu_lock);
+ unmap_range(kvm, kvm->arch.pgd, start, size);
+ }
+
+@@ -824,7 +833,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm
+ if (kvm->arch.pgd == NULL)
+ return;
+
++ spin_lock(&kvm->mmu_lock);
+ unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
++ spin_unlock(&kvm->mmu_lock);
++
+ kvm_free_hwpgd(kvm_get_hwpgd(kvm));
+ if (KVM_PREALLOC_LEVEL > 0)
+ kfree(kvm->arch.pgd);
--- /dev/null
+From 8f5f525d5b83f7d76a6baf9c4e94d4bf312ea7f6 Mon Sep 17 00:00:00 2001
+From: Oliver O'Halloran <oohall@gmail.com>
+Date: Mon, 3 Apr 2017 13:25:12 +1000
+Subject: powerpc/64: Fix flush_(d|i)cache_range() called from modules
+
+From: Oliver O'Halloran <oohall@gmail.com>
+
+commit 8f5f525d5b83f7d76a6baf9c4e94d4bf312ea7f6 upstream.
+
+When the kernel is compiled to use 64bit ABIv2 the _GLOBAL() macro does
+not include a global entry point. A function's global entry point is
+used when the function is called from a different TOC context and in the
+kernel this typically means a call from a module into the vmlinux (or
+vice-versa).
+
+There are a few exported asm functions declared with _GLOBAL() and
+calling them from a module will likely crash the kernel since any TOC
+relative load will yield garbage.
+
+flush_icache_range() and flush_dcache_range() are both exported to
+modules, and use the TOC, so must use _GLOBAL_TOC().
+
+Fixes: 721aeaa9fdf3 ("powerpc: Build little endian ppc64 kernel with ABIv2")
+Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ arch/powerpc/kernel/misc_64.S | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kernel/misc_64.S
++++ b/arch/powerpc/kernel/misc_64.S
+@@ -67,6 +67,9 @@ PPC64_CACHES:
+ */
+
+ _KPROBE(flush_icache_range)
++0: addis r2,r12,(.TOC. - 0b)@ha
++ addi r2, r2,(.TOC. - 0b)@l
++ .localentry flush_icache_range, . - flush_icache_range
+ BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
+ blr
+@@ -117,7 +120,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_I
+ *
+ * flush all bytes from start to stop-1 inclusive
+ */
+-_GLOBAL(flush_dcache_range)
++_GLOBAL_TOC(flush_dcache_range)
+
+ /*
+ * Flush the data cache to memory
mac80211-reject-tods-broadcast-data-frames.patch
ubi-upd-always-flush-after-prepared-for-an-update.patch
powerpc-kprobe-fix-oops-when-kprobed-on-stdu-instruction.patch
+x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch
+kvm-arm-arm64-fix-locking-for-kvm_free_stage2_pgd.patch
+powerpc-64-fix-flush_-d-i-cache_range-called-from-modules.patch
+tools-hv-kvp-ensure-kvp-device-fd-is-closed-on-exec.patch
+drivers-hv-balloon-keep-track-of-where-ha_region-starts.patch
+drivers-hv-balloon-account-for-gaps-in-hot-add-regions.patch
+hv-don-t-reset-hv_context.tsc_page-on-crash.patch
+x86-pmem-fix-broken-__copy_user_nocache-cache-bypass-assumptions.patch
+block-fix-del_gendisk-vs-blkdev_ioctl-crash.patch
+tipc-fix-crash-during-node-removal.patch
--- /dev/null
+From d25a01257e422a4bdeb426f69529d57c73b235fe Mon Sep 17 00:00:00 2001
+From: Jon Paul Maloy <jon.maloy@ericsson.com>
+Date: Wed, 24 Feb 2016 11:10:48 -0500
+Subject: tipc: fix crash during node removal
+
+From: Jon Paul Maloy <jon.maloy@ericsson.com>
+
+commit d25a01257e422a4bdeb426f69529d57c73b235fe upstream.
+
+When the TIPC module is unloaded, we have identified a race condition
+that allows a node reference counter to go to zero and the node instance
+being freed before the node timer is finished with accessing it. This
+leads to occasional crashes, especially in multi-namespace environments.
+
+The scenario goes as follows:
+
+CPU0:(node_stop) CPU1:(node_timeout) // ref == 2
+
+1: if(!mod_timer())
+2: if (del_timer())
+3: tipc_node_put() // ref -> 1
+4: tipc_node_put() // ref -> 0
+5: kfree_rcu(node);
+6: tipc_node_get(node)
+7: // BOOM!
+
+We now clean up this functionality as follows:
+
+1) We remove the node pointer from the node lookup table before we
+ attempt deactivating the timer. This way, we reduce the risk that
+ tipc_node_find() may obtain a valid pointer to an instance marked
+ for deletion; a harmless but undesirable situation.
+
+2) We use del_timer_sync() instead of del_timer() to safely deactivate
+ the node timer without any risk that it might be reactivated by the
+ timeout handler. There is no risk of deadlock here, since the two
+ functions never touch the same spinlocks.
+
+3: We remove a pointless tipc_node_get() + tipc_node_put() from the
+ timeout handler.
+
+Reported-by: Zhijiang Hu <huzhijiang@gmail.com>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/tipc/node.c | 24 +++++++++++-------------
+ 1 file changed, 11 insertions(+), 13 deletions(-)
+
+--- a/net/tipc/node.c
++++ b/net/tipc/node.c
+@@ -102,9 +102,10 @@ static unsigned int tipc_hashfn(u32 addr
+
+ static void tipc_node_kref_release(struct kref *kref)
+ {
+- struct tipc_node *node = container_of(kref, struct tipc_node, kref);
++ struct tipc_node *n = container_of(kref, struct tipc_node, kref);
+
+- tipc_node_delete(node);
++ kfree(n->bc_entry.link);
++ kfree_rcu(n, rcu);
+ }
+
+ void tipc_node_put(struct tipc_node *node)
+@@ -216,21 +217,20 @@ static void tipc_node_delete(struct tipc
+ {
+ list_del_rcu(&node->list);
+ hlist_del_rcu(&node->hash);
+- kfree(node->bc_entry.link);
+- kfree_rcu(node, rcu);
++ tipc_node_put(node);
++
++ del_timer_sync(&node->timer);
++ tipc_node_put(node);
+ }
+
+ void tipc_node_stop(struct net *net)
+ {
+- struct tipc_net *tn = net_generic(net, tipc_net_id);
++ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *node, *t_node;
+
+ spin_lock_bh(&tn->node_list_lock);
+- list_for_each_entry_safe(node, t_node, &tn->node_list, list) {
+- if (del_timer(&node->timer))
+- tipc_node_put(node);
+- tipc_node_put(node);
+- }
++ list_for_each_entry_safe(node, t_node, &tn->node_list, list)
++ tipc_node_delete(node);
+ spin_unlock_bh(&tn->node_list_lock);
+ }
+
+@@ -313,9 +313,7 @@ static void tipc_node_timeout(unsigned l
+ if (rc & TIPC_LINK_DOWN_EVT)
+ tipc_node_link_down(n, bearer_id, false);
+ }
+- if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+- tipc_node_get(n);
+- tipc_node_put(n);
++ mod_timer(&n->timer, jiffies + n->keepalive_intv);
+ }
+
+ /**
--- /dev/null
+From 26840437cbd6d3625ea6ab34e17cd34bb810c861 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Wed, 6 Jul 2016 18:24:10 -0700
+Subject: Tools: hv: kvp: ensure kvp device fd is closed on exec
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 26840437cbd6d3625ea6ab34e17cd34bb810c861 upstream.
+
+KVP daemon does fork()/exec() (with popen()) so we need to close our fds
+to avoid sharing them with child processes. The immediate implication of
+not doing so I see is SELinux complaining about 'ip' trying to access
+'/dev/vmbus/hv_kvp'.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/hv/hv_kvp_daemon.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/hv/hv_kvp_daemon.c
++++ b/tools/hv/hv_kvp_daemon.c
+@@ -1433,7 +1433,7 @@ int main(int argc, char *argv[])
+ openlog("KVP", 0, LOG_USER);
+ syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
+
+- kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR);
++ kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
+
+ if (kvp_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
--- /dev/null
+From 29f72ce3e4d18066ec75c79c857bee0618a3504b Mon Sep 17 00:00:00 2001
+From: Yazen Ghannam <yazen.ghannam@amd.com>
+Date: Thu, 30 Mar 2017 13:17:14 +0200
+Subject: x86/mce/AMD: Give a name to MCA bank 3 when accessed with legacy MSRs
+
+From: Yazen Ghannam <yazen.ghannam@amd.com>
+
+commit 29f72ce3e4d18066ec75c79c857bee0618a3504b upstream.
+
+MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name.
+However, MCA bank 3 is defined on Fam17h systems and can be accessed
+using legacy MSRs. Without a name we get a stack trace on Fam17h systems
+when trying to register sysfs files for bank 3 on kernels that don't
+recognize Scalable MCA.
+
+Call MCA bank 3 "decode_unit" since this is what it represents on
+Fam17h. This will allow kernels without SMCA support to see this bank on
+Fam17h+ and prevent the stack trace. This will not affect older systems
+since this bank is reserved on them, i.e. it'll be ignored.
+
+Tested on AMD Fam15h and Fam17h systems.
+
+ WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal
+ kobject: (ffff88085bb256c0): attempted to be registered with empty name!
+ ...
+ Call Trace:
+ kobject_add_internal
+ kobject_add
+ kobject_create_and_add
+ threshold_create_device
+ threshold_init_device
+
+Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -53,7 +53,7 @@ static const char * const th_names[] = {
+ "load_store",
+ "insn_fetch",
+ "combined_unit",
+- "",
++ "decode_unit",
+ "northbridge",
+ "execution_unit",
+ };
--- /dev/null
+From 11e63f6d920d6f2dfd3cd421e939a4aec9a58dcd Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 6 Apr 2017 09:04:31 -0700
+Subject: x86, pmem: fix broken __copy_user_nocache cache-bypass assumptions
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 11e63f6d920d6f2dfd3cd421e939a4aec9a58dcd upstream.
+
+Before we rework the "pmem api" to stop abusing __copy_user_nocache()
+for memcpy_to_pmem() we need to fix cases where we may strand dirty data
+in the cpu cache. The problem occurs when copy_from_iter_pmem() is used
+for arbitrary data transfers from userspace. There is no guarantee that
+these transfers, performed by dax_iomap_actor(), will have aligned
+destinations or aligned transfer lengths. Backstop the usage
+__copy_user_nocache() with explicit cache management in these unaligned
+cases.
+
+Yes, copy_from_iter_pmem() is now too big for an inline, but addressing
+that is saved for a later patch that moves the entirety of the "pmem
+api" into the pmem driver directly.
+
+Fixes: 5de490daec8b ("pmem: add copy_from_iter_pmem() and clear_pmem()")
+Cc: <x86@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jeff Moyer <jmoyer@redhat.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Matthew Wilcox <mawilcox@microsoft.com>
+Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pmem.h | 45 +++++++++++++++++++++++++++++++-------------
+ 1 file changed, 32 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/pmem.h
++++ b/arch/x86/include/asm/pmem.h
+@@ -72,8 +72,8 @@ static inline void arch_wmb_pmem(void)
+ * @size: number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+- * instruction. This function requires explicit ordering with an
+- * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation.
++ * instruction. Note that @size is internally rounded up to be cache
++ * line size aligned.
+ */
+ static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+ {
+@@ -87,15 +87,6 @@ static inline void __arch_wb_cache_pmem(
+ clwb(p);
+ }
+
+-/*
+- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
+- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
+- */
+-static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
+-{
+- return iter_is_iovec(i) == false;
+-}
+-
+ /**
+ * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr: PMEM destination address
+@@ -114,8 +105,36 @@ static inline size_t arch_copy_from_iter
+ /* TODO: skip the write-back by always using non-temporal stores */
+ len = copy_from_iter_nocache(vaddr, bytes, i);
+
+- if (__iter_needs_pmem_wb(i))
+- __arch_wb_cache_pmem(vaddr, bytes);
++ /*
++ * In the iovec case on x86_64 copy_from_iter_nocache() uses
++ * non-temporal stores for the bulk of the transfer, but we need
++ * to manually flush if the transfer is unaligned. A cached
++ * memory copy is used when destination or size is not naturally
++ * aligned. That is:
++ * - Require 8-byte alignment when size is 8 bytes or larger.
++ * - Require 4-byte alignment when size is 4 bytes.
++ *
++ * In the non-iovec case the entire destination needs to be
++ * flushed.
++ */
++ if (iter_is_iovec(i)) {
++ unsigned long flushed, dest = (unsigned long) addr;
++
++ if (bytes < 8) {
++ if (!IS_ALIGNED(dest, 4) || (bytes != 4))
++ __arch_wb_cache_pmem(addr, 1);
++ } else {
++ if (!IS_ALIGNED(dest, 8)) {
++ dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
++ __arch_wb_cache_pmem(addr, 1);
++ }
++
++ flushed = dest - (unsigned long) addr;
++ if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
++ __arch_wb_cache_pmem(addr + bytes - 1, 1);
++ }
++ } else
++ __arch_wb_cache_pmem(addr, bytes);
+
+ return len;
+ }