--- /dev/null
+From 7cf321d118a825c1541b43ca45294126fd474efa Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 24 Oct 2016 15:37:48 +1000
+Subject: drm/drivers: add support for using the arch wc mapping API.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Dave Airlie <airlied@redhat.com>
+
+commit 7cf321d118a825c1541b43ca45294126fd474efa upstream.
+
+This fixes a regression in all these drivers since the cache
+mode tracking was fixed for mixed mappings. It uses the new
+arch API to add the VRAM range to the PAT mapping tracking
+tables.
+
+Fixes: 87744ab3832 (mm: fix cache mode tracking in vm_insert_mixed())
+Reviewed-by: Christian König <christian.koenig@amd.com>.
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++++
+ drivers/gpu/drm/ast/ast_ttm.c | 6 ++++++
+ drivers/gpu/drm/cirrus/cirrus_ttm.c | 7 +++++++
+ drivers/gpu/drm/mgag200/mgag200_ttm.c | 7 +++++++
+ drivers/gpu/drm/nouveau/nouveau_ttm.c | 8 ++++++++
+ drivers/gpu/drm/radeon/radeon_object.c | 5 +++++
+ 6 files changed, 38 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -492,6 +492,10 @@ void amdgpu_bo_force_delete(struct amdgp
+
+ int amdgpu_bo_init(struct amdgpu_device *adev)
+ {
++ /* reserve PAT memory space to WC for VRAM */
++ arch_io_reserve_memtype_wc(adev->mc.aper_base,
++ adev->mc.aper_size);
++
+ /* Add an MTRR for the VRAM */
+ adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
+ adev->mc.aper_size);
+@@ -507,6 +511,7 @@ void amdgpu_bo_fini(struct amdgpu_device
+ {
+ amdgpu_ttm_fini(adev);
+ arch_phys_wc_del(adev->mc.vram_mtrr);
++ arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
+ }
+
+ int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
+--- a/drivers/gpu/drm/ast/ast_ttm.c
++++ b/drivers/gpu/drm/ast/ast_ttm.c
+@@ -275,6 +275,8 @@ int ast_mm_init(struct ast_private *ast)
+ return ret;
+ }
+
++ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ ast->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
+@@ -283,11 +285,15 @@ int ast_mm_init(struct ast_private *ast)
+
+ void ast_mm_fini(struct ast_private *ast)
+ {
++ struct drm_device *dev = ast->dev;
++
+ ttm_bo_device_release(&ast->ttm.bdev);
+
+ ast_ttm_global_release(ast);
+
+ arch_phys_wc_del(ast->fb_mtrr);
++ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ }
+
+ void ast_ttm_placement(struct ast_bo *bo, int domain)
+--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
++++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
+@@ -275,6 +275,9 @@ int cirrus_mm_init(struct cirrus_device
+ return ret;
+ }
+
++ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
++
+ cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
+@@ -284,6 +287,8 @@ int cirrus_mm_init(struct cirrus_device
+
+ void cirrus_mm_fini(struct cirrus_device *cirrus)
+ {
++ struct drm_device *dev = cirrus->dev;
++
+ if (!cirrus->mm_inited)
+ return;
+
+@@ -293,6 +298,8 @@ void cirrus_mm_fini(struct cirrus_device
+
+ arch_phys_wc_del(cirrus->fb_mtrr);
+ cirrus->fb_mtrr = 0;
++ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ }
+
+ void cirrus_ttm_placement(struct cirrus_bo *bo, int domain)
+--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
++++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
+@@ -274,6 +274,9 @@ int mgag200_mm_init(struct mga_device *m
+ return ret;
+ }
+
++ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
++
+ mdev->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
+@@ -282,10 +285,14 @@ int mgag200_mm_init(struct mga_device *m
+
+ void mgag200_mm_fini(struct mga_device *mdev)
+ {
++ struct drm_device *dev = mdev->dev;
++
+ ttm_bo_device_release(&mdev->ttm.bdev);
+
+ mgag200_ttm_global_release(mdev);
+
++ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ arch_phys_wc_del(mdev->fb_mtrr);
+ mdev->fb_mtrr = 0;
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
+@@ -397,6 +397,9 @@ nouveau_ttm_init(struct nouveau_drm *drm
+ /* VRAM init */
+ drm->gem.vram_available = drm->device.info.ram_user;
+
++ arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1),
++ device->func->resource_size(device, 1));
++
+ ret = ttm_bo_init_mm(&drm->ttm.bdev, TTM_PL_VRAM,
+ drm->gem.vram_available >> PAGE_SHIFT);
+ if (ret) {
+@@ -429,6 +432,8 @@ nouveau_ttm_init(struct nouveau_drm *drm
+ void
+ nouveau_ttm_fini(struct nouveau_drm *drm)
+ {
++ struct nvkm_device *device = nvxx_device(&drm->device);
++
+ ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
+ ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
+
+@@ -438,4 +443,7 @@ nouveau_ttm_fini(struct nouveau_drm *drm
+
+ arch_phys_wc_del(drm->ttm.mtrr);
+ drm->ttm.mtrr = 0;
++ arch_io_free_memtype_wc(device->func->resource_addr(device, 1),
++ device->func->resource_size(device, 1));
++
+ }
+--- a/drivers/gpu/drm/radeon/radeon_object.c
++++ b/drivers/gpu/drm/radeon/radeon_object.c
+@@ -447,6 +447,10 @@ void radeon_bo_force_delete(struct radeo
+
+ int radeon_bo_init(struct radeon_device *rdev)
+ {
++ /* reserve PAT memory space to WC for VRAM */
++ arch_io_reserve_memtype_wc(rdev->mc.aper_base,
++ rdev->mc.aper_size);
++
+ /* Add an MTRR for the VRAM */
+ if (!rdev->fastfb_working) {
+ rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
+@@ -464,6 +468,7 @@ void radeon_bo_fini(struct radeon_device
+ {
+ radeon_ttm_fini(rdev);
+ arch_phys_wc_del(rdev->mc.vram_mtrr);
++ arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
+ }
+
+ /* Returns how many bytes TTM can move per IB.
---
arch/powerpc/include/asm/fadump.h | 3 -
- arch/powerpc/kernel/fadump.c | 91 ++++++++++++++++++++++++++++++++------
- 2 files changed, 77 insertions(+), 17 deletions(-)
+ arch/powerpc/kernel/fadump.c | 92 ++++++++++++++++++++++++++++++++------
+ 2 files changed, 78 insertions(+), 17 deletions(-)
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
unsigned long long size;
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
-@@ -48,8 +48,10 @@ static struct fadump_mem_struct fdm;
+@@ -35,6 +35,7 @@
+ #include <linux/crash_dump.h>
+ #include <linux/kobject.h>
+ #include <linux/sysfs.h>
++#include <linux/slab.h>
+
+ #include <asm/page.h>
+ #include <asm/prom.h>
+@@ -48,8 +49,10 @@ static struct fadump_mem_struct fdm;
static const struct fadump_mem_struct *fdm_active;
static DEFINE_MUTEX(fadump_mutex);
/* Scan the Firmware Assisted dump configuration details. */
int __init early_init_dt_scan_fw_dump(unsigned long node,
-@@ -726,38 +728,88 @@ static int __init process_fadump(const s
+@@ -726,38 +729,88 @@ static int __init process_fadump(const s
return 0;
}
}
static int fadump_init_elfcore_header(char *bufp)
-@@ -793,10 +845,11 @@ static int fadump_init_elfcore_header(ch
+@@ -793,10 +846,11 @@ static int fadump_init_elfcore_header(ch
* Traverse through memblock structure and setup crash memory ranges. These
* ranges will be used create PT_LOAD program headers in elfcore header.
*/
pr_debug("Setup crash memory ranges.\n");
crash_mem_ranges = 0;
-@@ -807,7 +860,9 @@ static void fadump_setup_crash_memory_ra
+@@ -807,7 +861,9 @@ static void fadump_setup_crash_memory_ra
* specified during fadump registration. We need to create a separate
* program header for this chunk with the correct offset.
*/
for_each_memblock(memory, reg) {
start = (unsigned long long)reg->base;
-@@ -816,8 +871,12 @@ static void fadump_setup_crash_memory_ra
+@@ -816,8 +872,12 @@ static void fadump_setup_crash_memory_ra
start = fw_dump.boot_memory_size;
/* add this range excluding the reserved dump area. */
}
/*
-@@ -941,6 +1000,7 @@ static void register_fadump(void)
+@@ -941,6 +1001,7 @@ static void register_fadump(void)
{
unsigned long addr;
void *vaddr;
/*
* If no memory is reserved then we can not register for firmware-
-@@ -949,7 +1009,9 @@ static void register_fadump(void)
+@@ -949,7 +1010,9 @@ static void register_fadump(void)
if (!fw_dump.reserve_dump_area_size)
return;
addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
/* Initialize fadump crash info header. */
-@@ -1028,6 +1090,7 @@ void fadump_cleanup(void)
+@@ -1028,6 +1091,7 @@ void fadump_cleanup(void)
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
fadump_unregister_dump(&fdm);
--- /dev/null
+From 8ef4227615e158faa4ee85a1d6466782f7e22f2f Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 24 Oct 2016 15:27:59 +1000
+Subject: x86/io: add interface to reserve io memtype for a resource range. (v1.1)
+
+From: Dave Airlie <airlied@redhat.com>
+
+commit 8ef4227615e158faa4ee85a1d6466782f7e22f2f upstream.
+
+A recent change to the mm code in:
+87744ab3832b mm: fix cache mode tracking in vm_insert_mixed()
+
+started enforcing checking the memory type against the registered list for
+amixed pfn insertion mappings. It happens that the drm drivers for a number
+of gpus relied on this being broken. Currently the driver only inserted
+VRAM mappings into the tracking table when they came from the kernel,
+and userspace mappings never landed in the table. This led to a regression
+where all the mapping end up as UC instead of WC now.
+
+I've considered a number of solutions but since this needs to be fixed
+in fixes and not next, and some of the solutions were going to introduce
+overhead that hadn't been there before I didn't consider them viable at
+this stage. These mainly concerned hooking into the TTM io reserve APIs,
+but these API have a bunch of fast paths I didn't want to unwind to add
+this to.
+
+The solution I've decided on is to add a new API like the arch_phys_wc
+APIs (these would have worked but wc_del didn't take a range), and
+use them from the drivers to add a WC compatible mapping to the table
+for all VRAM on those GPUs. This means we can then create userspace
+mapping that won't get degraded to UC.
+
+v1.1: use CONFIG_X86_PAT + add some comments in io.h
+
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: x86@kernel.org
+Cc: mcgrof@suse.com
+Cc: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/io.h | 6 ++++++
+ arch/x86/mm/pat.c | 14 ++++++++++++++
+ include/linux/io.h | 22 ++++++++++++++++++++++
+ 3 files changed, 42 insertions(+)
+
+--- a/arch/x86/include/asm/io.h
++++ b/arch/x86/include/asm/io.h
+@@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle)
+ #define arch_phys_wc_add arch_phys_wc_add
+ #endif
+
++#ifdef CONFIG_X86_PAT
++extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size);
++extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size);
++#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
++#endif
++
+ #endif /* _ASM_X86_IO_H */
+--- a/arch/x86/mm/pat.c
++++ b/arch/x86/mm/pat.c
+@@ -726,6 +726,20 @@ void io_free_memtype(resource_size_t sta
+ free_memtype(start, end);
+ }
+
++int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
++{
++ enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
++
++ return io_reserve_memtype(start, start + size, &type);
++}
++EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
++
++void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
++{
++ io_free_memtype(start, start + size);
++}
++EXPORT_SYMBOL(arch_io_free_memtype_wc);
++
+ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot)
+ {
+--- a/include/linux/io.h
++++ b/include/linux/io.h
+@@ -154,4 +154,26 @@ enum {
+ void *memremap(resource_size_t offset, size_t size, unsigned long flags);
+ void memunmap(void *addr);
+
++/*
++ * On x86 PAT systems we have memory tracking that keeps track of
++ * the allowed mappings on memory ranges. This tracking works for
++ * all the in-kernel mapping APIs (ioremap*), but where the user
++ * wishes to map a range from a physical device into user memory
++ * the tracking won't be updated. This API is to be used by
++ * drivers which remap physical device pages into userspace,
++ * and wants to make sure they are mapped WC and not UC.
++ */
++#ifndef arch_io_reserve_memtype_wc
++static inline int arch_io_reserve_memtype_wc(resource_size_t base,
++ resource_size_t size)
++{
++ return 0;
++}
++
++static inline void arch_io_free_memtype_wc(resource_size_t base,
++ resource_size_t size)
++{
++}
++#endif
++
+ #endif /* _LINUX_IO_H */