--- /dev/null
+From a828b79637a2608b9872e1a283ef12436dce0fc4 Mon Sep 17 00:00:00 2001
+From: Vincent Whitchurch <vincent.whitchurch@axis.com>
+Date: Fri, 13 Jul 2018 11:12:22 +0100
+Subject: ARM: 8781/1: Fix Thumb-2 syscall return for binutils 2.29+
+
+[ Upstream commit afc9f65e01cd114cb2cedf544d22239116ce0cc6 ]
+
+When building the kernel as Thumb-2 with binutils 2.29 or newer, if the
+assembler has seen the .type directive (via ENDPROC()) for a symbol, it
+automatically handles the setting of the lowest bit when the symbol is
+used with ADR. The badr macro on the other hand handles this lowest bit
+manually. This leads to a jump to a wrong address in the wrong state
+in the syscall return path:
+
+ Internal error: Oops - undefined instruction: 0 [#2] SMP THUMB2
+ Modules linked in:
+ CPU: 0 PID: 652 Comm: modprobe Tainted: G D 4.18.0-rc3+ #8
+ PC is at ret_fast_syscall+0x4/0x62
+ LR is at sys_brk+0x109/0x128
+ pc : [<80101004>] lr : [<801c8a35>] psr: 60000013
+ Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
+ Control: 50c5387d Table: 9e82006a DAC: 00000051
+ Process modprobe (pid: 652, stack limit = 0x(ptrval))
+
+ 80101000 <ret_fast_syscall>:
+ 80101000: b672 cpsid i
+ 80101002: f8d9 2008 ldr.w r2, [r9, #8]
+ 80101006: f1b2 4ffe cmp.w r2, #2130706432 ; 0x7f000000
+
+ 80101184 <local_restart>:
+ 80101184: f8d9 a000 ldr.w sl, [r9]
+ 80101188: e92d 0030 stmdb sp!, {r4, r5}
+ 8010118c: f01a 0ff0 tst.w sl, #240 ; 0xf0
+ 80101190: d117 bne.n 801011c2 <__sys_trace>
+ 80101192: 46ba mov sl, r7
+ 80101194: f5ba 7fc8 cmp.w sl, #400 ; 0x190
+ 80101198: bf28 it cs
+ 8010119a: f04f 0a00 movcs.w sl, #0
+ 8010119e: f3af 8014 nop.w {20}
+ 801011a2: f2af 1ea2 subw lr, pc, #418 ; 0x1a2
+
+To fix this, add a new symbol name which doesn't have ENDPROC used on it
+and use that with badr. We can't remove the badr usage since that would
+would cause breakage with older binutils.
+
+Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/kernel/entry-common.S | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
+index 56be67ecf0fa..d69adfb3d79e 100644
+--- a/arch/arm/kernel/entry-common.S
++++ b/arch/arm/kernel/entry-common.S
+@@ -32,6 +32,7 @@
+ * features make this path too inefficient.
+ */
+ ret_fast_syscall:
++__ret_fast_syscall:
+ UNWIND(.fnstart )
+ UNWIND(.cantunwind )
+ disable_irq_notrace @ disable interrupts
+@@ -57,6 +58,7 @@ fast_work_pending:
+ * r0 first to avoid needing to save registers around each C function call.
+ */
+ ret_fast_syscall:
++__ret_fast_syscall:
+ UNWIND(.fnstart )
+ UNWIND(.cantunwind )
+ str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
+@@ -223,7 +225,7 @@ local_restart:
+ tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
+ bne __sys_trace
+
+- invoke_syscall tbl, scno, r10, ret_fast_syscall
++ invoke_syscall tbl, scno, r10, __ret_fast_syscall
+
+ add r1, sp, #S_OFF
+ 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
+--
+2.19.1
+
--- /dev/null
+From 05bf4164cfe8d811a692aff47149d1bf29ae9065 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Thu, 24 Jan 2019 13:06:58 +0100
+Subject: drm: disable uncached DMA optimization for ARM and arm64
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ]
+
+The DRM driver stack is designed to work with cache coherent devices
+only, but permits an optimization to be enabled in some cases, where
+for some buffers, both the CPU and the GPU use uncached mappings,
+removing the need for DMA snooping and allocation in the CPU caches.
+
+The use of uncached GPU mappings relies on the correct implementation
+of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
+will use cached mappings nonetheless. On x86 platforms, this does not
+seem to matter, as uncached CPU mappings will snoop the caches in any
+case. However, on ARM and arm64, enabling this optimization on a
+platform where NoSnoop is ignored results in loss of coherency, which
+breaks correct operation of the device. Since we have no way of
+detecting whether NoSnoop works or not, just disable this
+optimization entirely for ARM and arm64.
+
+Cc: Christian Koenig <christian.koenig@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: David Zhou <David1.Zhou@amd.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: Junwei Zhang <Jerry.Zhang@amd.com>
+Cc: Michel Daenzer <michel.daenzer@amd.com>
+Cc: David Airlie <airlied@linux.ie>
+Cc: Daniel Vetter <daniel@ffwll.ch>
+Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Cc: Maxime Ripard <maxime.ripard@bootlin.com>
+Cc: Sean Paul <sean@poorly.run>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Robin Murphy <robin.murphy@arm.com>
+Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>
+Cc: dri-devel <dri-devel@lists.freedesktop.org>
+Reported-by: Carsten Haitzler <Carsten.Haitzler@arm.com>
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Link: https://patchwork.kernel.org/patch/10778815/
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/drm/drm_cache.h | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
+index cebecff536a3..c5fb6f871930 100644
+--- a/include/drm/drm_cache.h
++++ b/include/drm/drm_cache.h
+@@ -41,6 +41,24 @@ static inline bool drm_arch_can_wc_memory(void)
+ return false;
+ #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
+ return false;
++#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
++ /*
++ * The DRM driver stack is designed to work with cache coherent devices
++ * only, but permits an optimization to be enabled in some cases, where
++ * for some buffers, both the CPU and the GPU use uncached mappings,
++ * removing the need for DMA snooping and allocation in the CPU caches.
++ *
++ * The use of uncached GPU mappings relies on the correct implementation
++ * of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
++ * will use cached mappings nonetheless. On x86 platforms, this does not
++ * seem to matter, as uncached CPU mappings will snoop the caches in any
++ * case. However, on ARM and arm64, enabling this optimization on a
++ * platform where NoSnoop is ignored results in loss of coherency, which
++ * breaks correct operation of the device. Since we have no way of
++ * detecting whether NoSnoop works or not, just disable this
++ * optimization entirely for ARM and arm64.
++ */
++ return false;
+ #else
+ return true;
+ #endif
+--
+2.19.1
+