--- /dev/null
+From 120cf959308e1bda984e40a9edd25ee2d6262efd Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Wed, 12 Feb 2020 08:51:29 -0500
+Subject: drm/amdgpu/gfx9: disable gfxoff when reading rlc clock
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 120cf959308e1bda984e40a9edd25ee2d6262efd upstream.
+
+Otherwise we readback all ones. Fixes rlc counter
+readback while gfxoff is active.
+
+Reviewed-by: Xiaojie Yuan <xiaojie.yuan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+@@ -3376,11 +3376,13 @@ static uint64_t gfx_v9_0_get_gpu_clock_c
+ {
+ uint64_t clock;
+
++ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
++ amdgpu_gfx_off_ctrl(adev, true);
+ return clock;
+ }
+
--- /dev/null
+From c657b936ea98630ef5ba4f130ab1ad5c534d0165 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Wed, 12 Feb 2020 01:46:16 -0500
+Subject: drm/amdgpu/soc15: fix xclk for raven
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit c657b936ea98630ef5ba4f130ab1ad5c534d0165 upstream.
+
+It's 25 Mhz (refclk / 4). This fixes the interpretation
+of the rlc clock counter.
+
+Acked-by: Evan Quan <evan.quan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
+@@ -205,7 +205,12 @@ static u32 soc15_get_config_memsize(stru
+
+ static u32 soc15_get_xclk(struct amdgpu_device *adev)
+ {
+- return adev->clock.spll.reference_freq;
++ u32 reference_clock = adev->clock.spll.reference_freq;
++
++ if (adev->asic_type == CHIP_RAVEN)
++ return reference_clock / 4;
++
++ return reference_clock;
+ }
+
+
--- /dev/null
+From 76073c646f5f4999d763f471df9e38a5a912d70d Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gshan@redhat.com>
+Date: Thu, 20 Feb 2020 20:04:24 -0800
+Subject: mm/vmscan.c: don't round up scan size for online memory cgroup
+
+From: Gavin Shan <gshan@redhat.com>
+
+commit 76073c646f5f4999d763f471df9e38a5a912d70d upstream.
+
+Commit 68600f623d69 ("mm: don't miss the last page because of round-off
+error") makes the scan size round up to @denominator regardless of the
+memory cgroup's state, online or offline. This affects the overall
+reclaiming behavior: the corresponding LRU list is eligible for
+reclaiming only when its size logically right shifted by @sc->priority
+is bigger than zero in the former formula.
+
+For example, the inactive anonymous LRU list should have at least 0x4000
+pages to be eligible for reclaiming when we have 60/12 for
+swappiness/priority and without taking scan/rotation ratio into account.
+
+After the roundup is applied, the inactive anonymous LRU list becomes
+eligible for reclaiming when its size is bigger than or equal to 0x1000
+in the same condition.
+
+ (0x4000 >> 12) * 60 / (60 + 140 + 1) = 1
+ ((0x1000 >> 12) * 60) + 200) / (60 + 140 + 1) = 1
+
+aarch64 has 512MB huge page size when the base page size is 64KB. The
+memory cgroup that has a huge page is always eligible for reclaiming in
+that case.
+
+The reclaiming is likely to stop after the huge page is reclaimed,
+meaing the further iteration on @sc->priority and the silbing and child
+memory cgroups will be skipped. The overall behaviour has been changed.
+This fixes the issue by applying the roundup to offlined memory cgroups
+only, to give more preference to reclaim memory from offlined memory
+cgroup. It sounds reasonable as those memory is unlikedly to be used by
+anyone.
+
+The issue was found by starting up 8 VMs on a Ampere Mustang machine,
+which has 8 CPUs and 16 GB memory. Each VM is given with 2 vCPUs and
+2GB memory. It took 264 seconds for all VMs to be completely up and
+784MB swap is consumed after that. With this patch applied, it took 236
+seconds and 60MB swap to do same thing. So there is 10% performance
+improvement for my case. Note that KSM is disable while THP is enabled
+in the testing.
+
+ total used free shared buff/cache available
+ Mem: 16196 10065 2049 16 4081 3749
+ Swap: 8175 784 7391
+ total used free shared buff/cache available
+ Mem: 16196 11324 3656 24 1215 2936
+ Swap: 8175 60 8115
+
+Link: http://lkml.kernel.org/r/20200211024514.8730-1-gshan@redhat.com
+Fixes: 68600f623d69 ("mm: don't miss the last page because of round-off error")
+Signed-off-by: Gavin Shan <gshan@redhat.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org> [4.20+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2446,10 +2446,13 @@ out:
+ /*
+ * Scan types proportional to swappiness and
+ * their relative recent reclaim efficiency.
+- * Make sure we don't miss the last page
+- * because of a round-off error.
++ * Make sure we don't miss the last page on
++ * the offlined memory cgroups because of a
++ * round-off error.
+ */
+- scan = DIV64_U64_ROUND_UP(scan * fraction[file],
++ scan = mem_cgroup_online(memcg) ?
++ div64_u64(scan * fraction[file], denominator) :
++ DIV64_U64_ROUND_UP(scan * fraction[file],
+ denominator);
+ break;
+ case SCAN_FILE: