--- /dev/null
+From 3ca001aff0878546494d7f403334c8d987924977 Mon Sep 17 00:00:00 2001
+From: Aaron Liu <aaron.liu@amd.com>
+Date: Mon, 23 Aug 2021 12:26:50 +0800
+Subject: drm/amd/display: setup system context for APUs
+
+From: Aaron Liu <aaron.liu@amd.com>
+
+commit 3ca001aff0878546494d7f403334c8d987924977 upstream.
+
+Scatter/gather is APU feature starting from carrizo.
+adev->apu_flags is not used for all APUs.
+adev->flags & AMD_IS_APU can be used for all APUs.
+
+Signed-off-by: Aaron Liu <aaron.liu@amd.com>
+Reviewed-by: Huang Rui <ray.huang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -1202,7 +1202,7 @@ static int amdgpu_dm_init(struct amdgpu_
+ dc_hardware_init(adev->dm.dc);
+
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
+- if (adev->apu_flags) {
++ if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) {
+ struct dc_phy_addr_space_config pa_config;
+
+ mmhub_read_system_context(adev, &pa_config);
--- /dev/null
+From a7a9d11e12fcc32160d55e8612e72e5ab51b15dc Mon Sep 17 00:00:00 2001
+From: "Jerry (Fangzhi) Zuo" <Jerry.Zuo@amd.com>
+Date: Wed, 17 Jun 2020 20:34:33 -0400
+Subject: drm/amd/display: Update bounding box states (v2)
+
+From: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com>
+
+commit a7a9d11e12fcc32160d55e8612e72e5ab51b15dc upstream.
+
+[Why]
+Drop hardcoded dispclk, dppclk, phyclk
+
+[How]
+Read the corresponding values from clock table entries already populated.
+
+Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1403
+Cc: stable@vger.kernel.org
+Signed-off-by: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com>
+Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 41 +++++++++++++-----
+ 1 file changed, 31 insertions(+), 10 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+@@ -2398,16 +2398,37 @@ void dcn30_update_bw_bounding_box(struct
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ if (bw_params->clk_table.entries[0].memclk_mhz) {
++ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+- if (bw_params->clk_table.entries[1].dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
++ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
++ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
++ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
++ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
++ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
++ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
++ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
++ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
++ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
++ }
++
++ if (!max_dcfclk_mhz)
++ max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
++ if (!max_dispclk_mhz)
++ max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
++ if (!max_dppclk_mhz)
++ max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
++ if (!max_phyclk_mhz)
++ max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
++
++ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
+- dcfclk_sta_targets[num_dcfclk_sta_targets] = bw_params->clk_table.entries[1].dcfclk_mhz;
++ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+- } else if (bw_params->clk_table.entries[1].dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
++ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+- if (dcfclk_sta_targets[i] > bw_params->clk_table.entries[1].dcfclk_mhz) {
+- dcfclk_sta_targets[i] = bw_params->clk_table.entries[1].dcfclk_mhz;
++ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
++ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+@@ -2447,7 +2468,7 @@ void dcn30_update_bw_bounding_box(struct
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+- if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= bw_params->clk_table.entries[1].dcfclk_mhz) {
++ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+@@ -2462,7 +2483,7 @@ void dcn30_update_bw_bounding_box(struct
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+- optimal_dcfclk_for_uclk[j] <= bw_params->clk_table.entries[1].dcfclk_mhz) {
++ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+@@ -2475,9 +2496,9 @@ void dcn30_update_bw_bounding_box(struct
+ dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* Fill all states with max values of all other clocks */
+- dcn3_0_soc.clock_limits[i].dispclk_mhz = bw_params->clk_table.entries[1].dispclk_mhz;
+- dcn3_0_soc.clock_limits[i].dppclk_mhz = bw_params->clk_table.entries[1].dppclk_mhz;
+- dcn3_0_soc.clock_limits[i].phyclk_mhz = bw_params->clk_table.entries[1].phyclk_mhz;
++ dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
++ dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
++ dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
+ /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
+ /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
--- /dev/null
+From 0bbf06d888734041e813b916d7821acd4f72005a Mon Sep 17 00:00:00 2001
+From: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Date: Tue, 24 Aug 2021 15:10:50 -0400
+Subject: drm/amd/display: Update number of DCN3 clock states
+
+From: Aurabindo Pillai <aurabindo.pillai@amd.com>
+
+commit 0bbf06d888734041e813b916d7821acd4f72005a upstream.
+
+[Why & How]
+The DCN3 SoC parameter num_states was calculated but not saved into the
+object.
+
+Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1403
+Cc: stable@vger.kernel.org
+Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+@@ -2467,6 +2467,7 @@ void dcn30_update_bw_bounding_box(struct
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
++ dcn3_0_soc.num_states = num_states;
+ for (i = 0; i < dcn3_0_soc.num_states; i++) {
+ dcn3_0_soc.clock_limits[i].state = i;
+ dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
--- /dev/null
+From c5d3c9a093d353e7f38183a70df425f92e3c329d Mon Sep 17 00:00:00 2001
+From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Date: Thu, 14 Jan 2021 14:49:27 -0500
+Subject: drm/amdgpu: Enable S/G for Yellow Carp
+
+From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+
+commit c5d3c9a093d353e7f38183a70df425f92e3c329d upstream.
+
+Missing code for Yellow Carp to enable scatter gather - follows how
+DCN21 support was added.
+
+Tested that 8k framebuffer allocation and display can now succeed after
+applying the patch.
+
+v2: Add hookup in DM
+
+Reviewed-by: Aaron Liu <aaron.liu@amd.com>
+Acked-by: Huang Rui <ray.huang@amd.com>
+Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+@@ -522,6 +522,7 @@ uint32_t amdgpu_display_supported_domain
+ break;
+ case CHIP_RENOIR:
+ case CHIP_VANGOGH:
++ case CHIP_YELLOW_CARP:
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
+ break;
+
--- /dev/null
+From 703677d9345d87d7288ed8a2483ca424af7d4b3b Mon Sep 17 00:00:00 2001
+From: xinhui pan <xinhui.pan@amd.com>
+Date: Tue, 31 Aug 2021 13:49:59 +0800
+Subject: drm/amdgpu: Fix a deadlock if previous GEM object allocation fails
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: xinhui pan <xinhui.pan@amd.com>
+
+commit 703677d9345d87d7288ed8a2483ca424af7d4b3b upstream.
+
+Fall through to handle the error instead of return.
+
+Fixes: f8aab60422c37 ("drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs")
+Cc: stable@vger.kernel.org
+Signed-off-by: xinhui pan <xinhui.pan@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 23 ++++++++++-------------
+ 1 file changed, 10 insertions(+), 13 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+@@ -341,21 +341,18 @@ retry:
+ r = amdgpu_gem_object_create(adev, size, args->in.alignment,
+ initial_domain,
+ flags, ttm_bo_type_device, resv, &gobj);
+- if (r) {
+- if (r != -ERESTARTSYS) {
+- if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+- flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+- goto retry;
+- }
++ if (r && r != -ERESTARTSYS) {
++ if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
++ flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
++ goto retry;
++ }
+
+- if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+- initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+- goto retry;
+- }
+- DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
+- size, initial_domain, args->in.alignment, r);
++ if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
++ initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
++ goto retry;
+ }
+- return r;
++ DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
++ size, initial_domain, args->in.alignment, r);
+ }
+
+ if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
--- /dev/null
+From ea7acd7c5967542353430947f3faf699e70602e5 Mon Sep 17 00:00:00 2001
+From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Date: Tue, 22 Jun 2021 12:23:38 -0400
+Subject: drm/amdgpu: Fix BUG_ON assert
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+
+commit ea7acd7c5967542353430947f3faf699e70602e5 upstream.
+
+With added CPU domain to placement you can have
+now 3 placemnts at once.
+
+CC: stable@kernel.org
+Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210622162339.761651-5-andrey.grodzovsky@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -196,7 +196,7 @@ void amdgpu_bo_placement_from_domain(str
+ c++;
+ }
+
+- BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
++ BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS);
+
+ placement->num_placement = c;
+ placement->placement = places;
--- /dev/null
+From d6043581e1d9d0507a8413a302db0e35c8506e0e Mon Sep 17 00:00:00 2001
+From: Alex Sierra <alex.sierra@amd.com>
+Date: Tue, 31 Aug 2021 21:05:02 -0500
+Subject: drm/amdkfd: drop process ref count when xnack disable
+
+From: Alex Sierra <alex.sierra@amd.com>
+
+commit d6043581e1d9d0507a8413a302db0e35c8506e0e upstream.
+
+During svm restore pages interrupt handler, kfd_process ref count was
+never dropped when xnack was disabled. Therefore, the object was never
+released.
+
+Fixes: 2383f56bbe4a ("drm/amdkfd: page table restore through svm API")
+Signed-off-by: Alex Sierra <alex.sierra@amd.com>
+Reviewed-by: Philip Yang <philip.yang@amd.com>
+Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+@@ -2426,7 +2426,8 @@ svm_range_restore_pages(struct amdgpu_de
+ }
+ if (!p->xnack_enabled) {
+ pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+- return -EFAULT;
++ r = -EFAULT;
++ goto out;
+ }
+ svms = &p->svms;
+
--- /dev/null
+From 92bd92c44d0d9be5dcbcda315b4be4b909ed9740 Mon Sep 17 00:00:00 2001
+From: Rajkumar Subbiah <rsubbia@codeaurora.org>
+Date: Tue, 6 Jul 2021 08:30:34 -0700
+Subject: drm/dp_mst: Fix return code on sideband message failure
+
+From: Rajkumar Subbiah <rsubbia@codeaurora.org>
+
+commit 92bd92c44d0d9be5dcbcda315b4be4b909ed9740 upstream.
+
+Commit 2f015ec6eab6 ("drm/dp_mst: Add sideband down request tracing +
+selftests") added some debug code for sideband message tracing. But
+it seems to have unintentionally changed the behavior on sideband message
+failure. It catches and returns failure only if DRM_UT_DP is enabled.
+Otherwise it ignores the error code and returns success. So on an MST
+unplug, the caller is unaware that the clear payload message failed and
+ends up waiting for 4 seconds for the response. Fixes the issue by
+returning the proper error code.
+
+Changes in V2:
+-- Revise commit text as review comment
+-- add Fixes text
+
+Changes in V3:
+-- remove "unlikely" optimization
+
+Fixes: 2f015ec6eab6 ("drm/dp_mst: Add sideband down request tracing + selftests")
+Cc: <stable@vger.kernel.org> # v5.5+
+Signed-off-by: Rajkumar Subbiah <rsubbia@codeaurora.org>
+Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
+Reviewed-by: Stephen Boyd <swboyd@chromium.org>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/1625585434-9562-1-git-send-email-khsieh@codeaurora.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_dp_mst_topology.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -2872,11 +2872,13 @@ static int process_single_tx_qlock(struc
+ idx += tosend + 1;
+
+ ret = drm_dp_send_sideband_msg(mgr, up, chunk, idx);
+- if (unlikely(ret) && drm_debug_enabled(DRM_UT_DP)) {
+- struct drm_printer p = drm_debug_printer(DBG_PREFIX);
++ if (ret) {
++ if (drm_debug_enabled(DRM_UT_DP)) {
++ struct drm_printer p = drm_debug_printer(DBG_PREFIX);
+
+- drm_printf(&p, "sideband msg failed to send\n");
+- drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
++ drm_printf(&p, "sideband msg failed to send\n");
++ drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
++ }
+ return ret;
+ }
+
--- /dev/null
+From 147696720eca12ae48d020726208b9a61cdd80bc Mon Sep 17 00:00:00 2001
+From: Thomas Zimmermann <tzimmermann@suse.de>
+Date: Wed, 14 Jul 2021 16:22:28 +0200
+Subject: drm/mgag200: Select clock in PLL update functions
+
+From: Thomas Zimmermann <tzimmermann@suse.de>
+
+commit 147696720eca12ae48d020726208b9a61cdd80bc upstream.
+
+Put the clock-selection code into each of the PLL-update functions to
+make them select the correct pixel clock. Instead of copying the code,
+introduce a new helper WREG_MISC_MASKED, which does masked writes into
+<MISC>. Use it from each individual PLL update function.
+
+The pixel clock for video output was not actually set before programming
+the clock's values. It worked because the device had the correct clock
+pre-set.
+
+v2:
+ * don't duplicate <MISC> update code (Sam)
+
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Fixes: db05f8d3dc87 ("drm/mgag200: Split MISC register update into PLL selection, SYNC and I/O")
+Acked-by: Sam Ravnborg <sam@ravnborg.org>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: Emil Velikov <emil.velikov@collabora.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v5.9+
+Link: https://patchwork.freedesktop.org/patch/msgid/20210714142240.21979-2-tzimmermann@suse.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/mgag200/mgag200_drv.h | 16 ++++++++++++++++
+ drivers/gpu/drm/mgag200/mgag200_mode.c | 20 +++++++++++++-------
+ drivers/gpu/drm/mgag200/mgag200_reg.h | 9 ++++-----
+ 3 files changed, 33 insertions(+), 12 deletions(-)
+
+--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
++++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
+@@ -43,6 +43,22 @@
+ #define ATTR_INDEX 0x1fc0
+ #define ATTR_DATA 0x1fc1
+
++#define WREG_MISC(v) \
++ WREG8(MGA_MISC_OUT, v)
++
++#define RREG_MISC(v) \
++ ((v) = RREG8(MGA_MISC_IN))
++
++#define WREG_MISC_MASKED(v, mask) \
++ do { \
++ u8 misc_; \
++ u8 mask_ = (mask); \
++ RREG_MISC(misc_); \
++ misc_ &= ~mask_; \
++ misc_ |= ((v) & mask_); \
++ WREG_MISC(misc_); \
++ } while (0)
++
+ #define WREG_ATTR(reg, v) \
+ do { \
+ RREG8(0x1fda); \
+--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
++++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
+@@ -174,6 +174,8 @@ static int mgag200_g200_set_plls(struct
+ drm_dbg_kms(dev, "clock: %ld vco: %ld m: %d n: %d p: %d s: %d\n",
+ clock, f_vco, m, n, p, s);
+
++ WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK);
++
+ WREG_DAC(MGA1064_PIX_PLLC_M, m);
+ WREG_DAC(MGA1064_PIX_PLLC_N, n);
+ WREG_DAC(MGA1064_PIX_PLLC_P, (p | (s << 3)));
+@@ -289,6 +291,8 @@ static int mga_g200se_set_plls(struct mg
+ return 1;
+ }
+
++ WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK);
++
+ WREG_DAC(MGA1064_PIX_PLLC_M, m);
+ WREG_DAC(MGA1064_PIX_PLLC_N, n);
+ WREG_DAC(MGA1064_PIX_PLLC_P, p);
+@@ -385,6 +389,8 @@ static int mga_g200wb_set_plls(struct mg
+ }
+ }
+
++ WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK);
++
+ for (i = 0; i <= 32 && pll_locked == false; i++) {
+ if (i > 0) {
+ WREG8(MGAREG_CRTC_INDEX, 0x1e);
+@@ -522,6 +528,8 @@ static int mga_g200ev_set_plls(struct mg
+ }
+ }
+
++ WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK);
++
+ WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL);
+ tmp = RREG8(DAC_DATA);
+ tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS;
+@@ -654,6 +662,9 @@ static int mga_g200eh_set_plls(struct mg
+ }
+ }
+ }
++
++ WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK);
++
+ for (i = 0; i <= 32 && pll_locked == false; i++) {
+ WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL);
+ tmp = RREG8(DAC_DATA);
+@@ -754,6 +765,8 @@ static int mga_g200er_set_plls(struct mg
+ }
+ }
+
++ WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK);
++
+ WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL);
+ tmp = RREG8(DAC_DATA);
+ tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS;
+@@ -787,8 +800,6 @@ static int mga_g200er_set_plls(struct mg
+
+ static int mgag200_crtc_set_plls(struct mga_device *mdev, long clock)
+ {
+- u8 misc;
+-
+ switch(mdev->type) {
+ case G200_PCI:
+ case G200_AGP:
+@@ -808,11 +819,6 @@ static int mgag200_crtc_set_plls(struct
+ return mga_g200er_set_plls(mdev, clock);
+ }
+
+- misc = RREG8(MGA_MISC_IN);
+- misc &= ~MGAREG_MISC_CLK_SEL_MASK;
+- misc |= MGAREG_MISC_CLK_SEL_MGA_MSK;
+- WREG8(MGA_MISC_OUT, misc);
+-
+ return 0;
+ }
+
+--- a/drivers/gpu/drm/mgag200/mgag200_reg.h
++++ b/drivers/gpu/drm/mgag200/mgag200_reg.h
+@@ -222,11 +222,10 @@
+
+ #define MGAREG_MISC_IOADSEL (0x1 << 0)
+ #define MGAREG_MISC_RAMMAPEN (0x1 << 1)
+-#define MGAREG_MISC_CLK_SEL_MASK GENMASK(3, 2)
+-#define MGAREG_MISC_CLK_SEL_VGA25 (0x0 << 2)
+-#define MGAREG_MISC_CLK_SEL_VGA28 (0x1 << 2)
+-#define MGAREG_MISC_CLK_SEL_MGA_PIX (0x2 << 2)
+-#define MGAREG_MISC_CLK_SEL_MGA_MSK (0x3 << 2)
++#define MGAREG_MISC_CLKSEL_MASK GENMASK(3, 2)
++#define MGAREG_MISC_CLKSEL_VGA25 (0x0 << 2)
++#define MGAREG_MISC_CLKSEL_VGA28 (0x1 << 2)
++#define MGAREG_MISC_CLKSEL_MGA (0x3 << 2)
+ #define MGAREG_MISC_VIDEO_DIS (0x1 << 4)
+ #define MGAREG_MISC_HIGH_PG_SEL (0x1 << 5)
+ #define MGAREG_MISC_HSYNCPOL BIT(6)
--- /dev/null
+From cb0927ab80d224c9074f53d1a55b087d12ec5a85 Mon Sep 17 00:00:00 2001
+From: David Heidelberg <david@ixit.cz>
+Date: Wed, 11 Aug 2021 19:06:31 +0200
+Subject: drm/msi/mdp4: populate priv->kms in mdp4_kms_init
+
+From: David Heidelberg <david@ixit.cz>
+
+commit cb0927ab80d224c9074f53d1a55b087d12ec5a85 upstream.
+
+Without this fix boot throws NULL ptr exception at msm_dsi_manager_setup_encoder
+on devices like Nexus 7 2013 (MDP4 v4.4).
+
+Fixes: 03436e3ec69c ("drm/msm/dsi: Move setup_encoder to modeset_init")
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: David Heidelberg <david@ixit.cz>
+Link: https://lore.kernel.org/r/20210811170631.39296-1-david@ixit.cz
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
++++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+@@ -399,6 +399,7 @@ struct msm_kms *mdp4_kms_init(struct drm
+ {
+ struct platform_device *pdev = to_platform_device(dev->dev);
+ struct mdp4_platform_config *config = mdp4_get_config(pdev);
++ struct msm_drm_private *priv = dev->dev_private;
+ struct mdp4_kms *mdp4_kms;
+ struct msm_kms *kms = NULL;
+ struct msm_gem_address_space *aspace;
+@@ -418,7 +419,8 @@ struct msm_kms *mdp4_kms_init(struct drm
+ goto fail;
+ }
+
+- kms = &mdp4_kms->base.base;
++ priv->kms = &mdp4_kms->base.base;
++ kms = priv->kms;
+
+ mdp4_kms->dev = dev;
+
--- /dev/null
+From 5bccb945f38b2aff334619b23b50bb0a6a9995a5 Mon Sep 17 00:00:00 2001
+From: Kalyan Thota <kalyan_t@codeaurora.org>
+Date: Wed, 4 Aug 2021 02:40:28 -0700
+Subject: drm/msm/disp/dpu1: add safe lut config in dpu driver
+
+From: Kalyan Thota <kalyan_t@codeaurora.org>
+
+commit 5bccb945f38b2aff334619b23b50bb0a6a9995a5 upstream.
+
+Add safe lut configuration for all the targets in dpu
+driver as per QOS recommendation.
+
+Issue reported on SC7280:
+
+With wait-for-safe feature in smmu enabled, RT client
+buffer levels are checked to be safe before smmu invalidation.
+Since display was always set to unsafe it was delaying the
+invalidaiton process thus impacting the performance on NRT clients
+such as eMMC and NVMe.
+
+Validated this change on SC7280, With this change eMMC performance
+has improved significantly.
+
+Changes in v2:
+- Add fixes tag (Sai)
+- CC stable kernel (Dimtry)
+
+Changes in v3:
+- Correct fixes tag with appropriate hash (stephen)
+- Resend patch adding reviewed by tag
+- Resend patch adding correct format for pushing into stable tree (Greg)
+
+Fixes: 591e34a091d1 ("drm/msm/disp/dpu1: add support for display for SC7280 target")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kalyan Thota <kalyan_t@codeaurora.org>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org> (sc7280, sc7180)
+Link: https://lore.kernel.org/r/1628070028-2616-1-git-send-email-kalyan_t@codeaurora.org
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+@@ -974,6 +974,7 @@ static const struct dpu_perf_cfg sdm845_
+ .amortizable_threshold = 25,
+ .min_prefill_lines = 24,
+ .danger_lut_tbl = {0xf, 0xffff, 0x0},
++ .safe_lut_tbl = {0xfff0, 0xf000, 0xffff},
+ .qos_lut_tbl = {
+ {.nentry = ARRAY_SIZE(sdm845_qos_linear),
+ .entries = sdm845_qos_linear
+@@ -1001,6 +1002,7 @@ static const struct dpu_perf_cfg sc7180_
+ .min_dram_ib = 1600000,
+ .min_prefill_lines = 24,
+ .danger_lut_tbl = {0xff, 0xffff, 0x0},
++ .safe_lut_tbl = {0xfff0, 0xff00, 0xffff},
+ .qos_lut_tbl = {
+ {.nentry = ARRAY_SIZE(sc7180_qos_linear),
+ .entries = sc7180_qos_linear
+@@ -1028,6 +1030,7 @@ static const struct dpu_perf_cfg sm8150_
+ .min_dram_ib = 800000,
+ .min_prefill_lines = 24,
+ .danger_lut_tbl = {0xf, 0xffff, 0x0},
++ .safe_lut_tbl = {0xfff8, 0xf000, 0xffff},
+ .qos_lut_tbl = {
+ {.nentry = ARRAY_SIZE(sm8150_qos_linear),
+ .entries = sm8150_qos_linear
+@@ -1056,6 +1059,7 @@ static const struct dpu_perf_cfg sm8250_
+ .min_dram_ib = 800000,
+ .min_prefill_lines = 35,
+ .danger_lut_tbl = {0xf, 0xffff, 0x0},
++ .safe_lut_tbl = {0xfff0, 0xff00, 0xffff},
+ .qos_lut_tbl = {
+ {.nentry = ARRAY_SIZE(sc7180_qos_linear),
+ .entries = sc7180_qos_linear
+@@ -1084,6 +1088,7 @@ static const struct dpu_perf_cfg sc7280_
+ .min_dram_ib = 1600000,
+ .min_prefill_lines = 24,
+ .danger_lut_tbl = {0xffff, 0xffff, 0x0},
++ .safe_lut_tbl = {0xff00, 0xff00, 0xffff},
+ .qos_lut_tbl = {
+ {.nentry = ARRAY_SIZE(sc7180_qos_macrotile),
+ .entries = sc7180_qos_macrotile
--- /dev/null
+From bd7ffbc3ca12629aeb66fb9e28cf42b7f37e3e3b Mon Sep 17 00:00:00 2001
+From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+Date: Tue, 24 Aug 2021 13:30:27 -0400
+Subject: drm/panfrost: Clamp lock region to Bifrost minimum
+
+From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+
+commit bd7ffbc3ca12629aeb66fb9e28cf42b7f37e3e3b upstream.
+
+When locking a region, we currently clamp to a PAGE_SIZE as the minimum
+lock region. While this is valid for Midgard, it is invalid for Bifrost,
+where the minimum locking size is 8x larger than the 4k page size. Add a
+hardware definition for the minimum lock region size (corresponding to
+KBASE_LOCK_REGION_MIN_SIZE_LOG2 in kbase) and respect it.
+
+Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+Tested-by: Chris Morgan <macromorgan@hotmail.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Rob Herring <robh@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210824173028.7528-4-alyssa.rosenzweig@collabora.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +-
+ drivers/gpu/drm/panfrost/panfrost_regs.h | 2 ++
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+@@ -63,7 +63,7 @@ static void lock_region(struct panfrost_
+ /* The size is encoded as ceil(log2) minus(1), which may be calculated
+ * with fls. The size must be clamped to hardware bounds.
+ */
+- size = max_t(u64, size, PAGE_SIZE);
++ size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE);
+ region_width = fls64(size - 1) - 1;
+ region |= region_width;
+
+--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
++++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
+@@ -319,6 +319,8 @@
+ #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8)
+ #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8)
+
++#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15)
++
+ #define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
+ #define gpu_read(dev, reg) readl(dev->iomem + reg)
+
--- /dev/null
+From 7fdc48cc63a30fa3480d18bdd8c5fff2b9b15212 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Mon, 21 Jun 2021 15:38:56 +0200
+Subject: drm/panfrost: Make sure MMU context lifetime is not bound to panfrost_priv
+
+From: Boris Brezillon <boris.brezillon@collabora.com>
+
+commit 7fdc48cc63a30fa3480d18bdd8c5fff2b9b15212 upstream.
+
+Jobs can be in-flight when the file descriptor is closed (either because
+the process did not terminate properly, or because it didn't wait for
+all GPU jobs to be finished), and apparently panfrost_job_close() does
+not cancel already running jobs. Let's refcount the MMU context object
+so it's lifetime is no longer bound to the FD lifetime and running jobs
+can finish properly without generating spurious page faults.
+
+Reported-by: Icecream95 <ixn@keemail.me>
+Fixes: 7282f7645d06 ("drm/panfrost: Implement per FD address spaces")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210621133907.1683899-2-boris.brezillon@collabora.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/panfrost/panfrost_device.h | 8 -
+ drivers/gpu/drm/panfrost/panfrost_drv.c | 50 +--------
+ drivers/gpu/drm/panfrost/panfrost_gem.c | 20 +--
+ drivers/gpu/drm/panfrost/panfrost_job.c | 4
+ drivers/gpu/drm/panfrost/panfrost_mmu.c | 160 +++++++++++++++++++----------
+ drivers/gpu/drm/panfrost/panfrost_mmu.h | 5
+ 6 files changed, 136 insertions(+), 111 deletions(-)
+
+--- a/drivers/gpu/drm/panfrost/panfrost_device.h
++++ b/drivers/gpu/drm/panfrost/panfrost_device.h
+@@ -121,8 +121,12 @@ struct panfrost_device {
+ };
+
+ struct panfrost_mmu {
++ struct panfrost_device *pfdev;
++ struct kref refcount;
+ struct io_pgtable_cfg pgtbl_cfg;
+ struct io_pgtable_ops *pgtbl_ops;
++ struct drm_mm mm;
++ spinlock_t mm_lock;
+ int as;
+ atomic_t as_count;
+ struct list_head list;
+@@ -133,9 +137,7 @@ struct panfrost_file_priv {
+
+ struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
+
+- struct panfrost_mmu mmu;
+- struct drm_mm mm;
+- spinlock_t mm_lock;
++ struct panfrost_mmu *mmu;
+ };
+
+ static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
+--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
+@@ -417,7 +417,7 @@ static int panfrost_ioctl_madvise(struct
+ * anyway, so let's not bother.
+ */
+ if (!list_is_singular(&bo->mappings.list) ||
+- WARN_ON_ONCE(first->mmu != &priv->mmu)) {
++ WARN_ON_ONCE(first->mmu != priv->mmu)) {
+ ret = -EINVAL;
+ goto out_unlock_mappings;
+ }
+@@ -449,32 +449,6 @@ int panfrost_unstable_ioctl_check(void)
+ return 0;
+ }
+
+-#define PFN_4G (SZ_4G >> PAGE_SHIFT)
+-#define PFN_4G_MASK (PFN_4G - 1)
+-#define PFN_16M (SZ_16M >> PAGE_SHIFT)
+-
+-static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node,
+- unsigned long color,
+- u64 *start, u64 *end)
+-{
+- /* Executable buffers can't start or end on a 4GB boundary */
+- if (!(color & PANFROST_BO_NOEXEC)) {
+- u64 next_seg;
+-
+- if ((*start & PFN_4G_MASK) == 0)
+- (*start)++;
+-
+- if ((*end & PFN_4G_MASK) == 0)
+- (*end)--;
+-
+- next_seg = ALIGN(*start, PFN_4G);
+- if (next_seg - *start <= PFN_16M)
+- *start = next_seg + 1;
+-
+- *end = min(*end, ALIGN(*start, PFN_4G) - 1);
+- }
+-}
+-
+ static int
+ panfrost_open(struct drm_device *dev, struct drm_file *file)
+ {
+@@ -489,15 +463,11 @@ panfrost_open(struct drm_device *dev, st
+ panfrost_priv->pfdev = pfdev;
+ file->driver_priv = panfrost_priv;
+
+- spin_lock_init(&panfrost_priv->mm_lock);
+-
+- /* 4G enough for now. can be 48-bit */
+- drm_mm_init(&panfrost_priv->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT);
+- panfrost_priv->mm.color_adjust = panfrost_drm_mm_color_adjust;
+-
+- ret = panfrost_mmu_pgtable_alloc(panfrost_priv);
+- if (ret)
+- goto err_pgtable;
++ panfrost_priv->mmu = panfrost_mmu_ctx_create(pfdev);
++ if (IS_ERR(panfrost_priv->mmu)) {
++ ret = PTR_ERR(panfrost_priv->mmu);
++ goto err_free;
++ }
+
+ ret = panfrost_job_open(panfrost_priv);
+ if (ret)
+@@ -506,9 +476,8 @@ panfrost_open(struct drm_device *dev, st
+ return 0;
+
+ err_job:
+- panfrost_mmu_pgtable_free(panfrost_priv);
+-err_pgtable:
+- drm_mm_takedown(&panfrost_priv->mm);
++ panfrost_mmu_ctx_put(panfrost_priv->mmu);
++err_free:
+ kfree(panfrost_priv);
+ return ret;
+ }
+@@ -521,8 +490,7 @@ panfrost_postclose(struct drm_device *de
+ panfrost_perfcnt_close(file);
+ panfrost_job_close(panfrost_priv);
+
+- panfrost_mmu_pgtable_free(panfrost_priv);
+- drm_mm_takedown(&panfrost_priv->mm);
++ panfrost_mmu_ctx_put(panfrost_priv->mmu);
+ kfree(panfrost_priv);
+ }
+
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
+@@ -60,7 +60,7 @@ panfrost_gem_mapping_get(struct panfrost
+
+ mutex_lock(&bo->mappings.lock);
+ list_for_each_entry(iter, &bo->mappings.list, node) {
+- if (iter->mmu == &priv->mmu) {
++ if (iter->mmu == priv->mmu) {
+ kref_get(&iter->refcount);
+ mapping = iter;
+ break;
+@@ -74,16 +74,13 @@ panfrost_gem_mapping_get(struct panfrost
+ static void
+ panfrost_gem_teardown_mapping(struct panfrost_gem_mapping *mapping)
+ {
+- struct panfrost_file_priv *priv;
+-
+ if (mapping->active)
+ panfrost_mmu_unmap(mapping);
+
+- priv = container_of(mapping->mmu, struct panfrost_file_priv, mmu);
+- spin_lock(&priv->mm_lock);
++ spin_lock(&mapping->mmu->mm_lock);
+ if (drm_mm_node_allocated(&mapping->mmnode))
+ drm_mm_remove_node(&mapping->mmnode);
+- spin_unlock(&priv->mm_lock);
++ spin_unlock(&mapping->mmu->mm_lock);
+ }
+
+ static void panfrost_gem_mapping_release(struct kref *kref)
+@@ -94,6 +91,7 @@ static void panfrost_gem_mapping_release
+
+ panfrost_gem_teardown_mapping(mapping);
+ drm_gem_object_put(&mapping->obj->base.base);
++ panfrost_mmu_ctx_put(mapping->mmu);
+ kfree(mapping);
+ }
+
+@@ -143,11 +141,11 @@ int panfrost_gem_open(struct drm_gem_obj
+ else
+ align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0;
+
+- mapping->mmu = &priv->mmu;
+- spin_lock(&priv->mm_lock);
+- ret = drm_mm_insert_node_generic(&priv->mm, &mapping->mmnode,
++ mapping->mmu = panfrost_mmu_ctx_get(priv->mmu);
++ spin_lock(&mapping->mmu->mm_lock);
++ ret = drm_mm_insert_node_generic(&mapping->mmu->mm, &mapping->mmnode,
+ size >> PAGE_SHIFT, align, color, 0);
+- spin_unlock(&priv->mm_lock);
++ spin_unlock(&mapping->mmu->mm_lock);
+ if (ret)
+ goto err;
+
+@@ -176,7 +174,7 @@ void panfrost_gem_close(struct drm_gem_o
+
+ mutex_lock(&bo->mappings.lock);
+ list_for_each_entry(iter, &bo->mappings.list, node) {
+- if (iter->mmu == &priv->mmu) {
++ if (iter->mmu == priv->mmu) {
+ mapping = iter;
+ list_del(&iter->node);
+ break;
+--- a/drivers/gpu/drm/panfrost/panfrost_job.c
++++ b/drivers/gpu/drm/panfrost/panfrost_job.c
+@@ -165,7 +165,7 @@ static void panfrost_job_hw_submit(struc
+ return;
+ }
+
+- cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu);
++ cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu);
+
+ job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF);
+ job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32);
+@@ -527,7 +527,7 @@ static irqreturn_t panfrost_job_irq_hand
+ if (job) {
+ pfdev->jobs[j] = NULL;
+
+- panfrost_mmu_as_put(pfdev, &job->file_priv->mmu);
++ panfrost_mmu_as_put(pfdev, job->file_priv->mmu);
+ panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
+
+ dma_fence_signal_locked(job->done_fence);
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+@@ -1,5 +1,8 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++
++#include <drm/panfrost_drm.h>
++
+ #include <linux/atomic.h>
+ #include <linux/bitfield.h>
+ #include <linux/delay.h>
+@@ -337,7 +340,7 @@ static void mmu_tlb_inv_context_s1(void
+
+ static void mmu_tlb_sync_context(void *cookie)
+ {
+- //struct panfrost_device *pfdev = cookie;
++ //struct panfrost_mmu *mmu = cookie;
+ // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
+ }
+
+@@ -352,57 +355,10 @@ static const struct iommu_flush_ops mmu_
+ .tlb_flush_walk = mmu_tlb_flush_walk,
+ };
+
+-int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv)
+-{
+- struct panfrost_mmu *mmu = &priv->mmu;
+- struct panfrost_device *pfdev = priv->pfdev;
+-
+- INIT_LIST_HEAD(&mmu->list);
+- mmu->as = -1;
+-
+- mmu->pgtbl_cfg = (struct io_pgtable_cfg) {
+- .pgsize_bitmap = SZ_4K | SZ_2M,
+- .ias = FIELD_GET(0xff, pfdev->features.mmu_features),
+- .oas = FIELD_GET(0xff00, pfdev->features.mmu_features),
+- .coherent_walk = pfdev->coherent,
+- .tlb = &mmu_tlb_ops,
+- .iommu_dev = pfdev->dev,
+- };
+-
+- mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg,
+- priv);
+- if (!mmu->pgtbl_ops)
+- return -EINVAL;
+-
+- return 0;
+-}
+-
+-void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv)
+-{
+- struct panfrost_device *pfdev = priv->pfdev;
+- struct panfrost_mmu *mmu = &priv->mmu;
+-
+- spin_lock(&pfdev->as_lock);
+- if (mmu->as >= 0) {
+- pm_runtime_get_noresume(pfdev->dev);
+- if (pm_runtime_active(pfdev->dev))
+- panfrost_mmu_disable(pfdev, mmu->as);
+- pm_runtime_put_autosuspend(pfdev->dev);
+-
+- clear_bit(mmu->as, &pfdev->as_alloc_mask);
+- clear_bit(mmu->as, &pfdev->as_in_use_mask);
+- list_del(&mmu->list);
+- }
+- spin_unlock(&pfdev->as_lock);
+-
+- free_io_pgtable_ops(mmu->pgtbl_ops);
+-}
+-
+ static struct panfrost_gem_mapping *
+ addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr)
+ {
+ struct panfrost_gem_mapping *mapping = NULL;
+- struct panfrost_file_priv *priv;
+ struct drm_mm_node *node;
+ u64 offset = addr >> PAGE_SHIFT;
+ struct panfrost_mmu *mmu;
+@@ -415,11 +371,10 @@ addr_to_mapping(struct panfrost_device *
+ goto out;
+
+ found_mmu:
+- priv = container_of(mmu, struct panfrost_file_priv, mmu);
+
+- spin_lock(&priv->mm_lock);
++ spin_lock(&mmu->mm_lock);
+
+- drm_mm_for_each_node(node, &priv->mm) {
++ drm_mm_for_each_node(node, &mmu->mm) {
+ if (offset >= node->start &&
+ offset < (node->start + node->size)) {
+ mapping = drm_mm_node_to_panfrost_mapping(node);
+@@ -429,7 +384,7 @@ found_mmu:
+ }
+ }
+
+- spin_unlock(&priv->mm_lock);
++ spin_unlock(&mmu->mm_lock);
+ out:
+ spin_unlock(&pfdev->as_lock);
+ return mapping;
+@@ -542,6 +497,107 @@ err_bo:
+ return ret;
+ }
+
++static void panfrost_mmu_release_ctx(struct kref *kref)
++{
++ struct panfrost_mmu *mmu = container_of(kref, struct panfrost_mmu,
++ refcount);
++ struct panfrost_device *pfdev = mmu->pfdev;
++
++ spin_lock(&pfdev->as_lock);
++ if (mmu->as >= 0) {
++ pm_runtime_get_noresume(pfdev->dev);
++ if (pm_runtime_active(pfdev->dev))
++ panfrost_mmu_disable(pfdev, mmu->as);
++ pm_runtime_put_autosuspend(pfdev->dev);
++
++ clear_bit(mmu->as, &pfdev->as_alloc_mask);
++ clear_bit(mmu->as, &pfdev->as_in_use_mask);
++ list_del(&mmu->list);
++ }
++ spin_unlock(&pfdev->as_lock);
++
++ free_io_pgtable_ops(mmu->pgtbl_ops);
++ drm_mm_takedown(&mmu->mm);
++ kfree(mmu);
++}
++
++void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu)
++{
++ kref_put(&mmu->refcount, panfrost_mmu_release_ctx);
++}
++
++struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu)
++{
++ kref_get(&mmu->refcount);
++
++ return mmu;
++}
++
++#define PFN_4G (SZ_4G >> PAGE_SHIFT)
++#define PFN_4G_MASK (PFN_4G - 1)
++#define PFN_16M (SZ_16M >> PAGE_SHIFT)
++
++static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node,
++ unsigned long color,
++ u64 *start, u64 *end)
++{
++ /* Executable buffers can't start or end on a 4GB boundary */
++ if (!(color & PANFROST_BO_NOEXEC)) {
++ u64 next_seg;
++
++ if ((*start & PFN_4G_MASK) == 0)
++ (*start)++;
++
++ if ((*end & PFN_4G_MASK) == 0)
++ (*end)--;
++
++ next_seg = ALIGN(*start, PFN_4G);
++ if (next_seg - *start <= PFN_16M)
++ *start = next_seg + 1;
++
++ *end = min(*end, ALIGN(*start, PFN_4G) - 1);
++ }
++}
++
++struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev)
++{
++ struct panfrost_mmu *mmu;
++
++ mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
++ if (!mmu)
++ return ERR_PTR(-ENOMEM);
++
++ mmu->pfdev = pfdev;
++ spin_lock_init(&mmu->mm_lock);
++
++ /* 4G enough for now. can be 48-bit */
++ drm_mm_init(&mmu->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT);
++ mmu->mm.color_adjust = panfrost_drm_mm_color_adjust;
++
++ INIT_LIST_HEAD(&mmu->list);
++ mmu->as = -1;
++
++ mmu->pgtbl_cfg = (struct io_pgtable_cfg) {
++ .pgsize_bitmap = SZ_4K | SZ_2M,
++ .ias = FIELD_GET(0xff, pfdev->features.mmu_features),
++ .oas = FIELD_GET(0xff00, pfdev->features.mmu_features),
++ .coherent_walk = pfdev->coherent,
++ .tlb = &mmu_tlb_ops,
++ .iommu_dev = pfdev->dev,
++ };
++
++ mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg,
++ mmu);
++ if (!mmu->pgtbl_ops) {
++ kfree(mmu);
++ return ERR_PTR(-EINVAL);
++ }
++
++ kref_init(&mmu->refcount);
++
++ return mmu;
++}
++
+ static const char *access_type_name(struct panfrost_device *pfdev,
+ u32 fault_status)
+ {
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.h
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h
+@@ -18,7 +18,8 @@ void panfrost_mmu_reset(struct panfrost_
+ u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu);
+ void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu);
+
+-int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv);
+-void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv);
++struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu);
++void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu);
++struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev);
+
+ #endif
--- /dev/null
+From b5fab345654c603c07525100d744498f28786929 Mon Sep 17 00:00:00 2001
+From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+Date: Tue, 24 Aug 2021 13:30:25 -0400
+Subject: drm/panfrost: Simplify lock_region calculation
+
+From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+
+commit b5fab345654c603c07525100d744498f28786929 upstream.
+
+In lock_region, simplify the calculation of the region_width parameter.
+This field is the size, but encoded as ceil(log2(size)) - 1.
+ceil(log2(size)) may be computed directly as fls(size - 1). However, we
+want to use the 64-bit versions as the amount to lock can exceed
+32-bits.
+
+This avoids undefined (and completely wrong) behaviour when locking all
+memory (size ~0). In this case, the old code would "round up" ~0 to the
+nearest page, overflowing to 0. Since fls(0) == 0, this would calculate
+a region width of 10 + 0 = 10. But then the code would shift by
+(region_width - 11) = -1. As shifting by a negative number is undefined,
+UBSAN flags the bug. Of course, even if it were defined the behaviour is
+wrong, instead of locking all memory almost none would get locked.
+
+The new form of the calculation corrects this special case and avoids
+the undefined behaviour.
+
+Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+Reported-and-tested-by: Chris Morgan <macromorgan@hotmail.com>
+Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Rob Herring <robh@kernel.org>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210824173028.7528-2-alyssa.rosenzweig@collabora.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/panfrost/panfrost_mmu.c | 19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+@@ -59,21 +59,12 @@ static void lock_region(struct panfrost_
+ {
+ u8 region_width;
+ u64 region = iova & PAGE_MASK;
+- /*
+- * fls returns:
+- * 1 .. 32
+- *
+- * 10 + fls(num_pages)
+- * results in the range (11 .. 42)
+- */
+-
+- size = round_up(size, PAGE_SIZE);
+
+- region_width = 10 + fls(size >> PAGE_SHIFT);
+- if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) {
+- /* not pow2, so must go up to the next pow2 */
+- region_width += 1;
+- }
++ /* The size is encoded as ceil(log2) minus(1), which may be calculated
++ * with fls. The size must be clamped to hardware bounds.
++ */
++ size = max_t(u64, size, PAGE_SIZE);
++ region_width = fls64(size - 1) - 1;
+ region |= region_width;
+
+ /* Lock the region that needs to be updated */
--- /dev/null
+From a77b58825d7221d4a45c47881c35a47ba003aa73 Mon Sep 17 00:00:00 2001
+From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+Date: Tue, 24 Aug 2021 13:30:26 -0400
+Subject: drm/panfrost: Use u64 for size in lock_region
+
+From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+
+commit a77b58825d7221d4a45c47881c35a47ba003aa73 upstream.
+
+Mali virtual addresses are 48-bit. Use a u64 instead of size_t to ensure
+we can express the "lock everything" condition as ~0ULL without
+overflow. This code was silently broken on any platform where a size_t
+is less than 48-bits; in particular, it was broken on 32-bit armv7
+platforms which remain in use with panfrost. (Mainly RK3288)
+
+Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+Suggested-by: Rob Herring <robh@kernel.org>
+Tested-by: Chris Morgan <macromorgan@hotmail.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Rob Herring <robh@kernel.org>
+Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210824173028.7528-3-alyssa.rosenzweig@collabora.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/panfrost/panfrost_mmu.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+@@ -55,7 +55,7 @@ static int write_cmd(struct panfrost_dev
+ }
+
+ static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
+- u64 iova, size_t size)
++ u64 iova, u64 size)
+ {
+ u8 region_width;
+ u64 region = iova & PAGE_MASK;
+@@ -75,7 +75,7 @@ static void lock_region(struct panfrost_
+
+
+ static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
+- u64 iova, size_t size, u32 op)
++ u64 iova, u64 size, u32 op)
+ {
+ if (as_nr < 0)
+ return 0;
+@@ -92,7 +92,7 @@ static int mmu_hw_do_operation_locked(st
+
+ static int mmu_hw_do_operation(struct panfrost_device *pfdev,
+ struct panfrost_mmu *mmu,
+- u64 iova, size_t size, u32 op)
++ u64 iova, u64 size, u32 op)
+ {
+ int ret;
+
+@@ -109,7 +109,7 @@ static void panfrost_mmu_enable(struct p
+ u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
+ u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
+
+- mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
++ mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+
+ mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL);
+ mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32);
+@@ -125,7 +125,7 @@ static void panfrost_mmu_enable(struct p
+
+ static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
+ {
+- mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
++ mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+
+ mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0);
+ mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0);
+@@ -225,7 +225,7 @@ static size_t get_pgsize(u64 addr, size_
+
+ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+ struct panfrost_mmu *mmu,
+- u64 iova, size_t size)
++ u64 iova, u64 size)
+ {
+ if (mmu->as < 0)
+ return;
--- /dev/null
+From efcefc7127290e7e9fa98dea029163ad8eda8fb3 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
+Date: Tue, 31 Aug 2021 09:15:36 +0200
+Subject: drm/ttm: Fix ttm_bo_move_memcpy() for subclassed struct ttm_resource
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+
+commit efcefc7127290e7e9fa98dea029163ad8eda8fb3 upstream.
+
+The code was making a copy of a struct ttm_resource. However,
+recently the struct ttm_resources were allowed to be subclassed and
+also were allowed to be malloced, hence the driver could end up assuming
+the copy we handed it was subclassed and worse, the original could have
+been freed at this point.
+
+Fix this by using the original struct ttm_resource before it is
+potentially freed in ttm_bo_move_sync_cleanup()
+
+v2: Base on drm-misc-next-fixes rather than drm-tip.
+
+Reported-by: Ben Skeggs <skeggsb@gmail.com>
+Reported-by: Dave Airlie <airlied@gmail.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: <stable@vger.kernel.org>
+Fixes: 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem")
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210831071536.80636-1-thomas.hellstrom@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_bo_util.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
+@@ -143,7 +143,6 @@ int ttm_bo_move_memcpy(struct ttm_buffer
+ struct ttm_resource *src_mem = bo->resource;
+ struct ttm_resource_manager *src_man =
+ ttm_manager_type(bdev, src_mem->mem_type);
+- struct ttm_resource src_copy = *src_mem;
+ union {
+ struct ttm_kmap_iter_tt tt;
+ struct ttm_kmap_iter_linear_io io;
+@@ -173,11 +172,11 @@ int ttm_bo_move_memcpy(struct ttm_buffer
+ }
+
+ ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
+- src_copy = *src_mem;
+- ttm_bo_move_sync_cleanup(bo, dst_mem);
+
+ if (!src_iter->ops->maps_tt)
+- ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, &src_copy);
++ ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem);
++ ttm_bo_move_sync_cleanup(bo, dst_mem);
++
+ out_src_iter:
+ if (!dst_iter->ops->maps_tt)
+ ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem);
--- /dev/null
+From 09a26e832705fdb7a9484495b71a05e0bbc65207 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Thu, 2 Sep 2021 14:58:53 -0700
+Subject: hugetlb: fix hugetlb cgroup refcounting during vma split
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 09a26e832705fdb7a9484495b71a05e0bbc65207 upstream.
+
+Guillaume Morin reported hitting the following WARNING followed by GPF or
+NULL pointer deference either in cgroups_destroy or in the kill_css path.:
+
+ percpu ref (css_release) <= 0 (-1) after switching to atomic
+ WARNING: CPU: 23 PID: 130 at lib/percpu-refcount.c:196 percpu_ref_switch_to_atomic_rcu+0x127/0x130
+ CPU: 23 PID: 130 Comm: ksoftirqd/23 Kdump: loaded Tainted: G O 5.10.60 #1
+ RIP: 0010:percpu_ref_switch_to_atomic_rcu+0x127/0x130
+ Call Trace:
+ rcu_core+0x30f/0x530
+ rcu_core_si+0xe/0x10
+ __do_softirq+0x103/0x2a2
+ run_ksoftirqd+0x2b/0x40
+ smpboot_thread_fn+0x11a/0x170
+ kthread+0x10a/0x140
+ ret_from_fork+0x22/0x30
+
+Upon further examination, it was discovered that the css structure was
+associated with hugetlb reservations.
+
+For private hugetlb mappings the vma points to a reserve map that
+contains a pointer to the css. At mmap time, reservations are set up
+and a reference to the css is taken. This reference is dropped in the
+vma close operation; hugetlb_vm_op_close. However, if a vma is split no
+additional reference to the css is taken yet hugetlb_vm_op_close will be
+called twice for the split vma resulting in an underflow.
+
+Fix by taking another reference in hugetlb_vm_op_open. Note that the
+reference is only taken for the owner of the reserve map. In the more
+common fork case, the pointer to the reserve map is cleared for
+non-owning vmas.
+
+Link: https://lkml.kernel.org/r/20210830215015.155224-1-mike.kravetz@oracle.com
+Fixes: e9fe92ae0cd2 ("hugetlb_cgroup: add reservation accounting for private mappings")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reported-by: Guillaume Morin <guillaume@morinfr.org>
+Suggested-by: Guillaume Morin <guillaume@morinfr.org>
+Tested-by: Guillaume Morin <guillaume@morinfr.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb_cgroup.h | 12 ++++++++++++
+ mm/hugetlb.c | 4 +++-
+ 2 files changed, 15 insertions(+), 1 deletion(-)
+
+--- a/include/linux/hugetlb_cgroup.h
++++ b/include/linux/hugetlb_cgroup.h
+@@ -121,6 +121,13 @@ static inline void hugetlb_cgroup_put_rs
+ css_put(&h_cg->css);
+ }
+
++static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
++ struct resv_map *resv_map)
++{
++ if (resv_map->css)
++ css_get(resv_map->css);
++}
++
+ extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup **ptr);
+ extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
+@@ -199,6 +206,11 @@ static inline void hugetlb_cgroup_put_rs
+ {
+ }
+
++static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
++ struct resv_map *resv_map)
++{
++}
++
+ static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup **ptr)
+ {
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4033,8 +4033,10 @@ static void hugetlb_vm_op_open(struct vm
+ * after this open call completes. It is therefore safe to take a
+ * new reference here without additional locking.
+ */
+- if (resv && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
++ if (resv && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
++ resv_map_dup_hugetlb_cgroup_uncharge_info(resv);
+ kref_get(&resv->refs);
++ }
+ }
+
+ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
--- /dev/null
+From f9398f15605a50110bf570aaa361163a85113dd1 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Fri, 23 Jul 2021 15:19:31 -0700
+Subject: lib/test_stackinit: Fix static initializer test
+
+From: Kees Cook <keescook@chromium.org>
+
+commit f9398f15605a50110bf570aaa361163a85113dd1 upstream.
+
+The static initializer test got accidentally converted to a dynamic
+initializer. Fix this and retain the giant padding hole without using
+an aligned struct member.
+
+Fixes: 50ceaa95ea09 ("lib: Introduce test_stackinit module")
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210723221933.3431999-2-keescook@chromium.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/test_stackinit.c | 20 +++++++-------------
+ 1 file changed, 7 insertions(+), 13 deletions(-)
+
+--- a/lib/test_stackinit.c
++++ b/lib/test_stackinit.c
+@@ -67,10 +67,10 @@ static bool range_contains(char *haystac
+ #define INIT_STRUCT_none /**/
+ #define INIT_STRUCT_zero = { }
+ #define INIT_STRUCT_static_partial = { .two = 0, }
+-#define INIT_STRUCT_static_all = { .one = arg->one, \
+- .two = arg->two, \
+- .three = arg->three, \
+- .four = arg->four, \
++#define INIT_STRUCT_static_all = { .one = 0, \
++ .two = 0, \
++ .three = 0, \
++ .four = 0, \
+ }
+ #define INIT_STRUCT_dynamic_partial = { .two = arg->two, }
+ #define INIT_STRUCT_dynamic_all = { .one = arg->one, \
+@@ -84,8 +84,7 @@ static bool range_contains(char *haystac
+ var.one = 0; \
+ var.two = 0; \
+ var.three = 0; \
+- memset(&var.four, 0, \
+- sizeof(var.four))
++ var.four = 0
+
+ /*
+ * @name: unique string name for the test
+@@ -210,18 +209,13 @@ struct test_small_hole {
+ unsigned long four;
+ };
+
+-/* Try to trigger unhandled padding in a structure. */
+-struct test_aligned {
+- u32 internal1;
+- u64 internal2;
+-} __aligned(64);
+-
++/* Trigger unhandled padding in a structure. */
+ struct test_big_hole {
+ u8 one;
+ u8 two;
+ u8 three;
+ /* 61 byte padding hole here. */
+- struct test_aligned four;
++ u8 four __aligned(64);
+ } __aligned(64);
+
+ struct test_trailing_hole {
--- /dev/null
+From 32b2397c1e56f33b0b1881def965bb89bd12f448 Mon Sep 17 00:00:00 2001
+From: sumiyawang <sumiyawang@tencent.com>
+Date: Sun, 22 Aug 2021 19:49:09 +0800
+Subject: libnvdimm/pmem: Fix crash triggered when I/O in-flight during unbind
+
+From: sumiyawang <sumiyawang@tencent.com>
+
+commit 32b2397c1e56f33b0b1881def965bb89bd12f448 upstream.
+
+There is a use after free crash when the pmem driver tears down its
+mapping while I/O is still inbound.
+
+This is triggered by driver unbind, "ndctl destroy-namespace", while I/O
+is in flight.
+
+Fix the sequence of blk_cleanup_queue() vs memunmap().
+
+The crash signature is of the form:
+
+ BUG: unable to handle page fault for address: ffffc90080200000
+ CPU: 36 PID: 9606 Comm: systemd-udevd
+ Call Trace:
+ ? pmem_do_bvec+0xf9/0x3a0
+ ? xas_alloc+0x55/0xd0
+ pmem_rw_page+0x4b/0x80
+ bdev_read_page+0x86/0xb0
+ do_mpage_readpage+0x5d4/0x7a0
+ ? lru_cache_add+0xe/0x10
+ mpage_readpages+0xf9/0x1c0
+ ? bd_link_disk_holder+0x1a0/0x1a0
+ blkdev_readpages+0x1d/0x20
+ read_pages+0x67/0x1a0
+
+ ndctl Call Trace in vmcore:
+ PID: 23473 TASK: ffff88c4fbbe8000 CPU: 1 COMMAND: "ndctl"
+ __schedule
+ schedule
+ blk_mq_freeze_queue_wait
+ blk_freeze_queue
+ blk_cleanup_queue
+ pmem_release_queue
+ devm_action_release
+ release_nodes
+ devres_release_all
+ device_release_driver_internal
+ device_driver_detach
+ unbind_store
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: sumiyawang <sumiyawang@tencent.com>
+Reviewed-by: yongduan <yongduan@tencent.com>
+Link: https://lore.kernel.org/r/1629632949-14749-1-git-send-email-sumiyawang@tencent.com
+Fixes: 50f44ee7248a ("mm/devm_memremap_pages: fix final page put race")
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvdimm/pmem.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -450,11 +450,11 @@ static int pmem_attach_disk(struct devic
+ pmem->pfn_flags |= PFN_MAP;
+ bb_range = pmem->pgmap.range;
+ } else {
++ addr = devm_memremap(dev, pmem->phys_addr,
++ pmem->size, ARCH_MEMREMAP_PMEM);
+ if (devm_add_action_or_reset(dev, pmem_release_queue,
+ &pmem->pgmap))
+ return -ENOMEM;
+- addr = devm_memremap(dev, pmem->phys_addr,
+- pmem->size, ARCH_MEMREMAP_PMEM);
+ bb_range.start = res->start;
+ bb_range.end = res->end;
+ }
--- /dev/null
+From fab827dbee8c2e06ca4ba000fa6c48bcf9054aba Mon Sep 17 00:00:00 2001
+From: Vasily Averin <vvs@virtuozzo.com>
+Date: Thu, 2 Sep 2021 14:54:57 -0700
+Subject: memcg: enable accounting for pids in nested pid namespaces
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Vasily Averin <vvs@virtuozzo.com>
+
+commit fab827dbee8c2e06ca4ba000fa6c48bcf9054aba upstream.
+
+Commit 5d097056c9a0 ("kmemcg: account certain kmem allocations to memcg")
+enabled memcg accounting for pids allocated from init_pid_ns.pid_cachep,
+but forgot to adjust the setting for nested pid namespaces. As a result,
+pid memory is not accounted exactly where it is really needed, inside
+memcg-limited containers with their own pid namespaces.
+
+Pid was one the first kernel objects enabled for memcg accounting.
+init_pid_ns.pid_cachep marked by SLAB_ACCOUNT and we can expect that any
+new pids in the system are memcg-accounted.
+
+Though recently I've noticed that it is wrong. nested pid namespaces
+creates own slab caches for pid objects, nested pids have increased size
+because contain id both for all parent and for own pid namespaces. The
+problem is that these slab caches are _NOT_ marked by SLAB_ACCOUNT, as a
+result any pids allocated in nested pid namespaces are not
+memcg-accounted.
+
+Pid struct in nested pid namespace consumes up to 500 bytes memory, 100000
+such objects gives us up to ~50Mb unaccounted memory, this allow container
+to exceed assigned memcg limits.
+
+Link: https://lkml.kernel.org/r/8b6de616-fd1a-02c6-cbdb-976ecdcfa604@virtuozzo.com
+Fixes: 5d097056c9a0 ("kmemcg: account certain kmem allocations to memcg")
+Cc: stable@vger.kernel.org
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/pid_namespace.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/pid_namespace.c
++++ b/kernel/pid_namespace.c
+@@ -51,7 +51,8 @@ static struct kmem_cache *create_pid_cac
+ mutex_lock(&pid_caches_mutex);
+ /* Name collision forces to do allocation under mutex. */
+ if (!*pkc)
+- *pkc = kmem_cache_create(name, len, 0, SLAB_HWCACHE_ALIGN, 0);
++ *pkc = kmem_cache_create(name, len, 0,
++ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 0);
+ mutex_unlock(&pid_caches_mutex);
+ /* current can fail, but someone else can succeed. */
+ return READ_ONCE(*pkc);
--- /dev/null
+From 4b42fb213678d2b6a9eeea92a9be200f23e49583 Mon Sep 17 00:00:00 2001
+From: Li Zhijian <lizhijian@cn.fujitsu.com>
+Date: Wed, 8 Sep 2021 18:10:02 -0700
+Subject: mm/hmm: bypass devmap pte when all pfn requested flags are fulfilled
+
+From: Li Zhijian <lizhijian@cn.fujitsu.com>
+
+commit 4b42fb213678d2b6a9eeea92a9be200f23e49583 upstream.
+
+Previously, we noticed the one rpma example was failed[1] since commit
+36f30e486dce ("IB/core: Improve ODP to use hmm_range_fault()"), where it
+will use ODP feature to do RDMA WRITE between fsdax files.
+
+After digging into the code, we found hmm_vma_handle_pte() will still
+return EFAULT even though all the its requesting flags has been
+fulfilled. That's because a DAX page will be marked as (_PAGE_SPECIAL |
+PAGE_DEVMAP) by pte_mkdevmap().
+
+Link: https://github.com/pmem/rpma/issues/1142 [1]
+Link: https://lkml.kernel.org/r/20210830094232.203029-1-lizhijian@cn.fujitsu.com
+Fixes: 405506274922 ("mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL handling")
+Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hmm.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/mm/hmm.c
++++ b/mm/hmm.c
+@@ -295,10 +295,13 @@ static int hmm_vma_handle_pte(struct mm_
+ goto fault;
+
+ /*
++ * Bypass devmap pte such as DAX page when all pfn requested
++ * flags(pfn_req_flags) are fulfilled.
+ * Since each architecture defines a struct page for the zero page, just
+ * fall through and treat it like a normal page.
+ */
+- if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
++ if (pte_special(pte) && !pte_devmap(pte) &&
++ !is_zero_pfn(pte_pfn(pte))) {
+ if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
+ pte_unmap(ptep);
+ return -EFAULT;
--- /dev/null
+From 13db8c50477d83ad3e3b9b0ae247e5cd833a7ae4 Mon Sep 17 00:00:00 2001
+From: Liu Zixian <liuzixian4@huawei.com>
+Date: Wed, 8 Sep 2021 18:10:05 -0700
+Subject: mm/hugetlb: initialize hugetlb_usage in mm_init
+
+From: Liu Zixian <liuzixian4@huawei.com>
+
+commit 13db8c50477d83ad3e3b9b0ae247e5cd833a7ae4 upstream.
+
+After fork, the child process will get incorrect (2x) hugetlb_usage. If
+a process uses 5 2MB hugetlb pages in an anonymous mapping,
+
+ HugetlbPages: 10240 kB
+
+and then forks, the child will show,
+
+ HugetlbPages: 20480 kB
+
+The reason for double the amount is because hugetlb_usage will be copied
+from the parent and then increased when we copy page tables from parent
+to child. Child will have 2x actual usage.
+
+Fix this by adding hugetlb_count_init in mm_init.
+
+Link: https://lkml.kernel.org/r/20210826071742.877-1-liuzixian4@huawei.com
+Fixes: 5d317b2b6536 ("mm: hugetlb: proc: add HugetlbPages field to /proc/PID/status")
+Signed-off-by: Liu Zixian <liuzixian4@huawei.com>
+Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h | 9 +++++++++
+ kernel/fork.c | 1 +
+ 2 files changed, 10 insertions(+)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -858,6 +858,11 @@ static inline spinlock_t *huge_pte_lockp
+
+ void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm);
+
++static inline void hugetlb_count_init(struct mm_struct *mm)
++{
++ atomic_long_set(&mm->hugetlb_usage, 0);
++}
++
+ static inline void hugetlb_count_add(long l, struct mm_struct *mm)
+ {
+ atomic_long_add(l, &mm->hugetlb_usage);
+@@ -1042,6 +1047,10 @@ static inline spinlock_t *huge_pte_lockp
+ return &mm->page_table_lock;
+ }
+
++static inline void hugetlb_count_init(struct mm_struct *mm)
++{
++}
++
+ static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
+ {
+ }
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1050,6 +1050,7 @@ static struct mm_struct *mm_init(struct
+ mm->pmd_huge_pte = NULL;
+ #endif
+ mm_init_uprobes_state(mm);
++ hugetlb_count_init(mm);
+
+ if (current->mm) {
+ mm->flags = current->mm->flags & MMF_INIT_MASK;
--- /dev/null
+From 7cf209ba8a86410939a24cb1aeb279479a7e0ca6 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Tue, 7 Sep 2021 19:54:59 -0700
+Subject: mm/memory_hotplug: use "unsigned long" for PFN in zone_for_pfn_range()
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 7cf209ba8a86410939a24cb1aeb279479a7e0ca6 upstream.
+
+Patch series "mm/memory_hotplug: preparatory patches for new online policy and memory"
+
+These are all cleanups and one fix previously sent as part of [1]:
+[PATCH v1 00/12] mm/memory_hotplug: "auto-movable" online policy and memory
+groups.
+
+These patches make sense even without the other series, therefore I pulled
+them out to make the other series easier to digest.
+
+[1] https://lkml.kernel.org/r/20210607195430.48228-1-david@redhat.com
+
+This patch (of 4):
+
+Checkpatch complained on a follow-up patch that we are using "unsigned"
+here, which defaults to "unsigned int" and checkpatch is correct.
+
+As we will search for a fitting zone using the wrong pfn, we might end
+up onlining memory to one of the special kernel zones, such as ZONE_DMA,
+which can end badly as the onlined memory does not satisfy properties of
+these zones.
+
+Use "unsigned long" instead, just as we do in other places when handling
+PFNs. This can bite us once we have physical addresses in the range of
+multiple TB.
+
+Link: https://lkml.kernel.org/r/20210712124052.26491-2-david@redhat.com
+Fixes: e5e689302633 ("mm, memory_hotplug: display allowed zones in the preferred ordering")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Pankaj Gupta <pankaj.gupta@ionos.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
+Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
+Cc: Len Brown <lenb@kernel.org>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: virtualization@lists.linux-foundation.org
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Cc: Anton Blanchard <anton@ozlabs.org>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: Dave Jiang <dave.jiang@intel.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jia He <justin.he@arm.com>
+Cc: Joe Perches <joe@perches.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Laurent Dufour <ldufour@linux.ibm.com>
+Cc: Michel Lespinasse <michel@lespinasse.org>
+Cc: Nathan Lynch <nathanl@linux.ibm.com>
+Cc: Nicholas Piggin <npiggin@gmail.com>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Pierre Morel <pmorel@linux.ibm.com>
+Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Cc: Rich Felker <dalias@libc.org>
+Cc: Scott Cheloha <cheloha@linux.ibm.com>
+Cc: Sergei Trofimovich <slyfox@gentoo.org>
+Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memory_hotplug.h | 4 ++--
+ mm/memory_hotplug.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/include/linux/memory_hotplug.h
++++ b/include/linux/memory_hotplug.h
+@@ -339,8 +339,8 @@ extern void sparse_remove_section(struct
+ unsigned long map_offset, struct vmem_altmap *altmap);
+ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
+ unsigned long pnum);
+-extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+- unsigned long nr_pages);
++extern struct zone *zone_for_pfn_range(int online_type, int nid,
++ unsigned long start_pfn, unsigned long nr_pages);
+ extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
+ struct mhp_params *params);
+ void arch_remove_linear_mapping(u64 start, u64 size);
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -708,8 +708,8 @@ static inline struct zone *default_zone_
+ return movable_node_enabled ? movable_zone : kernel_zone;
+ }
+
+-struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+- unsigned long nr_pages)
++struct zone *zone_for_pfn_range(int online_type, int nid,
++ unsigned long start_pfn, unsigned long nr_pages)
+ {
+ if (online_type == MMOP_ONLINE_KERNEL)
+ return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
--- /dev/null
+From 276aeee1c5fc00df700f0782060beae126600472 Mon Sep 17 00:00:00 2001
+From: yanghui <yanghui.def@bytedance.com>
+Date: Wed, 8 Sep 2021 18:10:20 -0700
+Subject: mm/mempolicy: fix a race between offset_il_node and mpol_rebind_task
+
+From: yanghui <yanghui.def@bytedance.com>
+
+commit 276aeee1c5fc00df700f0782060beae126600472 upstream.
+
+Servers happened below panic:
+
+ Kernel version:5.4.56
+ BUG: unable to handle page fault for address: 0000000000002c48
+ RIP: 0010:__next_zones_zonelist+0x1d/0x40
+ Call Trace:
+ __alloc_pages_nodemask+0x277/0x310
+ alloc_page_interleave+0x13/0x70
+ handle_mm_fault+0xf99/0x1390
+ __do_page_fault+0x288/0x500
+ do_page_fault+0x30/0x110
+ page_fault+0x3e/0x50
+
+The reason for the panic is that MAX_NUMNODES is passed in the third
+parameter in __alloc_pages_nodemask(preferred_nid). So access to
+zonelist->zoneref->zone_idx in __next_zones_zonelist will cause a panic.
+
+In offset_il_node(), first_node() returns nid from pol->v.nodes, after
+this other threads may chang pol->v.nodes before next_node(). This race
+condition will let next_node return MAX_NUMNODES. So put pol->nodes in
+a local variable.
+
+The race condition is between offset_il_node and cpuset_change_task_nodemask:
+
+ CPU0: CPU1:
+ alloc_pages_vma()
+ interleave_nid(pol,)
+ offset_il_node(pol,)
+ first_node(pol->v.nodes) cpuset_change_task_nodemask
+ //nodes==0xc mpol_rebind_task
+ mpol_rebind_policy
+ mpol_rebind_nodemask(pol,nodes)
+ //nodes==0x3
+ next_node(nid, pol->v.nodes)//return MAX_NUMNODES
+
+Link: https://lkml.kernel.org/r/20210906034658.48721-1-yanghui.def@bytedance.com
+Signed-off-by: yanghui <yanghui.def@bytedance.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1965,17 +1965,26 @@ unsigned int mempolicy_slab_node(void)
+ */
+ static unsigned offset_il_node(struct mempolicy *pol, unsigned long n)
+ {
+- unsigned nnodes = nodes_weight(pol->nodes);
+- unsigned target;
++ nodemask_t nodemask = pol->nodes;
++ unsigned int target, nnodes;
+ int i;
+ int nid;
++ /*
++ * The barrier will stabilize the nodemask in a register or on
++ * the stack so that it will stop changing under the code.
++ *
++ * Between first_node() and next_node(), pol->nodes could be changed
++ * by other threads. So we put pol->nodes in a local stack.
++ */
++ barrier();
+
++ nnodes = nodes_weight(nodemask);
+ if (!nnodes)
+ return numa_node_id();
+ target = (unsigned int)n % nnodes;
+- nid = first_node(pol->nodes);
++ nid = first_node(nodemask);
+ for (i = 0; i < target; i++)
+- nid = next_node(nid, pol->nodes);
++ nid = next_node(nid, nodemask);
+ return nid;
+ }
+
--- /dev/null
+From 053cfda102306a3394012f9fe2594811c34925e4 Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Wed, 8 Sep 2021 18:10:11 -0700
+Subject: mm/page_alloc.c: avoid accessing uninitialized pcp page migratetype
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit 053cfda102306a3394012f9fe2594811c34925e4 upstream.
+
+If it's not prepared to free unref page, the pcp page migratetype is
+unset. Thus we will get rubbish from get_pcppage_migratetype() and
+might list_del(&page->lru) again after it's already deleted from the list
+leading to grumble about data corruption.
+
+Link: https://lkml.kernel.org/r/20210902115447.57050-1-linmiaohe@huawei.com
+Fixes: df1acc856923 ("mm/page_alloc: avoid conflating IRQs disabled with zone->lock")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3445,8 +3445,10 @@ void free_unref_page_list(struct list_he
+ /* Prepare pages for freeing */
+ list_for_each_entry_safe(page, next, list, lru) {
+ pfn = page_to_pfn(page);
+- if (!free_unref_page_prepare(page, pfn, 0))
++ if (!free_unref_page_prepare(page, pfn, 0)) {
+ list_del(&page->lru);
++ continue;
++ }
+
+ /*
+ * Free isolated pages directly to the allocator, see
--- /dev/null
+From 32d4f4b782bb8f0ceb78c6b5dc46eb577ae25bf7 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Wed, 8 Sep 2021 18:10:08 -0700
+Subject: mm,vmscan: fix divide by zero in get_scan_count
+
+From: Rik van Riel <riel@surriel.com>
+
+commit 32d4f4b782bb8f0ceb78c6b5dc46eb577ae25bf7 upstream.
+
+Commit f56ce412a59d ("mm: memcontrol: fix occasional OOMs due to
+proportional memory.low reclaim") introduced a divide by zero corner
+case when oomd is being used in combination with cgroup memory.low
+protection.
+
+When oomd decides to kill a cgroup, it will force the cgroup memory to
+be reclaimed after killing the tasks, by writing to the memory.max file
+for that cgroup, forcing the remaining page cache and reclaimable slab
+to be reclaimed down to zero.
+
+Previously, on cgroups with some memory.low protection that would result
+in the memory being reclaimed down to the memory.low limit, or likely
+not at all, having the page cache reclaimed asynchronously later.
+
+With f56ce412a59d the oomd write to memory.max tries to reclaim all the
+way down to zero, which may race with another reclaimer, to the point of
+ending up with the divide by zero below.
+
+This patch implements the obvious fix.
+
+Link: https://lkml.kernel.org/r/20210826220149.058089c6@imladris.surriel.com
+Fixes: f56ce412a59d ("mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim")
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Chris Down <chris@chrisdown.name>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmscan.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2592,7 +2592,7 @@ out:
+ cgroup_size = max(cgroup_size, protection);
+
+ scan = lruvec_size - lruvec_size * protection /
+- cgroup_size;
++ (cgroup_size + 1);
+
+ /*
+ * Minimally target SWAP_CLUSTER_MAX pages to keep
--- /dev/null
+From 552799f8b3b0074d2617f53a63a088f9514a66e3 Mon Sep 17 00:00:00 2001
+From: Jan Hoffmann <jan@3e8.eu>
+Date: Wed, 1 Sep 2021 20:49:33 +0200
+Subject: net: dsa: lantiq_gswip: fix maximum frame length
+
+From: Jan Hoffmann <jan@3e8.eu>
+
+commit 552799f8b3b0074d2617f53a63a088f9514a66e3 upstream.
+
+Currently, outgoing packets larger than 1496 bytes are dropped when
+tagged VLAN is used on a switch port.
+
+Add the frame check sequence length to the value of the register
+GSWIP_MAC_FLEN to fix this. This matches the lantiq_ppa vendor driver,
+which uses a value consisting of 1518 bytes for the MAC frame, plus the
+lengths of special tag and VLAN tags.
+
+Fixes: 14fceff4771e ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jan Hoffmann <jan@3e8.eu>
+Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/lantiq_gswip.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/dsa/lantiq_gswip.c
++++ b/drivers/net/dsa/lantiq_gswip.c
+@@ -843,7 +843,8 @@ static int gswip_setup(struct dsa_switch
+
+ gswip_switch_mask(priv, 0, GSWIP_MAC_CTRL_2_MLEN,
+ GSWIP_MAC_CTRL_2p(cpu_port));
+- gswip_switch_w(priv, VLAN_ETH_FRAME_LEN + 8, GSWIP_MAC_FLEN);
++ gswip_switch_w(priv, VLAN_ETH_FRAME_LEN + 8 + ETH_FCS_LEN,
++ GSWIP_MAC_FLEN);
+ gswip_switch_mask(priv, 0, GSWIP_BM_QUEUE_GCTRL_GL_MOD,
+ GSWIP_BM_QUEUE_GCTRL);
+
--- /dev/null
+From 81d0885d68ec427e62044cf46a400c9958ea0092 Mon Sep 17 00:00:00 2001
+From: Song Yoong Siang <yoong.siang.song@intel.com>
+Date: Fri, 3 Sep 2021 10:00:26 +0800
+Subject: net: stmmac: Fix overall budget calculation for rxtx_napi
+
+From: Song Yoong Siang <yoong.siang.song@intel.com>
+
+commit 81d0885d68ec427e62044cf46a400c9958ea0092 upstream.
+
+tx_done is not used for napi_complete_done(). Thus, NAPI busy polling
+mechanism by gro_flush_timeout and napi_defer_hard_irqs will not able
+be triggered after a packet is transmitted when there is no receive
+packet.
+
+Fix this by taking the maximum value between tx_done and rx_done as
+overall budget completed by the rxtx NAPI poll to ensure XDP Tx ZC
+operation is continuously polling for next Tx frame. This gives
+benefit of lower packet submission processing latency and jitter
+under XDP Tx ZC mode.
+
+Performance of tx-only using xdp-sock on Intel ADL-S platform is
+the same with and without this patch.
+
+root@intel-corei7-64:~# ./xdpsock -i enp0s30f4 -t -z -q 1 -n 10
+ sock0@enp0s30f4:1 txonly xdp-drv
+ pps pkts 10.00
+rx 0 0
+tx 511630 8659520
+
+ sock0@enp0s30f4:1 txonly xdp-drv
+ pps pkts 10.00
+rx 0 0
+tx 511625 13775808
+
+ sock0@enp0s30f4:1 txonly xdp-drv
+ pps pkts 10.00
+rx 0 0
+tx 511619 18892032
+
+Fixes: 132c32ee5bc0 ("net: stmmac: Add TX via XDP zero-copy socket")
+Cc: <stable@vger.kernel.org> # 5.13.x
+Co-developed-by: Ong Boon Leong <boon.leong.ong@intel.com>
+Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
+Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -5342,7 +5342,7 @@ static int stmmac_napi_poll_rxtx(struct
+ struct stmmac_channel *ch =
+ container_of(napi, struct stmmac_channel, rxtx_napi);
+ struct stmmac_priv *priv = ch->priv_data;
+- int rx_done, tx_done;
++ int rx_done, tx_done, rxtx_done;
+ u32 chan = ch->index;
+
+ priv->xstats.napi_poll++;
+@@ -5352,14 +5352,16 @@ static int stmmac_napi_poll_rxtx(struct
+
+ rx_done = stmmac_rx_zc(priv, budget, chan);
+
++ rxtx_done = max(tx_done, rx_done);
++
+ /* If either TX or RX work is not complete, return budget
+ * and keep pooling
+ */
+- if (tx_done >= budget || rx_done >= budget)
++ if (rxtx_done >= budget)
+ return budget;
+
+ /* all work done, exit the polling mode */
+- if (napi_complete_done(napi, rx_done)) {
++ if (napi_complete_done(napi, rxtx_done)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ch->lock, flags);
+@@ -5370,7 +5372,7 @@ static int stmmac_napi_poll_rxtx(struct
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+
+- return min(rx_done, budget - 1);
++ return min(rxtx_done, budget - 1);
+ }
+
+ /**
--- /dev/null
+From 3abc16af57c9939724df92fcbda296b25cc95168 Mon Sep 17 00:00:00 2001
+From: Patryk Duda <pdk@semihalf.com>
+Date: Tue, 18 May 2021 16:07:58 +0200
+Subject: platform/chrome: cros_ec_proto: Send command again when timeout occurs
+
+From: Patryk Duda <pdk@semihalf.com>
+
+commit 3abc16af57c9939724df92fcbda296b25cc95168 upstream.
+
+Sometimes kernel is trying to probe Fingerprint MCU (FPMCU) when it
+hasn't initialized SPI yet. This can happen because FPMCU is restarted
+during system boot and kernel can send message in short window
+eg. between sysjump to RW and SPI initialization.
+
+Cc: <stable@vger.kernel.org> # 4.4+
+Signed-off-by: Patryk Duda <pdk@semihalf.com>
+Link: https://lore.kernel.org/r/20210518140758.29318-1-pdk@semihalf.com
+Signed-off-by: Benson Leung <bleung@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/chrome/cros_ec_proto.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/platform/chrome/cros_ec_proto.c
++++ b/drivers/platform/chrome/cros_ec_proto.c
+@@ -279,6 +279,15 @@ static int cros_ec_host_command_proto_qu
+ msg->insize = sizeof(struct ec_response_get_protocol_info);
+
+ ret = send_command(ec_dev, msg);
++ /*
++ * Send command once again when timeout occurred.
++ * Fingerprint MCU (FPMCU) is restarted during system boot which
++ * introduces small window in which FPMCU won't respond for any
++ * messages sent by kernel. There is no need to wait before next
++ * attempt because we waited at least EC_MSG_DEADLINE_MS.
++ */
++ if (ret == -ETIMEDOUT)
++ ret = send_command(ec_dev, msg);
+
+ if (ret < 0) {
+ dev_dbg(ec_dev->dev,
s390-pv-fix-the-forcing-of-the-swiotlb.patch
s390-topology-fix-topology-information-when-calling-cpu-hotplug-notifiers.patch
mm-fix-panic-caused-by-__page_handle_poison.patch
+hugetlb-fix-hugetlb-cgroup-refcounting-during-vma-split.patch
+mm-memory_hotplug-use-unsigned-long-for-pfn-in-zone_for_pfn_range.patch
+mm-hmm-bypass-devmap-pte-when-all-pfn-requested-flags-are-fulfilled.patch
+mm-hugetlb-initialize-hugetlb_usage-in-mm_init.patch
+mm-vmscan-fix-divide-by-zero-in-get_scan_count.patch
+mm-page_alloc.c-avoid-accessing-uninitialized-pcp-page-migratetype.patch
+mm-mempolicy-fix-a-race-between-offset_il_node-and-mpol_rebind_task.patch
+memcg-enable-accounting-for-pids-in-nested-pid-namespaces.patch
+libnvdimm-pmem-fix-crash-triggered-when-i-o-in-flight-during-unbind.patch
+platform-chrome-cros_ec_proto-send-command-again-when-timeout-occurs.patch
+lib-test_stackinit-fix-static-initializer-test.patch
+net-dsa-lantiq_gswip-fix-maximum-frame-length.patch
+net-stmmac-fix-overall-budget-calculation-for-rxtx_napi.patch
+drm-mgag200-select-clock-in-pll-update-functions.patch
+drm-msi-mdp4-populate-priv-kms-in-mdp4_kms_init.patch
+drm-dp_mst-fix-return-code-on-sideband-message-failure.patch
+drm-panfrost-make-sure-mmu-context-lifetime-is-not-bound-to-panfrost_priv.patch
+drm-amdgpu-fix-bug_on-assert.patch
+drm-amdgpu-enable-s-g-for-yellow-carp.patch
+drm-amdgpu-fix-a-deadlock-if-previous-gem-object-allocation-fails.patch
+drm-amd-display-update-number-of-dcn3-clock-states.patch
+drm-amd-display-update-bounding-box-states-v2.patch
+drm-amdkfd-drop-process-ref-count-when-xnack-disable.patch
+drm-amd-display-setup-system-context-for-apus.patch
+drm-msm-disp-dpu1-add-safe-lut-config-in-dpu-driver.patch
+drm-ttm-fix-ttm_bo_move_memcpy-for-subclassed-struct-ttm_resource.patch
+drm-panfrost-simplify-lock_region-calculation.patch
+drm-panfrost-use-u64-for-size-in-lock_region.patch
+drm-panfrost-clamp-lock-region-to-bifrost-minimum.patch