--- /dev/null
+From 5bd85e4abdcb05b04634879a1a1a015324ff00e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 14:34:32 +0100
+Subject: ACPI: processor: Fix memory leaks in error paths of processor_add()
+
+From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+
+[ Upstream commit 47ec9b417ed9b6b8ec2a941cd84d9de62adc358a ]
+
+If acpi_processor_get_info() returned an error, pr and the associated
+pr->throttling.shared_cpu_map were leaked.
+
+The unwind code was in the wrong order wrt to setup, relying on
+some unwind actions having no affect (clearing variables that were
+never set etc). That makes it harder to reason about so reorder
+and add appropriate labels to only undo what was actually set up
+in the first place.
+
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: Gavin Shan <gshan@redhat.com>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Link: https://lore.kernel.org/r/20240529133446.28446-6-Jonathan.Cameron@huawei.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/acpi_processor.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
+index 5662c157fda7..8bd5c4fa91f2 100644
+--- a/drivers/acpi/acpi_processor.c
++++ b/drivers/acpi/acpi_processor.c
+@@ -373,7 +373,7 @@ static int acpi_processor_add(struct acpi_device *device,
+
+ result = acpi_processor_get_info(device);
+ if (result) /* Processor is not physically present or unavailable */
+- return result;
++ goto err_clear_driver_data;
+
+ BUG_ON(pr->id >= nr_cpu_ids);
+
+@@ -388,7 +388,7 @@ static int acpi_processor_add(struct acpi_device *device,
+ "BIOS reported wrong ACPI id %d for the processor\n",
+ pr->id);
+ /* Give up, but do not abort the namespace scan. */
+- goto err;
++ goto err_clear_driver_data;
+ }
+ /*
+ * processor_device_array is not cleared on errors to allow buggy BIOS
+@@ -400,12 +400,12 @@ static int acpi_processor_add(struct acpi_device *device,
+ dev = get_cpu_device(pr->id);
+ if (!dev) {
+ result = -ENODEV;
+- goto err;
++ goto err_clear_per_cpu;
+ }
+
+ result = acpi_bind_one(dev, device);
+ if (result)
+- goto err;
++ goto err_clear_per_cpu;
+
+ pr->dev = dev;
+
+@@ -416,10 +416,11 @@ static int acpi_processor_add(struct acpi_device *device,
+ dev_err(dev, "Processor driver could not be attached\n");
+ acpi_unbind_one(dev);
+
+- err:
+- free_cpumask_var(pr->throttling.shared_cpu_map);
+- device->driver_data = NULL;
++ err_clear_per_cpu:
+ per_cpu(processors, pr->id) = NULL;
++ err_clear_driver_data:
++ device->driver_data = NULL;
++ free_cpumask_var(pr->throttling.shared_cpu_map);
+ err_free_pr:
+ kfree(pr);
+ return result;
+--
+2.43.0
+
--- /dev/null
+From cb8c7c42180635cc56ded95ad5a63d16300deb2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 14:34:31 +0100
+Subject: ACPI: processor: Return an error if acpi_processor_get_info() fails
+ in processor_add()
+
+From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+
+[ Upstream commit fadf231f0a06a6748a7fc4a2c29ac9ef7bca6bfd ]
+
+Rafael observed [1] that returning 0 from processor_add() will result in
+acpi_default_enumeration() being called which will attempt to create a
+platform device, but that makes little sense when the processor is known
+to be not available. So just return the error code from acpi_processor_get_info()
+instead.
+
+Link: https://lore.kernel.org/all/CAJZ5v0iKU8ra9jR+EmgxbuNm=Uwx2m1-8vn_RAZ+aCiUVLe3Pw@mail.gmail.com/ [1]
+Suggested-by: Rafael J. Wysocki <rafael@kernel.org>
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: Gavin Shan <gshan@redhat.com>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Link: https://lore.kernel.org/r/20240529133446.28446-5-Jonathan.Cameron@huawei.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/acpi_processor.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
+index 6737b1cbf6d6..5662c157fda7 100644
+--- a/drivers/acpi/acpi_processor.c
++++ b/drivers/acpi/acpi_processor.c
+@@ -373,7 +373,7 @@ static int acpi_processor_add(struct acpi_device *device,
+
+ result = acpi_processor_get_info(device);
+ if (result) /* Processor is not physically present or unavailable */
+- return 0;
++ return result;
+
+ BUG_ON(pr->id >= nr_cpu_ids);
+
+--
+2.43.0
+
--- /dev/null
+From 16e090f0b28a2287aab8ac09a5a62f9a9f92bed8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 14:34:39 +0100
+Subject: arm64: acpi: Harden get_cpu_for_acpi_id() against missing CPU entry
+
+From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+
+[ Upstream commit 2488444274c70038eb6b686cba5f1ce48ebb9cdd ]
+
+In a review discussion of the changes to support vCPU hotplug where
+a check was added on the GICC being enabled if was online, it was
+noted that there is need to map back to the cpu and use that to index
+into a cpumask. As such, a valid ID is needed.
+
+If an MPIDR check fails in acpi_map_gic_cpu_interface() it is possible
+for the entry in cpu_madt_gicc[cpu] == NULL. This function would
+then cause a NULL pointer dereference. Whilst a path to trigger
+this has not been established, harden this caller against the
+possibility.
+
+Reviewed-by: Gavin Shan <gshan@redhat.com>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Link: https://lore.kernel.org/r/20240529133446.28446-13-Jonathan.Cameron@huawei.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/acpi.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
+index 0d1da93a5bad..702587fda70c 100644
+--- a/arch/arm64/include/asm/acpi.h
++++ b/arch/arm64/include/asm/acpi.h
+@@ -102,7 +102,8 @@ static inline int get_cpu_for_acpi_id(u32 uid)
+ int cpu;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+- if (uid == get_acpi_id_for_cpu(cpu))
++ if (acpi_cpu_get_madt_gicc(cpu) &&
++ uid == get_acpi_id_for_cpu(cpu))
+ return cpu;
+
+ return -EINVAL;
+--
+2.43.0
+
--- /dev/null
+From 098dd58fc2a49944a669a6a17a0dadb929428ad9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 14:34:38 +0100
+Subject: arm64: acpi: Move get_cpu_for_acpi_id() to a header
+
+From: James Morse <james.morse@arm.com>
+
+[ Upstream commit 8d34b6f17b9ac93faa2791eb037dcb08bdf755de ]
+
+ACPI identifies CPUs by UID. get_cpu_for_acpi_id() maps the ACPI UID
+to the Linux CPU number.
+
+The helper to retrieve this mapping is only available in arm64's NUMA
+code.
+
+Move it to live next to get_acpi_id_for_cpu().
+
+Signed-off-by: James Morse <james.morse@arm.com>
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Reviewed-by: Gavin Shan <gshan@redhat.com>
+Tested-by: Miguel Luis <miguel.luis@oracle.com>
+Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
+Tested-by: Jianyong Wu <jianyong.wu@arm.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Acked-by: Hanjun Guo <guohanjun@huawei.com>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Reviewed-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
+Link: https://lore.kernel.org/r/20240529133446.28446-12-Jonathan.Cameron@huawei.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/acpi.h | 11 +++++++++++
+ arch/arm64/kernel/acpi_numa.c | 11 -----------
+ 2 files changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
+index bd68e1b7f29f..0d1da93a5bad 100644
+--- a/arch/arm64/include/asm/acpi.h
++++ b/arch/arm64/include/asm/acpi.h
+@@ -97,6 +97,17 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
+ return acpi_cpu_get_madt_gicc(cpu)->uid;
+ }
+
++static inline int get_cpu_for_acpi_id(u32 uid)
++{
++ int cpu;
++
++ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
++ if (uid == get_acpi_id_for_cpu(cpu))
++ return cpu;
++
++ return -EINVAL;
++}
++
+ static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+ void __init acpi_init_cpus(void);
+ int apei_claim_sea(struct pt_regs *regs);
+diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
+index ccbff21ce1fa..2465f291c7e1 100644
+--- a/arch/arm64/kernel/acpi_numa.c
++++ b/arch/arm64/kernel/acpi_numa.c
+@@ -34,17 +34,6 @@ int __init acpi_numa_get_nid(unsigned int cpu)
+ return acpi_early_node_map[cpu];
+ }
+
+-static inline int get_cpu_for_acpi_id(u32 uid)
+-{
+- int cpu;
+-
+- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+- if (uid == get_acpi_id_for_cpu(cpu))
+- return cpu;
+-
+- return -EINVAL;
+-}
+-
+ static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header,
+ const unsigned long end)
+ {
+--
+2.43.0
+
--- /dev/null
+From e349484b4bf73b335e230c3451f1113a87231edf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Aug 2024 12:10:04 +0800
+Subject: ASoc: SOF: topology: Clear SOF link platform name upon unload
+
+From: Chen-Yu Tsai <wenst@chromium.org>
+
+[ Upstream commit e0be875c5bf03a9676a6bfed9e0f1766922a7dbd ]
+
+The SOF topology loading function sets the device name for the platform
+component link. This should be unset when unloading the topology,
+otherwise a machine driver unbind/bind or reprobe would complain about
+an invalid component as having both its component name and of_node set:
+
+ mt8186_mt6366 sound: ASoC: Both Component name/of_node are set for AFE_SOF_DL1
+ mt8186_mt6366 sound: error -EINVAL: Cannot register card
+ mt8186_mt6366 sound: probe with driver mt8186_mt6366 failed with error -22
+
+This happens with machine drivers that set the of_node separately.
+
+Clear the SOF link platform name in the topology unload callback.
+
+Fixes: 311ce4fe7637 ("ASoC: SOF: Add support for loading topologies")
+Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
+Link: https://patch.msgid.link/20240821041006.2618855-1-wenst@chromium.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sof/topology.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
+index e7305ce57ea1..374c8b1d6958 100644
+--- a/sound/soc/sof/topology.c
++++ b/sound/soc/sof/topology.c
+@@ -1817,6 +1817,8 @@ static int sof_link_unload(struct snd_soc_component *scomp, struct snd_soc_dobj
+ if (!slink)
+ return 0;
+
++ slink->link->platforms->name = NULL;
++
+ kfree(slink->tuples);
+ list_del(&slink->list);
+ kfree(slink->hw_configs);
+--
+2.43.0
+
--- /dev/null
+From 02a21ff8381518e2b87583c19c71543f7c0f079f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Aug 2024 14:07:19 +0200
+Subject: ASoC: sunxi: sun4i-i2s: fix LRCLK polarity in i2s mode
+
+From: Matteo Martelli <matteomartelli3@gmail.com>
+
+[ Upstream commit 3e83957e8dd7433a69116780d9bad217b00913ea ]
+
+This fixes the LRCLK polarity for sun8i-h3 and sun50i-h6 in i2s mode
+which was wrongly inverted.
+
+The LRCLK was being set in reversed logic compared to the DAI format:
+inverted LRCLK for SND_SOC_DAIFMT_IB_NF and SND_SOC_DAIFMT_NB_NF; normal
+LRCLK for SND_SOC_DAIFMT_IB_IF and SND_SOC_DAIFMT_NB_IF. Such reversed
+logic applies properly for DSP_A, DSP_B, LEFT_J and RIGHT_J modes but
+not for I2S mode, for which the LRCLK signal results reversed to what
+expected on the bus. The issue is due to a misinterpretation of the
+LRCLK polarity bit of the H3 and H6 i2s controllers. Such bit in this
+case does not mean "0 => normal" or "1 => inverted" according to the
+expected bus operation, but it means "0 => frame starts on low edge" and
+"1 => frame starts on high edge" (from the User Manuals).
+
+This commit fixes the LRCLK polarity by setting the LRCLK polarity bit
+according to the selected bus mode and renames the LRCLK polarity bit
+definition to avoid further confusion.
+
+Fixes: dd657eae8164 ("ASoC: sun4i-i2s: Fix the LRCK polarity")
+Fixes: 73adf87b7a58 ("ASoC: sun4i-i2s: Add support for H6 I2S")
+Signed-off-by: Matteo Martelli <matteomartelli3@gmail.com>
+Link: https://patch.msgid.link/20240801-asoc-fix-sun4i-i2s-v2-1-a8e4e9daa363@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sunxi/sun4i-i2s.c | 143 ++++++++++++++++++------------------
+ 1 file changed, 73 insertions(+), 70 deletions(-)
+
+diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c
+index 6028871825ba..47faaf849de0 100644
+--- a/sound/soc/sunxi/sun4i-i2s.c
++++ b/sound/soc/sunxi/sun4i-i2s.c
+@@ -100,8 +100,8 @@
+ #define SUN8I_I2S_CTRL_MODE_PCM (0 << 4)
+
+ #define SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK BIT(19)
+-#define SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED (1 << 19)
+-#define SUN8I_I2S_FMT0_LRCLK_POLARITY_NORMAL (0 << 19)
++#define SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH (1 << 19)
++#define SUN8I_I2S_FMT0_LRCLK_POLARITY_START_LOW (0 << 19)
+ #define SUN8I_I2S_FMT0_LRCK_PERIOD_MASK GENMASK(17, 8)
+ #define SUN8I_I2S_FMT0_LRCK_PERIOD(period) ((period - 1) << 8)
+ #define SUN8I_I2S_FMT0_BCLK_POLARITY_MASK BIT(7)
+@@ -727,65 +727,37 @@ static int sun4i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s,
+ static int sun8i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s,
+ unsigned int fmt)
+ {
+- u32 mode, val;
++ u32 mode, lrclk_pol, bclk_pol, val;
+ u8 offset;
+
+- /*
+- * DAI clock polarity
+- *
+- * The setup for LRCK contradicts the datasheet, but under a
+- * scope it's clear that the LRCK polarity is reversed
+- * compared to the expected polarity on the bus.
+- */
+- switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+- case SND_SOC_DAIFMT_IB_IF:
+- /* Invert both clocks */
+- val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED;
+- break;
+- case SND_SOC_DAIFMT_IB_NF:
+- /* Invert bit clock */
+- val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED |
+- SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED;
+- break;
+- case SND_SOC_DAIFMT_NB_IF:
+- /* Invert frame clock */
+- val = 0;
+- break;
+- case SND_SOC_DAIFMT_NB_NF:
+- val = SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED;
+- break;
+- default:
+- return -EINVAL;
+- }
+-
+- regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG,
+- SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK |
+- SUN8I_I2S_FMT0_BCLK_POLARITY_MASK,
+- val);
+-
+ /* DAI Mode */
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_DSP_A:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_PCM;
+ offset = 1;
+ break;
+
+ case SND_SOC_DAIFMT_DSP_B:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_PCM;
+ offset = 0;
+ break;
+
+ case SND_SOC_DAIFMT_I2S:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_LOW;
+ mode = SUN8I_I2S_CTRL_MODE_LEFT;
+ offset = 1;
+ break;
+
+ case SND_SOC_DAIFMT_LEFT_J:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_LEFT;
+ offset = 0;
+ break;
+
+ case SND_SOC_DAIFMT_RIGHT_J:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_RIGHT;
+ offset = 0;
+ break;
+@@ -803,6 +775,35 @@ static int sun8i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s,
+ SUN8I_I2S_TX_CHAN_OFFSET_MASK,
+ SUN8I_I2S_TX_CHAN_OFFSET(offset));
+
++ /* DAI clock polarity */
++ bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_NORMAL;
++
++ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
++ case SND_SOC_DAIFMT_IB_IF:
++ /* Invert both clocks */
++ lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK;
++ bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED;
++ break;
++ case SND_SOC_DAIFMT_IB_NF:
++ /* Invert bit clock */
++ bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED;
++ break;
++ case SND_SOC_DAIFMT_NB_IF:
++ /* Invert frame clock */
++ lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK;
++ break;
++ case SND_SOC_DAIFMT_NB_NF:
++ /* No inversion */
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG,
++ SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK |
++ SUN8I_I2S_FMT0_BCLK_POLARITY_MASK,
++ lrclk_pol | bclk_pol);
++
+ /* DAI clock master masks */
+ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
+ case SND_SOC_DAIFMT_BP_FP:
+@@ -834,65 +835,37 @@ static int sun8i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s,
+ static int sun50i_h6_i2s_set_soc_fmt(const struct sun4i_i2s *i2s,
+ unsigned int fmt)
+ {
+- u32 mode, val;
++ u32 mode, lrclk_pol, bclk_pol, val;
+ u8 offset;
+
+- /*
+- * DAI clock polarity
+- *
+- * The setup for LRCK contradicts the datasheet, but under a
+- * scope it's clear that the LRCK polarity is reversed
+- * compared to the expected polarity on the bus.
+- */
+- switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+- case SND_SOC_DAIFMT_IB_IF:
+- /* Invert both clocks */
+- val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED;
+- break;
+- case SND_SOC_DAIFMT_IB_NF:
+- /* Invert bit clock */
+- val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED |
+- SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED;
+- break;
+- case SND_SOC_DAIFMT_NB_IF:
+- /* Invert frame clock */
+- val = 0;
+- break;
+- case SND_SOC_DAIFMT_NB_NF:
+- val = SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED;
+- break;
+- default:
+- return -EINVAL;
+- }
+-
+- regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG,
+- SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK |
+- SUN8I_I2S_FMT0_BCLK_POLARITY_MASK,
+- val);
+-
+ /* DAI Mode */
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_DSP_A:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_PCM;
+ offset = 1;
+ break;
+
+ case SND_SOC_DAIFMT_DSP_B:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_PCM;
+ offset = 0;
+ break;
+
+ case SND_SOC_DAIFMT_I2S:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_LOW;
+ mode = SUN8I_I2S_CTRL_MODE_LEFT;
+ offset = 1;
+ break;
+
+ case SND_SOC_DAIFMT_LEFT_J:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_LEFT;
+ offset = 0;
+ break;
+
+ case SND_SOC_DAIFMT_RIGHT_J:
++ lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH;
+ mode = SUN8I_I2S_CTRL_MODE_RIGHT;
+ offset = 0;
+ break;
+@@ -910,6 +883,36 @@ static int sun50i_h6_i2s_set_soc_fmt(const struct sun4i_i2s *i2s,
+ SUN50I_H6_I2S_TX_CHAN_SEL_OFFSET_MASK,
+ SUN50I_H6_I2S_TX_CHAN_SEL_OFFSET(offset));
+
++ /* DAI clock polarity */
++ bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_NORMAL;
++
++ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
++ case SND_SOC_DAIFMT_IB_IF:
++ /* Invert both clocks */
++ lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK;
++ bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED;
++ break;
++ case SND_SOC_DAIFMT_IB_NF:
++ /* Invert bit clock */
++ bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED;
++ break;
++ case SND_SOC_DAIFMT_NB_IF:
++ /* Invert frame clock */
++ lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK;
++ break;
++ case SND_SOC_DAIFMT_NB_NF:
++ /* No inversion */
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG,
++ SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK |
++ SUN8I_I2S_FMT0_BCLK_POLARITY_MASK,
++ lrclk_pol | bclk_pol);
++
++
+ /* DAI clock master masks */
+ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
+ case SND_SOC_DAIFMT_BP_FP:
+--
+2.43.0
+
--- /dev/null
+From 131e71a062286e080a06ce9520baeb40874369b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Aug 2024 14:43:42 +0000
+Subject: ASoC: tegra: Fix CBB error during probe()
+
+From: Mohan Kumar <mkumard@nvidia.com>
+
+[ Upstream commit 6781b962d97bc52715a8db8cc17278cc3c23ebe8 ]
+
+When Tegra audio drivers are built as part of the kernel image,
+TIMEOUT_ERR is observed from cbb-fabric. Following is seen on
+Jetson AGX Orin during boot:
+
+[ 8.012482] **************************************
+[ 8.017423] CPU:0, Error:cbb-fabric, Errmon:2
+[ 8.021922] Error Code : TIMEOUT_ERR
+[ 8.025966] Overflow : Multiple TIMEOUT_ERR
+[ 8.030644]
+[ 8.032175] Error Code : TIMEOUT_ERR
+[ 8.036217] MASTER_ID : CCPLEX
+[ 8.039722] Address : 0x290a0a8
+[ 8.043318] Cache : 0x1 -- Bufferable
+[ 8.047630] Protection : 0x2 -- Unprivileged, Non-Secure, Data Access
+[ 8.054628] Access_Type : Write
+
+[ 8.106130] WARNING: CPU: 0 PID: 124 at drivers/soc/tegra/cbb/tegra234-cbb.c:604 tegra234_cbb_isr+0x134/0x178
+
+[ 8.240602] Call trace:
+[ 8.243126] tegra234_cbb_isr+0x134/0x178
+[ 8.247261] __handle_irq_event_percpu+0x60/0x238
+[ 8.252132] handle_irq_event+0x54/0xb8
+
+These errors happen when MVC device, which is a child of AHUB
+device, tries to access its device registers. This happens as
+part of call tegra210_mvc_reset_vol_settings() in MVC device
+probe().
+
+The root cause of this problem is, the child MVC device gets
+probed before the AHUB clock gets enabled. The AHUB clock is
+enabled in runtime PM resume of parent AHUB device and due to
+the wrong sequence of pm_runtime_enable() in AHUB driver,
+runtime PM resume doesn't happen for AHUB device when MVC makes
+register access.
+
+Fix this by calling pm_runtime_enable() for parent AHUB device
+before of_platform_populate() in AHUB driver. This ensures that
+clock becomes available when MVC makes register access.
+
+Fixes: 16e1bcc2caf4 ("ASoC: tegra: Add Tegra210 based AHUB driver")
+Signed-off-by: Mohan Kumar <mkumard@nvidia.com>
+Signed-off-by: Ritu Chaudhary <rituc@nvidia.com>
+Signed-off-by: Sameer Pujar <spujar@nvidia.com>
+Link: https://patch.msgid.link/20240823144342.4123814-3-spujar@nvidia.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/tegra/tegra210_ahub.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c
+index b38d205b69cc..dfdcb4580cd7 100644
+--- a/sound/soc/tegra/tegra210_ahub.c
++++ b/sound/soc/tegra/tegra210_ahub.c
+@@ -2,7 +2,7 @@
+ //
+ // tegra210_ahub.c - Tegra210 AHUB driver
+ //
+-// Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
++// Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
+
+ #include <linux/clk.h>
+ #include <linux/device.h>
+@@ -1401,11 +1401,13 @@ static int tegra_ahub_probe(struct platform_device *pdev)
+ return err;
+ }
+
++ pm_runtime_enable(&pdev->dev);
++
+ err = of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+- if (err)
++ if (err) {
++ pm_runtime_disable(&pdev->dev);
+ return err;
+-
+- pm_runtime_enable(&pdev->dev);
++ }
+
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 61ba93a7348c113b630f8ce19950f88899a8a8d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Jan 2023 11:48:16 +0100
+Subject: can: mcp251xfd: clarify the meaning of timestamp
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+[ Upstream commit e793c724b48ca8cae9693bc3be528e85284c126a ]
+
+The mcp251xfd chip is configured to provide a timestamp with each
+received and transmitted CAN frame. The timestamp is derived from the
+internal free-running timer, which can also be read from the TBC
+register via SPI. The timer is 32 bits wide and is clocked by the
+external oscillator (typically 20 or 40 MHz).
+
+To avoid confusion, we call this timestamp "timestamp_raw" or "ts_raw"
+for short.
+
+Using the timecounter framework, the "ts_raw" is converted to 64 bit
+nanoseconds since the epoch. This is what we call "timestamp".
+
+This is a preparation for the next patches which use the "timestamp"
+to work around a bug where so far only the "ts_raw" is used.
+
+Tested-by: Stefan Althöfer <Stefan.Althoefer@janztec.com>
+Tested-by: Thomas Kopp <thomas.kopp@microchip.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 28 +++++++++----------
+ drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 2 +-
+ drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c | 2 +-
+ .../can/spi/mcp251xfd/mcp251xfd-timestamp.c | 22 ++++-----------
+ drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 27 ++++++++++++++----
+ 5 files changed, 43 insertions(+), 38 deletions(-)
+
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+index 1665f78abb5c..a9bafa96e2f9 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+@@ -2,7 +2,7 @@
+ //
+ // mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+ //
+-// Copyright (c) 2019, 2020, 2021 Pengutronix,
++// Copyright (c) 2019, 2020, 2021, 2023 Pengutronix,
+ // Marc Kleine-Budde <kernel@pengutronix.de>
+ //
+ // Based on:
+@@ -867,18 +867,18 @@ static int mcp251xfd_get_berr_counter(const struct net_device *ndev,
+
+ static struct sk_buff *
+ mcp251xfd_alloc_can_err_skb(struct mcp251xfd_priv *priv,
+- struct can_frame **cf, u32 *timestamp)
++ struct can_frame **cf, u32 *ts_raw)
+ {
+ struct sk_buff *skb;
+ int err;
+
+- err = mcp251xfd_get_timestamp(priv, timestamp);
++ err = mcp251xfd_get_timestamp_raw(priv, ts_raw);
+ if (err)
+ return NULL;
+
+ skb = alloc_can_err_skb(priv->ndev, cf);
+ if (skb)
+- mcp251xfd_skb_set_timestamp(priv, skb, *timestamp);
++ mcp251xfd_skb_set_timestamp_raw(priv, skb, *ts_raw);
+
+ return skb;
+ }
+@@ -889,7 +889,7 @@ static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv)
+ struct mcp251xfd_rx_ring *ring;
+ struct sk_buff *skb;
+ struct can_frame *cf;
+- u32 timestamp, rxovif;
++ u32 ts_raw, rxovif;
+ int err, i;
+
+ stats->rx_over_errors++;
+@@ -924,14 +924,14 @@ static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv)
+ return err;
+ }
+
+- skb = mcp251xfd_alloc_can_err_skb(priv, &cf, ×tamp);
++ skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &ts_raw);
+ if (!skb)
+ return 0;
+
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+
+- err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp);
++ err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw);
+ if (err)
+ stats->rx_fifo_errors++;
+
+@@ -948,12 +948,12 @@ static int mcp251xfd_handle_txatif(struct mcp251xfd_priv *priv)
+ static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv)
+ {
+ struct net_device_stats *stats = &priv->ndev->stats;
+- u32 bdiag1, timestamp;
++ u32 bdiag1, ts_raw;
+ struct sk_buff *skb;
+ struct can_frame *cf = NULL;
+ int err;
+
+- err = mcp251xfd_get_timestamp(priv, ×tamp);
++ err = mcp251xfd_get_timestamp_raw(priv, &ts_raw);
+ if (err)
+ return err;
+
+@@ -1035,8 +1035,8 @@ static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv)
+ if (!cf)
+ return 0;
+
+- mcp251xfd_skb_set_timestamp(priv, skb, timestamp);
+- err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp);
++ mcp251xfd_skb_set_timestamp_raw(priv, skb, ts_raw);
++ err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw);
+ if (err)
+ stats->rx_fifo_errors++;
+
+@@ -1049,7 +1049,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv)
+ struct sk_buff *skb;
+ struct can_frame *cf = NULL;
+ enum can_state new_state, rx_state, tx_state;
+- u32 trec, timestamp;
++ u32 trec, ts_raw;
+ int err;
+
+ err = regmap_read(priv->map_reg, MCP251XFD_REG_TREC, &trec);
+@@ -1079,7 +1079,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv)
+ /* The skb allocation might fail, but can_change_state()
+ * handles cf == NULL.
+ */
+- skb = mcp251xfd_alloc_can_err_skb(priv, &cf, ×tamp);
++ skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &ts_raw);
+ can_change_state(priv->ndev, cf, tx_state, rx_state);
+
+ if (new_state == CAN_STATE_BUS_OFF) {
+@@ -1110,7 +1110,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv)
+ cf->data[7] = bec.rxerr;
+ }
+
+- err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp);
++ err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw);
+ if (err)
+ stats->rx_fifo_errors++;
+
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+index 5d0fb1c454cd..a79e6c661ecc 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+@@ -160,7 +160,7 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
+ if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
+ memcpy(cfd->data, hw_rx_obj->data, cfd->len);
+
+- mcp251xfd_skb_set_timestamp(priv, skb, hw_rx_obj->ts);
++ mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_rx_obj->ts);
+ }
+
+ static int
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
+index 902eb767426d..8f39730f3122 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
+@@ -97,7 +97,7 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
+ tef_tail = mcp251xfd_get_tef_tail(priv);
+ skb = priv->can.echo_skb[tef_tail];
+ if (skb)
+- mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts);
++ mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_tef_obj->ts);
+ stats->tx_bytes +=
+ can_rx_offload_get_echo_skb(&priv->offload,
+ tef_tail, hw_tef_obj->ts,
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
+index 712e09186987..1db99aabe85c 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
+@@ -2,7 +2,7 @@
+ //
+ // mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+ //
+-// Copyright (c) 2021 Pengutronix,
++// Copyright (c) 2021, 2023 Pengutronix,
+ // Marc Kleine-Budde <kernel@pengutronix.de>
+ //
+
+@@ -11,20 +11,20 @@
+
+ #include "mcp251xfd.h"
+
+-static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc)
++static u64 mcp251xfd_timestamp_raw_read(const struct cyclecounter *cc)
+ {
+ const struct mcp251xfd_priv *priv;
+- u32 timestamp = 0;
++ u32 ts_raw = 0;
+ int err;
+
+ priv = container_of(cc, struct mcp251xfd_priv, cc);
+- err = mcp251xfd_get_timestamp(priv, ×tamp);
++ err = mcp251xfd_get_timestamp_raw(priv, &ts_raw);
+ if (err)
+ netdev_err(priv->ndev,
+ "Error %d while reading timestamp. HW timestamps may be inaccurate.",
+ err);
+
+- return timestamp;
++ return ts_raw;
+ }
+
+ static void mcp251xfd_timestamp_work(struct work_struct *work)
+@@ -39,21 +39,11 @@ static void mcp251xfd_timestamp_work(struct work_struct *work)
+ MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ);
+ }
+
+-void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
+- struct sk_buff *skb, u32 timestamp)
+-{
+- struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
+- u64 ns;
+-
+- ns = timecounter_cyc2time(&priv->tc, timestamp);
+- hwtstamps->hwtstamp = ns_to_ktime(ns);
+-}
+-
+ void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv)
+ {
+ struct cyclecounter *cc = &priv->cc;
+
+- cc->read = mcp251xfd_timestamp_read;
++ cc->read = mcp251xfd_timestamp_raw_read;
+ cc->mask = CYCLECOUNTER_MASK(32);
+ cc->shift = 1;
+ cc->mult = clocksource_hz2mult(priv->can.clock.freq, cc->shift);
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+index ca5f4e670ec1..7713c9264fb5 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+@@ -2,7 +2,7 @@
+ *
+ * mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+ *
+- * Copyright (c) 2019, 2020, 2021 Pengutronix,
++ * Copyright (c) 2019, 2020, 2021, 2023 Pengutronix,
+ * Marc Kleine-Budde <kernel@pengutronix.de>
+ * Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org>
+ */
+@@ -794,10 +794,27 @@ mcp251xfd_spi_cmd_write(const struct mcp251xfd_priv *priv,
+ return data;
+ }
+
+-static inline int mcp251xfd_get_timestamp(const struct mcp251xfd_priv *priv,
+- u32 *timestamp)
++static inline int mcp251xfd_get_timestamp_raw(const struct mcp251xfd_priv *priv,
++ u32 *ts_raw)
+ {
+- return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, timestamp);
++ return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, ts_raw);
++}
++
++static inline void mcp251xfd_skb_set_timestamp(struct sk_buff *skb, u64 ns)
++{
++ struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
++
++ hwtstamps->hwtstamp = ns_to_ktime(ns);
++}
++
++static inline
++void mcp251xfd_skb_set_timestamp_raw(const struct mcp251xfd_priv *priv,
++ struct sk_buff *skb, u32 ts_raw)
++{
++ u64 ns;
++
++ ns = timecounter_cyc2time(&priv->tc, ts_raw);
++ mcp251xfd_skb_set_timestamp(skb, ns);
+ }
+
+ static inline u16 mcp251xfd_get_tef_obj_addr(u8 n)
+@@ -918,8 +935,6 @@ void mcp251xfd_ring_free(struct mcp251xfd_priv *priv);
+ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv);
+ int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv);
+ int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv);
+-void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
+- struct sk_buff *skb, u32 timestamp);
+ void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
+ void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
+
+--
+2.43.0
+
--- /dev/null
+From b4eff1df1ca6422651dcd898c5326b40560c48b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Jan 2023 20:25:48 +0100
+Subject: can: mcp251xfd: mcp251xfd_handle_rxif_ring_uinc(): factor out in
+ separate function
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+[ Upstream commit d49184b7b585f9da7ee546b744525f62117019f6 ]
+
+This is a preparation patch.
+
+Sending the UINC messages followed by incrementing the tail pointer
+will be called in more than one place in upcoming patches, so factor
+this out into a separate function.
+
+Also make mcp251xfd_handle_rxif_ring_uinc() safe to be called with a
+"len" of 0.
+
+Tested-by: Stefan Althöfer <Stefan.Althoefer@janztec.com>
+Tested-by: Thomas Kopp <thomas.kopp@microchip.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 48 +++++++++++++-------
+ 1 file changed, 32 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+index ced8d9c81f8c..5e2f39de88f3 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+@@ -197,6 +197,37 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
+ return err;
+ }
+
++static int
++mcp251xfd_handle_rxif_ring_uinc(const struct mcp251xfd_priv *priv,
++ struct mcp251xfd_rx_ring *ring,
++ u8 len)
++{
++ int offset;
++ int err;
++
++ if (!len)
++ return 0;
++
++ /* Increment the RX FIFO tail pointer 'len' times in a
++ * single SPI message.
++ *
++ * Note:
++ * Calculate offset, so that the SPI transfer ends on
++ * the last message of the uinc_xfer array, which has
++ * "cs_change == 0", to properly deactivate the chip
++ * select.
++ */
++ offset = ARRAY_SIZE(ring->uinc_xfer) - len;
++ err = spi_sync_transfer(priv->spi,
++ ring->uinc_xfer + offset, len);
++ if (err)
++ return err;
++
++ ring->tail += len;
++
++ return 0;
++}
++
+ static int
+ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
+ struct mcp251xfd_rx_ring *ring)
+@@ -210,8 +241,6 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
+ return err;
+
+ while ((len = mcp251xfd_get_rx_linear_len(ring))) {
+- int offset;
+-
+ rx_tail = mcp251xfd_get_rx_tail(ring);
+
+ err = mcp251xfd_rx_obj_read(priv, ring, hw_rx_obj,
+@@ -227,22 +256,9 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
+ return err;
+ }
+
+- /* Increment the RX FIFO tail pointer 'len' times in a
+- * single SPI message.
+- *
+- * Note:
+- * Calculate offset, so that the SPI transfer ends on
+- * the last message of the uinc_xfer array, which has
+- * "cs_change == 0", to properly deactivate the chip
+- * select.
+- */
+- offset = ARRAY_SIZE(ring->uinc_xfer) - len;
+- err = spi_sync_transfer(priv->spi,
+- ring->uinc_xfer + offset, len);
++ err = mcp251xfd_handle_rxif_ring_uinc(priv, ring, len);
+ if (err)
+ return err;
+-
+- ring->tail += len;
+ }
+
+ return 0;
+--
+2.43.0
+
--- /dev/null
+From 834af3f405ae1b13ffec28c63ac50192f398b664 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Jan 2023 11:53:50 +0100
+Subject: can: mcp251xfd: rx: add workaround for erratum DS80000789E 6 of
+ mcp2518fd
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+[ Upstream commit 24436be590c6fbb05f6161b0dfba7d9da60214aa ]
+
+This patch tries to works around erratum DS80000789E 6 of the
+mcp2518fd, the other variants of the chip family (mcp2517fd and
+mcp251863) are probably also affected.
+
+In the bad case, the driver reads a too large head index. In the
+original code, the driver always trusted the read value, which caused
+old, already processed CAN frames or new, incompletely written CAN
+frames to be (re-)processed.
+
+To work around this issue, keep a per FIFO timestamp [1] of the last
+valid received CAN frame and compare against the timestamp of every
+received CAN frame. If an old CAN frame is detected, abort the
+iteration and mark the number of valid CAN frames as processed in the
+chip by incrementing the FIFO's tail index.
+
+Further tests showed that this workaround can recognize old CAN
+frames, but a small time window remains in which partially written CAN
+frames [2] are not recognized but then processed. These CAN frames
+have the correct data and time stamps, but the DLC has not yet been
+updated.
+
+[1] As the raw timestamp overflows every 107 seconds (at the usual
+ clock rate of 40 MHz) convert it to nanoseconds with the
+ timecounter framework and use this to detect stale CAN frames.
+
+Link: https://lore.kernel.org/all/BL3PR11MB64844C1C95CA3BDADAE4D8CCFBC99@BL3PR11MB6484.namprd11.prod.outlook.com [2]
+Reported-by: Stefan Althöfer <Stefan.Althoefer@janztec.com>
+Closes: https://lore.kernel.org/all/FR0P281MB1966273C216630B120ABB6E197E89@FR0P281MB1966.DEUP281.PROD.OUTLOOK.COM
+Tested-by: Stefan Althöfer <Stefan.Althoefer@janztec.com>
+Tested-by: Thomas Kopp <thomas.kopp@microchip.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/can/spi/mcp251xfd/mcp251xfd-ring.c | 1 +
+ drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 32 +++++++++++++++++--
+ drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 3 ++
+ 3 files changed, 33 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
+index 5ed0cd62f4f8..0fde8154a649 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
+@@ -196,6 +196,7 @@ mcp251xfd_ring_init_rx(struct mcp251xfd_priv *priv, u16 *base, u8 *fifo_nr)
+ int i, j;
+
+ mcp251xfd_for_each_rx_ring(priv, rx_ring, i) {
++ rx_ring->last_valid = timecounter_read(&priv->tc);
+ rx_ring->head = 0;
+ rx_ring->tail = 0;
+ rx_ring->base = *base;
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+index a79e6c661ecc..fe897f3e4c12 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+@@ -159,8 +159,6 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
+
+ if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
+ memcpy(cfd->data, hw_rx_obj->data, cfd->len);
+-
+- mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_rx_obj->ts);
+ }
+
+ static int
+@@ -171,8 +169,26 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv,
+ struct net_device_stats *stats = &priv->ndev->stats;
+ struct sk_buff *skb;
+ struct canfd_frame *cfd;
++ u64 timestamp;
+ int err;
+
++ /* According to mcp2518fd erratum DS80000789E 6. the FIFOCI
++ * bits of a FIFOSTA register, here the RX FIFO head index
++ * might be corrupted and we might process past the RX FIFO's
++ * head into old CAN frames.
++ *
++ * Compare the timestamp of currently processed CAN frame with
++ * last valid frame received. Abort with -EBADMSG if an old
++ * CAN frame is detected.
++ */
++ timestamp = timecounter_cyc2time(&priv->tc, hw_rx_obj->ts);
++ if (timestamp <= ring->last_valid) {
++ stats->rx_fifo_errors++;
++
++ return -EBADMSG;
++ }
++ ring->last_valid = timestamp;
++
+ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF)
+ skb = alloc_canfd_skb(priv->ndev, &cfd);
+ else
+@@ -183,6 +199,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv,
+ return 0;
+ }
+
++ mcp251xfd_skb_set_timestamp(skb, timestamp);
+ mcp251xfd_hw_rx_obj_to_skb(priv, hw_rx_obj, skb);
+ err = can_rx_offload_queue_timestamp(&priv->offload, skb, hw_rx_obj->ts);
+ if (err)
+@@ -265,7 +282,16 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
+ err = mcp251xfd_handle_rxif_one(priv, ring,
+ (void *)hw_rx_obj +
+ i * ring->obj_size);
+- if (err)
++
++ /* -EBADMSG means we're affected by mcp2518fd
++ * erratum DS80000789E 6., i.e. the timestamp
++ * in the RX object is older that the last
++ * valid received CAN frame. Don't process any
++ * further and mark processed frames as good.
++ */
++ if (err == -EBADMSG)
++ return mcp251xfd_handle_rxif_ring_uinc(priv, ring, i);
++ else if (err)
+ return err;
+ }
+
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+index 7713c9264fb5..c07300443c6a 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+@@ -549,6 +549,9 @@ struct mcp251xfd_rx_ring {
+ unsigned int head;
+ unsigned int tail;
+
++ /* timestamp of the last valid received CAN frame */
++ u64 last_valid;
++
+ u16 base;
+ u8 nr;
+ u8 fifo_nr;
+--
+2.43.0
+
--- /dev/null
+From f8c4df6217b34529d6b2e64b719f16d211ba0c91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Jan 2023 21:07:03 +0100
+Subject: can: mcp251xfd: rx: prepare to workaround broken RX FIFO head index
+ erratum
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+[ Upstream commit 85505e585637a737e4713c1386c30e37c325b82e ]
+
+This is a preparatory patch to work around erratum DS80000789E 6 of
+the mcp2518fd, the other variants of the chip family (mcp2517fd and
+mcp251863) are probably also affected.
+
+When handling the RX interrupt, the driver iterates over all pending
+FIFOs (which are implemented as ring buffers in hardware) and reads
+the FIFO header index from the RX FIFO STA register of the chip.
+
+In the bad case, the driver reads a too large head index. In the
+original code, the driver always trusted the read value, which caused
+old CAN frames that were already processed, or new, incompletely
+written CAN frames to be (re-)processed.
+
+Instead of reading and trusting the head index, read the head index
+and calculate the number of CAN frames that were supposedly received -
+replace mcp251xfd_rx_ring_update() with mcp251xfd_get_rx_len().
+
+The mcp251xfd_handle_rxif_ring() function reads the received CAN
+frames from the chip, iterates over them and pushes them into the
+network stack. Prepare that the iteration can be stopped if an old CAN
+frame is detected. The actual code to detect old or incomplete frames
+and abort will be added in the next patch.
+
+Link: https://lore.kernel.org/all/BL3PR11MB64844C1C95CA3BDADAE4D8CCFBC99@BL3PR11MB6484.namprd11.prod.outlook.com
+Reported-by: Stefan Althöfer <Stefan.Althoefer@janztec.com>
+Closes: https://lore.kernel.org/all/FR0P281MB1966273C216630B120ABB6E197E89@FR0P281MB1966.DEUP281.PROD.OUTLOOK.COM
+Tested-by: Stefan Althöfer <Stefan.Althoefer@janztec.com>
+Tested-by: Thomas Kopp <thomas.kopp@microchip.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/can/spi/mcp251xfd/mcp251xfd-ring.c | 2 +
+ drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 89 +++++++++++--------
+ drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 12 +--
+ 3 files changed, 56 insertions(+), 47 deletions(-)
+
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
+index 915d505a304f..5ed0cd62f4f8 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
+@@ -513,6 +513,8 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv)
+ }
+
+ rx_ring->obj_num = rx_obj_num;
++ rx_ring->obj_num_shift_to_u8 = BITS_PER_TYPE(rx_ring->obj_num_shift_to_u8) -
++ ilog2(rx_obj_num);
+ rx_ring->obj_size = rx_obj_size;
+ priv->rx[i] = rx_ring;
+ }
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+index 5e2f39de88f3..5d0fb1c454cd 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+@@ -2,7 +2,7 @@
+ //
+ // mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+ //
+-// Copyright (c) 2019, 2020, 2021 Pengutronix,
++// Copyright (c) 2019, 2020, 2021, 2023 Pengutronix,
+ // Marc Kleine-Budde <kernel@pengutronix.de>
+ //
+ // Based on:
+@@ -16,23 +16,14 @@
+
+ #include "mcp251xfd.h"
+
+-static inline int
+-mcp251xfd_rx_head_get_from_chip(const struct mcp251xfd_priv *priv,
+- const struct mcp251xfd_rx_ring *ring,
+- u8 *rx_head, bool *fifo_empty)
++static inline bool mcp251xfd_rx_fifo_sta_empty(const u32 fifo_sta)
+ {
+- u32 fifo_sta;
+- int err;
+-
+- err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr),
+- &fifo_sta);
+- if (err)
+- return err;
+-
+- *rx_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
+- *fifo_empty = !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF);
++ return !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF);
++}
+
+- return 0;
++static inline bool mcp251xfd_rx_fifo_sta_full(const u32 fifo_sta)
++{
++ return fifo_sta & MCP251XFD_REG_FIFOSTA_TFERFFIF;
+ }
+
+ static inline int
+@@ -80,29 +71,49 @@ mcp251xfd_check_rx_tail(const struct mcp251xfd_priv *priv,
+ }
+
+ static int
+-mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv,
+- struct mcp251xfd_rx_ring *ring)
++mcp251xfd_get_rx_len(const struct mcp251xfd_priv *priv,
++ const struct mcp251xfd_rx_ring *ring,
++ u8 *len_p)
+ {
+- u32 new_head;
+- u8 chip_rx_head;
+- bool fifo_empty;
++ const u8 shift = ring->obj_num_shift_to_u8;
++ u8 chip_head, tail, len;
++ u32 fifo_sta;
+ int err;
+
+- err = mcp251xfd_rx_head_get_from_chip(priv, ring, &chip_rx_head,
+- &fifo_empty);
+- if (err || fifo_empty)
++ err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr),
++ &fifo_sta);
++ if (err)
++ return err;
++
++ if (mcp251xfd_rx_fifo_sta_empty(fifo_sta)) {
++ *len_p = 0;
++ return 0;
++ }
++
++ if (mcp251xfd_rx_fifo_sta_full(fifo_sta)) {
++ *len_p = ring->obj_num;
++ return 0;
++ }
++
++ chip_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
++
++ err = mcp251xfd_check_rx_tail(priv, ring);
++ if (err)
+ return err;
++ tail = mcp251xfd_get_rx_tail(ring);
+
+- /* chip_rx_head, is the next RX-Object filled by the HW.
+- * The new RX head must be >= the old head.
++ /* First shift to full u8. The subtraction works on signed
++ * values, that keeps the difference steady around the u8
++ * overflow. The right shift acts on len, which is an u8.
+ */
+- new_head = round_down(ring->head, ring->obj_num) + chip_rx_head;
+- if (new_head <= ring->head)
+- new_head += ring->obj_num;
++ BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(chip_head));
++ BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(tail));
++ BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(len));
+
+- ring->head = new_head;
++ len = (chip_head << shift) - (tail << shift);
++ *len_p = len >> shift;
+
+- return mcp251xfd_check_rx_tail(priv, ring);
++ return 0;
+ }
+
+ static void
+@@ -208,6 +219,8 @@ mcp251xfd_handle_rxif_ring_uinc(const struct mcp251xfd_priv *priv,
+ if (!len)
+ return 0;
+
++ ring->head += len;
++
+ /* Increment the RX FIFO tail pointer 'len' times in a
+ * single SPI message.
+ *
+@@ -233,22 +246,22 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
+ struct mcp251xfd_rx_ring *ring)
+ {
+ struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj = ring->obj;
+- u8 rx_tail, len;
++ u8 rx_tail, len, l;
+ int err, i;
+
+- err = mcp251xfd_rx_ring_update(priv, ring);
++ err = mcp251xfd_get_rx_len(priv, ring, &len);
+ if (err)
+ return err;
+
+- while ((len = mcp251xfd_get_rx_linear_len(ring))) {
++ while ((l = mcp251xfd_get_rx_linear_len(ring, len))) {
+ rx_tail = mcp251xfd_get_rx_tail(ring);
+
+ err = mcp251xfd_rx_obj_read(priv, ring, hw_rx_obj,
+- rx_tail, len);
++ rx_tail, l);
+ if (err)
+ return err;
+
+- for (i = 0; i < len; i++) {
++ for (i = 0; i < l; i++) {
+ err = mcp251xfd_handle_rxif_one(priv, ring,
+ (void *)hw_rx_obj +
+ i * ring->obj_size);
+@@ -256,9 +269,11 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
+ return err;
+ }
+
+- err = mcp251xfd_handle_rxif_ring_uinc(priv, ring, len);
++ err = mcp251xfd_handle_rxif_ring_uinc(priv, ring, l);
+ if (err)
+ return err;
++
++ len -= l;
+ }
+
+ return 0;
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+index 78d12dda08a0..ca5f4e670ec1 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+@@ -553,6 +553,7 @@ struct mcp251xfd_rx_ring {
+ u8 nr;
+ u8 fifo_nr;
+ u8 obj_num;
++ u8 obj_num_shift_to_u8;
+ u8 obj_size;
+
+ union mcp251xfd_write_reg_buf irq_enable_buf;
+@@ -889,18 +890,9 @@ static inline u8 mcp251xfd_get_rx_tail(const struct mcp251xfd_rx_ring *ring)
+ return ring->tail & (ring->obj_num - 1);
+ }
+
+-static inline u8 mcp251xfd_get_rx_len(const struct mcp251xfd_rx_ring *ring)
+-{
+- return ring->head - ring->tail;
+-}
+-
+ static inline u8
+-mcp251xfd_get_rx_linear_len(const struct mcp251xfd_rx_ring *ring)
++mcp251xfd_get_rx_linear_len(const struct mcp251xfd_rx_ring *ring, u8 len)
+ {
+- u8 len;
+-
+- len = mcp251xfd_get_rx_len(ring);
+-
+ return min_t(u8, len, ring->obj_num - mcp251xfd_get_rx_tail(ring));
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 60573b6057e1b4cd3e1fdcf435b1d32aacc4fb70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Feb 2024 14:00:27 -0500
+Subject: drm/amd: Add gfx12 swizzle mode defs
+
+From: Aurabindo Pillai <aurabindo.pillai@amd.com>
+
+[ Upstream commit 7ceb94e87bffff7c12b61eb29749e1d8ac976896 ]
+
+Add GFX12 swizzle mode definitions for use with DCN401
+
+Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/uapi/drm/drm_fourcc.h | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
+index 868d6909b718..52ce13488eaf 100644
+--- a/include/uapi/drm/drm_fourcc.h
++++ b/include/uapi/drm/drm_fourcc.h
+@@ -1390,6 +1390,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
+ #define AMD_FMT_MOD_TILE_VER_GFX10 2
+ #define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3
+ #define AMD_FMT_MOD_TILE_VER_GFX11 4
++#define AMD_FMT_MOD_TILE_VER_GFX12 5
+
+ /*
+ * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical
+@@ -1400,6 +1401,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
+ /*
+ * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has
+ * GFX9 as canonical version.
++ *
++ * 64K_D_2D on GFX12 is identical to 64K_D on GFX11.
+ */
+ #define AMD_FMT_MOD_TILE_GFX9_64K_D 10
+ #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25
+@@ -1407,6 +1410,19 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
+ #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27
+ #define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31
+
++/* Gfx12 swizzle modes:
++ * 0 - LINEAR
++ * 1 - 256B_2D - 2D block dimensions
++ * 2 - 4KB_2D
++ * 3 - 64KB_2D
++ * 4 - 256KB_2D
++ * 5 - 4KB_3D - 3D block dimensions
++ * 6 - 64KB_3D
++ * 7 - 256KB_3D
++ */
++#define AMD_FMT_MOD_TILE_GFX12_64K_2D 3
++#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4
++
+ #define AMD_FMT_MOD_DCC_BLOCK_64B 0
+ #define AMD_FMT_MOD_DCC_BLOCK_128B 1
+ #define AMD_FMT_MOD_DCC_BLOCK_256B 2
+--
+2.43.0
+
--- /dev/null
+From fb5f3897896bc68352f87422d9bd0ff4c299f6e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 1 Jun 2024 19:53:01 -0400
+Subject: drm/amdgpu: handle gfx12 in amdgpu_display_verify_sizes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marek Olšák <marek.olsak@amd.com>
+
+[ Upstream commit 8dd1426e2c80e32ac1995007330c8f95ffa28ebb ]
+
+It verified GFX9-11 swizzle modes on GFX12, which has undefined behavior.
+
+Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 27 ++++++++++++++++++++-
+ include/uapi/drm/drm_fourcc.h | 2 ++
+ 2 files changed, 28 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+index ac773b191071..cd0bccc95205 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+@@ -997,6 +997,30 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
+ block_width = 256 / format_info->cpp[i];
+ block_height = 1;
+ block_size_log2 = 8;
++ } else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) {
++ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
++
++ switch (swizzle) {
++ case AMD_FMT_MOD_TILE_GFX12_256B_2D:
++ block_size_log2 = 8;
++ break;
++ case AMD_FMT_MOD_TILE_GFX12_4K_2D:
++ block_size_log2 = 12;
++ break;
++ case AMD_FMT_MOD_TILE_GFX12_64K_2D:
++ block_size_log2 = 16;
++ break;
++ case AMD_FMT_MOD_TILE_GFX12_256K_2D:
++ block_size_log2 = 18;
++ break;
++ default:
++ drm_dbg_kms(rfb->base.dev,
++ "Gfx12 swizzle mode with unknown block size: %d\n", swizzle);
++ return -EINVAL;
++ }
++
++ get_block_dimensions(block_size_log2, format_info->cpp[i],
++ &block_width, &block_height);
+ } else {
+ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+@@ -1032,7 +1056,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
+ return ret;
+ }
+
+- if (AMD_FMT_MOD_GET(DCC, modifier)) {
++ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 &&
++ AMD_FMT_MOD_GET(DCC, modifier)) {
+ if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
+ block_size_log2 = get_dcc_block_size(modifier, false, false);
+ get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
+index 52ce13488eaf..6245928d76ee 100644
+--- a/include/uapi/drm/drm_fourcc.h
++++ b/include/uapi/drm/drm_fourcc.h
+@@ -1420,6 +1420,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
+ * 6 - 64KB_3D
+ * 7 - 256KB_3D
+ */
++#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1
++#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2
+ #define AMD_FMT_MOD_TILE_GFX12_64K_2D 3
+ #define AMD_FMT_MOD_TILE_GFX12_256K_2D 4
+
+--
+2.43.0
+
--- /dev/null
+From e78c72487f97dcca8e432e7434986f76df9babaa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Aug 2024 18:58:38 +0300
+Subject: drm/i915/fence: Mark debug_fence_free() with __maybe_unused
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit f99999536128b14b5d765a9982763b5134efdd79 ]
+
+When debug_fence_free() is unused
+(CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS=n), it prevents kernel builds
+with clang, `make W=1` and CONFIG_WERROR=y:
+
+.../i915_sw_fence.c:118:20: error: unused function 'debug_fence_free' [-Werror,-Wunused-function]
+ 118 | static inline void debug_fence_free(struct i915_sw_fence *fence)
+ | ^~~~~~~~~~~~~~~~
+
+Fix this by marking debug_fence_free() with __maybe_unused.
+
+See also commit 6863f5643dd7 ("kbuild: allow Clang to find unused static
+inline functions for W=1 build").
+
+Fixes: fc1584059d6c ("drm/i915: Integrate i915_sw_fence with debugobjects")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240829155950.1141978-3-andriy.shevchenko@linux.intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+(cherry picked from commit 8be4dce5ea6f2368cc25edc71989c4690fa66964)
+Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/i915_sw_fence.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
+index c2ac1900d73e..e664f8e461e6 100644
+--- a/drivers/gpu/drm/i915/i915_sw_fence.c
++++ b/drivers/gpu/drm/i915/i915_sw_fence.c
+@@ -77,7 +77,7 @@ static inline void debug_fence_destroy(struct i915_sw_fence *fence)
+ debug_object_destroy(fence, &i915_sw_fence_debug_descr);
+ }
+
+-static inline void debug_fence_free(struct i915_sw_fence *fence)
++static inline __maybe_unused void debug_fence_free(struct i915_sw_fence *fence)
+ {
+ debug_object_free(fence, &i915_sw_fence_debug_descr);
+ smp_wmb(); /* flush the change in state before reallocation */
+@@ -115,7 +115,7 @@ static inline void debug_fence_destroy(struct i915_sw_fence *fence)
+ {
+ }
+
+-static inline void debug_fence_free(struct i915_sw_fence *fence)
++static inline __maybe_unused void debug_fence_free(struct i915_sw_fence *fence)
+ {
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 7412089d7059c68fd40a1a028a628e556802ca4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Aug 2024 18:58:37 +0300
+Subject: drm/i915/fence: Mark debug_fence_init_onstack() with __maybe_unused
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit fcd9e8afd546f6ced378d078345a89bf346d065e ]
+
+When debug_fence_init_onstack() is unused (CONFIG_DRM_I915_SELFTEST=n),
+it prevents kernel builds with clang, `make W=1` and CONFIG_WERROR=y:
+
+.../i915_sw_fence.c:97:20: error: unused function 'debug_fence_init_onstack' [-Werror,-Wunused-function]
+ 97 | static inline void debug_fence_init_onstack(struct i915_sw_fence *fence)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+
+Fix this by marking debug_fence_init_onstack() with __maybe_unused.
+
+See also commit 6863f5643dd7 ("kbuild: allow Clang to find unused static
+inline functions for W=1 build").
+
+Fixes: 214707fc2ce0 ("drm/i915/selftests: Wrap a timer into a i915_sw_fence")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240829155950.1141978-2-andriy.shevchenko@linux.intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+(cherry picked from commit 5bf472058ffb43baf6a4cdfe1d7f58c4c194c688)
+Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/i915_sw_fence.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
+index 6fc0d1b89690..c2ac1900d73e 100644
+--- a/drivers/gpu/drm/i915/i915_sw_fence.c
++++ b/drivers/gpu/drm/i915/i915_sw_fence.c
+@@ -51,7 +51,7 @@ static inline void debug_fence_init(struct i915_sw_fence *fence)
+ debug_object_init(fence, &i915_sw_fence_debug_descr);
+ }
+
+-static inline void debug_fence_init_onstack(struct i915_sw_fence *fence)
++static inline __maybe_unused void debug_fence_init_onstack(struct i915_sw_fence *fence)
+ {
+ debug_object_init_on_stack(fence, &i915_sw_fence_debug_descr);
+ }
+@@ -94,7 +94,7 @@ static inline void debug_fence_init(struct i915_sw_fence *fence)
+ {
+ }
+
+-static inline void debug_fence_init_onstack(struct i915_sw_fence *fence)
++static inline __maybe_unused void debug_fence_init_onstack(struct i915_sw_fence *fence)
+ {
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 6904fa62b1c53e0a4096d346969abccb68f54bf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Sep 2024 11:58:48 +0000
+Subject: gpio: modepin: Enable module autoloading
+
+From: Liao Chen <liaochen4@huawei.com>
+
+[ Upstream commit a5135526426df5319d5f4bcd15ae57c45a97714b ]
+
+Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based
+on the alias from of_device_id table.
+
+Fixes: 7687a5b0ee93 ("gpio: modepin: Add driver support for modepin GPIO controller")
+Signed-off-by: Liao Chen <liaochen4@huawei.com>
+Reviewed-by: Michal Simek <michal.simek@amd.com>
+Link: https://lore.kernel.org/r/20240902115848.904227-1-liaochen4@huawei.com
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-zynqmp-modepin.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpio/gpio-zynqmp-modepin.c b/drivers/gpio/gpio-zynqmp-modepin.c
+index a0d69387c153..2f3c9ebfa78d 100644
+--- a/drivers/gpio/gpio-zynqmp-modepin.c
++++ b/drivers/gpio/gpio-zynqmp-modepin.c
+@@ -146,6 +146,7 @@ static const struct of_device_id modepin_platform_id[] = {
+ { .compatible = "xlnx,zynqmp-gpio-modepin", },
+ { }
+ };
++MODULE_DEVICE_TABLE(of, modepin_platform_id);
+
+ static struct platform_driver modepin_platform_driver = {
+ .driver = {
+--
+2.43.0
+
--- /dev/null
+From 59a9ac06aa3dd0514661ee40af1028b86c9eabba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Aug 2024 17:08:32 +0200
+Subject: gpio: rockchip: fix OF node leak in probe()
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+[ Upstream commit adad2e460e505a556f5ea6f0dc16fe95e62d5d76 ]
+
+Driver code is leaking OF node reference from of_get_parent() in
+probe().
+
+Fixes: 936ee2675eee ("gpio/rockchip: add driver for rockchip gpio")
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Reviewed-by: Heiko Stuebner <heiko@sntech.de>
+Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
+Link: https://lore.kernel.org/r/20240826150832.65657-1-krzysztof.kozlowski@linaro.org
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-rockchip.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
+index 200e43a6f4b4..3c1e303aaca8 100644
+--- a/drivers/gpio/gpio-rockchip.c
++++ b/drivers/gpio/gpio-rockchip.c
+@@ -713,6 +713,7 @@ static int rockchip_gpio_probe(struct platform_device *pdev)
+ return -ENODEV;
+
+ pctldev = of_pinctrl_get(pctlnp);
++ of_node_put(pctlnp);
+ if (!pctldev)
+ return -EPROBE_DEFER;
+
+--
+2.43.0
+
--- /dev/null
+From a7280d277baaf6320c67bab84fe7ffddbf47462c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Aug 2024 16:28:26 +0200
+Subject: nvmet-tcp: fix kernel crash if commands allocation fails
+
+From: Maurizio Lombardi <mlombard@redhat.com>
+
+[ Upstream commit 5572a55a6f830ee3f3a994b6b962a5c327d28cb3 ]
+
+If the commands allocation fails in nvmet_tcp_alloc_cmds()
+the kernel crashes in nvmet_tcp_release_queue_work() because of
+a NULL pointer dereference.
+
+ nvmet: failed to install queue 0 cntlid 1 ret 6
+ Unable to handle kernel NULL pointer dereference at
+ virtual address 0000000000000008
+
+Fix the bug by setting queue->nr_cmds to zero in case
+nvmet_tcp_alloc_cmd() fails.
+
+Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver")
+Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/tcp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
+index 76b9eb438268..81574500a57c 100644
+--- a/drivers/nvme/target/tcp.c
++++ b/drivers/nvme/target/tcp.c
+@@ -1816,8 +1816,10 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
+ }
+
+ queue->nr_cmds = sq->size * 2;
+- if (nvmet_tcp_alloc_cmds(queue))
++ if (nvmet_tcp_alloc_cmds(queue)) {
++ queue->nr_cmds = 0;
+ return NVME_SC_INTERNAL;
++ }
+ return 0;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 9ee77a9c6805fc54b98fdb3a8b3152fe6b8a6257 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Aug 2024 14:42:38 +0200
+Subject: powerpc/64e: Define mmu_pte_psize static
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit d92b5cc29c792f1d3f0aaa3b29dddfe816c03e88 ]
+
+mmu_pte_psize is only used in the tlb_64e.c, define it static.
+
+Fixes: 25d21ad6e799 ("powerpc: Add TLB management code for 64-bit Book3E")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202408011256.1O99IB0s-lkp@intel.com/
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/beb30d280eaa5d857c38a0834b147dffd6b28aa9.1724157750.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/nohash/tlb_64e.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c
+index 1dcda261554c..b6af3ec4d001 100644
+--- a/arch/powerpc/mm/nohash/tlb_64e.c
++++ b/arch/powerpc/mm/nohash/tlb_64e.c
+@@ -33,7 +33,7 @@
+ * though this will probably be made common with other nohash
+ * implementations at some point
+ */
+-int mmu_pte_psize; /* Page size used for PTE pages */
++static int mmu_pte_psize; /* Page size used for PTE pages */
+ int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
+ int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
+ unsigned long linear_map_top; /* Top of linear mapping */
+--
+2.43.0
+
--- /dev/null
+From c94cb293819d6be7a6ef37979f2e8b431e88e30e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 15:51:13 +0200
+Subject: powerpc/64e: remove unused IBM HTW code
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+[ Upstream commit 88715b6e5d529f4ef3830ad2a893e4624c6af0b8 ]
+
+Patch series "Reimplement huge pages without hugepd on powerpc (8xx, e500,
+book3s/64)", v7.
+
+Unlike most architectures, powerpc 8xx HW requires a two-level pagetable
+topology for all page sizes. So a leaf PMD-contig approach is not
+feasible as such.
+
+Possible sizes on 8xx are 4k, 16k, 512k and 8M.
+
+First level (PGD/PMD) covers 4M per entry. For 8M pages, two PMD entries
+must point to a single entry level-2 page table. Until now that was done
+using hugepd. This series changes it to use standard page tables where
+the entry is replicated 1024 times on each of the two pagetables refered
+by the two associated PMD entries for that 8M page.
+
+For e500 and book3s/64 there are less constraints because it is not tied
+to the HW assisted tablewalk like on 8xx, so it is easier to use leaf PMDs
+(and PUDs).
+
+On e500 the supported page sizes are 4M, 16M, 64M, 256M and 1G. All at
+PMD level on e500/32 (mpc85xx) and mix of PMD and PUD for e500/64. We
+encode page size with 4 available bits in PTE entries. On e300/32 PGD
+entries size is increases to 64 bits in order to allow leaf-PMD entries
+because PTE are 64 bits on e500.
+
+On book3s/64 only the hash-4k mode is concerned. It supports 16M pages as
+cont-PMD and 16G pages as cont-PUD. In other modes (radix-4k, radix-6k
+and hash-64k) the sizes match with PMD and PUD sizes so that's just leaf
+entries. The hash processing make things a bit more complex. To ease
+things, __hash_page_huge() is modified to bail out when DIRTY or ACCESSED
+bits are missing, leaving it to mm core to fix it.
+
+This patch (of 23):
+
+The nohash HTW_IBM (Hardware Table Walk) code is unused since support for
+A2 was removed in commit fb5a515704d7 ("powerpc: Remove platforms/ wsp and
+associated pieces") (2014).
+
+The remaining supported CPUs use either no HTW (data_tlb_miss_bolted), or
+the e6500 HTW (data_tlb_miss_e6500).
+
+Link: https://lkml.kernel.org/r/cover.1719928057.git.christophe.leroy@csgroup.eu
+Link: https://lkml.kernel.org/r/820dd1385ecc931f07b0d7a0fa827b1613917ab6.1719928057.git.christophe.leroy@csgroup.eu
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Nicholas Piggin <npiggin@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: d92b5cc29c79 ("powerpc/64e: Define mmu_pte_psize static")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/nohash/mmu-e500.h | 3 +-
+ arch/powerpc/mm/nohash/tlb.c | 57 +-----
+ arch/powerpc/mm/nohash/tlb_low_64e.S | 195 ---------------------
+ 3 files changed, 2 insertions(+), 253 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/nohash/mmu-e500.h b/arch/powerpc/include/asm/nohash/mmu-e500.h
+index e43a418d3ccd..9b5ba73d33d6 100644
+--- a/arch/powerpc/include/asm/nohash/mmu-e500.h
++++ b/arch/powerpc/include/asm/nohash/mmu-e500.h
+@@ -303,8 +303,7 @@ extern unsigned long linear_map_top;
+ extern int book3e_htw_mode;
+
+ #define PPC_HTW_NONE 0
+-#define PPC_HTW_IBM 1
+-#define PPC_HTW_E6500 2
++#define PPC_HTW_E6500 1
+
+ /*
+ * 64-bit booke platforms don't load the tlb in the tlb miss handler code.
+diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
+index 2c15c86c7015..19df1e13fe0e 100644
+--- a/arch/powerpc/mm/nohash/tlb.c
++++ b/arch/powerpc/mm/nohash/tlb.c
+@@ -403,9 +403,8 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+ static void __init setup_page_sizes(void)
+ {
+ unsigned int tlb0cfg;
+- unsigned int tlb0ps;
+ unsigned int eptcfg;
+- int i, psize;
++ int psize;
+
+ #ifdef CONFIG_PPC_E500
+ unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+@@ -474,50 +473,6 @@ static void __init setup_page_sizes(void)
+ goto out;
+ }
+ #endif
+-
+- tlb0cfg = mfspr(SPRN_TLB0CFG);
+- tlb0ps = mfspr(SPRN_TLB0PS);
+- eptcfg = mfspr(SPRN_EPTCFG);
+-
+- /* Look for supported direct sizes */
+- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+- struct mmu_psize_def *def = &mmu_psize_defs[psize];
+-
+- if (tlb0ps & (1U << (def->shift - 10)))
+- def->flags |= MMU_PAGE_SIZE_DIRECT;
+- }
+-
+- /* Indirect page sizes supported ? */
+- if ((tlb0cfg & TLBnCFG_IND) == 0 ||
+- (tlb0cfg & TLBnCFG_PT) == 0)
+- goto out;
+-
+- book3e_htw_mode = PPC_HTW_IBM;
+-
+- /* Now, we only deal with one IND page size for each
+- * direct size. Hopefully all implementations today are
+- * unambiguous, but we might want to be careful in the
+- * future.
+- */
+- for (i = 0; i < 3; i++) {
+- unsigned int ps, sps;
+-
+- sps = eptcfg & 0x1f;
+- eptcfg >>= 5;
+- ps = eptcfg & 0x1f;
+- eptcfg >>= 5;
+- if (!ps || !sps)
+- continue;
+- for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+- struct mmu_psize_def *def = &mmu_psize_defs[psize];
+-
+- if (ps == (def->shift - 10))
+- def->flags |= MMU_PAGE_SIZE_INDIRECT;
+- if (sps == (def->shift - 10))
+- def->ind = ps + 10;
+- }
+- }
+-
+ out:
+ /* Cleanup array and print summary */
+ pr_info("MMU: Supported page sizes\n");
+@@ -546,10 +501,6 @@ static void __init setup_mmu_htw(void)
+ */
+
+ switch (book3e_htw_mode) {
+- case PPC_HTW_IBM:
+- patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
+- patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
+- break;
+ #ifdef CONFIG_PPC_E500
+ case PPC_HTW_E6500:
+ extlb_level_exc = EX_TLB_SIZE;
+@@ -580,12 +531,6 @@ static void early_init_this_mmu(void)
+ mmu_pte_psize = MMU_PAGE_2M;
+ break;
+
+- case PPC_HTW_IBM:
+- mas4 |= MAS4_INDD;
+- mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
+- mmu_pte_psize = MMU_PAGE_1M;
+- break;
+-
+ case PPC_HTW_NONE:
+ mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
+ mmu_pte_psize = mmu_virtual_psize;
+diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
+index 76cf456d7976..d831a111eaba 100644
+--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
++++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
+@@ -893,201 +893,6 @@ virt_page_table_tlb_miss_whacko_fault:
+ TLB_MISS_EPILOG_ERROR
+ b exc_data_storage_book3e
+
+-
+-/**************************************************************
+- * *
+- * TLB miss handling for Book3E with hw page table support *
+- * *
+- **************************************************************/
+-
+-
+-/* Data TLB miss */
+- START_EXCEPTION(data_tlb_miss_htw)
+- TLB_MISS_PROLOG
+-
+- /* Now we handle the fault proper. We only save DEAR in normal
+- * fault case since that's the only interesting values here.
+- * We could probably also optimize by not saving SRR0/1 in the
+- * linear mapping case but I'll leave that for later
+- */
+- mfspr r14,SPRN_ESR
+- mfspr r16,SPRN_DEAR /* get faulting address */
+- srdi r11,r16,44 /* get region */
+- xoris r11,r11,0xc
+- cmpldi cr0,r11,0 /* linear mapping ? */
+- beq tlb_load_linear /* yes -> go to linear map load */
+- cmpldi cr1,r11,1 /* vmalloc mapping ? */
+-
+- /* We do the user/kernel test for the PID here along with the RW test
+- */
+- srdi. r11,r16,60 /* Check for user region */
+- ld r15,PACAPGD(r13) /* Load user pgdir */
+- beq htw_tlb_miss
+-
+- /* XXX replace the RMW cycles with immediate loads + writes */
+-1: mfspr r10,SPRN_MAS1
+- rlwinm r10,r10,0,16,1 /* Clear TID */
+- mtspr SPRN_MAS1,r10
+- ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
+- beq+ cr1,htw_tlb_miss
+-
+- /* We got a crappy address, just fault with whatever DEAR and ESR
+- * are here
+- */
+- TLB_MISS_EPILOG_ERROR
+- b exc_data_storage_book3e
+-
+-/* Instruction TLB miss */
+- START_EXCEPTION(instruction_tlb_miss_htw)
+- TLB_MISS_PROLOG
+-
+- /* If we take a recursive fault, the second level handler may need
+- * to know whether we are handling a data or instruction fault in
+- * order to get to the right store fault handler. We provide that
+- * info by keeping a crazy value for ESR in r14
+- */
+- li r14,-1 /* store to exception frame is done later */
+-
+- /* Now we handle the fault proper. We only save DEAR in the non
+- * linear mapping case since we know the linear mapping case will
+- * not re-enter. We could indeed optimize and also not save SRR0/1
+- * in the linear mapping case but I'll leave that for later
+- *
+- * Faulting address is SRR0 which is already in r16
+- */
+- srdi r11,r16,44 /* get region */
+- xoris r11,r11,0xc
+- cmpldi cr0,r11,0 /* linear mapping ? */
+- beq tlb_load_linear /* yes -> go to linear map load */
+- cmpldi cr1,r11,1 /* vmalloc mapping ? */
+-
+- /* We do the user/kernel test for the PID here along with the RW test
+- */
+- srdi. r11,r16,60 /* Check for user region */
+- ld r15,PACAPGD(r13) /* Load user pgdir */
+- beq htw_tlb_miss
+-
+- /* XXX replace the RMW cycles with immediate loads + writes */
+-1: mfspr r10,SPRN_MAS1
+- rlwinm r10,r10,0,16,1 /* Clear TID */
+- mtspr SPRN_MAS1,r10
+- ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
+- beq+ htw_tlb_miss
+-
+- /* We got a crappy address, just fault */
+- TLB_MISS_EPILOG_ERROR
+- b exc_instruction_storage_book3e
+-
+-
+-/*
+- * This is the guts of the second-level TLB miss handler for direct
+- * misses. We are entered with:
+- *
+- * r16 = virtual page table faulting address
+- * r15 = PGD pointer
+- * r14 = ESR
+- * r13 = PACA
+- * r12 = TLB exception frame in PACA
+- * r11 = crap (free to use)
+- * r10 = crap (free to use)
+- *
+- * It can be re-entered by the linear mapping miss handler. However, to
+- * avoid too much complication, it will save/restore things for us
+- */
+-htw_tlb_miss:
+-#ifdef CONFIG_PPC_KUAP
+- mfspr r10,SPRN_MAS1
+- rlwinm. r10,r10,0,0x3fff0000
+- beq- htw_tlb_miss_fault /* KUAP fault */
+-#endif
+- /* Search if we already have a TLB entry for that virtual address, and
+- * if we do, bail out.
+- *
+- * MAS1:IND should be already set based on MAS4
+- */
+- PPC_TLBSRX_DOT(0,R16)
+- beq htw_tlb_miss_done
+-
+- /* Now, we need to walk the page tables. First check if we are in
+- * range.
+- */
+- rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+- bne- htw_tlb_miss_fault
+-
+- /* Get the PGD pointer */
+- cmpldi cr0,r15,0
+- beq- htw_tlb_miss_fault
+-
+- /* Get to PGD entry */
+- rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
+- clrrdi r10,r11,3
+- ldx r15,r10,r15
+- cmpdi cr0,r15,0
+- bge htw_tlb_miss_fault
+-
+- /* Get to PUD entry */
+- rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
+- clrrdi r10,r11,3
+- ldx r15,r10,r15
+- cmpdi cr0,r15,0
+- bge htw_tlb_miss_fault
+-
+- /* Get to PMD entry */
+- rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
+- clrrdi r10,r11,3
+- ldx r15,r10,r15
+- cmpdi cr0,r15,0
+- bge htw_tlb_miss_fault
+-
+- /* Ok, we're all right, we can now create an indirect entry for
+- * a 1M or 256M page.
+- *
+- * The last trick is now that because we use "half" pages for
+- * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
+- * for an added LSB bit to the RPN. For 64K pages, there is no
+- * problem as we already use 32K arrays (half PTE pages), but for
+- * 4K page we need to extract a bit from the virtual address and
+- * insert it into the "PA52" bit of the RPN.
+- */
+- rlwimi r15,r16,32-9,20,20
+- /* Now we build the MAS:
+- *
+- * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
+- * MAS 1 : Almost fully setup
+- * - PID already updated by caller if necessary
+- * - TSIZE for now is base ind page size always
+- * MAS 2 : Use defaults
+- * MAS 3+7 : Needs to be done
+- */
+- ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
+-
+- srdi r16,r10,32
+- mtspr SPRN_MAS3,r10
+- mtspr SPRN_MAS7,r16
+-
+- tlbwe
+-
+-htw_tlb_miss_done:
+- /* We don't bother with restoring DEAR or ESR since we know we are
+- * level 0 and just going back to userland. They are only needed
+- * if you are going to take an access fault
+- */
+- TLB_MISS_EPILOG_SUCCESS
+- rfi
+-
+-htw_tlb_miss_fault:
+- /* We need to check if it was an instruction miss. We know this
+- * though because r14 would contain -1
+- */
+- cmpdi cr0,r14,-1
+- beq 1f
+- mtspr SPRN_DEAR,r16
+- mtspr SPRN_ESR,r14
+- TLB_MISS_EPILOG_ERROR
+- b exc_data_storage_book3e
+-1: TLB_MISS_EPILOG_ERROR
+- b exc_instruction_storage_book3e
+-
+ /*
+ * This is the guts of "any" level TLB miss handler for kernel linear
+ * mapping misses. We are entered with:
+--
+2.43.0
+
--- /dev/null
+From c77513154e13befe92af4505c7c0ce7d50a623c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 15:51:14 +0200
+Subject: powerpc/64e: split out nohash Book3E 64-bit code
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+[ Upstream commit a898530eea3d0ba08c17a60865995a3bb468d1bc ]
+
+A reasonable chunk of nohash/tlb.c is 64-bit only code, split it out into
+a separate file.
+
+Link: https://lkml.kernel.org/r/cb2b118f9d8a86f82d01bfb9ad309d1d304480a1.1719928057.git.christophe.leroy@csgroup.eu
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Nicholas Piggin <npiggin@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: d92b5cc29c79 ("powerpc/64e: Define mmu_pte_psize static")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/nohash/Makefile | 2 +-
+ arch/powerpc/mm/nohash/tlb.c | 343 +----------------------------
+ arch/powerpc/mm/nohash/tlb_64e.c | 361 +++++++++++++++++++++++++++++++
+ 3 files changed, 363 insertions(+), 343 deletions(-)
+ create mode 100644 arch/powerpc/mm/nohash/tlb_64e.c
+
+diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
+index f3894e79d5f7..24b445a5fcac 100644
+--- a/arch/powerpc/mm/nohash/Makefile
++++ b/arch/powerpc/mm/nohash/Makefile
+@@ -3,7 +3,7 @@
+ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+
+ obj-y += mmu_context.o tlb.o tlb_low.o kup.o
+-obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
++obj-$(CONFIG_PPC_BOOK3E_64) += tlb_64e.o tlb_low_64e.o book3e_pgtable.o
+ obj-$(CONFIG_40x) += 40x.o
+ obj-$(CONFIG_44x) += 44x.o
+ obj-$(CONFIG_PPC_8xx) += 8xx.o
+diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
+index 19df1e13fe0e..c0b643d30fcc 100644
+--- a/arch/powerpc/mm/nohash/tlb.c
++++ b/arch/powerpc/mm/nohash/tlb.c
+@@ -110,28 +110,6 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+ };
+ #endif
+
+-/* The variables below are currently only used on 64-bit Book3E
+- * though this will probably be made common with other nohash
+- * implementations at some point
+- */
+-#ifdef CONFIG_PPC64
+-
+-int mmu_pte_psize; /* Page size used for PTE pages */
+-int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
+-int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
+-unsigned long linear_map_top; /* Top of linear mapping */
+-
+-
+-/*
+- * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
+- * exceptions. This is used for bolted and e6500 TLB miss handlers which
+- * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
+- * this is set to zero.
+- */
+-int extlb_level_exc;
+-
+-#endif /* CONFIG_PPC64 */
+-
+ #ifdef CONFIG_PPC_E500
+ /* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
+ DEFINE_PER_CPU(int, next_tlbcam_idx);
+@@ -361,326 +339,7 @@ void tlb_flush(struct mmu_gather *tlb)
+ flush_tlb_mm(tlb->mm);
+ }
+
+-/*
+- * Below are functions specific to the 64-bit variant of Book3E though that
+- * may change in the future
+- */
+-
+-#ifdef CONFIG_PPC64
+-
+-/*
+- * Handling of virtual linear page tables or indirect TLB entries
+- * flushing when PTE pages are freed
+- */
+-void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+-{
+- int tsize = mmu_psize_defs[mmu_pte_psize].enc;
+-
+- if (book3e_htw_mode != PPC_HTW_NONE) {
+- unsigned long start = address & PMD_MASK;
+- unsigned long end = address + PMD_SIZE;
+- unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
+-
+- /* This isn't the most optimal, ideally we would factor out the
+- * while preempt & CPU mask mucking around, or even the IPI but
+- * it will do for now
+- */
+- while (start < end) {
+- __flush_tlb_page(tlb->mm, start, tsize, 1);
+- start += size;
+- }
+- } else {
+- unsigned long rmask = 0xf000000000000000ul;
+- unsigned long rid = (address & rmask) | 0x1000000000000000ul;
+- unsigned long vpte = address & ~rmask;
+-
+- vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
+- vpte |= rid;
+- __flush_tlb_page(tlb->mm, vpte, tsize, 0);
+- }
+-}
+-
+-static void __init setup_page_sizes(void)
+-{
+- unsigned int tlb0cfg;
+- unsigned int eptcfg;
+- int psize;
+-
+-#ifdef CONFIG_PPC_E500
+- unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+- int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
+-
+- if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+- unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
+- unsigned int min_pg, max_pg;
+-
+- min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
+- max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
+-
+- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+- struct mmu_psize_def *def;
+- unsigned int shift;
+-
+- def = &mmu_psize_defs[psize];
+- shift = def->shift;
+-
+- if (shift == 0 || shift & 1)
+- continue;
+-
+- /* adjust to be in terms of 4^shift Kb */
+- shift = (shift - 10) >> 1;
+-
+- if ((shift >= min_pg) && (shift <= max_pg))
+- def->flags |= MMU_PAGE_SIZE_DIRECT;
+- }
+-
+- goto out;
+- }
+-
+- if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
+- u32 tlb1cfg, tlb1ps;
+-
+- tlb0cfg = mfspr(SPRN_TLB0CFG);
+- tlb1cfg = mfspr(SPRN_TLB1CFG);
+- tlb1ps = mfspr(SPRN_TLB1PS);
+- eptcfg = mfspr(SPRN_EPTCFG);
+-
+- if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
+- book3e_htw_mode = PPC_HTW_E6500;
+-
+- /*
+- * We expect 4K subpage size and unrestricted indirect size.
+- * The lack of a restriction on indirect size is a Freescale
+- * extension, indicated by PSn = 0 but SPSn != 0.
+- */
+- if (eptcfg != 2)
+- book3e_htw_mode = PPC_HTW_NONE;
+-
+- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+- struct mmu_psize_def *def = &mmu_psize_defs[psize];
+-
+- if (!def->shift)
+- continue;
+-
+- if (tlb1ps & (1U << (def->shift - 10))) {
+- def->flags |= MMU_PAGE_SIZE_DIRECT;
+-
+- if (book3e_htw_mode && psize == MMU_PAGE_2M)
+- def->flags |= MMU_PAGE_SIZE_INDIRECT;
+- }
+- }
+-
+- goto out;
+- }
+-#endif
+-out:
+- /* Cleanup array and print summary */
+- pr_info("MMU: Supported page sizes\n");
+- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+- struct mmu_psize_def *def = &mmu_psize_defs[psize];
+- const char *__page_type_names[] = {
+- "unsupported",
+- "direct",
+- "indirect",
+- "direct & indirect"
+- };
+- if (def->flags == 0) {
+- def->shift = 0;
+- continue;
+- }
+- pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
+- __page_type_names[def->flags & 0x3]);
+- }
+-}
+-
+-static void __init setup_mmu_htw(void)
+-{
+- /*
+- * If we want to use HW tablewalk, enable it by patching the TLB miss
+- * handlers to branch to the one dedicated to it.
+- */
+-
+- switch (book3e_htw_mode) {
+-#ifdef CONFIG_PPC_E500
+- case PPC_HTW_E6500:
+- extlb_level_exc = EX_TLB_SIZE;
+- patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
+- patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
+- break;
+-#endif
+- }
+- pr_info("MMU: Book3E HW tablewalk %s\n",
+- book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
+-}
+-
+-/*
+- * Early initialization of the MMU TLB code
+- */
+-static void early_init_this_mmu(void)
+-{
+- unsigned int mas4;
+-
+- /* Set MAS4 based on page table setting */
+-
+- mas4 = 0x4 << MAS4_WIMGED_SHIFT;
+- switch (book3e_htw_mode) {
+- case PPC_HTW_E6500:
+- mas4 |= MAS4_INDD;
+- mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
+- mas4 |= MAS4_TLBSELD(1);
+- mmu_pte_psize = MMU_PAGE_2M;
+- break;
+-
+- case PPC_HTW_NONE:
+- mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
+- mmu_pte_psize = mmu_virtual_psize;
+- break;
+- }
+- mtspr(SPRN_MAS4, mas4);
+-
+-#ifdef CONFIG_PPC_E500
+- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+- unsigned int num_cams;
+- bool map = true;
+-
+- /* use a quarter of the TLBCAM for bolted linear map */
+- num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+-
+- /*
+- * Only do the mapping once per core, or else the
+- * transient mapping would cause problems.
+- */
+-#ifdef CONFIG_SMP
+- if (hweight32(get_tensr()) > 1)
+- map = false;
+-#endif
+-
+- if (map)
+- linear_map_top = map_mem_in_cams(linear_map_top,
+- num_cams, false, true);
+- }
+-#endif
+-
+- /* A sync won't hurt us after mucking around with
+- * the MMU configuration
+- */
+- mb();
+-}
+-
+-static void __init early_init_mmu_global(void)
+-{
+- /* XXX This should be decided at runtime based on supported
+- * page sizes in the TLB, but for now let's assume 16M is
+- * always there and a good fit (which it probably is)
+- *
+- * Freescale booke only supports 4K pages in TLB0, so use that.
+- */
+- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+- mmu_vmemmap_psize = MMU_PAGE_4K;
+- else
+- mmu_vmemmap_psize = MMU_PAGE_16M;
+-
+- /* XXX This code only checks for TLB 0 capabilities and doesn't
+- * check what page size combos are supported by the HW. It
+- * also doesn't handle the case where a separate array holds
+- * the IND entries from the array loaded by the PT.
+- */
+- /* Look for supported page sizes */
+- setup_page_sizes();
+-
+- /* Look for HW tablewalk support */
+- setup_mmu_htw();
+-
+-#ifdef CONFIG_PPC_E500
+- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+- if (book3e_htw_mode == PPC_HTW_NONE) {
+- extlb_level_exc = EX_TLB_SIZE;
+- patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
+- patch_exception(0x1e0,
+- exc_instruction_tlb_miss_bolted_book3e);
+- }
+- }
+-#endif
+-
+- /* Set the global containing the top of the linear mapping
+- * for use by the TLB miss code
+- */
+- linear_map_top = memblock_end_of_DRAM();
+-
+- ioremap_bot = IOREMAP_BASE;
+-}
+-
+-static void __init early_mmu_set_memory_limit(void)
+-{
+-#ifdef CONFIG_PPC_E500
+- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+- /*
+- * Limit memory so we dont have linear faults.
+- * Unlike memblock_set_current_limit, which limits
+- * memory available during early boot, this permanently
+- * reduces the memory available to Linux. We need to
+- * do this because highmem is not supported on 64-bit.
+- */
+- memblock_enforce_memory_limit(linear_map_top);
+- }
+-#endif
+-
+- memblock_set_current_limit(linear_map_top);
+-}
+-
+-/* boot cpu only */
+-void __init early_init_mmu(void)
+-{
+- early_init_mmu_global();
+- early_init_this_mmu();
+- early_mmu_set_memory_limit();
+-}
+-
+-void early_init_mmu_secondary(void)
+-{
+- early_init_this_mmu();
+-}
+-
+-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+- phys_addr_t first_memblock_size)
+-{
+- /* On non-FSL Embedded 64-bit, we adjust the RMA size to match
+- * the bolted TLB entry. We know for now that only 1G
+- * entries are supported though that may eventually
+- * change.
+- *
+- * on FSL Embedded 64-bit, usually all RAM is bolted, but with
+- * unusual memory sizes it's possible for some RAM to not be mapped
+- * (such RAM is not used at all by Linux, since we don't support
+- * highmem on 64-bit). We limit ppc64_rma_size to what would be
+- * mappable if this memblock is the only one. Additional memblocks
+- * can only increase, not decrease, the amount that ends up getting
+- * mapped. We still limit max to 1G even if we'll eventually map
+- * more. This is due to what the early init code is set up to do.
+- *
+- * We crop it to the size of the first MEMBLOCK to
+- * avoid going over total available memory just in case...
+- */
+-#ifdef CONFIG_PPC_E500
+- if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+- unsigned long linear_sz;
+- unsigned int num_cams;
+-
+- /* use a quarter of the TLBCAM for bolted linear map */
+- num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+-
+- linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
+- true, true);
+-
+- ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
+- } else
+-#endif
+- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+-
+- /* Finally limit subsequent allocations */
+- memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
+-}
+-#else /* ! CONFIG_PPC64 */
++#ifndef CONFIG_PPC64
+ void __init early_init_mmu(void)
+ {
+ #ifdef CONFIG_PPC_47x
+diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c
+new file mode 100644
+index 000000000000..1dcda261554c
+--- /dev/null
++++ b/arch/powerpc/mm/nohash/tlb_64e.c
+@@ -0,0 +1,361 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
++ * IBM Corp.
++ *
++ * Derived from arch/ppc/mm/init.c:
++ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
++ *
++ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
++ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
++ * Copyright (C) 1996 Paul Mackerras
++ *
++ * Derived from "arch/i386/mm/init.c"
++ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
++ */
++
++#include <linux/kernel.h>
++#include <linux/export.h>
++#include <linux/mm.h>
++#include <linux/init.h>
++#include <linux/pagemap.h>
++#include <linux/memblock.h>
++
++#include <asm/pgalloc.h>
++#include <asm/tlbflush.h>
++#include <asm/tlb.h>
++#include <asm/code-patching.h>
++#include <asm/cputhreads.h>
++
++#include <mm/mmu_decl.h>
++
++/* The variables below are currently only used on 64-bit Book3E
++ * though this will probably be made common with other nohash
++ * implementations at some point
++ */
++int mmu_pte_psize; /* Page size used for PTE pages */
++int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
++int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
++unsigned long linear_map_top; /* Top of linear mapping */
++
++
++/*
++ * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
++ * exceptions. This is used for bolted and e6500 TLB miss handlers which
++ * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
++ * this is set to zero.
++ */
++int extlb_level_exc;
++
++/*
++ * Handling of virtual linear page tables or indirect TLB entries
++ * flushing when PTE pages are freed
++ */
++void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
++{
++ int tsize = mmu_psize_defs[mmu_pte_psize].enc;
++
++ if (book3e_htw_mode != PPC_HTW_NONE) {
++ unsigned long start = address & PMD_MASK;
++ unsigned long end = address + PMD_SIZE;
++ unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
++
++ /* This isn't the most optimal, ideally we would factor out the
++ * while preempt & CPU mask mucking around, or even the IPI but
++ * it will do for now
++ */
++ while (start < end) {
++ __flush_tlb_page(tlb->mm, start, tsize, 1);
++ start += size;
++ }
++ } else {
++ unsigned long rmask = 0xf000000000000000ul;
++ unsigned long rid = (address & rmask) | 0x1000000000000000ul;
++ unsigned long vpte = address & ~rmask;
++
++ vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
++ vpte |= rid;
++ __flush_tlb_page(tlb->mm, vpte, tsize, 0);
++ }
++}
++
++static void __init setup_page_sizes(void)
++{
++ unsigned int tlb0cfg;
++ unsigned int eptcfg;
++ int psize;
++
++#ifdef CONFIG_PPC_E500
++ unsigned int mmucfg = mfspr(SPRN_MMUCFG);
++ int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
++
++ if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
++ unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
++ unsigned int min_pg, max_pg;
++
++ min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
++ max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
++
++ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
++ struct mmu_psize_def *def;
++ unsigned int shift;
++
++ def = &mmu_psize_defs[psize];
++ shift = def->shift;
++
++ if (shift == 0 || shift & 1)
++ continue;
++
++ /* adjust to be in terms of 4^shift Kb */
++ shift = (shift - 10) >> 1;
++
++ if ((shift >= min_pg) && (shift <= max_pg))
++ def->flags |= MMU_PAGE_SIZE_DIRECT;
++ }
++
++ goto out;
++ }
++
++ if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
++ u32 tlb1cfg, tlb1ps;
++
++ tlb0cfg = mfspr(SPRN_TLB0CFG);
++ tlb1cfg = mfspr(SPRN_TLB1CFG);
++ tlb1ps = mfspr(SPRN_TLB1PS);
++ eptcfg = mfspr(SPRN_EPTCFG);
++
++ if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
++ book3e_htw_mode = PPC_HTW_E6500;
++
++ /*
++ * We expect 4K subpage size and unrestricted indirect size.
++ * The lack of a restriction on indirect size is a Freescale
++ * extension, indicated by PSn = 0 but SPSn != 0.
++ */
++ if (eptcfg != 2)
++ book3e_htw_mode = PPC_HTW_NONE;
++
++ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
++ struct mmu_psize_def *def = &mmu_psize_defs[psize];
++
++ if (!def->shift)
++ continue;
++
++ if (tlb1ps & (1U << (def->shift - 10))) {
++ def->flags |= MMU_PAGE_SIZE_DIRECT;
++
++ if (book3e_htw_mode && psize == MMU_PAGE_2M)
++ def->flags |= MMU_PAGE_SIZE_INDIRECT;
++ }
++ }
++
++ goto out;
++ }
++#endif
++out:
++ /* Cleanup array and print summary */
++ pr_info("MMU: Supported page sizes\n");
++ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
++ struct mmu_psize_def *def = &mmu_psize_defs[psize];
++ const char *__page_type_names[] = {
++ "unsupported",
++ "direct",
++ "indirect",
++ "direct & indirect"
++ };
++ if (def->flags == 0) {
++ def->shift = 0;
++ continue;
++ }
++ pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
++ __page_type_names[def->flags & 0x3]);
++ }
++}
++
++static void __init setup_mmu_htw(void)
++{
++ /*
++ * If we want to use HW tablewalk, enable it by patching the TLB miss
++ * handlers to branch to the one dedicated to it.
++ */
++
++ switch (book3e_htw_mode) {
++#ifdef CONFIG_PPC_E500
++ case PPC_HTW_E6500:
++ extlb_level_exc = EX_TLB_SIZE;
++ patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
++ patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
++ break;
++#endif
++ }
++ pr_info("MMU: Book3E HW tablewalk %s\n",
++ book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
++}
++
++/*
++ * Early initialization of the MMU TLB code
++ */
++static void early_init_this_mmu(void)
++{
++ unsigned int mas4;
++
++ /* Set MAS4 based on page table setting */
++
++ mas4 = 0x4 << MAS4_WIMGED_SHIFT;
++ switch (book3e_htw_mode) {
++ case PPC_HTW_E6500:
++ mas4 |= MAS4_INDD;
++ mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
++ mas4 |= MAS4_TLBSELD(1);
++ mmu_pte_psize = MMU_PAGE_2M;
++ break;
++
++ case PPC_HTW_NONE:
++ mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
++ mmu_pte_psize = mmu_virtual_psize;
++ break;
++ }
++ mtspr(SPRN_MAS4, mas4);
++
++#ifdef CONFIG_PPC_E500
++ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
++ unsigned int num_cams;
++ bool map = true;
++
++ /* use a quarter of the TLBCAM for bolted linear map */
++ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
++
++ /*
++ * Only do the mapping once per core, or else the
++ * transient mapping would cause problems.
++ */
++#ifdef CONFIG_SMP
++ if (hweight32(get_tensr()) > 1)
++ map = false;
++#endif
++
++ if (map)
++ linear_map_top = map_mem_in_cams(linear_map_top,
++ num_cams, false, true);
++ }
++#endif
++
++ /* A sync won't hurt us after mucking around with
++ * the MMU configuration
++ */
++ mb();
++}
++
++static void __init early_init_mmu_global(void)
++{
++ /* XXX This should be decided at runtime based on supported
++ * page sizes in the TLB, but for now let's assume 16M is
++ * always there and a good fit (which it probably is)
++ *
++ * Freescale booke only supports 4K pages in TLB0, so use that.
++ */
++ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
++ mmu_vmemmap_psize = MMU_PAGE_4K;
++ else
++ mmu_vmemmap_psize = MMU_PAGE_16M;
++
++ /* XXX This code only checks for TLB 0 capabilities and doesn't
++ * check what page size combos are supported by the HW. It
++ * also doesn't handle the case where a separate array holds
++ * the IND entries from the array loaded by the PT.
++ */
++ /* Look for supported page sizes */
++ setup_page_sizes();
++
++ /* Look for HW tablewalk support */
++ setup_mmu_htw();
++
++#ifdef CONFIG_PPC_E500
++ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
++ if (book3e_htw_mode == PPC_HTW_NONE) {
++ extlb_level_exc = EX_TLB_SIZE;
++ patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
++ patch_exception(0x1e0,
++ exc_instruction_tlb_miss_bolted_book3e);
++ }
++ }
++#endif
++
++ /* Set the global containing the top of the linear mapping
++ * for use by the TLB miss code
++ */
++ linear_map_top = memblock_end_of_DRAM();
++
++ ioremap_bot = IOREMAP_BASE;
++}
++
++static void __init early_mmu_set_memory_limit(void)
++{
++#ifdef CONFIG_PPC_E500
++ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
++ /*
++ * Limit memory so we dont have linear faults.
++ * Unlike memblock_set_current_limit, which limits
++ * memory available during early boot, this permanently
++ * reduces the memory available to Linux. We need to
++ * do this because highmem is not supported on 64-bit.
++ */
++ memblock_enforce_memory_limit(linear_map_top);
++ }
++#endif
++
++ memblock_set_current_limit(linear_map_top);
++}
++
++/* boot cpu only */
++void __init early_init_mmu(void)
++{
++ early_init_mmu_global();
++ early_init_this_mmu();
++ early_mmu_set_memory_limit();
++}
++
++void early_init_mmu_secondary(void)
++{
++ early_init_this_mmu();
++}
++
++void setup_initial_memory_limit(phys_addr_t first_memblock_base,
++ phys_addr_t first_memblock_size)
++{
++ /* On non-FSL Embedded 64-bit, we adjust the RMA size to match
++ * the bolted TLB entry. We know for now that only 1G
++ * entries are supported though that may eventually
++ * change.
++ *
++ * on FSL Embedded 64-bit, usually all RAM is bolted, but with
++ * unusual memory sizes it's possible for some RAM to not be mapped
++ * (such RAM is not used at all by Linux, since we don't support
++ * highmem on 64-bit). We limit ppc64_rma_size to what would be
++ * mappable if this memblock is the only one. Additional memblocks
++ * can only increase, not decrease, the amount that ends up getting
++ * mapped. We still limit max to 1G even if we'll eventually map
++ * more. This is due to what the early init code is set up to do.
++ *
++ * We crop it to the size of the first MEMBLOCK to
++ * avoid going over total available memory just in case...
++ */
++#ifdef CONFIG_PPC_E500
++ if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
++ unsigned long linear_sz;
++ unsigned int num_cams;
++
++ /* use a quarter of the TLBCAM for bolted linear map */
++ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
++
++ linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
++ true, true);
++
++ ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
++ } else
++#endif
++ ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
++
++ /* Finally limit subsequent allocations */
++ memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
++}
+--
+2.43.0
+
mm-rename-pmd_read_atomic.patch
userfaultfd-fix-checks-for-huge-pmds.patch
net-mana-fix-error-handling-in-mana_create_txq-rxq-s.patch
+workqueue-wq_watchdog_touch-is-always-called-with-va.patch
+workqueue-improve-scalability-of-workqueue-watchdog-.patch
+acpi-processor-return-an-error-if-acpi_processor_get.patch
+acpi-processor-fix-memory-leaks-in-error-paths-of-pr.patch
+arm64-acpi-move-get_cpu_for_acpi_id-to-a-header.patch
+arm64-acpi-harden-get_cpu_for_acpi_id-against-missin.patch
+can-mcp251xfd-mcp251xfd_handle_rxif_ring_uinc-factor.patch
+can-mcp251xfd-rx-prepare-to-workaround-broken-rx-fif.patch
+can-mcp251xfd-clarify-the-meaning-of-timestamp.patch
+can-mcp251xfd-rx-add-workaround-for-erratum-ds800007.patch
+drm-amd-add-gfx12-swizzle-mode-defs.patch
+drm-amdgpu-handle-gfx12-in-amdgpu_display_verify_siz.patch
+powerpc-64e-remove-unused-ibm-htw-code.patch
+powerpc-64e-split-out-nohash-book3e-64-bit-code.patch
+powerpc-64e-define-mmu_pte_psize-static.patch
+asoc-tegra-fix-cbb-error-during-probe.patch
+nvmet-tcp-fix-kernel-crash-if-commands-allocation-fa.patch
+asoc-sof-topology-clear-sof-link-platform-name-upon-.patch
+asoc-sunxi-sun4i-i2s-fix-lrclk-polarity-in-i2s-mode.patch
+drm-i915-fence-mark-debug_fence_init_onstack-with-__.patch
+drm-i915-fence-mark-debug_fence_free-with-__maybe_un.patch
+gpio-rockchip-fix-of-node-leak-in-probe.patch
+gpio-modepin-enable-module-autoloading.patch
+ublk_drv-fix-null-pointer-dereference-in-ublk_ctrl_s.patch
--- /dev/null
+From 983297b71fd637e48d2b1447a70e102dacc879f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Sep 2024 11:13:48 +0800
+Subject: ublk_drv: fix NULL pointer dereference in ublk_ctrl_start_recovery()
+
+From: Li Nan <linan122@huawei.com>
+
+[ Upstream commit e58f5142f88320a5b1449f96a146f2f24615c5c7 ]
+
+When two UBLK_CMD_START_USER_RECOVERY commands are submitted, the
+first one sets 'ubq->ubq_daemon' to NULL, and the second one triggers
+WARN in ublk_queue_reinit() and subsequently a NULL pointer dereference
+issue.
+
+Fix it by adding the check in ublk_ctrl_start_recovery() and return
+immediately in case of zero 'ub->nr_queues_ready'.
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000028
+ RIP: 0010:ublk_ctrl_start_recovery.constprop.0+0x82/0x180
+ Call Trace:
+ <TASK>
+ ? __die+0x20/0x70
+ ? page_fault_oops+0x75/0x170
+ ? exc_page_fault+0x64/0x140
+ ? asm_exc_page_fault+0x22/0x30
+ ? ublk_ctrl_start_recovery.constprop.0+0x82/0x180
+ ublk_ctrl_uring_cmd+0x4f7/0x6c0
+ ? pick_next_task_idle+0x26/0x40
+ io_uring_cmd+0x9a/0x1b0
+ io_issue_sqe+0x193/0x3f0
+ io_wq_submit_work+0x9b/0x390
+ io_worker_handle_work+0x165/0x360
+ io_wq_worker+0xcb/0x2f0
+ ? finish_task_switch.isra.0+0x203/0x290
+ ? finish_task_switch.isra.0+0x203/0x290
+ ? __pfx_io_wq_worker+0x10/0x10
+ ret_from_fork+0x2d/0x50
+ ? __pfx_io_wq_worker+0x10/0x10
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+Fixes: c732a852b419 ("ublk_drv: add START_USER_RECOVERY and END_USER_RECOVERY support")
+Reported-and-tested-by: Changhui Zhong <czhong@redhat.com>
+Closes: https://lore.kernel.org/all/CAGVVp+UvLiS+bhNXV-h2icwX1dyybbYHeQUuH7RYqUvMQf6N3w@mail.gmail.com
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Li Nan <linan122@huawei.com>
+Link: https://lore.kernel.org/r/20240904031348.4139545-1-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index 3fa74051f31b..bfd643856f64 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1915,6 +1915,8 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
+ mutex_lock(&ub->mutex);
+ if (!ublk_can_use_recovery(ub))
+ goto out_unlock;
++ if (!ub->nr_queues_ready)
++ goto out_unlock;
+ /*
+ * START_RECOVERY is only allowd after:
+ *
+--
+2.43.0
+
--- /dev/null
+From a62fa8104cfb91440d75980c0f8d75a76be7c668 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jun 2024 21:42:45 +1000
+Subject: workqueue: Improve scalability of workqueue watchdog touch
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+[ Upstream commit 98f887f820c993e05a12e8aa816c80b8661d4c87 ]
+
+On a ~2000 CPU powerpc system, hard lockups have been observed in the
+workqueue code when stop_machine runs (in this case due to CPU hotplug).
+This is due to lots of CPUs spinning in multi_cpu_stop, calling
+touch_nmi_watchdog() which ends up calling wq_watchdog_touch().
+wq_watchdog_touch() writes to the global variable wq_watchdog_touched,
+and that can find itself in the same cacheline as other important
+workqueue data, which slows down operations to the point of lockups.
+
+In the case of the following abridged trace, worker_pool_idr was in
+the hot line, causing the lockups to always appear at idr_find.
+
+ watchdog: CPU 1125 self-detected hard LOCKUP @ idr_find
+ Call Trace:
+ get_work_pool
+ __queue_work
+ call_timer_fn
+ run_timer_softirq
+ __do_softirq
+ do_softirq_own_stack
+ irq_exit
+ timer_interrupt
+ decrementer_common_virt
+ * interrupt: 900 (timer) at multi_cpu_stop
+ multi_cpu_stop
+ cpu_stopper_thread
+ smpboot_thread_fn
+ kthread
+
+Fix this by having wq_watchdog_touch() only write to the line if the
+last time a touch was recorded exceeds 1/4 of the watchdog threshold.
+
+Reported-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/workqueue.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 4da8a5e702f8..93303148a434 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -5891,12 +5891,18 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
+
+ notrace void wq_watchdog_touch(int cpu)
+ {
++ unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
++ unsigned long touch_ts = READ_ONCE(wq_watchdog_touched);
++ unsigned long now = jiffies;
++
+ if (cpu >= 0)
+- per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
++ per_cpu(wq_watchdog_touched_cpu, cpu) = now;
+ else
+ WARN_ONCE(1, "%s should be called with valid CPU", __func__);
+
+- wq_watchdog_touched = jiffies;
++ /* Don't unnecessarily store to global cacheline */
++ if (time_after(now, touch_ts + thresh / 4))
++ WRITE_ONCE(wq_watchdog_touched, jiffies);
+ }
+
+ static void wq_watchdog_set_thresh(unsigned long thresh)
+--
+2.43.0
+
--- /dev/null
+From 9f487de526aa28171b4a511012aa54cb40280e34 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jun 2024 21:42:44 +1000
+Subject: workqueue: wq_watchdog_touch is always called with valid CPU
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+[ Upstream commit 18e24deb1cc92f2068ce7434a94233741fbd7771 ]
+
+Warn in the case it is called with cpu == -1. This does not appear
+to happen anywhere.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/workqueue.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index f3b6ac232e21..4da8a5e702f8 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -5893,6 +5893,8 @@ notrace void wq_watchdog_touch(int cpu)
+ {
+ if (cpu >= 0)
+ per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
++ else
++ WARN_ONCE(1, "%s should be called with valid CPU", __func__);
+
+ wq_watchdog_touched = jiffies;
+ }
+--
+2.43.0
+