From: Greg Kroah-Hartman Date: Mon, 17 Jun 2024 18:06:31 +0000 (+0200) Subject: 6.9-stable patches X-Git-Tag: v6.1.95~100 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d830a6a05a61f5232a3bcab083bba28eef4db3d9;p=thirdparty%2Fkernel%2Fstable-queue.git 6.9-stable patches added patches: drivers-core-synchronize-really_probe-and-dev_uevent.patch iio-adc-ad9467-fix-scan-type-sign.patch iio-dac-ad5592r-fix-temperature-channel-scaling-value.patch iio-imu-bmi323-fix-trigger-notification-in-case-of-error.patch iio-imu-inv_icm42600-delete-unneeded-update-watermark-call.patch iio-invensense-fix-odr-switching-to-same-value.patch iio-pressure-bmp280-fix-bmp580-temperature-reading.patch iio-temperature-mlx90635-fix-err_ptr-dereference-in-mlx90635_probe.patch ksmbd-fix-missing-use-of-get_write-in-in-smb2_set_ea.patch ksmbd-move-leading-slash-check-to-smb2_get_name.patch leds-class-revert-if-no-default-trigger-is-given-make-hw_control-trigger-the-default-trigger.patch misc-microchip-pci1xxxx-fix-double-free-in-the-error-handling-of-gp_aux_bus_probe.patch parisc-try-to-fix-random-segmentation-faults-in-package-builds.patch ras-amd-atl-fix-mi300-bank-hash.patch ras-amd-atl-use-system-settings-for-mi300-dram-to-normalized-address-translation.patch tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch x86-boot-don-t-add-the-efi-stub-to-targets-again.patch --- diff --git a/queue-6.9/drivers-core-synchronize-really_probe-and-dev_uevent.patch b/queue-6.9/drivers-core-synchronize-really_probe-and-dev_uevent.patch new file mode 100644 index 00000000000..796b9cbef41 --- /dev/null +++ b/queue-6.9/drivers-core-synchronize-really_probe-and-dev_uevent.patch @@ -0,0 +1,104 @@ +From c0a40097f0bc81deafc15f9195d1fb54595cd6d0 Mon Sep 17 00:00:00 2001 +From: Dirk Behme +Date: Mon, 13 May 2024 07:06:34 +0200 +Subject: drivers: core: synchronize really_probe() and dev_uevent() + +From: Dirk Behme + +commit c0a40097f0bc81deafc15f9195d1fb54595cd6d0 upstream. + +Synchronize the dev->driver usage in really_probe() and dev_uevent(). +These can run in different threads, what can result in the following +race condition for dev->driver uninitialization: + +Thread #1: +========== + +really_probe() { +... +probe_failed: +... +device_unbind_cleanup(dev) { + ... + dev->driver = NULL; // <= Failed probe sets dev->driver to NULL + ... + } +... +} + +Thread #2: +========== + +dev_uevent() { +... +if (dev->driver) + // If dev->driver is NULLed from really_probe() from here on, + // after above check, the system crashes + add_uevent_var(env, "DRIVER=%s", dev->driver->name); +... +} + +really_probe() holds the lock, already. So nothing needs to be done +there. dev_uevent() is called with lock held, often, too. But not +always. What implies that we can't add any locking in dev_uevent() +itself. So fix this race by adding the lock to the non-protected +path. This is the path where above race is observed: + + dev_uevent+0x235/0x380 + uevent_show+0x10c/0x1f0 <= Add lock here + dev_attr_show+0x3a/0xa0 + sysfs_kf_seq_show+0x17c/0x250 + kernfs_seq_show+0x7c/0x90 + seq_read_iter+0x2d7/0x940 + kernfs_fop_read_iter+0xc6/0x310 + vfs_read+0x5bc/0x6b0 + ksys_read+0xeb/0x1b0 + __x64_sys_read+0x42/0x50 + x64_sys_call+0x27ad/0x2d30 + do_syscall_64+0xcd/0x1d0 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +Similar cases are reported by syzkaller in + +https://syzkaller.appspot.com/bug?extid=ffa8143439596313a85a + +But these are regarding the *initialization* of dev->driver + +dev->driver = drv; + +As this switches dev->driver to non-NULL these reports can be considered +to be false-positives (which should be "fixed" by this commit, as well, +though). + +The same issue was reported and tried to be fixed back in 2015 in + +https://lore.kernel.org/lkml/1421259054-2574-1-git-send-email-a.sangwan@samsung.com/ + +already. + +Fixes: 239378f16aa1 ("Driver core: add uevent vars for devices of a class") +Cc: stable +Cc: syzbot+ffa8143439596313a85a@syzkaller.appspotmail.com +Cc: Ashish Sangwan +Cc: Namjae Jeon +Signed-off-by: Dirk Behme +Link: https://lore.kernel.org/r/20240513050634.3964461-1-dirk.behme@de.bosch.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/core.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -2738,8 +2738,11 @@ static ssize_t uevent_show(struct device + if (!env) + return -ENOMEM; + ++ /* Synchronize with really_probe() */ ++ device_lock(dev); + /* let the kset specific function add its keys */ + retval = kset->uevent_ops->uevent(&dev->kobj, env); ++ device_unlock(dev); + if (retval) + goto out; + diff --git a/queue-6.9/iio-adc-ad9467-fix-scan-type-sign.patch b/queue-6.9/iio-adc-ad9467-fix-scan-type-sign.patch new file mode 100644 index 00000000000..aaec782888c --- /dev/null +++ b/queue-6.9/iio-adc-ad9467-fix-scan-type-sign.patch @@ -0,0 +1,41 @@ +From 8a01ef749b0a632f0e1f4ead0f08b3310d99fcb1 Mon Sep 17 00:00:00 2001 +From: David Lechner +Date: Fri, 3 May 2024 14:45:05 -0500 +Subject: iio: adc: ad9467: fix scan type sign + +From: David Lechner + +commit 8a01ef749b0a632f0e1f4ead0f08b3310d99fcb1 upstream. + +According to the IIO documentation, the sign in the scan type should be +lower case. The ad9467 driver was incorrectly using upper case. + +Fix by changing to lower case. + +Fixes: 4606d0f4b05f ("iio: adc: ad9467: add support for AD9434 high-speed ADC") +Fixes: ad6797120238 ("iio: adc: ad9467: add support AD9467 ADC") +Signed-off-by: David Lechner +Link: https://lore.kernel.org/r/20240503-ad9467-fix-scan-type-sign-v1-1-c7a1a066ebb9@baylibre.com +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/adc/ad9467.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/iio/adc/ad9467.c ++++ b/drivers/iio/adc/ad9467.c +@@ -225,11 +225,11 @@ static void __ad9467_get_scale(struct ad + } + + static const struct iio_chan_spec ad9434_channels[] = { +- AD9467_CHAN(0, 0, 12, 'S'), ++ AD9467_CHAN(0, 0, 12, 's'), + }; + + static const struct iio_chan_spec ad9467_channels[] = { +- AD9467_CHAN(0, 0, 16, 'S'), ++ AD9467_CHAN(0, 0, 16, 's'), + }; + + static const struct ad9467_chip_info ad9467_chip_tbl = { diff --git a/queue-6.9/iio-dac-ad5592r-fix-temperature-channel-scaling-value.patch b/queue-6.9/iio-dac-ad5592r-fix-temperature-channel-scaling-value.patch new file mode 100644 index 00000000000..612b63f09ea --- /dev/null +++ b/queue-6.9/iio-dac-ad5592r-fix-temperature-channel-scaling-value.patch @@ -0,0 +1,66 @@ +From 279428df888319bf68f2686934897301a250bb84 Mon Sep 17 00:00:00 2001 +From: Marc Ferland +Date: Wed, 1 May 2024 11:05:54 -0400 +Subject: iio: dac: ad5592r: fix temperature channel scaling value + +From: Marc Ferland + +commit 279428df888319bf68f2686934897301a250bb84 upstream. + +The scale value for the temperature channel is (assuming Vref=2.5 and +the datasheet): + + 376.7897513 + +When calculating both val and val2 for the temperature scale we +use (3767897513/25) and multiply it by Vref (here I assume 2500mV) to +obtain: + + 2500 * (3767897513/25) ==> 376789751300 + +Finally we divide with remainder by 10^9 to get: + + val = 376 + val2 = 789751300 + +However, we return IIO_VAL_INT_PLUS_MICRO (should have been NANO) as +the scale type. So when converting the raw temperature value to the +'processed' temperature value we will get (assuming raw=810, +offset=-753): + + processed = (raw + offset) * scale_val + = (810 + -753) * 376 + = 21432 + + processed += div((raw + offset) * scale_val2, 10^6) + += div((810 + -753) * 789751300, 10^6) + += 45015 + ==> 66447 + ==> 66.4 Celcius + +instead of the expected 21.5 Celsius. + +Fix this issue by changing IIO_VAL_INT_PLUS_MICRO to +IIO_VAL_INT_PLUS_NANO. + +Fixes: 56ca9db862bf ("iio: dac: Add support for the AD5592R/AD5593R ADCs/DACs") +Signed-off-by: Marc Ferland +Link: https://lore.kernel.org/r/20240501150554.1871390-1-marc.ferland@sonatest.com +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/dac/ad5592r-base.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/iio/dac/ad5592r-base.c ++++ b/drivers/iio/dac/ad5592r-base.c +@@ -415,7 +415,7 @@ static int ad5592r_read_raw(struct iio_d + s64 tmp = *val * (3767897513LL / 25LL); + *val = div_s64_rem(tmp, 1000000000LL, val2); + +- return IIO_VAL_INT_PLUS_MICRO; ++ return IIO_VAL_INT_PLUS_NANO; + } + + mutex_lock(&st->lock); diff --git a/queue-6.9/iio-imu-bmi323-fix-trigger-notification-in-case-of-error.patch b/queue-6.9/iio-imu-bmi323-fix-trigger-notification-in-case-of-error.patch new file mode 100644 index 00000000000..3fda3937911 --- /dev/null +++ b/queue-6.9/iio-imu-bmi323-fix-trigger-notification-in-case-of-error.patch @@ -0,0 +1,52 @@ +From bedb2ccb566de5ca0c336ca3fd3588cea6d50414 Mon Sep 17 00:00:00 2001 +From: Vasileios Amoiridis +Date: Wed, 8 May 2024 17:54:07 +0200 +Subject: iio: imu: bmi323: Fix trigger notification in case of error + +From: Vasileios Amoiridis + +commit bedb2ccb566de5ca0c336ca3fd3588cea6d50414 upstream. + +In case of error in the bmi323_trigger_handler() function, the +function exits without calling the iio_trigger_notify_done() +which is responsible for informing the attached trigger that +the process is done and in case there is a .reenable(), to +call it. + +Fixes: 8a636db3aa57 ("iio: imu: Add driver for BMI323 IMU") +Signed-off-by: Vasileios Amoiridis +Link: https://lore.kernel.org/r/20240508155407.139805-1-vassilisamir@gmail.com +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/bmi323/bmi323_core.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/iio/imu/bmi323/bmi323_core.c ++++ b/drivers/iio/imu/bmi323/bmi323_core.c +@@ -1391,7 +1391,7 @@ static irqreturn_t bmi323_trigger_handle + &data->buffer.channels, + ARRAY_SIZE(data->buffer.channels)); + if (ret) +- return IRQ_NONE; ++ goto out; + } else { + for_each_set_bit(bit, indio_dev->active_scan_mask, + BMI323_CHAN_MAX) { +@@ -1400,13 +1400,14 @@ static irqreturn_t bmi323_trigger_handle + &data->buffer.channels[index++], + BMI323_BYTES_PER_SAMPLE); + if (ret) +- return IRQ_NONE; ++ goto out; + } + } + + iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer, + iio_get_time_ns(indio_dev)); + ++out: + iio_trigger_notify_done(indio_dev->trig); + + return IRQ_HANDLED; diff --git a/queue-6.9/iio-imu-inv_icm42600-delete-unneeded-update-watermark-call.patch b/queue-6.9/iio-imu-inv_icm42600-delete-unneeded-update-watermark-call.patch new file mode 100644 index 00000000000..9136d04a758 --- /dev/null +++ b/queue-6.9/iio-imu-inv_icm42600-delete-unneeded-update-watermark-call.patch @@ -0,0 +1,49 @@ +From 245f3b149e6cc3ac6ee612cdb7042263bfc9e73c Mon Sep 17 00:00:00 2001 +From: Jean-Baptiste Maneyrol +Date: Mon, 27 May 2024 21:00:08 +0000 +Subject: iio: imu: inv_icm42600: delete unneeded update watermark call + +From: Jean-Baptiste Maneyrol + +commit 245f3b149e6cc3ac6ee612cdb7042263bfc9e73c upstream. + +Update watermark will be done inside the hwfifo_set_watermark callback +just after the update_scan_mode. It is useless to do it here. + +Fixes: 7f85e42a6c54 ("iio: imu: inv_icm42600: add buffer support in iio devices") +Cc: stable@vger.kernel.org +Signed-off-by: Jean-Baptiste Maneyrol +Link: https://lore.kernel.org/r/20240527210008.612932-1-inv.git-commit@tdk.com +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c | 4 ---- + drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c | 4 ---- + 2 files changed, 8 deletions(-) + +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +@@ -129,10 +129,6 @@ static int inv_icm42600_accel_update_sca + /* update data FIFO write */ + inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); + ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); +- if (ret) +- goto out_unlock; +- +- ret = inv_icm42600_buffer_update_watermark(st); + + out_unlock: + mutex_unlock(&st->lock); +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +@@ -129,10 +129,6 @@ static int inv_icm42600_gyro_update_scan + /* update data FIFO write */ + inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); + ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); +- if (ret) +- goto out_unlock; +- +- ret = inv_icm42600_buffer_update_watermark(st); + + out_unlock: + mutex_unlock(&st->lock); diff --git a/queue-6.9/iio-invensense-fix-odr-switching-to-same-value.patch b/queue-6.9/iio-invensense-fix-odr-switching-to-same-value.patch new file mode 100644 index 00000000000..cf49075df17 --- /dev/null +++ b/queue-6.9/iio-invensense-fix-odr-switching-to-same-value.patch @@ -0,0 +1,46 @@ +From 95444b9eeb8c5c0330563931d70c61ca3b101548 Mon Sep 17 00:00:00 2001 +From: Jean-Baptiste Maneyrol +Date: Fri, 24 May 2024 12:48:51 +0000 +Subject: iio: invensense: fix odr switching to same value + +From: Jean-Baptiste Maneyrol + +commit 95444b9eeb8c5c0330563931d70c61ca3b101548 upstream. + +ODR switching happens in 2 steps, update to store the new value and then +apply when the ODR change flag is received in the data. When switching to +the same ODR value, the ODR change flag is never happening, and frequency +switching is blocked waiting for the never coming apply. + +Fix the issue by preventing update to happen when switching to same ODR +value. + +Fixes: 0ecc363ccea7 ("iio: make invensense timestamp module generic") +Cc: stable@vger.kernel.org +Signed-off-by: Jean-Baptiste Maneyrol +Link: https://lore.kernel.org/r/20240524124851.567485-1-inv.git-commit@tdk.com +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/common/inv_sensors/inv_sensors_timestamp.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c ++++ b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c +@@ -60,11 +60,15 @@ EXPORT_SYMBOL_NS_GPL(inv_sensors_timesta + int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts, + uint32_t period, bool fifo) + { ++ uint32_t mult; ++ + /* when FIFO is on, prevent odr change if one is already pending */ + if (fifo && ts->new_mult != 0) + return -EAGAIN; + +- ts->new_mult = period / ts->chip.clock_period; ++ mult = period / ts->chip.clock_period; ++ if (mult != ts->mult) ++ ts->new_mult = mult; + + return 0; + } diff --git a/queue-6.9/iio-pressure-bmp280-fix-bmp580-temperature-reading.patch b/queue-6.9/iio-pressure-bmp280-fix-bmp580-temperature-reading.patch new file mode 100644 index 00000000000..83bcb8ca884 --- /dev/null +++ b/queue-6.9/iio-pressure-bmp280-fix-bmp580-temperature-reading.patch @@ -0,0 +1,43 @@ +From 0f0f6306617cb4b6231fc9d4ec68ab9a56dba7c0 Mon Sep 17 00:00:00 2001 +From: Adam Rizkalla +Date: Thu, 25 Apr 2024 01:22:49 -0500 +Subject: iio: pressure: bmp280: Fix BMP580 temperature reading + +From: Adam Rizkalla + +commit 0f0f6306617cb4b6231fc9d4ec68ab9a56dba7c0 upstream. + +Fix overflow issue when storing BMP580 temperature reading and +properly preserve sign of 24-bit data. + +Signed-off-by: Adam Rizkalla +Tested-By: Vasileios Amoiridis +Acked-by: Angel Iglesias +Link: https://lore.kernel.org/r/Zin2udkXRD0+GrML@adam-asahi.lan +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/pressure/bmp280-core.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/iio/pressure/bmp280-core.c ++++ b/drivers/iio/pressure/bmp280-core.c +@@ -1394,12 +1394,12 @@ static int bmp580_read_temp(struct bmp28 + + /* + * Temperature is returned in Celsius degrees in fractional +- * form down 2^16. We rescale by x1000 to return milli Celsius +- * to respect IIO ABI. ++ * form down 2^16. We rescale by x1000 to return millidegrees ++ * Celsius to respect IIO ABI. + */ +- *val = raw_temp * 1000; +- *val2 = 16; +- return IIO_VAL_FRACTIONAL_LOG2; ++ raw_temp = sign_extend32(raw_temp, 23); ++ *val = ((s64)raw_temp * 1000) / (1 << 16); ++ return IIO_VAL_INT; + } + + static int bmp580_read_press(struct bmp280_data *data, int *val, int *val2) diff --git a/queue-6.9/iio-temperature-mlx90635-fix-err_ptr-dereference-in-mlx90635_probe.patch b/queue-6.9/iio-temperature-mlx90635-fix-err_ptr-dereference-in-mlx90635_probe.patch new file mode 100644 index 00000000000..b4aaa31fcb8 --- /dev/null +++ b/queue-6.9/iio-temperature-mlx90635-fix-err_ptr-dereference-in-mlx90635_probe.patch @@ -0,0 +1,39 @@ +From a23c14b062d8800a2192077d83273bbfe6c7552d Mon Sep 17 00:00:00 2001 +From: Harshit Mogalapalli +Date: Mon, 13 May 2024 13:34:27 -0700 +Subject: iio: temperature: mlx90635: Fix ERR_PTR dereference in mlx90635_probe() + +From: Harshit Mogalapalli + +commit a23c14b062d8800a2192077d83273bbfe6c7552d upstream. + +When devm_regmap_init_i2c() fails, regmap_ee could be error pointer, +instead of checking for IS_ERR(regmap_ee), regmap is checked which looks +like a copy paste error. + +Fixes: a1d1ba5e1c28 ("iio: temperature: mlx90635 MLX90635 IR Temperature sensor") +Reviewed-by: Crt Mori +Signed-off-by: Harshit Mogalapalli +Link: https://lore.kernel.org/r/20240513203427.3208696-1-harshit.m.mogalapalli@oracle.com +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/temperature/mlx90635.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/iio/temperature/mlx90635.c ++++ b/drivers/iio/temperature/mlx90635.c +@@ -947,9 +947,9 @@ static int mlx90635_probe(struct i2c_cli + "failed to allocate regmap\n"); + + regmap_ee = devm_regmap_init_i2c(client, &mlx90635_regmap_ee); +- if (IS_ERR(regmap)) +- return dev_err_probe(&client->dev, PTR_ERR(regmap), +- "failed to allocate regmap\n"); ++ if (IS_ERR(regmap_ee)) ++ return dev_err_probe(&client->dev, PTR_ERR(regmap_ee), ++ "failed to allocate EEPROM regmap\n"); + + mlx90635 = iio_priv(indio_dev); + i2c_set_clientdata(client, indio_dev); diff --git a/queue-6.9/ksmbd-fix-missing-use-of-get_write-in-in-smb2_set_ea.patch b/queue-6.9/ksmbd-fix-missing-use-of-get_write-in-in-smb2_set_ea.patch new file mode 100644 index 00000000000..34112297e51 --- /dev/null +++ b/queue-6.9/ksmbd-fix-missing-use-of-get_write-in-in-smb2_set_ea.patch @@ -0,0 +1,116 @@ +From 2bfc4214c69c62da13a9da8e3c3db5539da2ccd3 Mon Sep 17 00:00:00 2001 +From: Namjae Jeon +Date: Tue, 11 Jun 2024 23:27:27 +0900 +Subject: ksmbd: fix missing use of get_write in in smb2_set_ea() + +From: Namjae Jeon + +commit 2bfc4214c69c62da13a9da8e3c3db5539da2ccd3 upstream. + +Fix an issue where get_write is not used in smb2_set_ea(). + +Fixes: 6fc0a265e1b9 ("ksmbd: fix potential circular locking issue in smb2_set_ea()") +Cc: stable@vger.kernel.org +Reported-by: Wang Zhaolong +Signed-off-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/smb2pdu.c | 7 ++++--- + fs/smb/server/vfs.c | 17 +++++++++++------ + fs/smb/server/vfs.h | 3 ++- + fs/smb/server/vfs_cache.c | 3 ++- + 4 files changed, 19 insertions(+), 11 deletions(-) + +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -2367,7 +2367,8 @@ static int smb2_set_ea(struct smb2_ea_in + if (rc > 0) { + rc = ksmbd_vfs_remove_xattr(idmap, + path, +- attr_name); ++ attr_name, ++ get_write); + + if (rc < 0) { + ksmbd_debug(SMB, +@@ -2382,7 +2383,7 @@ static int smb2_set_ea(struct smb2_ea_in + } else { + rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, + le16_to_cpu(eabuf->EaValueLength), +- 0, true); ++ 0, get_write); + if (rc < 0) { + ksmbd_debug(SMB, + "ksmbd_vfs_setxattr is failed(%d)\n", +@@ -2474,7 +2475,7 @@ static int smb2_remove_smb_xattrs(const + !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, + STREAM_PREFIX_LEN)) { + err = ksmbd_vfs_remove_xattr(idmap, path, +- name); ++ name, true); + if (err) + ksmbd_debug(SMB, "remove xattr failed : %s\n", + name); +--- a/fs/smb/server/vfs.c ++++ b/fs/smb/server/vfs.c +@@ -1058,16 +1058,21 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_fi + } + + int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, +- const struct path *path, char *attr_name) ++ const struct path *path, char *attr_name, ++ bool get_write) + { + int err; + +- err = mnt_want_write(path->mnt); +- if (err) +- return err; ++ if (get_write == true) { ++ err = mnt_want_write(path->mnt); ++ if (err) ++ return err; ++ } + + err = vfs_removexattr(idmap, path->dentry, attr_name); +- mnt_drop_write(path->mnt); ++ ++ if (get_write == true) ++ mnt_drop_write(path->mnt); + + return err; + } +@@ -1380,7 +1385,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mn + ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); + + if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { +- err = ksmbd_vfs_remove_xattr(idmap, path, name); ++ err = ksmbd_vfs_remove_xattr(idmap, path, name, true); + if (err) + ksmbd_debug(SMB, "remove xattr failed : %s\n", name); + } +--- a/fs/smb/server/vfs.h ++++ b/fs/smb/server/vfs.h +@@ -114,7 +114,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap + int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, + size_t *xattr_stream_name_size, int s_type); + int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, +- const struct path *path, char *attr_name); ++ const struct path *path, char *attr_name, ++ bool get_write); + int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, + unsigned int flags, struct path *parent_path, + struct path *path, bool caseless); +--- a/fs/smb/server/vfs_cache.c ++++ b/fs/smb/server/vfs_cache.c +@@ -254,7 +254,8 @@ static void __ksmbd_inode_close(struct k + ci->m_flags &= ~S_DEL_ON_CLS_STREAM; + err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), + &filp->f_path, +- fp->stream.name); ++ fp->stream.name, ++ true); + if (err) + pr_err("remove xattr failed : %s\n", + fp->stream.name); diff --git a/queue-6.9/ksmbd-move-leading-slash-check-to-smb2_get_name.patch b/queue-6.9/ksmbd-move-leading-slash-check-to-smb2_get_name.patch new file mode 100644 index 00000000000..e7de4777eda --- /dev/null +++ b/queue-6.9/ksmbd-move-leading-slash-check-to-smb2_get_name.patch @@ -0,0 +1,61 @@ +From 1cdeca6a7264021e20157de0baf7880ff0ced822 Mon Sep 17 00:00:00 2001 +From: Namjae Jeon +Date: Mon, 10 Jun 2024 23:06:19 +0900 +Subject: ksmbd: move leading slash check to smb2_get_name() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Namjae Jeon + +commit 1cdeca6a7264021e20157de0baf7880ff0ced822 upstream. + +If the directory name in the root of the share starts with +character like 镜(0x955c) or Ṝ(0x1e5c), it (and anything inside) +cannot be accessed. The leading slash check must be checked after +converting unicode to nls string. + +Cc: stable@vger.kernel.org +Signed-off-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/smb2pdu.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -630,6 +630,12 @@ smb2_get_name(const char *src, const int + return name; + } + ++ if (*name == '\\') { ++ pr_err("not allow directory name included leading slash\n"); ++ kfree(name); ++ return ERR_PTR(-EINVAL); ++ } ++ + ksmbd_conv_path_to_unix(name); + ksmbd_strip_last_slash(name); + return name; +@@ -2842,20 +2848,11 @@ int smb2_open(struct ksmbd_work *work) + } + + if (req->NameLength) { +- if ((req->CreateOptions & FILE_DIRECTORY_FILE_LE) && +- *(char *)req->Buffer == '\\') { +- pr_err("not allow directory name included leading slash\n"); +- rc = -EINVAL; +- goto err_out2; +- } +- + name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset), + le16_to_cpu(req->NameLength), + work->conn->local_nls); + if (IS_ERR(name)) { + rc = PTR_ERR(name); +- if (rc != -ENOMEM) +- rc = -ENOENT; + name = NULL; + goto err_out2; + } diff --git a/queue-6.9/leds-class-revert-if-no-default-trigger-is-given-make-hw_control-trigger-the-default-trigger.patch b/queue-6.9/leds-class-revert-if-no-default-trigger-is-given-make-hw_control-trigger-the-default-trigger.patch new file mode 100644 index 00000000000..2dcdc8927e2 --- /dev/null +++ b/queue-6.9/leds-class-revert-if-no-default-trigger-is-given-make-hw_control-trigger-the-default-trigger.patch @@ -0,0 +1,63 @@ +From fcf2a9970ef587d8f358560c381ee6115a9108aa Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Fri, 7 Jun 2024 12:18:47 +0200 +Subject: leds: class: Revert: "If no default trigger is given, make hw_control trigger the default trigger" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hans de Goede + +commit fcf2a9970ef587d8f358560c381ee6115a9108aa upstream. + +Commit 66601a29bb23 ("leds: class: If no default trigger is given, make +hw_control trigger the default trigger") causes ledtrig-netdev to get +set as default trigger on various network LEDs. + +This causes users to hit a pre-existing AB-BA deadlock issue in +ledtrig-netdev between the LED-trigger locks and the rtnl mutex, +resulting in hung tasks in kernels >= 6.9. + +Solving the deadlock is non trivial, so for now revert the change to +set the hw_control trigger as default trigger, so that ledtrig-netdev +no longer gets activated automatically for various network LEDs. + +The netdev trigger is not needed because the network LEDs are usually under +hw-control and the netdev trigger tries to leave things that way so setting +it as the active trigger for the LED class device is a no-op. + +Fixes: 66601a29bb23 ("leds: class: If no default trigger is given, make hw_control trigger the default trigger") +Reported-by: Genes Lists +Closes: https://lore.kernel.org/all/9d189ec329cfe68ed68699f314e191a10d4b5eda.camel@sapience.com/ +Reported-by: Johannes Wüller +Closes: https://lore.kernel.org/lkml/e441605c-eaf2-4c2d-872b-d8e541f4cf60@gmail.com/ +Cc: stable@vger.kernel.org +Signed-off-by: Hans de Goede +Reviewed-by: Andrew Lunn +Acked-by: Lee Jones +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + drivers/leds/led-class.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c +index 24fcff682b24..ba1be15cfd8e 100644 +--- a/drivers/leds/led-class.c ++++ b/drivers/leds/led-class.c +@@ -552,12 +552,6 @@ int led_classdev_register_ext(struct device *parent, + led_init_core(led_cdev); + + #ifdef CONFIG_LEDS_TRIGGERS +- /* +- * If no default trigger was given and hw_control_trigger is set, +- * make it the default trigger. +- */ +- if (!led_cdev->default_trigger && led_cdev->hw_control_trigger) +- led_cdev->default_trigger = led_cdev->hw_control_trigger; + led_trigger_set_default(led_cdev); + #endif + +-- +2.45.2 + diff --git a/queue-6.9/misc-microchip-pci1xxxx-fix-double-free-in-the-error-handling-of-gp_aux_bus_probe.patch b/queue-6.9/misc-microchip-pci1xxxx-fix-double-free-in-the-error-handling-of-gp_aux_bus_probe.patch new file mode 100644 index 00000000000..51ec9daf9e5 --- /dev/null +++ b/queue-6.9/misc-microchip-pci1xxxx-fix-double-free-in-the-error-handling-of-gp_aux_bus_probe.patch @@ -0,0 +1,51 @@ +From 086c6cbcc563c81d55257f9b27e14faf1d0963d3 Mon Sep 17 00:00:00 2001 +From: Yongzhi Liu +Date: Thu, 23 May 2024 20:14:33 +0800 +Subject: misc: microchip: pci1xxxx: fix double free in the error handling of gp_aux_bus_probe() + +From: Yongzhi Liu + +commit 086c6cbcc563c81d55257f9b27e14faf1d0963d3 upstream. + +When auxiliary_device_add() returns error and then calls +auxiliary_device_uninit(), callback function +gp_auxiliary_device_release() calls ida_free() and +kfree(aux_device_wrapper) to free memory. We should't +call them again in the error handling path. + +Fix this by skipping the redundant cleanup functions. + +Fixes: 393fc2f5948f ("misc: microchip: pci1xxxx: load auxiliary bus driver for the PIO function in the multi-function endpoint of pci1xxxx device.") +Signed-off-by: Yongzhi Liu +Link: https://lore.kernel.org/r/20240523121434.21855-3-hyperlyzcs@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c ++++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c +@@ -111,6 +111,7 @@ static int gp_aux_bus_probe(struct pci_d + + err_aux_dev_add_1: + auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev); ++ goto err_aux_dev_add_0; + + err_aux_dev_init_1: + ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[1]->aux_dev.id); +@@ -120,6 +121,7 @@ err_ida_alloc_1: + + err_aux_dev_add_0: + auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev); ++ goto err_ret; + + err_aux_dev_init_0: + ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[0]->aux_dev.id); +@@ -127,6 +129,7 @@ err_aux_dev_init_0: + err_ida_alloc_0: + kfree(aux_bus->aux_device_wrapper[0]); + ++err_ret: + return retval; + } + diff --git a/queue-6.9/parisc-try-to-fix-random-segmentation-faults-in-package-builds.patch b/queue-6.9/parisc-try-to-fix-random-segmentation-faults-in-package-builds.patch new file mode 100644 index 00000000000..56af83dcd59 --- /dev/null +++ b/queue-6.9/parisc-try-to-fix-random-segmentation-faults-in-package-builds.patch @@ -0,0 +1,811 @@ +From 72d95924ee35c8cd16ef52f912483ee938a34d49 Mon Sep 17 00:00:00 2001 +From: John David Anglin +Date: Mon, 10 Jun 2024 18:47:07 +0000 +Subject: parisc: Try to fix random segmentation faults in package builds + +From: John David Anglin + +commit 72d95924ee35c8cd16ef52f912483ee938a34d49 upstream. + +PA-RISC systems with PA8800 and PA8900 processors have had problems +with random segmentation faults for many years. Systems with earlier +processors are much more stable. + +Systems with PA8800 and PA8900 processors have a large L2 cache which +needs per page flushing for decent performance when a large range is +flushed. The combined cache in these systems is also more sensitive to +non-equivalent aliases than the caches in earlier systems. + +The majority of random segmentation faults that I have looked at +appear to be memory corruption in memory allocated using mmap and +malloc. + +My first attempt at fixing the random faults didn't work. On +reviewing the cache code, I realized that there were two issues +which the existing code didn't handle correctly. Both relate +to cache move-in. Another issue is that the present bit in PTEs +is racy. + +1) PA-RISC caches have a mind of their own and they can speculatively +load data and instructions for a page as long as there is a entry in +the TLB for the page which allows move-in. TLBs are local to each +CPU. Thus, the TLB entry for a page must be purged before flushing +the page. This is particularly important on SMP systems. + +In some of the flush routines, the flush routine would be called +and then the TLB entry would be purged. This was because the flush +routine needed the TLB entry to do the flush. + +2) My initial approach to trying the fix the random faults was to +try and use flush_cache_page_if_present for all flush operations. +This actually made things worse and led to a couple of hardware +lockups. It finally dawned on me that some lines weren't being +flushed because the pte check code was racy. This resulted in +random inequivalent mappings to physical pages. + +The __flush_cache_page tmpalias flush sets up its own TLB entry +and it doesn't need the existing TLB entry. As long as we can find +the pte pointer for the vm page, we can get the pfn and physical +address of the page. We can also purge the TLB entry for the page +before doing the flush. Further, __flush_cache_page uses a special +TLB entry that inhibits cache move-in. + +When switching page mappings, we need to ensure that lines are +removed from the cache. It is not sufficient to just flush the +lines to memory as they may come back. + +This made it clear that we needed to implement all the required +flush operations using tmpalias routines. This includes flushes +for user and kernel pages. + +After modifying the code to use tmpalias flushes, it became clear +that the random segmentation faults were not fully resolved. The +frequency of faults was worse on systems with a 64 MB L2 (PA8900) +and systems with more CPUs (rp4440). + +The warning that I added to flush_cache_page_if_present to detect +pages that couldn't be flushed triggered frequently on some systems. + +Helge and I looked at the pages that couldn't be flushed and found +that the PTE was either cleared or for a swap page. Ignoring pages +that were swapped out seemed okay but pages with cleared PTEs seemed +problematic. + +I looked at routines related to pte_clear and noticed ptep_clear_flush. +The default implementation just flushes the TLB entry. However, it was +obvious that on parisc we need to flush the cache page as well. If +we don't flush the cache page, stale lines will be left in the cache +and cause random corruption. Once a PTE is cleared, there is no way +to find the physical address associated with the PTE and flush the +associated page at a later time. + +I implemented an updated change with a parisc specific version of +ptep_clear_flush. It fixed the random data corruption on Helge's rp4440 +and rp3440, as well as on my c8000. + +At this point, I realized that I could restore the code where we only +flush in flush_cache_page_if_present if the page has been accessed. +However, for this, we also need to flush the cache when the accessed +bit is cleared in ptep_clear_flush_young to keep things synchronized. +The default implementation only flushes the TLB entry. + +Other changes in this version are: + +1) Implement parisc specific version of ptep_get. It's identical to +default but needed in arch/parisc/include/asm/pgtable.h. +2) Revise parisc implementation of ptep_test_and_clear_young to use +ptep_get (READ_ONCE). +3) Drop parisc implementation of ptep_get_and_clear. We can use default. +4) Revise flush_kernel_vmap_range and invalidate_kernel_vmap_range to +use full data cache flush. +5) Move flush_cache_vmap and flush_cache_vunmap to cache.c. Handle +VM_IOREMAP case in flush_cache_vmap. + +At this time, I don't know whether it is better to always flush when +the PTE present bit is set or when both the accessed and present bits +are set. The later saves flushing pages that haven't been accessed, +but we need to flush in ptep_clear_flush_young. It also needs a page +table lookup to find the PTE pointer. The lpa instruction only needs +a page table lookup when the PTE entry isn't in the TLB. + +We don't atomically handle setting and clearing the _PAGE_ACCESSED bit. +If we miss an update, we may miss a flush and the cache may get corrupted. +Whether the current code is effectively atomic depends on process control. + +When CONFIG_FLUSH_PAGE_ACCESSED is set to zero, the page will eventually +be flushed when the PTE is cleared or in flush_cache_page_if_present. The +_PAGE_ACCESSED bit is not used, so the problem is avoided. + +The flush method can be selected using the CONFIG_FLUSH_PAGE_ACCESSED +define in cache.c. The default is 0. I didn't see a large difference +in performance. + +Signed-off-by: John David Anglin +Cc: # v6.6+ +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/cacheflush.h | 15 - + arch/parisc/include/asm/pgtable.h | 27 +- + arch/parisc/kernel/cache.c | 411 +++++++++++++++++++++-------------- + 3 files changed, 274 insertions(+), 179 deletions(-) + +--- a/arch/parisc/include/asm/cacheflush.h ++++ b/arch/parisc/include/asm/cacheflush.h +@@ -31,18 +31,17 @@ void flush_cache_all_local(void); + void flush_cache_all(void); + void flush_cache_mm(struct mm_struct *mm); + +-void flush_kernel_dcache_page_addr(const void *addr); +- + #define flush_kernel_dcache_range(start,size) \ + flush_kernel_dcache_range_asm((start), (start)+(size)); + ++/* The only way to flush a vmap range is to flush whole cache */ + #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 + void flush_kernel_vmap_range(void *vaddr, int size); + void invalidate_kernel_vmap_range(void *vaddr, int size); + +-#define flush_cache_vmap(start, end) flush_cache_all() ++void flush_cache_vmap(unsigned long start, unsigned long end); + #define flush_cache_vmap_early(start, end) do { } while (0) +-#define flush_cache_vunmap(start, end) flush_cache_all() ++void flush_cache_vunmap(unsigned long start, unsigned long end); + + void flush_dcache_folio(struct folio *folio); + #define flush_dcache_folio flush_dcache_folio +@@ -77,17 +76,11 @@ void flush_cache_page(struct vm_area_str + void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); + +-/* defined in pacache.S exported in cache.c used by flush_anon_page */ +-void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); +- + #define ARCH_HAS_FLUSH_ANON_PAGE + void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); + + #define ARCH_HAS_FLUSH_ON_KUNMAP +-static inline void kunmap_flush_on_unmap(const void *addr) +-{ +- flush_kernel_dcache_page_addr(addr); +-} ++void kunmap_flush_on_unmap(const void *addr); + + #endif /* _PARISC_CACHEFLUSH_H */ + +--- a/arch/parisc/include/asm/pgtable.h ++++ b/arch/parisc/include/asm/pgtable.h +@@ -448,14 +448,17 @@ static inline pte_t pte_swp_clear_exclus + return pte; + } + ++static inline pte_t ptep_get(pte_t *ptep) ++{ ++ return READ_ONCE(*ptep); ++} ++#define ptep_get ptep_get ++ + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) + { + pte_t pte; + +- if (!pte_young(*ptep)) +- return 0; +- +- pte = *ptep; ++ pte = ptep_get(ptep); + if (!pte_young(pte)) { + return 0; + } +@@ -463,17 +466,10 @@ static inline int ptep_test_and_clear_yo + return 1; + } + +-struct mm_struct; +-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +-{ +- pte_t old_pte; +- +- old_pte = *ptep; +- set_pte(ptep, __pte(0)); +- +- return old_pte; +-} ++int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); ++pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); + ++struct mm_struct; + static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { + set_pte(ptep, pte_wrprotect(*ptep)); +@@ -511,7 +507,8 @@ static inline void ptep_set_wrprotect(st + #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR ++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH ++#define __HAVE_ARCH_PTEP_CLEAR_FLUSH + #define __HAVE_ARCH_PTEP_SET_WRPROTECT + #define __HAVE_ARCH_PTE_SAME + +--- a/arch/parisc/kernel/cache.c ++++ b/arch/parisc/kernel/cache.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -31,20 +32,31 @@ + #include + #include + ++#define PTR_PAGE_ALIGN_DOWN(addr) PTR_ALIGN_DOWN(addr, PAGE_SIZE) ++ ++/* ++ * When nonzero, use _PAGE_ACCESSED bit to try to reduce the number ++ * of page flushes done flush_cache_page_if_present. There are some ++ * pros and cons in using this option. It may increase the risk of ++ * random segmentation faults. ++ */ ++#define CONFIG_FLUSH_PAGE_ACCESSED 0 ++ + int split_tlb __ro_after_init; + int dcache_stride __ro_after_init; + int icache_stride __ro_after_init; + EXPORT_SYMBOL(dcache_stride); + ++/* Internal implementation in arch/parisc/kernel/pacache.S */ + void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); + EXPORT_SYMBOL(flush_dcache_page_asm); + void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); + void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); +- +-/* Internal implementation in arch/parisc/kernel/pacache.S */ + void flush_data_cache_local(void *); /* flushes local data-cache only */ + void flush_instruction_cache_local(void); /* flushes local code-cache only */ + ++static void flush_kernel_dcache_page_addr(const void *addr); ++ + /* On some machines (i.e., ones with the Merced bus), there can be + * only a single PxTLB broadcast at a time; this must be guaranteed + * by software. We need a spinlock around all TLB flushes to ensure +@@ -321,6 +333,18 @@ __flush_cache_page(struct vm_area_struct + { + if (!static_branch_likely(&parisc_has_cache)) + return; ++ ++ /* ++ * The TLB is the engine of coherence on parisc. The CPU is ++ * entitled to speculate any page with a TLB mapping, so here ++ * we kill the mapping then flush the page along a special flush ++ * only alias mapping. This guarantees that the page is no-longer ++ * in the cache for any process and nor may it be speculatively ++ * read in (until the user or kernel specifically accesses it, ++ * of course). ++ */ ++ flush_tlb_page(vma, vmaddr); ++ + preempt_disable(); + flush_dcache_page_asm(physaddr, vmaddr); + if (vma->vm_flags & VM_EXEC) +@@ -328,46 +352,44 @@ __flush_cache_page(struct vm_area_struct + preempt_enable(); + } + +-static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr) ++static void flush_kernel_dcache_page_addr(const void *addr) + { +- unsigned long flags, space, pgd, prot; +-#ifdef CONFIG_TLB_PTLOCK +- unsigned long pgd_lock; +-#endif ++ unsigned long vaddr = (unsigned long)addr; ++ unsigned long flags; + +- vmaddr &= PAGE_MASK; ++ /* Purge TLB entry to remove translation on all CPUs */ ++ purge_tlb_start(flags); ++ pdtlb(SR_KERNEL, addr); ++ purge_tlb_end(flags); + ++ /* Use tmpalias flush to prevent data cache move-in */ + preempt_disable(); ++ flush_dcache_page_asm(__pa(vaddr), vaddr); ++ preempt_enable(); ++} + +- /* Set context for flush */ +- local_irq_save(flags); +- prot = mfctl(8); +- space = mfsp(SR_USER); +- pgd = mfctl(25); +-#ifdef CONFIG_TLB_PTLOCK +- pgd_lock = mfctl(28); +-#endif +- switch_mm_irqs_off(NULL, vma->vm_mm, NULL); +- local_irq_restore(flags); +- +- flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE); +- if (vma->vm_flags & VM_EXEC) +- flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE); +- flush_tlb_page(vma, vmaddr); ++static void flush_kernel_icache_page_addr(const void *addr) ++{ ++ unsigned long vaddr = (unsigned long)addr; ++ unsigned long flags; + +- /* Restore previous context */ +- local_irq_save(flags); +-#ifdef CONFIG_TLB_PTLOCK +- mtctl(pgd_lock, 28); +-#endif +- mtctl(pgd, 25); +- mtsp(space, SR_USER); +- mtctl(prot, 8); +- local_irq_restore(flags); ++ /* Purge TLB entry to remove translation on all CPUs */ ++ purge_tlb_start(flags); ++ pdtlb(SR_KERNEL, addr); ++ purge_tlb_end(flags); + ++ /* Use tmpalias flush to prevent instruction cache move-in */ ++ preempt_disable(); ++ flush_icache_page_asm(__pa(vaddr), vaddr); + preempt_enable(); + } + ++void kunmap_flush_on_unmap(const void *addr) ++{ ++ flush_kernel_dcache_page_addr(addr); ++} ++EXPORT_SYMBOL(kunmap_flush_on_unmap); ++ + void flush_icache_pages(struct vm_area_struct *vma, struct page *page, + unsigned int nr) + { +@@ -375,13 +397,16 @@ void flush_icache_pages(struct vm_area_s + + for (;;) { + flush_kernel_dcache_page_addr(kaddr); +- flush_kernel_icache_page(kaddr); ++ flush_kernel_icache_page_addr(kaddr); + if (--nr == 0) + break; + kaddr += PAGE_SIZE; + } + } + ++/* ++ * Walk page directory for MM to find PTEP pointer for address ADDR. ++ */ + static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr) + { + pte_t *ptep = NULL; +@@ -410,6 +435,41 @@ static inline bool pte_needs_flush(pte_t + == (_PAGE_PRESENT | _PAGE_ACCESSED); + } + ++/* ++ * Return user physical address. Returns 0 if page is not present. ++ */ ++static inline unsigned long get_upa(struct mm_struct *mm, unsigned long addr) ++{ ++ unsigned long flags, space, pgd, prot, pa; ++#ifdef CONFIG_TLB_PTLOCK ++ unsigned long pgd_lock; ++#endif ++ ++ /* Save context */ ++ local_irq_save(flags); ++ prot = mfctl(8); ++ space = mfsp(SR_USER); ++ pgd = mfctl(25); ++#ifdef CONFIG_TLB_PTLOCK ++ pgd_lock = mfctl(28); ++#endif ++ ++ /* Set context for lpa_user */ ++ switch_mm_irqs_off(NULL, mm, NULL); ++ pa = lpa_user(addr); ++ ++ /* Restore previous context */ ++#ifdef CONFIG_TLB_PTLOCK ++ mtctl(pgd_lock, 28); ++#endif ++ mtctl(pgd, 25); ++ mtsp(space, SR_USER); ++ mtctl(prot, 8); ++ local_irq_restore(flags); ++ ++ return pa; ++} ++ + void flush_dcache_folio(struct folio *folio) + { + struct address_space *mapping = folio_flush_mapping(folio); +@@ -458,50 +518,23 @@ void flush_dcache_folio(struct folio *fo + if (addr + nr * PAGE_SIZE > vma->vm_end) + nr = (vma->vm_end - addr) / PAGE_SIZE; + +- if (parisc_requires_coherency()) { +- for (i = 0; i < nr; i++) { +- pte_t *ptep = get_ptep(vma->vm_mm, +- addr + i * PAGE_SIZE); +- if (!ptep) +- continue; +- if (pte_needs_flush(*ptep)) +- flush_user_cache_page(vma, +- addr + i * PAGE_SIZE); +- /* Optimise accesses to the same table? */ +- pte_unmap(ptep); +- } +- } else { ++ if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) ++ != (addr & (SHM_COLOUR - 1))) { ++ for (i = 0; i < nr; i++) ++ __flush_cache_page(vma, ++ addr + i * PAGE_SIZE, ++ (pfn + i) * PAGE_SIZE); + /* +- * The TLB is the engine of coherence on parisc: +- * The CPU is entitled to speculate any page +- * with a TLB mapping, so here we kill the +- * mapping then flush the page along a special +- * flush only alias mapping. This guarantees that +- * the page is no-longer in the cache for any +- * process and nor may it be speculatively read +- * in (until the user or kernel specifically +- * accesses it, of course) ++ * Software is allowed to have any number ++ * of private mappings to a page. + */ +- for (i = 0; i < nr; i++) +- flush_tlb_page(vma, addr + i * PAGE_SIZE); +- if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) +- != (addr & (SHM_COLOUR - 1))) { +- for (i = 0; i < nr; i++) +- __flush_cache_page(vma, +- addr + i * PAGE_SIZE, +- (pfn + i) * PAGE_SIZE); +- /* +- * Software is allowed to have any number +- * of private mappings to a page. +- */ +- if (!(vma->vm_flags & VM_SHARED)) +- continue; +- if (old_addr) +- pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", +- old_addr, addr, vma->vm_file); +- if (nr == folio_nr_pages(folio)) +- old_addr = addr; +- } ++ if (!(vma->vm_flags & VM_SHARED)) ++ continue; ++ if (old_addr) ++ pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", ++ old_addr, addr, vma->vm_file); ++ if (nr == folio_nr_pages(folio)) ++ old_addr = addr; + } + WARN_ON(++count == 4096); + } +@@ -591,35 +624,28 @@ extern void purge_kernel_dcache_page_asm + extern void clear_user_page_asm(void *, unsigned long); + extern void copy_user_page_asm(void *, void *, unsigned long); + +-void flush_kernel_dcache_page_addr(const void *addr) +-{ +- unsigned long flags; +- +- flush_kernel_dcache_page_asm(addr); +- purge_tlb_start(flags); +- pdtlb(SR_KERNEL, addr); +- purge_tlb_end(flags); +-} +-EXPORT_SYMBOL(flush_kernel_dcache_page_addr); +- + static void flush_cache_page_if_present(struct vm_area_struct *vma, +- unsigned long vmaddr, unsigned long pfn) ++ unsigned long vmaddr) + { ++#if CONFIG_FLUSH_PAGE_ACCESSED + bool needs_flush = false; +- pte_t *ptep; ++ pte_t *ptep, pte; + +- /* +- * The pte check is racy and sometimes the flush will trigger +- * a non-access TLB miss. Hopefully, the page has already been +- * flushed. +- */ + ptep = get_ptep(vma->vm_mm, vmaddr); + if (ptep) { +- needs_flush = pte_needs_flush(*ptep); ++ pte = ptep_get(ptep); ++ needs_flush = pte_needs_flush(pte); + pte_unmap(ptep); + } + if (needs_flush) +- flush_cache_page(vma, vmaddr, pfn); ++ __flush_cache_page(vma, vmaddr, PFN_PHYS(pte_pfn(pte))); ++#else ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long physaddr = get_upa(mm, vmaddr); ++ ++ if (physaddr) ++ __flush_cache_page(vma, vmaddr, PAGE_ALIGN_DOWN(physaddr)); ++#endif + } + + void copy_user_highpage(struct page *to, struct page *from, +@@ -629,7 +655,7 @@ void copy_user_highpage(struct page *to, + + kfrom = kmap_local_page(from); + kto = kmap_local_page(to); +- flush_cache_page_if_present(vma, vaddr, page_to_pfn(from)); ++ __flush_cache_page(vma, vaddr, PFN_PHYS(page_to_pfn(from))); + copy_page_asm(kto, kfrom); + kunmap_local(kto); + kunmap_local(kfrom); +@@ -638,16 +664,17 @@ void copy_user_highpage(struct page *to, + void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len) + { +- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); ++ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); + memcpy(dst, src, len); +- flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); ++ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(dst)); + } + + void copy_from_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len) + { +- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); ++ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); + memcpy(dst, src, len); ++ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(src)); + } + + /* __flush_tlb_range() +@@ -681,32 +708,10 @@ int __flush_tlb_range(unsigned long sid, + + static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end) + { +- unsigned long addr, pfn; +- pte_t *ptep; ++ unsigned long addr; + +- for (addr = start; addr < end; addr += PAGE_SIZE) { +- bool needs_flush = false; +- /* +- * The vma can contain pages that aren't present. Although +- * the pte search is expensive, we need the pte to find the +- * page pfn and to check whether the page should be flushed. +- */ +- ptep = get_ptep(vma->vm_mm, addr); +- if (ptep) { +- needs_flush = pte_needs_flush(*ptep); +- pfn = pte_pfn(*ptep); +- pte_unmap(ptep); +- } +- if (needs_flush) { +- if (parisc_requires_coherency()) { +- flush_user_cache_page(vma, addr); +- } else { +- if (WARN_ON(!pfn_valid(pfn))) +- return; +- __flush_cache_page(vma, addr, PFN_PHYS(pfn)); +- } +- } +- } ++ for (addr = start; addr < end; addr += PAGE_SIZE) ++ flush_cache_page_if_present(vma, addr); + } + + static inline unsigned long mm_total_size(struct mm_struct *mm) +@@ -757,21 +762,19 @@ void flush_cache_range(struct vm_area_st + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) + return; + flush_tlb_range(vma, start, end); +- flush_cache_all(); ++ if (vma->vm_flags & VM_EXEC) ++ flush_cache_all(); ++ else ++ flush_data_cache(); + return; + } + +- flush_cache_pages(vma, start, end); ++ flush_cache_pages(vma, start & PAGE_MASK, end); + } + + void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) + { +- if (WARN_ON(!pfn_valid(pfn))) +- return; +- if (parisc_requires_coherency()) +- flush_user_cache_page(vma, vmaddr); +- else +- __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); ++ __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + } + + void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +@@ -779,34 +782,133 @@ void flush_anon_page(struct vm_area_stru + if (!PageAnon(page)) + return; + +- if (parisc_requires_coherency()) { +- if (vma->vm_flags & VM_SHARED) +- flush_data_cache(); +- else +- flush_user_cache_page(vma, vmaddr); ++ __flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page))); ++} ++ ++int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, ++ pte_t *ptep) ++{ ++ pte_t pte = ptep_get(ptep); ++ ++ if (!pte_young(pte)) ++ return 0; ++ set_pte(ptep, pte_mkold(pte)); ++#if CONFIG_FLUSH_PAGE_ACCESSED ++ __flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte))); ++#endif ++ return 1; ++} ++ ++/* ++ * After a PTE is cleared, we have no way to flush the cache for ++ * the physical page. On PA8800 and PA8900 processors, these lines ++ * can cause random cache corruption. Thus, we must flush the cache ++ * as well as the TLB when clearing a PTE that's valid. ++ */ ++pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, ++ pte_t *ptep) ++{ ++ struct mm_struct *mm = (vma)->vm_mm; ++ pte_t pte = ptep_get_and_clear(mm, addr, ptep); ++ unsigned long pfn = pte_pfn(pte); ++ ++ if (pfn_valid(pfn)) ++ __flush_cache_page(vma, addr, PFN_PHYS(pfn)); ++ else if (pte_accessible(mm, pte)) ++ flush_tlb_page(vma, addr); ++ ++ return pte; ++} ++ ++/* ++ * The physical address for pages in the ioremap case can be obtained ++ * from the vm_struct struct. I wasn't able to successfully handle the ++ * vmalloc and vmap cases. We have an array of struct page pointers in ++ * the uninitialized vmalloc case but the flush failed using page_to_pfn. ++ */ ++void flush_cache_vmap(unsigned long start, unsigned long end) ++{ ++ unsigned long addr, physaddr; ++ struct vm_struct *vm; ++ ++ /* Prevent cache move-in */ ++ flush_tlb_kernel_range(start, end); ++ ++ if (end - start >= parisc_cache_flush_threshold) { ++ flush_cache_all(); + return; + } + +- flush_tlb_page(vma, vmaddr); +- preempt_disable(); +- flush_dcache_page_asm(page_to_phys(page), vmaddr); +- preempt_enable(); ++ if (WARN_ON_ONCE(!is_vmalloc_addr((void *)start))) { ++ flush_cache_all(); ++ return; ++ } ++ ++ vm = find_vm_area((void *)start); ++ if (WARN_ON_ONCE(!vm)) { ++ flush_cache_all(); ++ return; ++ } ++ ++ /* The physical addresses of IOREMAP regions are contiguous */ ++ if (vm->flags & VM_IOREMAP) { ++ physaddr = vm->phys_addr; ++ for (addr = start; addr < end; addr += PAGE_SIZE) { ++ preempt_disable(); ++ flush_dcache_page_asm(physaddr, start); ++ flush_icache_page_asm(physaddr, start); ++ preempt_enable(); ++ physaddr += PAGE_SIZE; ++ } ++ return; ++ } ++ ++ flush_cache_all(); + } ++EXPORT_SYMBOL(flush_cache_vmap); + ++/* ++ * The vm_struct has been retired and the page table is set up. The ++ * last page in the range is a guard page. Its physical address can't ++ * be determined using lpa, so there is no way to flush the range ++ * using flush_dcache_page_asm. ++ */ ++void flush_cache_vunmap(unsigned long start, unsigned long end) ++{ ++ /* Prevent cache move-in */ ++ flush_tlb_kernel_range(start, end); ++ flush_data_cache(); ++} ++EXPORT_SYMBOL(flush_cache_vunmap); ++ ++/* ++ * On systems with PA8800/PA8900 processors, there is no way to flush ++ * a vmap range other than using the architected loop to flush the ++ * entire cache. The page directory is not set up, so we can't use ++ * fdc, etc. FDCE/FICE don't work to flush a portion of the cache. ++ * L2 is physically indexed but FDCE/FICE instructions in virtual ++ * mode output their virtual address on the core bus, not their ++ * real address. As a result, the L2 cache index formed from the ++ * virtual address will most likely not be the same as the L2 index ++ * formed from the real address. ++ */ + void flush_kernel_vmap_range(void *vaddr, int size) + { + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + +- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +- (unsigned long)size >= parisc_cache_flush_threshold) { +- flush_tlb_kernel_range(start, end); +- flush_data_cache(); ++ flush_tlb_kernel_range(start, end); ++ ++ if (!static_branch_likely(&parisc_has_dcache)) ++ return; ++ ++ /* If interrupts are disabled, we can only do local flush */ ++ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { ++ flush_data_cache_local(NULL); + return; + } + +- flush_kernel_dcache_range_asm(start, end); +- flush_tlb_kernel_range(start, end); ++ flush_data_cache(); + } + EXPORT_SYMBOL(flush_kernel_vmap_range); + +@@ -818,15 +920,18 @@ void invalidate_kernel_vmap_range(void * + /* Ensure DMA is complete */ + asm_syncdma(); + +- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +- (unsigned long)size >= parisc_cache_flush_threshold) { +- flush_tlb_kernel_range(start, end); +- flush_data_cache(); ++ flush_tlb_kernel_range(start, end); ++ ++ if (!static_branch_likely(&parisc_has_dcache)) ++ return; ++ ++ /* If interrupts are disabled, we can only do local flush */ ++ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { ++ flush_data_cache_local(NULL); + return; + } + +- purge_kernel_dcache_range_asm(start, end); +- flush_tlb_kernel_range(start, end); ++ flush_data_cache(); + } + EXPORT_SYMBOL(invalidate_kernel_vmap_range); + diff --git a/queue-6.9/ras-amd-atl-fix-mi300-bank-hash.patch b/queue-6.9/ras-amd-atl-fix-mi300-bank-hash.patch new file mode 100644 index 00000000000..509c367805e --- /dev/null +++ b/queue-6.9/ras-amd-atl-fix-mi300-bank-hash.patch @@ -0,0 +1,48 @@ +From fe8a08973a0dea9757394c5adbdc3c0a03b0b432 Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam +Date: Fri, 7 Jun 2024 16:32:59 -0500 +Subject: RAS/AMD/ATL: Fix MI300 bank hash + +From: Yazen Ghannam + +commit fe8a08973a0dea9757394c5adbdc3c0a03b0b432 upstream. + +Apply the SID bits to the correct offset in the Bank value. Do this in +the temporary value so they don't need to be masked off later. + +Fixes: 87a612375307 ("RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support") +Signed-off-by: Yazen Ghannam +Signed-off-by: Borislav Petkov (AMD) +Cc: +Link: https://lore.kernel.org/r/20240607-mi300-dram-xl-fix-v1-1-2f11547a178c@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ras/amd/atl/umc.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c +index 59b6169093f7..5cb92330dc67 100644 +--- a/drivers/ras/amd/atl/umc.c ++++ b/drivers/ras/amd/atl/umc.c +@@ -189,16 +189,11 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) + + /* Calculate hash for PC bit. */ + if (addr_hash.pc.xor_enable) { +- /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */ +- bank |= sid << 5; +- + temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); + temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); +- temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor); ++ /* Bits SID[1:0] act as Bank[5:4] for PC hash, so apply them here. */ ++ temp ^= bitwise_xor_bits((bank | sid << NUM_BANK_BITS) & addr_hash.bank_xor); + pc ^= temp; +- +- /* Drop SID bits for the sake of debug printing later. */ +- bank &= 0x1F; + } + + /* Reconstruct the normalized address starting with NA[4:0] = 0 */ +-- +2.45.2 + diff --git a/queue-6.9/ras-amd-atl-use-system-settings-for-mi300-dram-to-normalized-address-translation.patch b/queue-6.9/ras-amd-atl-use-system-settings-for-mi300-dram-to-normalized-address-translation.patch new file mode 100644 index 00000000000..27787e640fc --- /dev/null +++ b/queue-6.9/ras-amd-atl-use-system-settings-for-mi300-dram-to-normalized-address-translation.patch @@ -0,0 +1,271 @@ +From ba437905b4fbf0ee1686c175069239a1cc292558 Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam +Date: Fri, 7 Jun 2024 16:33:00 -0500 +Subject: RAS/AMD/ATL: Use system settings for MI300 DRAM to normalized address translation + +From: Yazen Ghannam + +commit ba437905b4fbf0ee1686c175069239a1cc292558 upstream. + +The currently used normalized address format is not applicable to all +MI300 systems. This leads to incorrect results during address +translation. + +Drop the fixed layout and construct the normalized address from system +settings. + +Fixes: 87a612375307 ("RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support") +Signed-off-by: Yazen Ghannam +Signed-off-by: Borislav Petkov (AMD) +Cc: +Link: https://lore.kernel.org/r/20240607-mi300-dram-xl-fix-v1-2-2f11547a178c@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ras/amd/atl/internal.h | 2 + drivers/ras/amd/atl/system.c | 2 + drivers/ras/amd/atl/umc.c | 157 ++++++++++++++++++++++++++++++----------- + 3 files changed, 117 insertions(+), 44 deletions(-) + +--- a/drivers/ras/amd/atl/internal.h ++++ b/drivers/ras/amd/atl/internal.h +@@ -224,7 +224,7 @@ int df_indirect_read_broadcast(u16 node, + + int get_df_system_info(void); + int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); +-int get_addr_hash_mi300(void); ++int get_umc_info_mi300(void); + + int get_address_map(struct addr_ctx *ctx); + +--- a/drivers/ras/amd/atl/system.c ++++ b/drivers/ras/amd/atl/system.c +@@ -127,7 +127,7 @@ static int df4_determine_df_rev(u32 reg) + if (reg == DF_FUNC0_ID_MI300) { + df_cfg.flags.heterogeneous = 1; + +- if (get_addr_hash_mi300()) ++ if (get_umc_info_mi300()) + return -EINVAL; + } + +--- a/drivers/ras/amd/atl/umc.c ++++ b/drivers/ras/amd/atl/umc.c +@@ -68,6 +68,8 @@ struct xor_bits { + }; + + #define NUM_BANK_BITS 4 ++#define NUM_COL_BITS 5 ++#define NUM_SID_BITS 2 + + static struct { + /* UMC::CH::AddrHashBank */ +@@ -80,7 +82,22 @@ static struct { + u8 bank_xor; + } addr_hash; + ++static struct { ++ u8 bank[NUM_BANK_BITS]; ++ u8 col[NUM_COL_BITS]; ++ u8 sid[NUM_SID_BITS]; ++ u8 num_row_lo; ++ u8 num_row_hi; ++ u8 row_lo; ++ u8 row_hi; ++ u8 pc; ++} bit_shifts; ++ + #define MI300_UMC_CH_BASE 0x90000 ++#define MI300_ADDR_CFG (MI300_UMC_CH_BASE + 0x30) ++#define MI300_ADDR_SEL (MI300_UMC_CH_BASE + 0x40) ++#define MI300_COL_SEL_LO (MI300_UMC_CH_BASE + 0x50) ++#define MI300_ADDR_SEL_2 (MI300_UMC_CH_BASE + 0xA4) + #define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8) + #define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0) + #define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4) +@@ -90,17 +107,42 @@ static struct { + #define ADDR_HASH_ROW_XOR GENMASK(31, 14) + #define ADDR_HASH_BANK_XOR GENMASK(5, 0) + ++#define ADDR_CFG_NUM_ROW_LO GENMASK(11, 8) ++#define ADDR_CFG_NUM_ROW_HI GENMASK(15, 12) ++ ++#define ADDR_SEL_BANK0 GENMASK(3, 0) ++#define ADDR_SEL_BANK1 GENMASK(7, 4) ++#define ADDR_SEL_BANK2 GENMASK(11, 8) ++#define ADDR_SEL_BANK3 GENMASK(15, 12) ++#define ADDR_SEL_BANK4 GENMASK(20, 16) ++#define ADDR_SEL_ROW_LO GENMASK(27, 24) ++#define ADDR_SEL_ROW_HI GENMASK(31, 28) ++ ++#define COL_SEL_LO_COL0 GENMASK(3, 0) ++#define COL_SEL_LO_COL1 GENMASK(7, 4) ++#define COL_SEL_LO_COL2 GENMASK(11, 8) ++#define COL_SEL_LO_COL3 GENMASK(15, 12) ++#define COL_SEL_LO_COL4 GENMASK(19, 16) ++ ++#define ADDR_SEL_2_BANK5 GENMASK(4, 0) ++#define ADDR_SEL_2_CHAN GENMASK(15, 12) ++ + /* + * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used +- * for hashing. Do this during module init, since the values will not +- * change during run time. ++ * for hashing. ++ * ++ * Also, read UMC::CH::Addr{Cfg,Sel,Sel2} and UMC::CH:ColSelLo registers to ++ * get the values needed to reconstruct the normalized address. Apply additional ++ * offsets to the raw register values, as needed. ++ * ++ * Do this during module init, since the values will not change during run time. + * + * These registers are instantiated for each UMC across each AMD Node. + * However, they should be identically programmed due to the fixed hardware + * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a + * single global structure for simplicity. + */ +-int get_addr_hash_mi300(void) ++int get_umc_info_mi300(void) + { + u32 temp; + int ret; +@@ -130,6 +172,44 @@ int get_addr_hash_mi300(void) + + addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp); + ++ ret = amd_smn_read(0, MI300_ADDR_CFG, &temp); ++ if (ret) ++ return ret; ++ ++ bit_shifts.num_row_hi = FIELD_GET(ADDR_CFG_NUM_ROW_HI, temp); ++ bit_shifts.num_row_lo = 10 + FIELD_GET(ADDR_CFG_NUM_ROW_LO, temp); ++ ++ ret = amd_smn_read(0, MI300_ADDR_SEL, &temp); ++ if (ret) ++ return ret; ++ ++ bit_shifts.bank[0] = 5 + FIELD_GET(ADDR_SEL_BANK0, temp); ++ bit_shifts.bank[1] = 5 + FIELD_GET(ADDR_SEL_BANK1, temp); ++ bit_shifts.bank[2] = 5 + FIELD_GET(ADDR_SEL_BANK2, temp); ++ bit_shifts.bank[3] = 5 + FIELD_GET(ADDR_SEL_BANK3, temp); ++ /* Use BankBit4 for the SID0 position. */ ++ bit_shifts.sid[0] = 5 + FIELD_GET(ADDR_SEL_BANK4, temp); ++ bit_shifts.row_lo = 12 + FIELD_GET(ADDR_SEL_ROW_LO, temp); ++ bit_shifts.row_hi = 24 + FIELD_GET(ADDR_SEL_ROW_HI, temp); ++ ++ ret = amd_smn_read(0, MI300_COL_SEL_LO, &temp); ++ if (ret) ++ return ret; ++ ++ bit_shifts.col[0] = 2 + FIELD_GET(COL_SEL_LO_COL0, temp); ++ bit_shifts.col[1] = 2 + FIELD_GET(COL_SEL_LO_COL1, temp); ++ bit_shifts.col[2] = 2 + FIELD_GET(COL_SEL_LO_COL2, temp); ++ bit_shifts.col[3] = 2 + FIELD_GET(COL_SEL_LO_COL3, temp); ++ bit_shifts.col[4] = 2 + FIELD_GET(COL_SEL_LO_COL4, temp); ++ ++ ret = amd_smn_read(0, MI300_ADDR_SEL_2, &temp); ++ if (ret) ++ return ret; ++ ++ /* Use BankBit5 for the SID1 position. */ ++ bit_shifts.sid[1] = 5 + FIELD_GET(ADDR_SEL_2_BANK5, temp); ++ bit_shifts.pc = 5 + FIELD_GET(ADDR_SEL_2_CHAN, temp); ++ + return 0; + } + +@@ -146,9 +226,6 @@ int get_addr_hash_mi300(void) + * The MCA address format is as follows: + * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]} + * +- * The normalized address format is fixed in hardware and is as follows: +- * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]} +- * + * Additionally, the PC and Bank bits may be hashed. This must be accounted for before + * reconstructing the normalized address. + */ +@@ -158,18 +235,10 @@ int get_addr_hash_mi300(void) + #define MI300_UMC_MCA_PC BIT(25) + #define MI300_UMC_MCA_SID GENMASK(27, 26) + +-#define MI300_NA_COL_1_0 GENMASK(6, 5) +-#define MI300_NA_PC BIT(7) +-#define MI300_NA_COL_3_2 GENMASK(9, 8) +-#define MI300_NA_BANK_3_2 GENMASK(11, 10) +-#define MI300_NA_BANK_1_0 GENMASK(13, 12) +-#define MI300_NA_COL_4 BIT(14) +-#define MI300_NA_ROW GENMASK(28, 15) +-#define MI300_NA_SID GENMASK(30, 29) +- + static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) + { +- u16 i, col, row, bank, pc, sid, temp; ++ u16 i, col, row, bank, pc, sid; ++ u32 temp; + + col = FIELD_GET(MI300_UMC_MCA_COL, addr); + bank = FIELD_GET(MI300_UMC_MCA_BANK, addr); +@@ -199,34 +268,38 @@ static unsigned long convert_dram_to_nor + /* Reconstruct the normalized address starting with NA[4:0] = 0 */ + addr = 0; + +- /* NA[6:5] = Column[1:0] */ +- temp = col & 0x3; +- addr |= FIELD_PREP(MI300_NA_COL_1_0, temp); +- +- /* NA[7] = PC */ +- addr |= FIELD_PREP(MI300_NA_PC, pc); +- +- /* NA[9:8] = Column[3:2] */ +- temp = (col >> 2) & 0x3; +- addr |= FIELD_PREP(MI300_NA_COL_3_2, temp); +- +- /* NA[11:10] = Bank[3:2] */ +- temp = (bank >> 2) & 0x3; +- addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp); +- +- /* NA[13:12] = Bank[1:0] */ +- temp = bank & 0x3; +- addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp); +- +- /* NA[14] = Column[4] */ +- temp = (col >> 4) & 0x1; +- addr |= FIELD_PREP(MI300_NA_COL_4, temp); ++ /* Column bits */ ++ for (i = 0; i < NUM_COL_BITS; i++) { ++ temp = (col >> i) & 0x1; ++ addr |= temp << bit_shifts.col[i]; ++ } + +- /* NA[28:15] = Row[13:0] */ +- addr |= FIELD_PREP(MI300_NA_ROW, row); ++ /* Bank bits */ ++ for (i = 0; i < NUM_BANK_BITS; i++) { ++ temp = (bank >> i) & 0x1; ++ addr |= temp << bit_shifts.bank[i]; ++ } ++ ++ /* Row lo bits */ ++ for (i = 0; i < bit_shifts.num_row_lo; i++) { ++ temp = (row >> i) & 0x1; ++ addr |= temp << (i + bit_shifts.row_lo); ++ } + +- /* NA[30:29] = SID[1:0] */ +- addr |= FIELD_PREP(MI300_NA_SID, sid); ++ /* Row hi bits */ ++ for (i = 0; i < bit_shifts.num_row_hi; i++) { ++ temp = (row >> (i + bit_shifts.num_row_lo)) & 0x1; ++ addr |= temp << (i + bit_shifts.row_hi); ++ } ++ ++ /* PC bit */ ++ addr |= pc << bit_shifts.pc; ++ ++ /* SID bits */ ++ for (i = 0; i < NUM_SID_BITS; i++) { ++ temp = (sid >> i) & 0x1; ++ addr |= temp << bit_shifts.sid[i]; ++ } + + pr_debug("Addr=0x%016lx", addr); + pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid); diff --git a/queue-6.9/series b/queue-6.9/series index 3f04df7d0c3..a22e8022553 100644 --- a/queue-6.9/series +++ b/queue-6.9/series @@ -191,3 +191,20 @@ ionic-fix-use-after-netif_napi_del.patch bnxt_en-cap-the-size-of-hwrm_port_phy_qcfg-forwarded.patch af_unix-read-with-msg_peek-loops-if-the-first-unread.patch bnxt_en-adjust-logging-of-firmware-messages-in-case-.patch +misc-microchip-pci1xxxx-fix-double-free-in-the-error-handling-of-gp_aux_bus_probe.patch +ksmbd-move-leading-slash-check-to-smb2_get_name.patch +ksmbd-fix-missing-use-of-get_write-in-in-smb2_set_ea.patch +tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch +leds-class-revert-if-no-default-trigger-is-given-make-hw_control-trigger-the-default-trigger.patch +x86-boot-don-t-add-the-efi-stub-to-targets-again.patch +iio-adc-ad9467-fix-scan-type-sign.patch +iio-dac-ad5592r-fix-temperature-channel-scaling-value.patch +iio-imu-bmi323-fix-trigger-notification-in-case-of-error.patch +iio-invensense-fix-odr-switching-to-same-value.patch +iio-pressure-bmp280-fix-bmp580-temperature-reading.patch +iio-temperature-mlx90635-fix-err_ptr-dereference-in-mlx90635_probe.patch +iio-imu-inv_icm42600-delete-unneeded-update-watermark-call.patch +drivers-core-synchronize-really_probe-and-dev_uevent.patch +parisc-try-to-fix-random-segmentation-faults-in-package-builds.patch +ras-amd-atl-fix-mi300-bank-hash.patch +ras-amd-atl-use-system-settings-for-mi300-dram-to-normalized-address-translation.patch diff --git a/queue-6.9/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch b/queue-6.9/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch new file mode 100644 index 00000000000..fc16422f756 --- /dev/null +++ b/queue-6.9/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch @@ -0,0 +1,94 @@ +From 07c54cc5988f19c9642fd463c2dbdac7fc52f777 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 28 May 2024 14:20:19 +0200 +Subject: tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device() + +From: Oleg Nesterov + +commit 07c54cc5988f19c9642fd463c2dbdac7fc52f777 upstream. + +After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash +when the boot CPU is nohz_full") the kernel no longer crashes, but there is +another problem. + +In this case tick_setup_device() calls tick_take_do_timer_from_boot() to +update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled) +in smp_call_function_single(). + +Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new +comment explains why this is safe (thanks Thomas!). + +Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full") +Signed-off-by: Oleg Nesterov +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com +Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/tick-common.c | 42 ++++++++++++++---------------------------- + 1 file changed, 14 insertions(+), 28 deletions(-) + +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_ev + } + } + +-#ifdef CONFIG_NO_HZ_FULL +-static void giveup_do_timer(void *info) +-{ +- int cpu = *(unsigned int *)info; +- +- WARN_ON(tick_do_timer_cpu != smp_processor_id()); +- +- tick_do_timer_cpu = cpu; +-} +- +-static void tick_take_do_timer_from_boot(void) +-{ +- int cpu = smp_processor_id(); +- int from = tick_do_timer_boot_cpu; +- +- if (from >= 0 && from != cpu) +- smp_call_function_single(from, giveup_do_timer, &cpu, 1); +-} +-#endif +- + /* + * Setup the tick device + */ +@@ -221,19 +201,25 @@ static void tick_setup_device(struct tic + tick_next_period = ktime_get(); + #ifdef CONFIG_NO_HZ_FULL + /* +- * The boot CPU may be nohz_full, in which case set +- * tick_do_timer_boot_cpu so the first housekeeping +- * secondary that comes up will take do_timer from +- * us. ++ * The boot CPU may be nohz_full, in which case the ++ * first housekeeping secondary will take do_timer() ++ * from it. + */ + if (tick_nohz_full_cpu(cpu)) + tick_do_timer_boot_cpu = cpu; + +- } else if (tick_do_timer_boot_cpu != -1 && +- !tick_nohz_full_cpu(cpu)) { +- tick_take_do_timer_from_boot(); ++ } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { + tick_do_timer_boot_cpu = -1; +- WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu); ++ /* ++ * The boot CPU will stay in periodic (NOHZ disabled) ++ * mode until clocksource_done_booting() called after ++ * smp_init() selects a high resolution clocksource and ++ * timekeeping_notify() kicks the NOHZ stuff alive. ++ * ++ * So this WRITE_ONCE can only race with the READ_ONCE ++ * check in tick_periodic() but this race is harmless. ++ */ ++ WRITE_ONCE(tick_do_timer_cpu, cpu); + #endif + } + diff --git a/queue-6.9/x86-boot-don-t-add-the-efi-stub-to-targets-again.patch b/queue-6.9/x86-boot-don-t-add-the-efi-stub-to-targets-again.patch new file mode 100644 index 00000000000..d54451b4301 --- /dev/null +++ b/queue-6.9/x86-boot-don-t-add-the-efi-stub-to-targets-again.patch @@ -0,0 +1,47 @@ +From b2747f108b8034271fd5289bd8f3a7003e0775a3 Mon Sep 17 00:00:00 2001 +From: Benjamin Segall +Date: Wed, 12 Jun 2024 12:44:44 -0700 +Subject: x86/boot: Don't add the EFI stub to targets, again + +From: Benjamin Segall + +commit b2747f108b8034271fd5289bd8f3a7003e0775a3 upstream. + +This is a re-commit of + + da05b143a308 ("x86/boot: Don't add the EFI stub to targets") + +after the tagged patch incorrectly reverted it. + +vmlinux-objs-y is added to targets, with an assumption that they are all +relative to $(obj); adding a $(objtree)/drivers/... path causes the +build to incorrectly create a useless +arch/x86/boot/compressed/drivers/... directory tree. + +Fix this just by using a different make variable for the EFI stub. + +Fixes: cb8bda8ad443 ("x86/boot/compressed: Rename efi_thunk_64.S to efi-mixed.S") +Signed-off-by: Ben Segall +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Ard Biesheuvel +Cc: stable@vger.kernel.org # v6.1+ +Link: https://lore.kernel.org/r/xm267ceukksz.fsf@bsegall.svl.corp.google.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/boot/compressed/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/boot/compressed/Makefile ++++ b/arch/x86/boot/compressed/Makefile +@@ -116,9 +116,9 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) + + vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o + vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o +-vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a ++vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a + +-$(obj)/vmlinux: $(vmlinux-objs-y) FORCE ++$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE + $(call if_changed,ld) + + OBJCOPYFLAGS_vmlinux.bin := -R .comment -S