6.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 1 Aug 2023 05:57:53 +0000 (07:57 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 1 Aug 2023 05:57:53 +0000 (07:57 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 1 Aug 2023 05:57:53 +0000 (07:57 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 1 Aug 2023 05:57:53 +0000 (07:57 +0200)
diff --git a/queue-6.4/io_uring-gate-iowait-schedule-on-having-pending-requests.patch b/queue-6.4/io_uring-gate-iowait-schedule-on-having-pending-requests.patch

new file mode 100644 (file)

index 0000000..f441ec4
--- /dev/null
+++ b/queue-6.4/io_uring-gate-iowait-schedule-on-having-pending-requests.patch
@@ -0,0 +1,82 @@
+From 7b72d661f1f2f950ab8c12de7e2bc48bdac8ed69 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 24 Jul 2023 11:28:17 -0600
+Subject: io_uring: gate iowait schedule on having pending requests
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 7b72d661f1f2f950ab8c12de7e2bc48bdac8ed69 upstream.
+
+A previous commit made all cqring waits marked as iowait, as a way to
+improve performance for short schedules with pending IO. However, for
+use cases that have a special reaper thread that does nothing but
+wait on events on the ring, this causes a cosmetic issue where we
+know have one core marked as being "busy" with 100% iowait.
+
+While this isn't a grave issue, it is confusing to users. Rather than
+always mark us as being in iowait, gate setting of current->in_iowait
+to 1 by whether or not the waiting task has pending requests.
+
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/io-uring/CAMEGJJ2RxopfNQ7GNLhr7X9=bHXKo+G5OOe0LUq=+UgLXsv1Xg@mail.gmail.com/
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217699
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217700
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Reported-by: Phil Elwell <phil@raspberrypi.com>
+Tested-by: Andres Freund <andres@anarazel.de>
+Fixes: 8a796565cec3 ("io_uring: Use io_schedule* in cqring wait")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |   23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2579,11 +2579,20 @@ int io_run_task_work_sig(struct io_ring_
+       return 0;
+ }
+ 
++static bool current_pending_io(void)
++{
++      struct io_uring_task *tctx = current->io_uring;
++
++      if (!tctx)
++              return false;
++      return percpu_counter_read_positive(&tctx->inflight);
++}
++
+ /* when returns >0, the caller should retry */
+ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+                                         struct io_wait_queue *iowq)
+ {
+-      int token, ret;
++      int io_wait, ret;
+ 
+       if (unlikely(READ_ONCE(ctx->check_cq)))
+               return 1;
+@@ -2597,17 +2606,19 @@ static inline int io_cqring_wait_schedul
+               return 0;
+ 
+       /*
+-       * Use io_schedule_prepare/finish, so cpufreq can take into account
+-       * that the task is waiting for IO - turns out to be important for low
+-       * QD IO.
++       * Mark us as being in io_wait if we have pending requests, so cpufreq
++       * can take into account that the task is waiting for IO - turns out
++       * to be important for low QD IO.
+        */
+-      token = io_schedule_prepare();
++      io_wait = current->in_iowait;
++      if (current_pending_io())
++              current->in_iowait = 1;
+       ret = 0;
+       if (iowq->timeout == KTIME_MAX)
+               schedule();
+       else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
+               ret = -ETIME;
+-      io_schedule_finish(token);
++      current->in_iowait = io_wait;
+       return ret;
+ }
+ 
diff --git a/queue-6.4/iommufd-set-end-correctly-when-doing-batch-carry.patch b/queue-6.4/iommufd-set-end-correctly-when-doing-batch-carry.patch

new file mode 100644 (file)

index 0000000..1f4d157
--- /dev/null
+++ b/queue-6.4/iommufd-set-end-correctly-when-doing-batch-carry.patch
@@ -0,0 +1,92 @@
+From b7c822fa6b7701b17e139f1c562fc24135880ed4 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 25 Jul 2023 16:05:50 -0300
+Subject: iommufd: Set end correctly when doing batch carry
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit b7c822fa6b7701b17e139f1c562fc24135880ed4 upstream.
+
+Even though the test suite covers this it somehow became obscured that
+this wasn't working.
+
+The test iommufd_ioas.mock_domain.access_domain_destory would blow up
+rarely.
+
+end should be set to 1 because this just pushed an item, the carry, to the
+pfns list.
+
+Sometimes the test would blow up with:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000000
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 0 P4D 0
+  Oops: 0000 [#1] SMP
+  CPU: 5 PID: 584 Comm: iommufd Not tainted 6.5.0-rc1-dirty #1236
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+  RIP: 0010:batch_unpin+0xa2/0x100 [iommufd]
+  Code: 17 48 81 fe ff ff 07 00 77 70 48 8b 15 b7 be 97 e2 48 85 d2 74 14 48 8b 14 fa 48 85 d2 74 0b 40 0f b6 f6 48 c1 e6 04 48 01 f2 <48> 8b 3a 48 c1 e0 06 89 ca 48 89 de 48 83 e7 f0 48 01 c7 e8 96 dc
+  RSP: 0018:ffffc90001677a58 EFLAGS: 00010246
+  RAX: 00007f7e2646f000 RBX: 0000000000000000 RCX: 0000000000000001
+  RDX: 0000000000000000 RSI: 00000000fefc4c8d RDI: 0000000000fefc4c
+  RBP: ffffc90001677a80 R08: 0000000000000048 R09: 0000000000000200
+  R10: 0000000000030b98 R11: ffffffff81f3bb40 R12: 0000000000000001
+  R13: ffff888101f75800 R14: ffffc90001677ad0 R15: 00000000000001fe
+  FS:  00007f9323679740(0000) GS:ffff8881ba540000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000000000 CR3: 0000000105ede003 CR4: 00000000003706a0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+   <TASK>
+   ? show_regs+0x5c/0x70
+   ? __die+0x1f/0x60
+   ? page_fault_oops+0x15d/0x440
+   ? lock_release+0xbc/0x240
+   ? exc_page_fault+0x4a4/0x970
+   ? asm_exc_page_fault+0x27/0x30
+   ? batch_unpin+0xa2/0x100 [iommufd]
+   ? batch_unpin+0xba/0x100 [iommufd]
+   __iopt_area_unfill_domain+0x198/0x430 [iommufd]
+   ? __mutex_lock+0x8c/0xb80
+   ? __mutex_lock+0x6aa/0xb80
+   ? xa_erase+0x28/0x30
+   ? iopt_table_remove_domain+0x162/0x320 [iommufd]
+   ? lock_release+0xbc/0x240
+   iopt_area_unfill_domain+0xd/0x10 [iommufd]
+   iopt_table_remove_domain+0x195/0x320 [iommufd]
+   iommufd_hw_pagetable_destroy+0xb3/0x110 [iommufd]
+   iommufd_object_destroy_user+0x8e/0xf0 [iommufd]
+   iommufd_device_detach+0xc5/0x140 [iommufd]
+   iommufd_selftest_destroy+0x1f/0x70 [iommufd]
+   iommufd_object_destroy_user+0x8e/0xf0 [iommufd]
+   iommufd_destroy+0x3a/0x50 [iommufd]
+   iommufd_fops_ioctl+0xfb/0x170 [iommufd]
+   __x64_sys_ioctl+0x40d/0x9a0
+   do_syscall_64+0x3c/0x80
+   entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Link: https://lore.kernel.org/r/3-v1-85aacb2af554+bc-iommufd_syz3_jgg@nvidia.com
+Cc: <stable@vger.kernel.org>
+Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages")
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Tested-by: Nicolin Chen <nicolinc@nvidia.com>
+Reported-by: Nicolin Chen <nicolinc@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/iommufd/pages.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iommu/iommufd/pages.c
++++ b/drivers/iommu/iommufd/pages.c
+@@ -297,7 +297,7 @@ static void batch_clear_carry(struct pfn
+       batch->pfns[0] = batch->pfns[batch->end - 1] +
+                        (batch->npfns[batch->end - 1] - keep_pfns);
+       batch->npfns[0] = keep_pfns;
+-      batch->end = 0;
++      batch->end = 1;
+ }
+ 
+ static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns)
diff --git a/queue-6.4/net-dsa-qca8k-enable-use_single_write-for-qca8xxx.patch b/queue-6.4/net-dsa-qca8k-enable-use_single_write-for-qca8xxx.patch

new file mode 100644 (file)

index 0000000..e22a38f
--- /dev/null
+++ b/queue-6.4/net-dsa-qca8k-enable-use_single_write-for-qca8xxx.patch
@@ -0,0 +1,88 @@
+From 2c39dd025da489cf87d26469d9f5ff19715324a0 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Mon, 24 Jul 2023 05:25:28 +0200
+Subject: net: dsa: qca8k: enable use_single_write for qca8xxx
+
+From: Christian Marangi <ansuelsmth@gmail.com>
+
+commit 2c39dd025da489cf87d26469d9f5ff19715324a0 upstream.
+
+The qca8xxx switch supports 2 way to write reg values, a slow way using
+mdio and a fast way by sending specially crafted mgmt packet to
+read/write reg.
+
+The fast way can support up to 32 bytes of data as eth packet are used
+to send/receive.
+
+This correctly works for almost the entire regmap of the switch but with
+the use of some kernel selftests for dsa drivers it was found a funny
+and interesting hw defect/limitation.
+
+For some specific reg, bulk write won't work and will result in writing
+only part of the requested regs resulting in half data written. This was
+especially hard to track and discover due to the total strangeness of
+the problem and also by the specific regs where this occurs.
+
+This occurs in the specific regs of the ATU table, where multiple entry
+needs to be written to compose the entire entry.
+It was discovered that with a bulk write of 12 bytes on
+QCA8K_REG_ATU_DATA0 only QCA8K_REG_ATU_DATA0 and QCA8K_REG_ATU_DATA2
+were written, but QCA8K_REG_ATU_DATA1 was always zero.
+Tcpdump was used to make sure the specially crafted packet was correct
+and this was confirmed.
+
+The problem was hard to track as the lack of QCA8K_REG_ATU_DATA1
+resulted in an entry somehow possible as the first bytes of the mac
+address are set in QCA8K_REG_ATU_DATA0 and the entry type is set in
+QCA8K_REG_ATU_DATA2.
+
+Funlly enough writing QCA8K_REG_ATU_DATA1 results in the same problem
+with QCA8K_REG_ATU_DATA2 empty and QCA8K_REG_ATU_DATA1 and
+QCA8K_REG_ATU_FUNC correctly written.
+A speculation on the problem might be that there are some kind of
+indirection internally when accessing these regs and they can't be
+accessed all together, due to the fact that it's really a table mapped
+somewhere in the switch SRAM.
+
+Even more funny is the fact that every other reg was tested with all
+kind of combination and they are not affected by this problem. Read
+operation was also tested and always worked so it's not affected by this
+problem.
+
+The problem is not present if we limit writing a single reg at times.
+
+To handle this hardware defect, enable use_single_write so that bulk
+api can correctly split the write in multiple different operation
+effectively reverting to a non-bulk write.
+
+Cc: Mark Brown <broonie@kernel.org>
+Fixes: c766e077d927 ("net: dsa: qca8k: convert to regmap read/write API")
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/qca/qca8k-8xxx.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
+index 09b80644c11b..efe9380d4a15 100644
+--- a/drivers/net/dsa/qca/qca8k-8xxx.c
++++ b/drivers/net/dsa/qca/qca8k-8xxx.c
+@@ -576,8 +576,11 @@ static struct regmap_config qca8k_regmap_config = {
+       .rd_table = &qca8k_readable_table,
+       .disable_locking = true, /* Locking is handled by qca8k read/write */
+       .cache_type = REGCACHE_NONE, /* Explicitly disable CACHE */
+-      .max_raw_read = 32, /* mgmt eth can read/write up to 8 registers at time */
+-      .max_raw_write = 32,
++      .max_raw_read = 32, /* mgmt eth can read up to 8 registers at time */
++      /* ATU regs suffer from a bug where some data are not correctly
++       * written. Disable bulk write to correctly write ATU entry.
++       */
++      .use_single_write = true,
+ };
+ 
+ static int
+-- 
+2.41.0
+
diff --git a/queue-6.4/net-dsa-qca8k-fix-broken-search_and_del.patch b/queue-6.4/net-dsa-qca8k-fix-broken-search_and_del.patch

new file mode 100644 (file)

index 0000000..86218fd
--- /dev/null
+++ b/queue-6.4/net-dsa-qca8k-fix-broken-search_and_del.patch
@@ -0,0 +1,44 @@
+From ae70dcb9d9ecaf7d9836d3e1b5bef654d7ef5680 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Mon, 24 Jul 2023 05:25:30 +0200
+Subject: net: dsa: qca8k: fix broken search_and_del
+
+From: Christian Marangi <ansuelsmth@gmail.com>
+
+commit ae70dcb9d9ecaf7d9836d3e1b5bef654d7ef5680 upstream.
+
+On deleting an MDB entry for a port, fdb_search_and_del is used.
+An FDB entry can't be modified so it needs to be deleted and readded
+again with the new portmap (and the port deleted as requested)
+
+We use the SEARCH operator to search the entry to edit by vid and mac
+address and then we check the aging if we actually found an entry.
+
+Currently the code suffer from a bug where the searched fdb entry is
+never read again with the found values (if found) resulting in the code
+always returning -EINVAL as aging was always 0.
+
+Fix this by correctly read the fdb entry after it was searched.
+
+Fixes: ba8f870dfa63 ("net: dsa: qca8k: add support for mdb_add/del")
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/qca/qca8k-common.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/dsa/qca/qca8k-common.c
++++ b/drivers/net/dsa/qca/qca8k-common.c
+@@ -293,6 +293,10 @@ static int qca8k_fdb_search_and_del(stru
+       if (ret < 0)
+               goto exit;
+ 
++      ret = qca8k_fdb_read(priv, &fdb);
++      if (ret < 0)
++              goto exit;
++
+       /* Rule doesn't exist. Why delete? */
+       if (!fdb.aging) {
+               ret = -EINVAL;
diff --git a/queue-6.4/net-dsa-qca8k-fix-mdb-add-del-case-with-0-vid.patch b/queue-6.4/net-dsa-qca8k-fix-mdb-add-del-case-with-0-vid.patch

new file mode 100644 (file)

index 0000000..574fbd5
--- /dev/null
+++ b/queue-6.4/net-dsa-qca8k-fix-mdb-add-del-case-with-0-vid.patch
@@ -0,0 +1,47 @@
+From dfd739f182b00b02bd7470ed94d112684cc04fa2 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Mon, 24 Jul 2023 05:25:31 +0200
+Subject: net: dsa: qca8k: fix mdb add/del case with 0 VID
+
+From: Christian Marangi <ansuelsmth@gmail.com>
+
+commit dfd739f182b00b02bd7470ed94d112684cc04fa2 upstream.
+
+The qca8k switch doesn't support using 0 as VID and require a default
+VID to be always set. MDB add/del function doesn't currently handle
+this and are currently setting the default VID.
+
+Fix this by correctly handling this corner case and internally use the
+default VID for VID 0 case.
+
+Fixes: ba8f870dfa63 ("net: dsa: qca8k: add support for mdb_add/del")
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/qca/qca8k-common.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/dsa/qca/qca8k-common.c
++++ b/drivers/net/dsa/qca/qca8k-common.c
+@@ -816,6 +816,9 @@ int qca8k_port_mdb_add(struct dsa_switch
+       const u8 *addr = mdb->addr;
+       u16 vid = mdb->vid;
+ 
++      if (!vid)
++              vid = QCA8K_PORT_VID_DEF;
++
+       return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid,
+                                          QCA8K_ATU_STATUS_STATIC);
+ }
+@@ -828,6 +831,9 @@ int qca8k_port_mdb_del(struct dsa_switch
+       const u8 *addr = mdb->addr;
+       u16 vid = mdb->vid;
+ 
++      if (!vid)
++              vid = QCA8K_PORT_VID_DEF;
++
+       return qca8k_fdb_search_and_del(priv, BIT(port), addr, vid);
+ }
+ 
diff --git a/queue-6.4/net-dsa-qca8k-fix-search_and_insert-wrong-handling-of-new-rule.patch b/queue-6.4/net-dsa-qca8k-fix-search_and_insert-wrong-handling-of-new-rule.patch

new file mode 100644 (file)

index 0000000..5c08fe0
--- /dev/null
+++ b/queue-6.4/net-dsa-qca8k-fix-search_and_insert-wrong-handling-of-new-rule.patch
@@ -0,0 +1,73 @@
+From 80248d4160894d7e40b04111bdbaa4ff93fc4bd7 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Mon, 24 Jul 2023 05:25:29 +0200
+Subject: net: dsa: qca8k: fix search_and_insert wrong handling of new rule
+
+From: Christian Marangi <ansuelsmth@gmail.com>
+
+commit 80248d4160894d7e40b04111bdbaa4ff93fc4bd7 upstream.
+
+On inserting a mdb entry, fdb_search_and_insert is used to add a port to
+the qca8k target entry in the FDB db.
+
+A FDB entry can't be modified so it needs to be removed and insert again
+with the new values.
+
+To detect if an entry already exist, the SEARCH operation is used and we
+check the aging of the entry. If the entry is not 0, the entry exist and
+we proceed to delete it.
+
+Current code have 2 main problem:
+- The condition to check if the FDB entry exist is wrong and should be
+  the opposite.
+- When a FDB entry doesn't exist, aging was never actually set to the
+  STATIC value resulting in allocating an invalid entry.
+
+Fix both problem by adding aging support to the function, calling the
+function with STATIC as aging by default and finally by correct the
+condition to check if the entry actually exist.
+
+Fixes: ba8f870dfa63 ("net: dsa: qca8k: add support for mdb_add/del")
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/qca/qca8k-common.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/dsa/qca/qca8k-common.c
++++ b/drivers/net/dsa/qca/qca8k-common.c
+@@ -244,7 +244,7 @@ void qca8k_fdb_flush(struct qca8k_priv *
+ }
+ 
+ static int qca8k_fdb_search_and_insert(struct qca8k_priv *priv, u8 port_mask,
+-                                     const u8 *mac, u16 vid)
++                                     const u8 *mac, u16 vid, u8 aging)
+ {
+       struct qca8k_fdb fdb = { 0 };
+       int ret;
+@@ -261,10 +261,12 @@ static int qca8k_fdb_search_and_insert(s
+               goto exit;
+ 
+       /* Rule exist. Delete first */
+-      if (!fdb.aging) {
++      if (fdb.aging) {
+               ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1);
+               if (ret)
+                       goto exit;
++      } else {
++              fdb.aging = aging;
+       }
+ 
+       /* Add port to fdb portmask */
+@@ -810,7 +812,8 @@ int qca8k_port_mdb_add(struct dsa_switch
+       const u8 *addr = mdb->addr;
+       u16 vid = mdb->vid;
+ 
+-      return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid);
++      return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid,
++                                         QCA8K_ATU_STATUS_STATIC);
+ }
+ 
+ int qca8k_port_mdb_del(struct dsa_switch *ds, int port,
diff --git a/queue-6.4/net-ipa-only-reset-hashed-tables-when-supported.patch b/queue-6.4/net-ipa-only-reset-hashed-tables-when-supported.patch

new file mode 100644 (file)

index 0000000..20f7c0b
--- /dev/null
+++ b/queue-6.4/net-ipa-only-reset-hashed-tables-when-supported.patch
@@ -0,0 +1,101 @@
+From e11ec2b868af2b351c6c1e2e50eb711cc5423a10 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Mon, 24 Jul 2023 17:40:55 -0500
+Subject: net: ipa: only reset hashed tables when supported
+
+From: Alex Elder <elder@linaro.org>
+
+commit e11ec2b868af2b351c6c1e2e50eb711cc5423a10 upstream.
+
+Last year, the code that manages GSI channel transactions switched
+from using spinlock-protected linked lists to using indexes into the
+ring buffer used for a channel.  Recently, Google reported seeing
+transaction reference count underflows occasionally during shutdown.
+
+Doug Anderson found a way to reproduce the issue reliably, and
+bisected the issue to the commit that eliminated the linked lists
+and the lock.  The root cause was ultimately determined to be
+related to unused transactions being committed as part of the modem
+shutdown cleanup activity.  Unused transactions are not normally
+expected (except in error cases).
+
+The modem uses some ranges of IPA-resident memory, and whenever it
+shuts down we zero those ranges.  In ipa_filter_reset_table() a
+transaction is allocated to zero modem filter table entries.  If
+hashing is not supported, hashed table memory should not be zeroed.
+But currently nothing prevents that, and the result is an unused
+transaction.  Something similar occurs when we zero routing table
+entries for the modem.
+
+By preventing any attempt to clear hashed tables when hashing is not
+supported, the reference count underflow is avoided in this case.
+
+Note that there likely remains an issue with properly freeing unused
+transactions (if they occur due to errors).  This patch addresses
+only the underflows that Google originally reported.
+
+Cc: <stable@vger.kernel.org> # 6.1.x
+Fixes: d338ae28d8a8 ("net: ipa: kill all other transaction lists")
+Tested-by: Douglas Anderson <dianders@chromium.org>
+Signed-off-by: Alex Elder <elder@linaro.org>
+Link: https://lore.kernel.org/r/20230724224055.1688854-1-elder@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_table.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ipa/ipa_table.c
++++ b/drivers/net/ipa/ipa_table.c
+@@ -273,16 +273,15 @@ static int ipa_filter_reset(struct ipa *
+       if (ret)
+               return ret;
+ 
+-      ret = ipa_filter_reset_table(ipa, true, false, modem);
+-      if (ret)
++      ret = ipa_filter_reset_table(ipa, false, true, modem);
++      if (ret || !ipa_table_hash_support(ipa))
+               return ret;
+ 
+-      ret = ipa_filter_reset_table(ipa, false, true, modem);
++      ret = ipa_filter_reset_table(ipa, true, false, modem);
+       if (ret)
+               return ret;
+-      ret = ipa_filter_reset_table(ipa, true, true, modem);
+ 
+-      return ret;
++      return ipa_filter_reset_table(ipa, true, true, modem);
+ }
+ 
+ /* The AP routes and modem routes are each contiguous within the
+@@ -291,12 +290,13 @@ static int ipa_filter_reset(struct ipa *
+  * */
+ static int ipa_route_reset(struct ipa *ipa, bool modem)
+ {
++      bool hash_support = ipa_table_hash_support(ipa);
+       u32 modem_route_count = ipa->modem_route_count;
+       struct gsi_trans *trans;
+       u16 first;
+       u16 count;
+ 
+-      trans = ipa_cmd_trans_alloc(ipa, 4);
++      trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2);
+       if (!trans) {
+               dev_err(&ipa->pdev->dev,
+                       "no transaction for %s route reset\n",
+@@ -313,10 +313,12 @@ static int ipa_route_reset(struct ipa *i
+       }
+ 
+       ipa_table_reset_add(trans, false, false, false, first, count);
+-      ipa_table_reset_add(trans, false, true, false, first, count);
+-
+       ipa_table_reset_add(trans, false, false, true, first, count);
+-      ipa_table_reset_add(trans, false, true, true, first, count);
++
++      if (hash_support) {
++              ipa_table_reset_add(trans, false, true, false, first, count);
++              ipa_table_reset_add(trans, false, true, true, first, count);
++      }
+ 
+       gsi_trans_commit_wait(trans);
+ 
diff --git a/queue-6.4/proc-vmcore-fix-signedness-bug-in-read_from_oldmem.patch b/queue-6.4/proc-vmcore-fix-signedness-bug-in-read_from_oldmem.patch

new file mode 100644 (file)

index 0000000..96e8685
--- /dev/null
+++ b/queue-6.4/proc-vmcore-fix-signedness-bug-in-read_from_oldmem.patch
@@ -0,0 +1,45 @@
+From 641db40f3afe7998011bfabc726dba3e698f8196 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Tue, 25 Jul 2023 20:03:16 +0300
+Subject: proc/vmcore: fix signedness bug in read_from_oldmem()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+commit 641db40f3afe7998011bfabc726dba3e698f8196 upstream.
+
+The bug is the error handling:
+
+       if (tmp < nr_bytes) {
+
+"tmp" can hold negative error codes but because "nr_bytes" is type size_t
+the negative error codes are treated as very high positive values
+(success).  Fix this by changing "nr_bytes" to type ssize_t.  The
+"nr_bytes" variable is used to store values between 1 and PAGE_SIZE and
+they can fit in ssize_t without any issue.
+
+Link: https://lkml.kernel.org/r/b55f7eed-1c65-4adc-95d1-6c7c65a54a6e@moroto.mountain
+Fixes: 5d8de293c224 ("vmcore: convert copy_oldmem_page() to take an iov_iter")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Acked-by: Baoquan He <bhe@redhat.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Vivek Goyal <vgoyal@redhat.com>
+Cc: Alexey Dobriyan <adobriyan@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/proc/vmcore.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/proc/vmcore.c
++++ b/fs/proc/vmcore.c
+@@ -132,7 +132,7 @@ ssize_t read_from_oldmem(struct iov_iter
+                        u64 *ppos, bool encrypted)
+ {
+       unsigned long pfn, offset;
+-      size_t nr_bytes;
++      ssize_t nr_bytes;
+       ssize_t read = 0, tmp;
+       int idx;
+ 
diff --git a/queue-6.4/series b/queue-6.4/series

index ca17b05e9ec8bb5d584c83b971569a765b98da91..638524666f2a719b84106e98f740e2c50075f05a 100644 (file)
--- a/queue-6.4/series
+++ b/queue-6.4/series
@@ -196,3 +196,13 @@ tpm_tis-explicitly-check-for-error-code.patch
  irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch
  irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch
  locking-rtmutex-fix-task-pi_waiters-integrity.patch
+proc-vmcore-fix-signedness-bug-in-read_from_oldmem.patch
+xen-speed-up-grant-table-reclaim.patch
+virtio-net-fix-race-between-set-queues-and-probe.patch
+net-ipa-only-reset-hashed-tables-when-supported.patch
+net-dsa-qca8k-enable-use_single_write-for-qca8xxx.patch
+net-dsa-qca8k-fix-search_and_insert-wrong-handling-of-new-rule.patch
+net-dsa-qca8k-fix-broken-search_and_del.patch
+net-dsa-qca8k-fix-mdb-add-del-case-with-0-vid.patch
+io_uring-gate-iowait-schedule-on-having-pending-requests.patch
+iommufd-set-end-correctly-when-doing-batch-carry.patch
diff --git a/queue-6.4/virtio-net-fix-race-between-set-queues-and-probe.patch b/queue-6.4/virtio-net-fix-race-between-set-queues-and-probe.patch

new file mode 100644 (file)

index 0000000..102e932
--- /dev/null
+++ b/queue-6.4/virtio-net-fix-race-between-set-queues-and-probe.patch
@@ -0,0 +1,47 @@
+From 25266128fe16d5632d43ada34c847d7b8daba539 Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 25 Jul 2023 03:20:49 -0400
+Subject: virtio-net: fix race between set queues and probe
+
+From: Jason Wang <jasowang@redhat.com>
+
+commit 25266128fe16d5632d43ada34c847d7b8daba539 upstream.
+
+A race were found where set_channels could be called after registering
+but before virtnet_set_queues() in virtnet_probe(). Fixing this by
+moving the virtnet_set_queues() before netdevice registering. While at
+it, use _virtnet_set_queues() to avoid holding rtnl as the device is
+not even registered at that time.
+
+Cc: stable@vger.kernel.org
+Fixes: a220871be66f ("virtio-net: correctly enable multiqueue")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20230725072049.617289-1-jasowang@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -4110,6 +4110,8 @@ static int virtnet_probe(struct virtio_d
+       if (vi->has_rss || vi->has_rss_hash_report)
+               virtnet_init_default_rss(vi);
+ 
++      _virtnet_set_queues(vi, vi->curr_queue_pairs);
++
+       /* serialize netdev register + virtio_device_ready() with ndo_open() */
+       rtnl_lock();
+ 
+@@ -4148,8 +4150,6 @@ static int virtnet_probe(struct virtio_d
+               goto free_unregister_netdev;
+       }
+ 
+-      virtnet_set_queues(vi, vi->curr_queue_pairs);
+-
+       /* Assume link up if device can't report link status,
+          otherwise get link status from config. */
+       netif_carrier_off(dev);
diff --git a/queue-6.4/xen-speed-up-grant-table-reclaim.patch b/queue-6.4/xen-speed-up-grant-table-reclaim.patch

new file mode 100644 (file)

index 0000000..356343a
--- /dev/null
+++ b/queue-6.4/xen-speed-up-grant-table-reclaim.patch
@@ -0,0 +1,143 @@
+From c04e9894846c663f3278a414f34416e6e45bbe68 Mon Sep 17 00:00:00 2001
+From: Demi Marie Obenour <demi@invisiblethingslab.com>
+Date: Wed, 26 Jul 2023 12:52:41 -0400
+Subject: xen: speed up grant-table reclaim
+
+From: Demi Marie Obenour <demi@invisiblethingslab.com>
+
+commit c04e9894846c663f3278a414f34416e6e45bbe68 upstream.
+
+When a grant entry is still in use by the remote domain, Linux must put
+it on a deferred list.  Normally, this list is very short, because
+the PV network and block protocols expect the backend to unmap the grant
+first.  However, Qubes OS's GUI protocol is subject to the constraints
+of the X Window System, and as such winds up with the frontend unmapping
+the window first.  As a result, the list can grow very large, resulting
+in a massive memory leak and eventual VM freeze.
+
+To partially solve this problem, make the number of entries that the VM
+will attempt to free at each iteration tunable.  The default is still
+10, but it can be overridden via a module parameter.
+
+This is Cc: stable because (when combined with appropriate userspace
+changes) it fixes a severe performance and stability problem for Qubes
+OS users.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20230726165354.1252-1-demi@invisiblethingslab.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/ABI/testing/sysfs-module |   11 +++++++++
+ drivers/xen/grant-table.c              |   40 +++++++++++++++++++++++----------
+ 2 files changed, 40 insertions(+), 11 deletions(-)
+
+--- a/Documentation/ABI/testing/sysfs-module
++++ b/Documentation/ABI/testing/sysfs-module
+@@ -60,3 +60,14 @@ Description:        Module taint flags:
+                       C   staging driver module
+                       E   unsigned module
+                       ==  =====================
++
++What:         /sys/module/grant_table/parameters/free_per_iteration
++Date:         July 2023
++KernelVersion:        6.5 but backported to all supported stable branches
++Contact:      Xen developer discussion <xen-devel@lists.xenproject.org>
++Description:  Read and write number of grant entries to attempt to free per iteration.
++
++              Note: Future versions of Xen and Linux may provide a better
++              interface for controlling the rate of deferred grant reclaim
++              or may not need it at all.
++Users:                Qubes OS (https://www.qubes-os.org)
+--- a/drivers/xen/grant-table.c
++++ b/drivers/xen/grant-table.c
+@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
+ static void gnttab_handle_deferred(struct timer_list *);
+ static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
+ 
++static atomic64_t deferred_count;
++static atomic64_t leaked_count;
++static unsigned int free_per_iteration = 10;
++module_param(free_per_iteration, uint, 0600);
++
+ static void gnttab_handle_deferred(struct timer_list *unused)
+ {
+-      unsigned int nr = 10;
++      unsigned int nr = READ_ONCE(free_per_iteration);
++      const bool ignore_limit = nr == 0;
+       struct deferred_entry *first = NULL;
+       unsigned long flags;
++      size_t freed = 0;
+ 
+       spin_lock_irqsave(&gnttab_list_lock, flags);
+-      while (nr--) {
++      while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
+               struct deferred_entry *entry
+                       = list_first_entry(&deferred_list,
+                                          struct deferred_entry, list);
+@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struc
+               list_del(&entry->list);
+               spin_unlock_irqrestore(&gnttab_list_lock, flags);
+               if (_gnttab_end_foreign_access_ref(entry->ref)) {
++                      uint64_t ret = atomic64_dec_return(&deferred_count);
++
+                       put_free_entry(entry->ref);
+-                      pr_debug("freeing g.e. %#x (pfn %#lx)\n",
+-                               entry->ref, page_to_pfn(entry->page));
++                      pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
++                               entry->ref, page_to_pfn(entry->page),
++                               (unsigned long long)ret);
+                       put_page(entry->page);
++                      freed++;
+                       kfree(entry);
+                       entry = NULL;
+               } else {
+@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struc
+               spin_lock_irqsave(&gnttab_list_lock, flags);
+               if (entry)
+                       list_add_tail(&entry->list, &deferred_list);
+-              else if (list_empty(&deferred_list))
+-                      break;
+       }
+-      if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
++      if (list_empty(&deferred_list))
++              WARN_ON(atomic64_read(&deferred_count));
++      else if (!timer_pending(&deferred_timer)) {
+               deferred_timer.expires = jiffies + HZ;
+               add_timer(&deferred_timer);
+       }
+       spin_unlock_irqrestore(&gnttab_list_lock, flags);
++      pr_debug("Freed %zu references", freed);
+ }
+ 
+ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
+ {
+       struct deferred_entry *entry;
+       gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
+-      const char *what = KERN_WARNING "leaking";
++      uint64_t leaked, deferred;
+ 
+       entry = kmalloc(sizeof(*entry), gfp);
+       if (!page) {
+@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_re
+                       add_timer(&deferred_timer);
+               }
+               spin_unlock_irqrestore(&gnttab_list_lock, flags);
+-              what = KERN_DEBUG "deferring";
++              deferred = atomic64_inc_return(&deferred_count);
++              leaked = atomic64_read(&leaked_count);
++              pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
++                       ref, page ? page_to_pfn(page) : -1, deferred, leaked);
++      } else {
++              deferred = atomic64_read(&deferred_count);
++              leaked = atomic64_inc_return(&leaked_count);
++              pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
++                      ref, page ? page_to_pfn(page) : -1, deferred, leaked);
+       }
+-      printk("%s g.e. %#x (pfn %#lx)\n",
+-             what, ref, page ? page_to_pfn(page) : -1);
+ }
+ 
+ int gnttab_try_end_foreign_access(grant_ref_t ref)
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 1 Aug 2023 05:57:53 +0000 (07:57 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 1 Aug 2023 05:57:53 +0000 (07:57 +0200)
queue-6.4/io_uring-gate-iowait-schedule-on-having-pending-requests.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/iommufd-set-end-correctly-when-doing-batch-carry.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-qca8k-enable-use_single_write-for-qca8xxx.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-qca8k-fix-broken-search_and_del.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-qca8k-fix-mdb-add-del-case-with-0-vid.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-qca8k-fix-search_and_insert-wrong-handling-of-new-rule.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-ipa-only-reset-hashed-tables-when-supported.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/proc-vmcore-fix-signedness-bug-in-read_from_oldmem.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/series		patch \| blob \| blame \| history
queue-6.4/virtio-net-fix-race-between-set-queues-and-probe.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/xen-speed-up-grant-table-reclaim.patch	[new file with mode: 0644]	patch \| blob