]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.10
authorSasha Levin <sashal@kernel.org>
Wed, 2 Oct 2024 05:17:33 +0000 (01:17 -0400)
committerSasha Levin <sashal@kernel.org>
Wed, 2 Oct 2024 05:17:33 +0000 (01:17 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
25 files changed:
queue-6.10/debugfs-convert-to-new-uid-gid-option-parsing-helper.patch [new file with mode: 0644]
queue-6.10/debugfs-show-actual-source-in-proc-mounts.patch [new file with mode: 0644]
queue-6.10/fs_parse-add-uid-gid-option-option-parsing-helpers.patch [new file with mode: 0644]
queue-6.10/idpf-fix-netdev-tx-queue-stop-wake.patch [new file with mode: 0644]
queue-6.10/idpf-merge-singleq-and-splitq-net_device_ops.patch [new file with mode: 0644]
queue-6.10/idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch [new file with mode: 0644]
queue-6.10/idpf-stop-using-macros-for-accessing-queue-descripto.patch [new file with mode: 0644]
queue-6.10/kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch [new file with mode: 0644]
queue-6.10/kvm-x86-make-x2apic-id-100-readonly.patch [new file with mode: 0644]
queue-6.10/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch [new file with mode: 0644]
queue-6.10/lsm-infrastructure-management-of-the-sock-security.patch [new file with mode: 0644]
queue-6.10/serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch [new file with mode: 0644]
queue-6.10/serial-qcom-geni-fix-console-corruption.patch [new file with mode: 0644]
queue-6.10/serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch [new file with mode: 0644]
queue-6.10/series
queue-6.10/soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch [new file with mode: 0644]
queue-6.10/soc-versatile-realview-fix-memory-leak-during-device.patch [new file with mode: 0644]
queue-6.10/soc-versatile-realview-fix-soc_dev-leak-during-devic.patch [new file with mode: 0644]
queue-6.10/usb-xhci-fix-loss-of-data-on-cadence-xhc.patch [new file with mode: 0644]
queue-6.10/x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch [new file with mode: 0644]
queue-6.10/x86-mm-make-x86_platform.guest.enc_status_change_-re.patch [new file with mode: 0644]
queue-6.10/x86-tdx-account-shared-memory.patch [new file with mode: 0644]
queue-6.10/x86-tdx-convert-shared-memory-back-to-private-on-kex.patch [new file with mode: 0644]
queue-6.10/x86-tdx-fix-in-kernel-mmio-check.patch [new file with mode: 0644]
queue-6.10/xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch [new file with mode: 0644]

diff --git a/queue-6.10/debugfs-convert-to-new-uid-gid-option-parsing-helper.patch b/queue-6.10/debugfs-convert-to-new-uid-gid-option-parsing-helper.patch
new file mode 100644 (file)
index 0000000..c80ce05
--- /dev/null
@@ -0,0 +1,67 @@
+From 52fe235e918c637b013c3382dc951842c8318ce3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jun 2024 19:29:46 -0500
+Subject: debugfs: Convert to new uid/gid option parsing helpers
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+[ Upstream commit 49abee5991e18f14ec822ef53acd173ae58ff594 ]
+
+Convert to new uid/gid option parsing helpers
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Link: https://lore.kernel.org/r/b2f44ee0-3cee-49eb-a416-f26a9306eb56@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 3a987b88a425 ("debugfs show actual source in /proc/mounts")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/debugfs/inode.c | 16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
+index 8fd928899a59e..91521576f5003 100644
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -92,9 +92,9 @@ enum {
+ };
+ static const struct fs_parameter_spec debugfs_param_specs[] = {
+-      fsparam_u32     ("gid",         Opt_gid),
++      fsparam_gid     ("gid",         Opt_gid),
+       fsparam_u32oct  ("mode",        Opt_mode),
+-      fsparam_u32     ("uid",         Opt_uid),
++      fsparam_uid     ("uid",         Opt_uid),
+       {}
+ };
+@@ -102,8 +102,6 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
+ {
+       struct debugfs_fs_info *opts = fc->s_fs_info;
+       struct fs_parse_result result;
+-      kuid_t uid;
+-      kgid_t gid;
+       int opt;
+       opt = fs_parse(fc, debugfs_param_specs, param, &result);
+@@ -120,16 +118,10 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
+       switch (opt) {
+       case Opt_uid:
+-              uid = make_kuid(current_user_ns(), result.uint_32);
+-              if (!uid_valid(uid))
+-                      return invalf(fc, "Unknown uid");
+-              opts->uid = uid;
++              opts->uid = result.uid;
+               break;
+       case Opt_gid:
+-              gid = make_kgid(current_user_ns(), result.uint_32);
+-              if (!gid_valid(gid))
+-                      return invalf(fc, "Unknown gid");
+-              opts->gid = gid;
++              opts->gid = result.gid;
+               break;
+       case Opt_mode:
+               opts->mode = result.uint_32 & S_IALLUGO;
+-- 
+2.43.0
+
diff --git a/queue-6.10/debugfs-show-actual-source-in-proc-mounts.patch b/queue-6.10/debugfs-show-actual-source-in-proc-mounts.patch
new file mode 100644 (file)
index 0000000..9a63736
--- /dev/null
@@ -0,0 +1,61 @@
+From f0b5b89d0a0c152c4e411edfbc647e8ad98b179e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 10 Aug 2024 13:25:27 -0600
+Subject: debugfs show actual source in /proc/mounts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marc Aurèle La France <tsi@tuyoix.net>
+
+[ Upstream commit 3a987b88a42593875f6345188ca33731c7df728c ]
+
+After its conversion to the new mount API, debugfs displays "none" in
+/proc/mounts instead of the actual source.  Fix this by recognising its
+"source" mount option.
+
+Signed-off-by: Marc Aurèle La France <tsi@tuyoix.net>
+Link: https://lore.kernel.org/r/e439fae2-01da-234b-75b9-2a7951671e27@tuyoix.net
+Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API")
+Cc: stable@vger.kernel.org # 6.10.x: 49abee5991e1: debugfs: Convert to new uid/gid option parsing helpers
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/debugfs/inode.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
+index 91521576f5003..66d9b3b4c5881 100644
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -89,12 +89,14 @@ enum {
+       Opt_uid,
+       Opt_gid,
+       Opt_mode,
++      Opt_source,
+ };
+ static const struct fs_parameter_spec debugfs_param_specs[] = {
+       fsparam_gid     ("gid",         Opt_gid),
+       fsparam_u32oct  ("mode",        Opt_mode),
+       fsparam_uid     ("uid",         Opt_uid),
++      fsparam_string  ("source",      Opt_source),
+       {}
+ };
+@@ -126,6 +128,12 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
+       case Opt_mode:
+               opts->mode = result.uint_32 & S_IALLUGO;
+               break;
++      case Opt_source:
++              if (fc->source)
++                      return invalfc(fc, "Multiple sources specified");
++              fc->source = param->string;
++              param->string = NULL;
++              break;
+       /*
+        * We might like to report bad mount options here;
+        * but traditionally debugfs has ignored all mount options
+-- 
+2.43.0
+
diff --git a/queue-6.10/fs_parse-add-uid-gid-option-option-parsing-helpers.patch b/queue-6.10/fs_parse-add-uid-gid-option-option-parsing-helpers.patch
new file mode 100644 (file)
index 0000000..bb53d9f
--- /dev/null
@@ -0,0 +1,143 @@
+From f1c7aae89ddbb223fac562b507d8dd8463f268df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jun 2024 19:26:24 -0500
+Subject: fs_parse: add uid & gid option option parsing helpers
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+[ Upstream commit 9f111059e725f7ca79a136bfc734da3c8c1838b4 ]
+
+Multiple filesystems take uid and gid as options, and the code to
+create the ID from an integer and validate it is standard boilerplate
+that can be moved into common helper functions, so do that for
+consistency and less cut&paste.
+
+This also helps avoid the buggy pattern noted by Seth Jenkins at
+https://lore.kernel.org/lkml/CALxfFW4BXhEwxR0Q5LSkg-8Vb4r2MONKCcUCVioehXQKr35eHg@mail.gmail.com/
+because uid/gid parsing will fail before any assignment in most
+filesystems.
+
+Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
+Link: https://lore.kernel.org/r/de859d0a-feb9-473d-a5e2-c195a3d47abb@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 3a987b88a425 ("debugfs show actual source in /proc/mounts")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/filesystems/mount_api.rst |  9 +++++--
+ fs/fs_parser.c                          | 34 +++++++++++++++++++++++++
+ include/linux/fs_parser.h               |  6 ++++-
+ 3 files changed, 46 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
+index 9aaf6ef75eb53..317934c9e8fca 100644
+--- a/Documentation/filesystems/mount_api.rst
++++ b/Documentation/filesystems/mount_api.rst
+@@ -645,6 +645,8 @@ The members are as follows:
+       fs_param_is_blockdev    Blockdev path           * Needs lookup
+       fs_param_is_path        Path                    * Needs lookup
+       fs_param_is_fd          File descriptor         result->int_32
++      fs_param_is_uid         User ID (u32)           result->uid
++      fs_param_is_gid         Group ID (u32)          result->gid
+       ======================= ======================= =====================
+      Note that if the value is of fs_param_is_bool type, fs_parse() will try
+@@ -678,6 +680,8 @@ The members are as follows:
+       fsparam_bdev()          fs_param_is_blockdev
+       fsparam_path()          fs_param_is_path
+       fsparam_fd()            fs_param_is_fd
++      fsparam_uid()           fs_param_is_uid
++      fsparam_gid()           fs_param_is_gid
+       ======================= ===============================================
+      all of which take two arguments, name string and option number - for
+@@ -784,8 +788,9 @@ process the parameters it is given.
+      option number (which it returns).
+      If successful, and if the parameter type indicates the result is a
+-     boolean, integer or enum type, the value is converted by this function and
+-     the result stored in result->{boolean,int_32,uint_32,uint_64}.
++     boolean, integer, enum, uid, or gid type, the value is converted by this
++     function and the result stored in
++     result->{boolean,int_32,uint_32,uint_64,uid,gid}.
+      If a match isn't initially made, the key is prefixed with "no" and no
+      value is present then an attempt will be made to look up the key with the
+diff --git a/fs/fs_parser.c b/fs/fs_parser.c
+index a4d6ca0b8971e..24727ec34e5aa 100644
+--- a/fs/fs_parser.c
++++ b/fs/fs_parser.c
+@@ -308,6 +308,40 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p,
+ }
+ EXPORT_SYMBOL(fs_param_is_fd);
++int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p,
++                  struct fs_parameter *param, struct fs_parse_result *result)
++{
++      kuid_t uid;
++
++      if (fs_param_is_u32(log, p, param, result) != 0)
++              return fs_param_bad_value(log, param);
++
++      uid = make_kuid(current_user_ns(), result->uint_32);
++      if (!uid_valid(uid))
++              return inval_plog(log, "Invalid uid '%s'", param->string);
++
++      result->uid = uid;
++      return 0;
++}
++EXPORT_SYMBOL(fs_param_is_uid);
++
++int fs_param_is_gid(struct p_log *log, const struct fs_parameter_spec *p,
++                  struct fs_parameter *param, struct fs_parse_result *result)
++{
++      kgid_t gid;
++
++      if (fs_param_is_u32(log, p, param, result) != 0)
++              return fs_param_bad_value(log, param);
++
++      gid = make_kgid(current_user_ns(), result->uint_32);
++      if (!gid_valid(gid))
++              return inval_plog(log, "Invalid gid '%s'", param->string);
++
++      result->gid = gid;
++      return 0;
++}
++EXPORT_SYMBOL(fs_param_is_gid);
++
+ int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p,
+                 struct fs_parameter *param, struct fs_parse_result *result)
+ {
+diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
+index d3350979115f0..6cf713a7e6c6f 100644
+--- a/include/linux/fs_parser.h
++++ b/include/linux/fs_parser.h
+@@ -28,7 +28,7 @@ typedef int fs_param_type(struct p_log *,
+  */
+ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
+       fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev,
+-      fs_param_is_path, fs_param_is_fd;
++      fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid;
+ /*
+  * Specification of the type of value a parameter wants.
+@@ -57,6 +57,8 @@ struct fs_parse_result {
+               int             int_32;         /* For spec_s32/spec_enum */
+               unsigned int    uint_32;        /* For spec_u32{,_octal,_hex}/spec_enum */
+               u64             uint_64;        /* For spec_u64 */
++              kuid_t          uid;
++              kgid_t          gid;
+       };
+ };
+@@ -131,6 +133,8 @@ static inline bool fs_validate_description(const char *name,
+ #define fsparam_bdev(NAME, OPT)       __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
+ #define fsparam_path(NAME, OPT)       __fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
+ #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
++#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL)
++#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL)
+ /* String parameter that allows empty argument */
+ #define fsparam_string_empty(NAME, OPT) \
+-- 
+2.43.0
+
diff --git a/queue-6.10/idpf-fix-netdev-tx-queue-stop-wake.patch b/queue-6.10/idpf-fix-netdev-tx-queue-stop-wake.patch
new file mode 100644 (file)
index 0000000..6549f57
--- /dev/null
@@ -0,0 +1,145 @@
+From c453341c47dccfbc87bf7c67ed0be13c8e5de572 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Sep 2024 17:47:47 +0200
+Subject: idpf: fix netdev Tx queue stop/wake
+
+From: Michal Kubiak <michal.kubiak@intel.com>
+
+[ Upstream commit e4b398dd82f5d5867bc5f442c43abc8fba30ed2c ]
+
+netif_txq_maybe_stop() returns -1, 0, or 1, while
+idpf_tx_maybe_stop_common() says it returns 0 or -EBUSY. As a result,
+there sometimes are Tx queue timeout warnings despite that the queue
+is empty or there is at least enough space to restart it.
+Make idpf_tx_maybe_stop_common() inline and returning true or false,
+handling the return of netif_txq_maybe_stop() properly. Use a correct
+goto in idpf_tx_maybe_stop_splitq() to avoid stopping the queue or
+incrementing the stops counter twice.
+
+Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit")
+Fixes: a5ab9ee0df0b ("idpf: add singleq start_xmit and napi poll")
+Cc: stable@vger.kernel.org # 6.7+
+Signed-off-by: Michal Kubiak <michal.kubiak@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/intel/idpf/idpf_singleq_txrx.c   |  4 +++
+ drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 35 +++++--------------
+ drivers/net/ethernet/intel/idpf/idpf_txrx.h   |  9 ++++-
+ 3 files changed, 21 insertions(+), 27 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+index 8630db24f63a7..5e5fa2d0aa4d1 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+@@ -369,6 +369,10 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
+                                     IDPF_TX_DESCS_FOR_CTX)) {
+               idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
++              u64_stats_update_begin(&tx_q->stats_sync);
++              u64_stats_inc(&tx_q->q_stats.q_busy);
++              u64_stats_update_end(&tx_q->stats_sync);
++
+               return NETDEV_TX_BUSY;
+       }
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+index 7b06ca7b9732a..9b7e67d0f38be 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+@@ -2149,29 +2149,6 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
+       desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag);
+ }
+-/**
+- * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions
+- * @tx_q: the queue to be checked
+- * @size: number of descriptors we want to assure is available
+- *
+- * Returns 0 if stop is not needed
+- */
+-int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size)
+-{
+-      struct netdev_queue *nq;
+-
+-      if (likely(IDPF_DESC_UNUSED(tx_q) >= size))
+-              return 0;
+-
+-      u64_stats_update_begin(&tx_q->stats_sync);
+-      u64_stats_inc(&tx_q->q_stats.q_busy);
+-      u64_stats_update_end(&tx_q->stats_sync);
+-
+-      nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+-
+-      return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size);
+-}
+-
+ /**
+  * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
+  * @tx_q: the queue to be checked
+@@ -2183,7 +2160,7 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
+                                    unsigned int descs_needed)
+ {
+       if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
+-              goto splitq_stop;
++              goto out;
+       /* If there are too many outstanding completions expected on the
+        * completion queue, stop the TX queue to give the device some time to
+@@ -2202,10 +2179,12 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
+       return 0;
+ splitq_stop:
++      netif_stop_subqueue(tx_q->netdev, tx_q->idx);
++
++out:
+       u64_stats_update_begin(&tx_q->stats_sync);
+       u64_stats_inc(&tx_q->q_stats.q_busy);
+       u64_stats_update_end(&tx_q->stats_sync);
+-      netif_stop_subqueue(tx_q->netdev, tx_q->idx);
+       return -EBUSY;
+ }
+@@ -2228,7 +2207,11 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
+       nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+       tx_q->next_to_use = val;
+-      idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED);
++      if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) {
++              u64_stats_update_begin(&tx_q->stats_sync);
++              u64_stats_inc(&tx_q->q_stats.q_busy);
++              u64_stats_update_end(&tx_q->stats_sync);
++      }
+       /* Force memory writes to complete before letting h/w
+        * know there are new descriptors to fetch.  (Only
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+index 5b3f19200255a..214a24e684634 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+@@ -1148,7 +1148,6 @@ void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
+                          struct idpf_tx_buf *first, u16 ring_idx);
+ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq,
+                                        struct sk_buff *skb);
+-int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size);
+ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
+ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
+                                 struct idpf_tx_queue *tx_q);
+@@ -1157,4 +1156,12 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
+                                     u16 cleaned_count);
+ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
++static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q,
++                                           u32 needed)
++{
++      return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
++                                        IDPF_DESC_UNUSED(tx_q),
++                                        needed, needed);
++}
++
+ #endif /* !_IDPF_TXRX_H_ */
+-- 
+2.43.0
+
diff --git a/queue-6.10/idpf-merge-singleq-and-splitq-net_device_ops.patch b/queue-6.10/idpf-merge-singleq-and-splitq-net_device_ops.patch
new file mode 100644 (file)
index 0000000..125e46b
--- /dev/null
@@ -0,0 +1,211 @@
+From ba7332fc1b466a126d4a986d059fb6f7c0dff12d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2024 15:53:41 +0200
+Subject: idpf: merge singleq and splitq &net_device_ops
+
+From: Alexander Lobakin <aleksander.lobakin@intel.com>
+
+[ Upstream commit 14f662b43bf8c765114f73d184af2702b2280436 ]
+
+It makes no sense to have a second &net_device_ops struct (800 bytes of
+rodata) with only one difference in .ndo_start_xmit, which can easily
+be just one `if`. This `if` is a drop in the ocean and you won't see
+any difference.
+Define unified idpf_xmit_start(). The preparation for sending is the
+same, just call either idpf_tx_splitq_frame() or idpf_tx_singleq_frame()
+depending on the active model to actually map and send the skb.
+
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: e4b398dd82f5 ("idpf: fix netdev Tx queue stop/wake")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/idpf/idpf_lib.c    | 26 +++-------------
+ .../ethernet/intel/idpf/idpf_singleq_txrx.c   | 31 ++-----------------
+ drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 17 ++++++----
+ drivers/net/ethernet/intel/idpf/idpf_txrx.h   |  9 ++----
+ 4 files changed, 20 insertions(+), 63 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
+index 1ab679a719c77..5e336f64bc25e 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
+@@ -4,8 +4,7 @@
+ #include "idpf.h"
+ #include "idpf_virtchnl.h"
+-static const struct net_device_ops idpf_netdev_ops_splitq;
+-static const struct net_device_ops idpf_netdev_ops_singleq;
++static const struct net_device_ops idpf_netdev_ops;
+ /**
+  * idpf_init_vector_stack - Fill the MSIX vector stack with vector index
+@@ -765,10 +764,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
+       }
+       /* assign netdev_ops */
+-      if (idpf_is_queue_model_split(vport->txq_model))
+-              netdev->netdev_ops = &idpf_netdev_ops_splitq;
+-      else
+-              netdev->netdev_ops = &idpf_netdev_ops_singleq;
++      netdev->netdev_ops = &idpf_netdev_ops;
+       /* setup watchdog timeout value to be 5 second */
+       netdev->watchdog_timeo = 5 * HZ;
+@@ -2353,24 +2349,10 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
+       mem->pa = 0;
+ }
+-static const struct net_device_ops idpf_netdev_ops_splitq = {
+-      .ndo_open = idpf_open,
+-      .ndo_stop = idpf_stop,
+-      .ndo_start_xmit = idpf_tx_splitq_start,
+-      .ndo_features_check = idpf_features_check,
+-      .ndo_set_rx_mode = idpf_set_rx_mode,
+-      .ndo_validate_addr = eth_validate_addr,
+-      .ndo_set_mac_address = idpf_set_mac,
+-      .ndo_change_mtu = idpf_change_mtu,
+-      .ndo_get_stats64 = idpf_get_stats64,
+-      .ndo_set_features = idpf_set_features,
+-      .ndo_tx_timeout = idpf_tx_timeout,
+-};
+-
+-static const struct net_device_ops idpf_netdev_ops_singleq = {
++static const struct net_device_ops idpf_netdev_ops = {
+       .ndo_open = idpf_open,
+       .ndo_stop = idpf_stop,
+-      .ndo_start_xmit = idpf_tx_singleq_start,
++      .ndo_start_xmit = idpf_tx_start,
+       .ndo_features_check = idpf_features_check,
+       .ndo_set_rx_mode = idpf_set_rx_mode,
+       .ndo_validate_addr = eth_validate_addr,
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+index 9864a3992f0c3..8630db24f63a7 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+@@ -351,8 +351,8 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq,
+  *
+  * Returns NETDEV_TX_OK if sent, else an error code
+  */
+-static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
+-                                       struct idpf_tx_queue *tx_q)
++netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
++                                struct idpf_tx_queue *tx_q)
+ {
+       struct idpf_tx_offload_params offload = { };
+       struct idpf_tx_buf *first;
+@@ -408,33 +408,6 @@ static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
+       return idpf_tx_drop_skb(tx_q, skb);
+ }
+-/**
+- * idpf_tx_singleq_start - Selects the right Tx queue to send buffer
+- * @skb: send buffer
+- * @netdev: network interface device structure
+- *
+- * Returns NETDEV_TX_OK if sent, else an error code
+- */
+-netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+-                                struct net_device *netdev)
+-{
+-      struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+-      struct idpf_tx_queue *tx_q;
+-
+-      tx_q = vport->txqs[skb_get_queue_mapping(skb)];
+-
+-      /* hardware can't handle really short frames, hardware padding works
+-       * beyond this point
+-       */
+-      if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) {
+-              idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+-
+-              return NETDEV_TX_OK;
+-      }
+-
+-      return idpf_tx_singleq_frame(skb, tx_q);
+-}
+-
+ /**
+  * idpf_tx_singleq_clean - Reclaim resources from queue
+  * @tx_q: Tx queue to clean
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+index cdb01c54213f9..7b06ca7b9732a 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+@@ -4,6 +4,9 @@
+ #include "idpf.h"
+ #include "idpf_virtchnl.h"
++static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
++                             unsigned int count);
++
+ /**
+  * idpf_buf_lifo_push - push a buffer pointer onto stack
+  * @stack: pointer to stack struct
+@@ -2702,8 +2705,8 @@ static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs)
+  * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO
+  * header, 1 for segment payload, and then 7 for the fragments.
+  */
+-bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+-                      unsigned int count)
++static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
++                             unsigned int count)
+ {
+       if (likely(count < max_bufs))
+               return false;
+@@ -2849,14 +2852,13 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
+ }
+ /**
+- * idpf_tx_splitq_start - Selects the right Tx queue to send buffer
++ * idpf_tx_start - Selects the right Tx queue to send buffer
+  * @skb: send buffer
+  * @netdev: network interface device structure
+  *
+  * Returns NETDEV_TX_OK if sent, else an error code
+  */
+-netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+-                               struct net_device *netdev)
++netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev)
+ {
+       struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+       struct idpf_tx_queue *tx_q;
+@@ -2878,7 +2880,10 @@ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+               return NETDEV_TX_OK;
+       }
+-      return idpf_tx_splitq_frame(skb, tx_q);
++      if (idpf_is_queue_model_split(vport->txq_model))
++              return idpf_tx_splitq_frame(skb, tx_q);
++      else
++              return idpf_tx_singleq_frame(skb, tx_q);
+ }
+ /**
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+index 704aec5c383b6..5b3f19200255a 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+@@ -1148,14 +1148,11 @@ void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
+                          struct idpf_tx_buf *first, u16 ring_idx);
+ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq,
+                                        struct sk_buff *skb);
+-bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+-                      unsigned int count);
+ int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size);
+ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
+-netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+-                               struct net_device *netdev);
+-netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+-                                struct net_device *netdev);
++netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
++                                struct idpf_tx_queue *tx_q);
++netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev);
+ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
+                                     u16 cleaned_count);
+ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
+-- 
+2.43.0
+
diff --git a/queue-6.10/idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch b/queue-6.10/idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch
new file mode 100644 (file)
index 0000000..6748d8c
--- /dev/null
@@ -0,0 +1,3988 @@
+From 006b3857a893f911630279c3590f415040801b6b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2024 15:53:38 +0200
+Subject: idpf: split &idpf_queue into 4 strictly-typed queue structures
+
+From: Alexander Lobakin <aleksander.lobakin@intel.com>
+
+[ Upstream commit e4891e4687c8dd136d80d6c1b857a02931ed6fc8 ]
+
+Currently, sizeof(struct idpf_queue) is 32 Kb.
+This is due to the 12-bit hashtable declaration at the end of the queue.
+This HT is needed only for Tx queues when the flow scheduling mode is
+enabled. But &idpf_queue is unified for all of the queue types,
+provoking excessive memory usage.
+The unified structure in general makes the code less effective via
+suboptimal fields placement. You can't avoid that unless you make unions
+each 2 fields. Even then, different field alignment etc., doesn't allow
+you to optimize things to the limit.
+Split &idpf_queue into 4 structures corresponding to the queue types:
+RQ (Rx queue), SQ (Tx queue), FQ (buffer queue), and CQ (completion
+queue). Place only needed fields there and shortcuts handy for hotpath.
+Allocate the abovementioned hashtable dynamically and only when needed,
+keeping &idpf_tx_queue relatively short (192 bytes, same as Rx). This HT
+is used only for OOO completions, which aren't really hotpath anyway.
+Note that this change must be done atomically, otherwise it's really
+easy to get lost and miss something.
+
+Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: e4b398dd82f5 ("idpf: fix netdev Tx queue stop/wake")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/idpf/idpf.h        |   3 +-
+ .../net/ethernet/intel/idpf/idpf_ethtool.c    | 125 +--
+ drivers/net/ethernet/intel/idpf/idpf_lib.c    |  46 +-
+ .../ethernet/intel/idpf/idpf_singleq_txrx.c   | 144 +--
+ drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 915 +++++++++++-------
+ drivers/net/ethernet/intel/idpf/idpf_txrx.h   | 440 ++++++---
+ .../net/ethernet/intel/idpf/idpf_virtchnl.c   |  73 +-
+ 7 files changed, 1018 insertions(+), 728 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
+index 0b26dd9b8a512..f9e43d171f171 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf.h
++++ b/drivers/net/ethernet/intel/idpf/idpf.h
+@@ -17,7 +17,6 @@ struct idpf_vport_max_q;
+ #include <linux/sctp.h>
+ #include <linux/ethtool_netlink.h>
+ #include <net/gro.h>
+-#include <linux/dim.h>
+ #include "virtchnl2.h"
+ #include "idpf_txrx.h"
+@@ -301,7 +300,7 @@ struct idpf_vport {
+       u16 num_txq_grp;
+       struct idpf_txq_group *txq_grps;
+       u32 txq_model;
+-      struct idpf_queue **txqs;
++      struct idpf_tx_queue **txqs;
+       bool crc_enable;
+       u16 num_rxq;
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+index 1885ba618981d..e933fed16c7ea 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+@@ -437,22 +437,24 @@ struct idpf_stats {
+       .stat_offset = offsetof(_type, _stat) \
+ }
+-/* Helper macro for defining some statistics related to queues */
+-#define IDPF_QUEUE_STAT(_name, _stat) \
+-      IDPF_STAT(struct idpf_queue, _name, _stat)
++/* Helper macros for defining some statistics related to queues */
++#define IDPF_RX_QUEUE_STAT(_name, _stat) \
++      IDPF_STAT(struct idpf_rx_queue, _name, _stat)
++#define IDPF_TX_QUEUE_STAT(_name, _stat) \
++      IDPF_STAT(struct idpf_tx_queue, _name, _stat)
+ /* Stats associated with a Tx queue */
+ static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = {
+-      IDPF_QUEUE_STAT("pkts", q_stats.tx.packets),
+-      IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes),
+-      IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts),
++      IDPF_TX_QUEUE_STAT("pkts", q_stats.packets),
++      IDPF_TX_QUEUE_STAT("bytes", q_stats.bytes),
++      IDPF_TX_QUEUE_STAT("lso_pkts", q_stats.lso_pkts),
+ };
+ /* Stats associated with an Rx queue */
+ static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = {
+-      IDPF_QUEUE_STAT("pkts", q_stats.rx.packets),
+-      IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes),
+-      IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts),
++      IDPF_RX_QUEUE_STAT("pkts", q_stats.packets),
++      IDPF_RX_QUEUE_STAT("bytes", q_stats.bytes),
++      IDPF_RX_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rsc_pkts),
+ };
+ #define IDPF_TX_QUEUE_STATS_LEN               ARRAY_SIZE(idpf_gstrings_tx_queue_stats)
+@@ -633,7 +635,7 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset)
+  * Copies the stat data defined by the pointer and stat structure pair into
+  * the memory supplied as data. If the pointer is null, data will be zero'd.
+  */
+-static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
++static void idpf_add_one_ethtool_stat(u64 *data, const void *pstat,
+                                     const struct idpf_stats *stat)
+ {
+       char *p;
+@@ -671,6 +673,7 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
+  * idpf_add_queue_stats - copy queue statistics into supplied buffer
+  * @data: ethtool stats buffer
+  * @q: the queue to copy
++ * @type: type of the queue
+  *
+  * Queue statistics must be copied while protected by u64_stats_fetch_begin,
+  * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats
+@@ -681,19 +684,23 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
+  *
+  * This function expects to be called while under rcu_read_lock().
+  */
+-static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q)
++static void idpf_add_queue_stats(u64 **data, const void *q,
++                               enum virtchnl2_queue_type type)
+ {
++      const struct u64_stats_sync *stats_sync;
+       const struct idpf_stats *stats;
+       unsigned int start;
+       unsigned int size;
+       unsigned int i;
+-      if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
++      if (type == VIRTCHNL2_QUEUE_TYPE_RX) {
+               size = IDPF_RX_QUEUE_STATS_LEN;
+               stats = idpf_gstrings_rx_queue_stats;
++              stats_sync = &((const struct idpf_rx_queue *)q)->stats_sync;
+       } else {
+               size = IDPF_TX_QUEUE_STATS_LEN;
+               stats = idpf_gstrings_tx_queue_stats;
++              stats_sync = &((const struct idpf_tx_queue *)q)->stats_sync;
+       }
+       /* To avoid invalid statistics values, ensure that we keep retrying
+@@ -701,10 +708,10 @@ static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q)
+        * u64_stats_fetch_retry.
+        */
+       do {
+-              start = u64_stats_fetch_begin(&q->stats_sync);
++              start = u64_stats_fetch_begin(stats_sync);
+               for (i = 0; i < size; i++)
+                       idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]);
+-      } while (u64_stats_fetch_retry(&q->stats_sync, start));
++      } while (u64_stats_fetch_retry(stats_sync, start));
+       /* Once we successfully copy the stats in, update the data pointer */
+       *data += size;
+@@ -793,7 +800,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)
+               for (j = 0; j < num_rxq; j++) {
+                       u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs;
+                       struct idpf_rx_queue_stats *stats;
+-                      struct idpf_queue *rxq;
++                      struct idpf_rx_queue *rxq;
+                       unsigned int start;
+                       if (idpf_is_queue_model_split(vport->rxq_model))
+@@ -807,7 +814,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)
+                       do {
+                               start = u64_stats_fetch_begin(&rxq->stats_sync);
+-                              stats = &rxq->q_stats.rx;
++                              stats = &rxq->q_stats;
+                               hw_csum_err = u64_stats_read(&stats->hw_csum_err);
+                               hsplit = u64_stats_read(&stats->hsplit_pkts);
+                               hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf);
+@@ -828,7 +835,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)
+               for (j = 0; j < txq_grp->num_txq; j++) {
+                       u64 linearize, qbusy, skb_drops, dma_map_errs;
+-                      struct idpf_queue *txq = txq_grp->txqs[j];
++                      struct idpf_tx_queue *txq = txq_grp->txqs[j];
+                       struct idpf_tx_queue_stats *stats;
+                       unsigned int start;
+@@ -838,7 +845,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)
+                       do {
+                               start = u64_stats_fetch_begin(&txq->stats_sync);
+-                              stats = &txq->q_stats.tx;
++                              stats = &txq->q_stats;
+                               linearize = u64_stats_read(&stats->linearize);
+                               qbusy = u64_stats_read(&stats->q_busy);
+                               skb_drops = u64_stats_read(&stats->skb_drops);
+@@ -896,12 +903,12 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
+               qtype = VIRTCHNL2_QUEUE_TYPE_TX;
+               for (j = 0; j < txq_grp->num_txq; j++, total++) {
+-                      struct idpf_queue *txq = txq_grp->txqs[j];
++                      struct idpf_tx_queue *txq = txq_grp->txqs[j];
+                       if (!txq)
+                               idpf_add_empty_queue_stats(&data, qtype);
+                       else
+-                              idpf_add_queue_stats(&data, txq);
++                              idpf_add_queue_stats(&data, txq, qtype);
+               }
+       }
+@@ -929,7 +936,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
+                       num_rxq = rxq_grp->singleq.num_rxq;
+               for (j = 0; j < num_rxq; j++, total++) {
+-                      struct idpf_queue *rxq;
++                      struct idpf_rx_queue *rxq;
+                       if (is_splitq)
+                               rxq = &rxq_grp->splitq.rxq_sets[j]->rxq;
+@@ -938,7 +945,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
+                       if (!rxq)
+                               idpf_add_empty_queue_stats(&data, qtype);
+                       else
+-                              idpf_add_queue_stats(&data, rxq);
++                              idpf_add_queue_stats(&data, rxq, qtype);
+                       /* In splitq mode, don't get page pool stats here since
+                        * the pools are attached to the buffer queues
+@@ -953,7 +960,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
+       for (i = 0; i < vport->num_rxq_grp; i++) {
+               for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+-                      struct idpf_queue *rxbufq =
++                      struct idpf_buf_queue *rxbufq =
+                               &vport->rxq_grps[i].splitq.bufq_sets[j].bufq;
+                       page_pool_get_stats(rxbufq->pp, &pp_stats);
+@@ -971,60 +978,64 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
+ }
+ /**
+- * idpf_find_rxq - find rxq from q index
++ * idpf_find_rxq_vec - find rxq vector from q index
+  * @vport: virtual port associated to queue
+  * @q_num: q index used to find queue
+  *
+- * returns pointer to rx queue
++ * returns pointer to rx vector
+  */
+-static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num)
++static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport,
++                                             int q_num)
+ {
+       int q_grp, q_idx;
+       if (!idpf_is_queue_model_split(vport->rxq_model))
+-              return vport->rxq_grps->singleq.rxqs[q_num];
++              return vport->rxq_grps->singleq.rxqs[q_num]->q_vector;
+       q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+       q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+-      return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq;
++      return vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq.q_vector;
+ }
+ /**
+- * idpf_find_txq - find txq from q index
++ * idpf_find_txq_vec - find txq vector from q index
+  * @vport: virtual port associated to queue
+  * @q_num: q index used to find queue
+  *
+- * returns pointer to tx queue
++ * returns pointer to tx vector
+  */
+-static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num)
++static struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport,
++                                             int q_num)
+ {
+       int q_grp;
+       if (!idpf_is_queue_model_split(vport->txq_model))
+-              return vport->txqs[q_num];
++              return vport->txqs[q_num]->q_vector;
+       q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP;
+-      return vport->txq_grps[q_grp].complq;
++      return vport->txq_grps[q_grp].complq->q_vector;
+ }
+ /**
+  * __idpf_get_q_coalesce - get ITR values for specific queue
+  * @ec: ethtool structure to fill with driver's coalesce settings
+- * @q: quuee of Rx or Tx
++ * @q_vector: queue vector corresponding to this queue
++ * @type: queue type
+  */
+ static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec,
+-                                struct idpf_queue *q)
++                                const struct idpf_q_vector *q_vector,
++                                enum virtchnl2_queue_type type)
+ {
+-      if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
++      if (type == VIRTCHNL2_QUEUE_TYPE_RX) {
+               ec->use_adaptive_rx_coalesce =
+-                              IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode);
+-              ec->rx_coalesce_usecs = q->q_vector->rx_itr_value;
++                              IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode);
++              ec->rx_coalesce_usecs = q_vector->rx_itr_value;
+       } else {
+               ec->use_adaptive_tx_coalesce =
+-                              IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode);
+-              ec->tx_coalesce_usecs = q->q_vector->tx_itr_value;
++                              IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode);
++              ec->tx_coalesce_usecs = q_vector->tx_itr_value;
+       }
+ }
+@@ -1040,8 +1051,8 @@ static int idpf_get_q_coalesce(struct net_device *netdev,
+                              struct ethtool_coalesce *ec,
+                              u32 q_num)
+ {
+-      struct idpf_netdev_priv *np = netdev_priv(netdev);
+-      struct idpf_vport *vport;
++      const struct idpf_netdev_priv *np = netdev_priv(netdev);
++      const struct idpf_vport *vport;
+       int err = 0;
+       idpf_vport_ctrl_lock(netdev);
+@@ -1056,10 +1067,12 @@ static int idpf_get_q_coalesce(struct net_device *netdev,
+       }
+       if (q_num < vport->num_rxq)
+-              __idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num));
++              __idpf_get_q_coalesce(ec, idpf_find_rxq_vec(vport, q_num),
++                                    VIRTCHNL2_QUEUE_TYPE_RX);
+       if (q_num < vport->num_txq)
+-              __idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num));
++              __idpf_get_q_coalesce(ec, idpf_find_txq_vec(vport, q_num),
++                                    VIRTCHNL2_QUEUE_TYPE_TX);
+ unlock_mutex:
+       idpf_vport_ctrl_unlock(netdev);
+@@ -1103,16 +1116,15 @@ static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+ /**
+  * __idpf_set_q_coalesce - set ITR values for specific queue
+  * @ec: ethtool structure from user to update ITR settings
+- * @q: queue for which itr values has to be set
++ * @qv: queue vector for which itr values has to be set
+  * @is_rxq: is queue type rx
+  *
+  * Returns 0 on success, negative otherwise.
+  */
+-static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
+-                               struct idpf_queue *q, bool is_rxq)
++static int __idpf_set_q_coalesce(const struct ethtool_coalesce *ec,
++                               struct idpf_q_vector *qv, bool is_rxq)
+ {
+       u32 use_adaptive_coalesce, coalesce_usecs;
+-      struct idpf_q_vector *qv = q->q_vector;
+       bool is_dim_ena = false;
+       u16 itr_val;
+@@ -1128,7 +1140,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
+               itr_val = qv->tx_itr_value;
+       }
+       if (coalesce_usecs != itr_val && use_adaptive_coalesce) {
+-              netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n");
++              netdev_err(qv->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n");
+               return -EINVAL;
+       }
+@@ -1137,7 +1149,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
+               return 0;
+       if (coalesce_usecs > IDPF_ITR_MAX) {
+-              netdev_err(q->vport->netdev,
++              netdev_err(qv->vport->netdev,
+                          "Invalid value, %d-usecs range is 0-%d\n",
+                          coalesce_usecs, IDPF_ITR_MAX);
+@@ -1146,7 +1158,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
+       if (coalesce_usecs % 2) {
+               coalesce_usecs--;
+-              netdev_info(q->vport->netdev,
++              netdev_info(qv->vport->netdev,
+                           "HW only supports even ITR values, ITR rounded to %d\n",
+                           coalesce_usecs);
+       }
+@@ -1185,15 +1197,16 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
+  *
+  * Return 0 on success, and negative on failure
+  */
+-static int idpf_set_q_coalesce(struct idpf_vport *vport,
+-                             struct ethtool_coalesce *ec,
++static int idpf_set_q_coalesce(const struct idpf_vport *vport,
++                             const struct ethtool_coalesce *ec,
+                              int q_num, bool is_rxq)
+ {
+-      struct idpf_queue *q;
++      struct idpf_q_vector *qv;
+-      q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num);
++      qv = is_rxq ? idpf_find_rxq_vec(vport, q_num) :
++                    idpf_find_txq_vec(vport, q_num);
+-      if (q && __idpf_set_q_coalesce(ec, q, is_rxq))
++      if (qv && __idpf_set_q_coalesce(ec, qv, is_rxq))
+               return -EINVAL;
+       return 0;
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
+index 3ac9d7ab83f20..1ab679a719c77 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
+@@ -1318,14 +1318,14 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport)
+               if (idpf_is_queue_model_split(vport->rxq_model)) {
+                       for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+-                              struct idpf_queue *q =
++                              const struct idpf_buf_queue *q =
+                                       &grp->splitq.bufq_sets[j].bufq;
+                               writel(q->next_to_alloc, q->tail);
+                       }
+               } else {
+                       for (j = 0; j < grp->singleq.num_rxq; j++) {
+-                              struct idpf_queue *q =
++                              const struct idpf_rx_queue *q =
+                                       grp->singleq.rxqs[j];
+                               writel(q->next_to_alloc, q->tail);
+@@ -1852,7 +1852,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
+       enum idpf_vport_state current_state = np->state;
+       struct idpf_adapter *adapter = vport->adapter;
+       struct idpf_vport *new_vport;
+-      int err, i;
++      int err;
+       /* If the system is low on memory, we can end up in bad state if we
+        * free all the memory for queue resources and try to allocate them
+@@ -1923,46 +1923,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
+        */
+       memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps));
+-      /* Since idpf_vport_queues_alloc was called with new_port, the queue
+-       * back pointers are currently pointing to the local new_vport. Reset
+-       * the backpointers to the original vport here
+-       */
+-      for (i = 0; i < vport->num_txq_grp; i++) {
+-              struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+-              int j;
+-
+-              tx_qgrp->vport = vport;
+-              for (j = 0; j < tx_qgrp->num_txq; j++)
+-                      tx_qgrp->txqs[j]->vport = vport;
+-
+-              if (idpf_is_queue_model_split(vport->txq_model))
+-                      tx_qgrp->complq->vport = vport;
+-      }
+-
+-      for (i = 0; i < vport->num_rxq_grp; i++) {
+-              struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+-              struct idpf_queue *q;
+-              u16 num_rxq;
+-              int j;
+-
+-              rx_qgrp->vport = vport;
+-              for (j = 0; j < vport->num_bufqs_per_qgrp; j++)
+-                      rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport;
+-
+-              if (idpf_is_queue_model_split(vport->rxq_model))
+-                      num_rxq = rx_qgrp->splitq.num_rxq_sets;
+-              else
+-                      num_rxq = rx_qgrp->singleq.num_rxq;
+-
+-              for (j = 0; j < num_rxq; j++) {
+-                      if (idpf_is_queue_model_split(vport->rxq_model))
+-                              q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+-                      else
+-                              q = rx_qgrp->singleq.rxqs[j];
+-                      q->vport = vport;
+-              }
+-      }
+-
+       if (reset_cause == IDPF_SR_Q_CHANGE)
+               idpf_vport_alloc_vec_indexes(vport);
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+index b17d88e150006..9864a3992f0c3 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+@@ -186,7 +186,7 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb,
+  * and gets a physical address for each memory location and programs
+  * it and the length into the transmit base mode descriptor.
+  */
+-static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
++static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
+                               struct idpf_tx_buf *first,
+                               struct idpf_tx_offload_params *offloads)
+ {
+@@ -210,7 +210,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+       dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
+       /* write each descriptor with CRC bit */
+-      if (tx_q->vport->crc_enable)
++      if (idpf_queue_has(CRC_EN, tx_q))
+               td_cmd |= IDPF_TX_DESC_CMD_ICRC;
+       for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+@@ -285,7 +285,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+       /* set next_to_watch value indicating a packet is present */
+       first->next_to_watch = tx_desc;
+-      nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
++      nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+       netdev_tx_sent_queue(nq, first->bytecount);
+       idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
+@@ -299,7 +299,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+  * ring entry to reflect that this index is a context descriptor
+  */
+ static struct idpf_base_tx_ctx_desc *
+-idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq)
++idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq)
+ {
+       struct idpf_base_tx_ctx_desc *ctx_desc;
+       int ntu = txq->next_to_use;
+@@ -320,7 +320,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq)
+  * @txq: queue to send buffer on
+  * @offload: offload parameter structure
+  **/
+-static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
++static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq,
+                                          struct idpf_tx_offload_params *offload)
+ {
+       struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq);
+@@ -333,7 +333,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
+               qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss);
+               u64_stats_update_begin(&txq->stats_sync);
+-              u64_stats_inc(&txq->q_stats.tx.lso_pkts);
++              u64_stats_inc(&txq->q_stats.lso_pkts);
+               u64_stats_update_end(&txq->stats_sync);
+       }
+@@ -352,7 +352,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
+  * Returns NETDEV_TX_OK if sent, else an error code
+  */
+ static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
+-                                       struct idpf_queue *tx_q)
++                                       struct idpf_tx_queue *tx_q)
+ {
+       struct idpf_tx_offload_params offload = { };
+       struct idpf_tx_buf *first;
+@@ -419,7 +419,7 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+                                 struct net_device *netdev)
+ {
+       struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+-      struct idpf_queue *tx_q;
++      struct idpf_tx_queue *tx_q;
+       tx_q = vport->txqs[skb_get_queue_mapping(skb)];
+@@ -442,16 +442,15 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+  * @cleaned: returns number of packets cleaned
+  *
+  */
+-static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
++static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
+                                 int *cleaned)
+ {
+-      unsigned int budget = tx_q->vport->compln_clean_budget;
+       unsigned int total_bytes = 0, total_pkts = 0;
+       struct idpf_base_tx_desc *tx_desc;
++      u32 budget = tx_q->clean_budget;
+       s16 ntc = tx_q->next_to_clean;
+       struct idpf_netdev_priv *np;
+       struct idpf_tx_buf *tx_buf;
+-      struct idpf_vport *vport;
+       struct netdev_queue *nq;
+       bool dont_wake;
+@@ -550,16 +549,15 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
+       *cleaned += total_pkts;
+       u64_stats_update_begin(&tx_q->stats_sync);
+-      u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts);
+-      u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes);
++      u64_stats_add(&tx_q->q_stats.packets, total_pkts);
++      u64_stats_add(&tx_q->q_stats.bytes, total_bytes);
+       u64_stats_update_end(&tx_q->stats_sync);
+-      vport = tx_q->vport;
+-      np = netdev_priv(vport->netdev);
+-      nq = netdev_get_tx_queue(vport->netdev, tx_q->idx);
++      np = netdev_priv(tx_q->netdev);
++      nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+       dont_wake = np->state != __IDPF_VPORT_UP ||
+-                  !netif_carrier_ok(vport->netdev);
++                  !netif_carrier_ok(tx_q->netdev);
+       __netif_txq_completed_wake(nq, total_pkts, total_bytes,
+                                  IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
+                                  dont_wake);
+@@ -584,7 +582,7 @@ static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
+       budget_per_q = num_txq ? max(budget / num_txq, 1) : 0;
+       for (i = 0; i < num_txq; i++) {
+-              struct idpf_queue *q;
++              struct idpf_tx_queue *q;
+               q = q_vec->tx[i];
+               clean_complete &= idpf_tx_singleq_clean(q, budget_per_q,
+@@ -614,14 +612,9 @@ static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc,
+ /**
+  * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers
+- * @rxq: Rx ring being processed
+  * @rx_desc: Rx descriptor for current buffer
+- * @skb: Current socket buffer containing buffer in progress
+- * @ntc: next to clean
+  */
+-static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq,
+-                                     union virtchnl2_rx_desc *rx_desc,
+-                                     struct sk_buff *skb, u16 ntc)
++static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc)
+ {
+       /* if we are the last buffer then there is nothing else to do */
+       if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ)))
+@@ -639,7 +632,7 @@ static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq,
+  *
+  * skb->protocol must be set before this function is called
+  */
+-static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
++static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb,
+                                struct idpf_rx_csum_decoded *csum_bits,
+                                u16 ptype)
+ {
+@@ -647,14 +640,14 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+       bool ipv4, ipv6;
+       /* check if Rx checksum is enabled */
+-      if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM)))
++      if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM)))
+               return;
+       /* check if HW has decoded the packet and checksum */
+       if (unlikely(!(csum_bits->l3l4p)))
+               return;
+-      decoded = rxq->vport->rx_ptype_lkup[ptype];
++      decoded = rxq->rx_ptype_lkup[ptype];
+       if (unlikely(!(decoded.known && decoded.outer_ip)))
+               return;
+@@ -707,7 +700,7 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+ checksum_fail:
+       u64_stats_update_begin(&rxq->stats_sync);
+-      u64_stats_inc(&rxq->q_stats.rx.hw_csum_err);
++      u64_stats_inc(&rxq->q_stats.hw_csum_err);
+       u64_stats_update_end(&rxq->stats_sync);
+ }
+@@ -721,9 +714,9 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+  * descriptor writeback format.
+  **/
+-static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q,
++static void idpf_rx_singleq_base_csum(struct idpf_rx_queue *rx_q,
+                                     struct sk_buff *skb,
+-                                    union virtchnl2_rx_desc *rx_desc,
++                                    const union virtchnl2_rx_desc *rx_desc,
+                                     u16 ptype)
+ {
+       struct idpf_rx_csum_decoded csum_bits;
+@@ -761,9 +754,9 @@ static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q,
+  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+  * descriptor writeback format.
+  **/
+-static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q,
++static void idpf_rx_singleq_flex_csum(struct idpf_rx_queue *rx_q,
+                                     struct sk_buff *skb,
+-                                    union virtchnl2_rx_desc *rx_desc,
++                                    const union virtchnl2_rx_desc *rx_desc,
+                                     u16 ptype)
+ {
+       struct idpf_rx_csum_decoded csum_bits;
+@@ -801,14 +794,14 @@ static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q,
+  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+  * descriptor writeback format.
+  **/
+-static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q,
++static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q,
+                                     struct sk_buff *skb,
+-                                    union virtchnl2_rx_desc *rx_desc,
++                                    const union virtchnl2_rx_desc *rx_desc,
+                                     struct idpf_rx_ptype_decoded *decoded)
+ {
+       u64 mask, qw1;
+-      if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
++      if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH)))
+               return;
+       mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
+@@ -831,12 +824,12 @@ static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q,
+  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+  * descriptor writeback format.
+  **/
+-static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q,
++static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
+                                     struct sk_buff *skb,
+-                                    union virtchnl2_rx_desc *rx_desc,
++                                    const union virtchnl2_rx_desc *rx_desc,
+                                     struct idpf_rx_ptype_decoded *decoded)
+ {
+-      if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
++      if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH)))
+               return;
+       if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
+@@ -857,16 +850,16 @@ static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q,
+  * order to populate the hash, checksum, VLAN, protocol, and
+  * other fields within the skb.
+  */
+-static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
+-                                             struct sk_buff *skb,
+-                                             union virtchnl2_rx_desc *rx_desc,
+-                                             u16 ptype)
++static void
++idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
++                                 struct sk_buff *skb,
++                                 const union virtchnl2_rx_desc *rx_desc,
++                                 u16 ptype)
+ {
+-      struct idpf_rx_ptype_decoded decoded =
+-                                      rx_q->vport->rx_ptype_lkup[ptype];
++      struct idpf_rx_ptype_decoded decoded = rx_q->rx_ptype_lkup[ptype];
+       /* modifies the skb - consumes the enet header */
+-      skb->protocol = eth_type_trans(skb, rx_q->vport->netdev);
++      skb->protocol = eth_type_trans(skb, rx_q->netdev);
+       /* Check if we're using base mode descriptor IDs */
+       if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
+@@ -878,6 +871,22 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
+       }
+ }
++/**
++ * idpf_rx_buf_hw_update - Store the new tail and head values
++ * @rxq: queue to bump
++ * @val: new head index
++ */
++static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val)
++{
++      rxq->next_to_use = val;
++
++      if (unlikely(!rxq->tail))
++              return;
++
++      /* writel has an implicit memory barrier */
++      writel(val, rxq->tail);
++}
++
+ /**
+  * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers
+  * @rx_q: queue for which the hw buffers are allocated
+@@ -885,7 +894,7 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
+  *
+  * Returns false if all allocations were successful, true if any fail
+  */
+-bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
++bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
+                                     u16 cleaned_count)
+ {
+       struct virtchnl2_singleq_rx_buf_desc *desc;
+@@ -896,7 +905,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+               return false;
+       desc = &rx_q->single_buf[nta];
+-      buf = &rx_q->rx_buf.buf[nta];
++      buf = &rx_q->rx_buf[nta];
+       do {
+               dma_addr_t addr;
+@@ -916,7 +925,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+               nta++;
+               if (unlikely(nta == rx_q->desc_count)) {
+                       desc = &rx_q->single_buf[0];
+-                      buf = rx_q->rx_buf.buf;
++                      buf = rx_q->rx_buf;
+                       nta = 0;
+               }
+@@ -933,7 +942,6 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+ /**
+  * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
+- * @rx_q: Rx descriptor queue
+  * @rx_desc: the descriptor to process
+  * @fields: storage for extracted values
+  *
+@@ -943,9 +951,9 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+  * descriptor writeback format.
+  */
+-static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,
+-                                              union virtchnl2_rx_desc *rx_desc,
+-                                              struct idpf_rx_extracted *fields)
++static void
++idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc,
++                                  struct idpf_rx_extracted *fields)
+ {
+       u64 qword;
+@@ -957,7 +965,6 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,
+ /**
+  * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
+- * @rx_q: Rx descriptor queue
+  * @rx_desc: the descriptor to process
+  * @fields: storage for extracted values
+  *
+@@ -967,9 +974,9 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,
+  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+  * descriptor writeback format.
+  */
+-static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q,
+-                                              union virtchnl2_rx_desc *rx_desc,
+-                                              struct idpf_rx_extracted *fields)
++static void
++idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc,
++                                  struct idpf_rx_extracted *fields)
+ {
+       fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
+                                le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
+@@ -984,14 +991,15 @@ static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q,
+  * @fields: storage for extracted values
+  *
+  */
+-static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q,
+-                                         union virtchnl2_rx_desc *rx_desc,
+-                                         struct idpf_rx_extracted *fields)
++static void
++idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
++                             const union virtchnl2_rx_desc *rx_desc,
++                             struct idpf_rx_extracted *fields)
+ {
+       if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
+-              idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields);
++              idpf_rx_singleq_extract_base_fields(rx_desc, fields);
+       else
+-              idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields);
++              idpf_rx_singleq_extract_flex_fields(rx_desc, fields);
+ }
+ /**
+@@ -1001,7 +1009,7 @@ static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q,
+  *
+  * Returns true if there's any budget left (e.g. the clean is finished)
+  */
+-static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
++static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
+ {
+       unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+       struct sk_buff *skb = rx_q->skb;
+@@ -1036,7 +1044,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+               idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
+-              rx_buf = &rx_q->rx_buf.buf[ntc];
++              rx_buf = &rx_q->rx_buf[ntc];
+               if (!fields.size) {
+                       idpf_rx_put_page(rx_buf);
+                       goto skip_data;
+@@ -1058,7 +1066,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+               cleaned_count++;
+               /* skip if it is non EOP desc */
+-              if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc))
++              if (idpf_rx_singleq_is_non_eop(rx_desc))
+                       continue;
+ #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
+@@ -1084,7 +1092,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+                                                  rx_desc, fields.rx_ptype);
+               /* send completed skb up the stack */
+-              napi_gro_receive(&rx_q->q_vector->napi, skb);
++              napi_gro_receive(rx_q->pp->p.napi, skb);
+               skb = NULL;
+               /* update budget accounting */
+@@ -1099,8 +1107,8 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+               failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
+       u64_stats_update_begin(&rx_q->stats_sync);
+-      u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts);
+-      u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes);
++      u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts);
++      u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes);
+       u64_stats_update_end(&rx_q->stats_sync);
+       /* guarantee a trip back through this routine if there was a failure */
+@@ -1127,7 +1135,7 @@ static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
+        */
+       budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
+       for (i = 0; i < num_rxq; i++) {
+-              struct idpf_queue *rxq = q_vec->rx[i];
++              struct idpf_rx_queue *rxq = q_vec->rx[i];
+               int pkts_cleaned_per_q;
+               pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q);
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+index 948b485da539c..cdb01c54213f9 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+@@ -60,7 +60,8 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+  * @tx_q: the queue that owns the buffer
+  * @tx_buf: the buffer to free
+  */
+-static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf)
++static void idpf_tx_buf_rel(struct idpf_tx_queue *tx_q,
++                          struct idpf_tx_buf *tx_buf)
+ {
+       if (tx_buf->skb) {
+               if (dma_unmap_len(tx_buf, len))
+@@ -86,8 +87,9 @@ static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf)
+  * idpf_tx_buf_rel_all - Free any empty Tx buffers
+  * @txq: queue to be cleaned
+  */
+-static void idpf_tx_buf_rel_all(struct idpf_queue *txq)
++static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
+ {
++      struct idpf_buf_lifo *buf_stack;
+       u16 i;
+       /* Buffers already cleared, nothing to do */
+@@ -101,38 +103,57 @@ static void idpf_tx_buf_rel_all(struct idpf_queue *txq)
+       kfree(txq->tx_buf);
+       txq->tx_buf = NULL;
+-      if (!txq->buf_stack.bufs)
++      if (!idpf_queue_has(FLOW_SCH_EN, txq))
+               return;
+-      for (i = 0; i < txq->buf_stack.size; i++)
+-              kfree(txq->buf_stack.bufs[i]);
++      buf_stack = &txq->stash->buf_stack;
++      if (!buf_stack->bufs)
++              return;
++
++      for (i = 0; i < buf_stack->size; i++)
++              kfree(buf_stack->bufs[i]);
+-      kfree(txq->buf_stack.bufs);
+-      txq->buf_stack.bufs = NULL;
++      kfree(buf_stack->bufs);
++      buf_stack->bufs = NULL;
+ }
+ /**
+  * idpf_tx_desc_rel - Free Tx resources per queue
+  * @txq: Tx descriptor ring for a specific queue
+- * @bufq: buffer q or completion q
+  *
+  * Free all transmit software resources
+  */
+-static void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq)
++static void idpf_tx_desc_rel(struct idpf_tx_queue *txq)
+ {
+-      if (bufq)
+-              idpf_tx_buf_rel_all(txq);
++      idpf_tx_buf_rel_all(txq);
+       if (!txq->desc_ring)
+               return;
+       dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma);
+       txq->desc_ring = NULL;
+-      txq->next_to_alloc = 0;
+       txq->next_to_use = 0;
+       txq->next_to_clean = 0;
+ }
++/**
++ * idpf_compl_desc_rel - Free completion resources per queue
++ * @complq: completion queue
++ *
++ * Free all completion software resources.
++ */
++static void idpf_compl_desc_rel(struct idpf_compl_queue *complq)
++{
++      if (!complq->comp)
++              return;
++
++      dma_free_coherent(complq->netdev->dev.parent, complq->size,
++                        complq->comp, complq->dma);
++      complq->comp = NULL;
++      complq->next_to_use = 0;
++      complq->next_to_clean = 0;
++}
++
+ /**
+  * idpf_tx_desc_rel_all - Free Tx Resources for All Queues
+  * @vport: virtual port structure
+@@ -150,10 +171,10 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport)
+               struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+               for (j = 0; j < txq_grp->num_txq; j++)
+-                      idpf_tx_desc_rel(txq_grp->txqs[j], true);
++                      idpf_tx_desc_rel(txq_grp->txqs[j]);
+               if (idpf_is_queue_model_split(vport->txq_model))
+-                      idpf_tx_desc_rel(txq_grp->complq, false);
++                      idpf_compl_desc_rel(txq_grp->complq);
+       }
+ }
+@@ -163,8 +184,9 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport)
+  *
+  * Returns 0 on success, negative on failure
+  */
+-static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q)
++static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q)
+ {
++      struct idpf_buf_lifo *buf_stack;
+       int buf_size;
+       int i;
+@@ -180,22 +202,26 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q)
+       for (i = 0; i < tx_q->desc_count; i++)
+               tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
++      if (!idpf_queue_has(FLOW_SCH_EN, tx_q))
++              return 0;
++
++      buf_stack = &tx_q->stash->buf_stack;
++
+       /* Initialize tx buf stack for out-of-order completions if
+        * flow scheduling offload is enabled
+        */
+-      tx_q->buf_stack.bufs =
+-              kcalloc(tx_q->desc_count, sizeof(struct idpf_tx_stash *),
+-                      GFP_KERNEL);
+-      if (!tx_q->buf_stack.bufs)
++      buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs),
++                                GFP_KERNEL);
++      if (!buf_stack->bufs)
+               return -ENOMEM;
+-      tx_q->buf_stack.size = tx_q->desc_count;
+-      tx_q->buf_stack.top = tx_q->desc_count;
++      buf_stack->size = tx_q->desc_count;
++      buf_stack->top = tx_q->desc_count;
+       for (i = 0; i < tx_q->desc_count; i++) {
+-              tx_q->buf_stack.bufs[i] = kzalloc(sizeof(*tx_q->buf_stack.bufs[i]),
+-                                                GFP_KERNEL);
+-              if (!tx_q->buf_stack.bufs[i])
++              buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]),
++                                           GFP_KERNEL);
++              if (!buf_stack->bufs[i])
+                       return -ENOMEM;
+       }
+@@ -204,28 +230,22 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q)
+ /**
+  * idpf_tx_desc_alloc - Allocate the Tx descriptors
++ * @vport: vport to allocate resources for
+  * @tx_q: the tx ring to set up
+- * @bufq: buffer or completion queue
+  *
+  * Returns 0 on success, negative on failure
+  */
+-static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq)
++static int idpf_tx_desc_alloc(const struct idpf_vport *vport,
++                            struct idpf_tx_queue *tx_q)
+ {
+       struct device *dev = tx_q->dev;
+-      u32 desc_sz;
+       int err;
+-      if (bufq) {
+-              err = idpf_tx_buf_alloc_all(tx_q);
+-              if (err)
+-                      goto err_alloc;
+-
+-              desc_sz = sizeof(struct idpf_base_tx_desc);
+-      } else {
+-              desc_sz = sizeof(struct idpf_splitq_tx_compl_desc);
+-      }
++      err = idpf_tx_buf_alloc_all(tx_q);
++      if (err)
++              goto err_alloc;
+-      tx_q->size = tx_q->desc_count * desc_sz;
++      tx_q->size = tx_q->desc_count * sizeof(*tx_q->base_tx);
+       /* Allocate descriptors also round up to nearest 4K */
+       tx_q->size = ALIGN(tx_q->size, 4096);
+@@ -238,19 +258,43 @@ static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq)
+               goto err_alloc;
+       }
+-      tx_q->next_to_alloc = 0;
+       tx_q->next_to_use = 0;
+       tx_q->next_to_clean = 0;
+-      set_bit(__IDPF_Q_GEN_CHK, tx_q->flags);
++      idpf_queue_set(GEN_CHK, tx_q);
+       return 0;
+ err_alloc:
+-      idpf_tx_desc_rel(tx_q, bufq);
++      idpf_tx_desc_rel(tx_q);
+       return err;
+ }
++/**
++ * idpf_compl_desc_alloc - allocate completion descriptors
++ * @vport: vport to allocate resources for
++ * @complq: completion queue to set up
++ *
++ * Return: 0 on success, -errno on failure.
++ */
++static int idpf_compl_desc_alloc(const struct idpf_vport *vport,
++                               struct idpf_compl_queue *complq)
++{
++      complq->size = array_size(complq->desc_count, sizeof(*complq->comp));
++
++      complq->comp = dma_alloc_coherent(complq->netdev->dev.parent,
++                                        complq->size, &complq->dma,
++                                        GFP_KERNEL);
++      if (!complq->comp)
++              return -ENOMEM;
++
++      complq->next_to_use = 0;
++      complq->next_to_clean = 0;
++      idpf_queue_set(GEN_CHK, complq);
++
++      return 0;
++}
++
+ /**
+  * idpf_tx_desc_alloc_all - allocate all queues Tx resources
+  * @vport: virtual port private structure
+@@ -259,7 +303,6 @@ static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq)
+  */
+ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
+ {
+-      struct device *dev = &vport->adapter->pdev->dev;
+       int err = 0;
+       int i, j;
+@@ -268,13 +311,14 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
+        */
+       for (i = 0; i < vport->num_txq_grp; i++) {
+               for (j = 0; j < vport->txq_grps[i].num_txq; j++) {
+-                      struct idpf_queue *txq = vport->txq_grps[i].txqs[j];
++                      struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j];
+                       u8 gen_bits = 0;
+                       u16 bufidx_mask;
+-                      err = idpf_tx_desc_alloc(txq, true);
++                      err = idpf_tx_desc_alloc(vport, txq);
+                       if (err) {
+-                              dev_err(dev, "Allocation for Tx Queue %u failed\n",
++                              pci_err(vport->adapter->pdev,
++                                      "Allocation for Tx Queue %u failed\n",
+                                       i);
+                               goto err_out;
+                       }
+@@ -312,9 +356,10 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
+                       continue;
+               /* Setup completion queues */
+-              err = idpf_tx_desc_alloc(vport->txq_grps[i].complq, false);
++              err = idpf_compl_desc_alloc(vport, vport->txq_grps[i].complq);
+               if (err) {
+-                      dev_err(dev, "Allocation for Tx Completion Queue %u failed\n",
++                      pci_err(vport->adapter->pdev,
++                              "Allocation for Tx Completion Queue %u failed\n",
+                               i);
+                       goto err_out;
+               }
+@@ -329,15 +374,14 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
+ /**
+  * idpf_rx_page_rel - Release an rx buffer page
+- * @rxq: the queue that owns the buffer
+  * @rx_buf: the buffer to free
+  */
+-static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf)
++static void idpf_rx_page_rel(struct idpf_rx_buf *rx_buf)
+ {
+       if (unlikely(!rx_buf->page))
+               return;
+-      page_pool_put_full_page(rxq->pp, rx_buf->page, false);
++      page_pool_put_full_page(rx_buf->page->pp, rx_buf->page, false);
+       rx_buf->page = NULL;
+       rx_buf->page_offset = 0;
+@@ -345,54 +389,72 @@ static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf)
+ /**
+  * idpf_rx_hdr_buf_rel_all - Release header buffer memory
+- * @rxq: queue to use
++ * @bufq: queue to use
++ * @dev: device to free DMA memory
+  */
+-static void idpf_rx_hdr_buf_rel_all(struct idpf_queue *rxq)
++static void idpf_rx_hdr_buf_rel_all(struct idpf_buf_queue *bufq,
++                                  struct device *dev)
+ {
+-      struct idpf_adapter *adapter = rxq->vport->adapter;
+-
+-      dma_free_coherent(&adapter->pdev->dev,
+-                        rxq->desc_count * IDPF_HDR_BUF_SIZE,
+-                        rxq->rx_buf.hdr_buf_va,
+-                        rxq->rx_buf.hdr_buf_pa);
+-      rxq->rx_buf.hdr_buf_va = NULL;
++      dma_free_coherent(dev, bufq->desc_count * IDPF_HDR_BUF_SIZE,
++                        bufq->rx_buf.hdr_buf_va, bufq->rx_buf.hdr_buf_pa);
++      bufq->rx_buf.hdr_buf_va = NULL;
+ }
+ /**
+- * idpf_rx_buf_rel_all - Free all Rx buffer resources for a queue
+- * @rxq: queue to be cleaned
++ * idpf_rx_buf_rel_bufq - Free all Rx buffer resources for a buffer queue
++ * @bufq: queue to be cleaned
++ * @dev: device to free DMA memory
+  */
+-static void idpf_rx_buf_rel_all(struct idpf_queue *rxq)
++static void idpf_rx_buf_rel_bufq(struct idpf_buf_queue *bufq,
++                               struct device *dev)
+ {
+-      u16 i;
+-
+       /* queue already cleared, nothing to do */
+-      if (!rxq->rx_buf.buf)
++      if (!bufq->rx_buf.buf)
+               return;
+       /* Free all the bufs allocated and given to hw on Rx queue */
+-      for (i = 0; i < rxq->desc_count; i++)
+-              idpf_rx_page_rel(rxq, &rxq->rx_buf.buf[i]);
++      for (u32 i = 0; i < bufq->desc_count; i++)
++              idpf_rx_page_rel(&bufq->rx_buf.buf[i]);
++
++      if (idpf_queue_has(HSPLIT_EN, bufq))
++              idpf_rx_hdr_buf_rel_all(bufq, dev);
++
++      page_pool_destroy(bufq->pp);
++      bufq->pp = NULL;
++
++      kfree(bufq->rx_buf.buf);
++      bufq->rx_buf.buf = NULL;
++}
+-      if (rxq->rx_hsplit_en)
+-              idpf_rx_hdr_buf_rel_all(rxq);
++/**
++ * idpf_rx_buf_rel_all - Free all Rx buffer resources for a receive queue
++ * @rxq: queue to be cleaned
++ */
++static void idpf_rx_buf_rel_all(struct idpf_rx_queue *rxq)
++{
++      if (!rxq->rx_buf)
++              return;
++
++      for (u32 i = 0; i < rxq->desc_count; i++)
++              idpf_rx_page_rel(&rxq->rx_buf[i]);
+       page_pool_destroy(rxq->pp);
+       rxq->pp = NULL;
+-      kfree(rxq->rx_buf.buf);
+-      rxq->rx_buf.buf = NULL;
++      kfree(rxq->rx_buf);
++      rxq->rx_buf = NULL;
+ }
+ /**
+  * idpf_rx_desc_rel - Free a specific Rx q resources
+  * @rxq: queue to clean the resources from
+- * @bufq: buffer q or completion q
+- * @q_model: single or split q model
++ * @dev: device to free DMA memory
++ * @model: single or split queue model
+  *
+  * Free a specific rx queue resources
+  */
+-static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model)
++static void idpf_rx_desc_rel(struct idpf_rx_queue *rxq, struct device *dev,
++                           u32 model)
+ {
+       if (!rxq)
+               return;
+@@ -402,7 +464,7 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model)
+               rxq->skb = NULL;
+       }
+-      if (bufq || !idpf_is_queue_model_split(q_model))
++      if (!idpf_is_queue_model_split(model))
+               idpf_rx_buf_rel_all(rxq);
+       rxq->next_to_alloc = 0;
+@@ -411,10 +473,34 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model)
+       if (!rxq->desc_ring)
+               return;
+-      dmam_free_coherent(rxq->dev, rxq->size, rxq->desc_ring, rxq->dma);
++      dmam_free_coherent(dev, rxq->size, rxq->desc_ring, rxq->dma);
+       rxq->desc_ring = NULL;
+ }
++/**
++ * idpf_rx_desc_rel_bufq - free buffer queue resources
++ * @bufq: buffer queue to clean the resources from
++ * @dev: device to free DMA memory
++ */
++static void idpf_rx_desc_rel_bufq(struct idpf_buf_queue *bufq,
++                                struct device *dev)
++{
++      if (!bufq)
++              return;
++
++      idpf_rx_buf_rel_bufq(bufq, dev);
++
++      bufq->next_to_alloc = 0;
++      bufq->next_to_clean = 0;
++      bufq->next_to_use = 0;
++
++      if (!bufq->split_buf)
++              return;
++
++      dma_free_coherent(dev, bufq->size, bufq->split_buf, bufq->dma);
++      bufq->split_buf = NULL;
++}
++
+ /**
+  * idpf_rx_desc_rel_all - Free Rx Resources for All Queues
+  * @vport: virtual port structure
+@@ -423,6 +509,7 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model)
+  */
+ static void idpf_rx_desc_rel_all(struct idpf_vport *vport)
+ {
++      struct device *dev = &vport->adapter->pdev->dev;
+       struct idpf_rxq_group *rx_qgrp;
+       u16 num_rxq;
+       int i, j;
+@@ -435,15 +522,15 @@ static void idpf_rx_desc_rel_all(struct idpf_vport *vport)
+               if (!idpf_is_queue_model_split(vport->rxq_model)) {
+                       for (j = 0; j < rx_qgrp->singleq.num_rxq; j++)
+-                              idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j],
+-                                               false, vport->rxq_model);
++                              idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j], dev,
++                                               VIRTCHNL2_QUEUE_MODEL_SINGLE);
+                       continue;
+               }
+               num_rxq = rx_qgrp->splitq.num_rxq_sets;
+               for (j = 0; j < num_rxq; j++)
+                       idpf_rx_desc_rel(&rx_qgrp->splitq.rxq_sets[j]->rxq,
+-                                       false, vport->rxq_model);
++                                       dev, VIRTCHNL2_QUEUE_MODEL_SPLIT);
+               if (!rx_qgrp->splitq.bufq_sets)
+                       continue;
+@@ -452,44 +539,40 @@ static void idpf_rx_desc_rel_all(struct idpf_vport *vport)
+                       struct idpf_bufq_set *bufq_set =
+                               &rx_qgrp->splitq.bufq_sets[j];
+-                      idpf_rx_desc_rel(&bufq_set->bufq, true,
+-                                       vport->rxq_model);
++                      idpf_rx_desc_rel_bufq(&bufq_set->bufq, dev);
+               }
+       }
+ }
+ /**
+  * idpf_rx_buf_hw_update - Store the new tail and head values
+- * @rxq: queue to bump
++ * @bufq: queue to bump
+  * @val: new head index
+  */
+-void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val)
++static void idpf_rx_buf_hw_update(struct idpf_buf_queue *bufq, u32 val)
+ {
+-      rxq->next_to_use = val;
++      bufq->next_to_use = val;
+-      if (unlikely(!rxq->tail))
++      if (unlikely(!bufq->tail))
+               return;
+       /* writel has an implicit memory barrier */
+-      writel(val, rxq->tail);
++      writel(val, bufq->tail);
+ }
+ /**
+  * idpf_rx_hdr_buf_alloc_all - Allocate memory for header buffers
+- * @rxq: ring to use
++ * @bufq: ring to use
+  *
+  * Returns 0 on success, negative on failure.
+  */
+-static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq)
++static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq)
+ {
+-      struct idpf_adapter *adapter = rxq->vport->adapter;
+-
+-      rxq->rx_buf.hdr_buf_va =
+-              dma_alloc_coherent(&adapter->pdev->dev,
+-                                 IDPF_HDR_BUF_SIZE * rxq->desc_count,
+-                                 &rxq->rx_buf.hdr_buf_pa,
+-                                 GFP_KERNEL);
+-      if (!rxq->rx_buf.hdr_buf_va)
++      bufq->rx_buf.hdr_buf_va =
++              dma_alloc_coherent(bufq->q_vector->vport->netdev->dev.parent,
++                                 IDPF_HDR_BUF_SIZE * bufq->desc_count,
++                                 &bufq->rx_buf.hdr_buf_pa, GFP_KERNEL);
++      if (!bufq->rx_buf.hdr_buf_va)
+               return -ENOMEM;
+       return 0;
+@@ -502,19 +585,20 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq)
+  */
+ static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id)
+ {
+-      u16 nta = refillq->next_to_alloc;
++      u32 nta = refillq->next_to_use;
+       /* store the buffer ID and the SW maintained GEN bit to the refillq */
+       refillq->ring[nta] =
+               FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) |
+               FIELD_PREP(IDPF_RX_BI_GEN_M,
+-                         test_bit(__IDPF_Q_GEN_CHK, refillq->flags));
++                         idpf_queue_has(GEN_CHK, refillq));
+       if (unlikely(++nta == refillq->desc_count)) {
+               nta = 0;
+-              change_bit(__IDPF_Q_GEN_CHK, refillq->flags);
++              idpf_queue_change(GEN_CHK, refillq);
+       }
+-      refillq->next_to_alloc = nta;
++
++      refillq->next_to_use = nta;
+ }
+ /**
+@@ -524,7 +608,7 @@ static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id)
+  *
+  * Returns false if buffer could not be allocated, true otherwise.
+  */
+-static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id)
++static bool idpf_rx_post_buf_desc(struct idpf_buf_queue *bufq, u16 buf_id)
+ {
+       struct virtchnl2_splitq_rx_buf_desc *splitq_rx_desc = NULL;
+       u16 nta = bufq->next_to_alloc;
+@@ -534,11 +618,10 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id)
+       splitq_rx_desc = &bufq->split_buf[nta];
+       buf = &bufq->rx_buf.buf[buf_id];
+-      if (bufq->rx_hsplit_en) {
++      if (idpf_queue_has(HSPLIT_EN, bufq))
+               splitq_rx_desc->hdr_addr =
+                       cpu_to_le64(bufq->rx_buf.hdr_buf_pa +
+                                   (u32)buf_id * IDPF_HDR_BUF_SIZE);
+-      }
+       addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size);
+       if (unlikely(addr == DMA_MAPPING_ERROR))
+@@ -562,7 +645,8 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id)
+  *
+  * Returns true if @working_set bufs were posted successfully, false otherwise.
+  */
+-static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set)
++static bool idpf_rx_post_init_bufs(struct idpf_buf_queue *bufq,
++                                 u16 working_set)
+ {
+       int i;
+@@ -571,26 +655,28 @@ static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set)
+                       return false;
+       }
+-      idpf_rx_buf_hw_update(bufq,
+-                            bufq->next_to_alloc & ~(bufq->rx_buf_stride - 1));
++      idpf_rx_buf_hw_update(bufq, ALIGN_DOWN(bufq->next_to_alloc,
++                                             IDPF_RX_BUF_STRIDE));
+       return true;
+ }
+ /**
+  * idpf_rx_create_page_pool - Create a page pool
+- * @rxbufq: RX queue to create page pool for
++ * @napi: NAPI of the associated queue vector
++ * @count: queue descriptor count
+  *
+  * Returns &page_pool on success, casted -errno on failure
+  */
+-static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq)
++static struct page_pool *idpf_rx_create_page_pool(struct napi_struct *napi,
++                                                u32 count)
+ {
+       struct page_pool_params pp = {
+               .flags          = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+               .order          = 0,
+-              .pool_size      = rxbufq->desc_count,
++              .pool_size      = count,
+               .nid            = NUMA_NO_NODE,
+-              .dev            = rxbufq->vport->netdev->dev.parent,
++              .dev            = napi->dev->dev.parent,
+               .max_len        = PAGE_SIZE,
+               .dma_dir        = DMA_FROM_DEVICE,
+               .offset         = 0,
+@@ -599,15 +685,58 @@ static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq)
+       return page_pool_create(&pp);
+ }
++/**
++ * idpf_rx_buf_alloc_singleq - Allocate memory for all buffer resources
++ * @rxq: queue for which the buffers are allocated
++ *
++ * Return: 0 on success, -ENOMEM on failure.
++ */
++static int idpf_rx_buf_alloc_singleq(struct idpf_rx_queue *rxq)
++{
++      rxq->rx_buf = kcalloc(rxq->desc_count, sizeof(*rxq->rx_buf),
++                            GFP_KERNEL);
++      if (!rxq->rx_buf)
++              return -ENOMEM;
++
++      if (idpf_rx_singleq_buf_hw_alloc_all(rxq, rxq->desc_count - 1))
++              goto err;
++
++      return 0;
++
++err:
++      idpf_rx_buf_rel_all(rxq);
++
++      return -ENOMEM;
++}
++
++/**
++ * idpf_rx_bufs_init_singleq - Initialize page pool and allocate Rx bufs
++ * @rxq: buffer queue to create page pool for
++ *
++ * Return: 0 on success, -errno on failure.
++ */
++static int idpf_rx_bufs_init_singleq(struct idpf_rx_queue *rxq)
++{
++      struct page_pool *pool;
++
++      pool = idpf_rx_create_page_pool(&rxq->q_vector->napi, rxq->desc_count);
++      if (IS_ERR(pool))
++              return PTR_ERR(pool);
++
++      rxq->pp = pool;
++
++      return idpf_rx_buf_alloc_singleq(rxq);
++}
++
+ /**
+  * idpf_rx_buf_alloc_all - Allocate memory for all buffer resources
+- * @rxbufq: queue for which the buffers are allocated; equivalent to
+- * rxq when operating in singleq mode
++ * @rxbufq: queue for which the buffers are allocated
+  *
+  * Returns 0 on success, negative on failure
+  */
+-static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq)
++static int idpf_rx_buf_alloc_all(struct idpf_buf_queue *rxbufq)
+ {
++      struct device *dev = rxbufq->q_vector->vport->netdev->dev.parent;
+       int err = 0;
+       /* Allocate book keeping buffers */
+@@ -618,48 +747,41 @@ static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq)
+               goto rx_buf_alloc_all_out;
+       }
+-      if (rxbufq->rx_hsplit_en) {
++      if (idpf_queue_has(HSPLIT_EN, rxbufq)) {
+               err = idpf_rx_hdr_buf_alloc_all(rxbufq);
+               if (err)
+                       goto rx_buf_alloc_all_out;
+       }
+       /* Allocate buffers to be given to HW.   */
+-      if (idpf_is_queue_model_split(rxbufq->vport->rxq_model)) {
+-              int working_set = IDPF_RX_BUFQ_WORKING_SET(rxbufq);
+-
+-              if (!idpf_rx_post_init_bufs(rxbufq, working_set))
+-                      err = -ENOMEM;
+-      } else {
+-              if (idpf_rx_singleq_buf_hw_alloc_all(rxbufq,
+-                                                   rxbufq->desc_count - 1))
+-                      err = -ENOMEM;
+-      }
++      if (!idpf_rx_post_init_bufs(rxbufq, IDPF_RX_BUFQ_WORKING_SET(rxbufq)))
++              err = -ENOMEM;
+ rx_buf_alloc_all_out:
+       if (err)
+-              idpf_rx_buf_rel_all(rxbufq);
++              idpf_rx_buf_rel_bufq(rxbufq, dev);
+       return err;
+ }
+ /**
+  * idpf_rx_bufs_init - Initialize page pool, allocate rx bufs, and post to HW
+- * @rxbufq: RX queue to create page pool for
++ * @bufq: buffer queue to create page pool for
+  *
+  * Returns 0 on success, negative on failure
+  */
+-static int idpf_rx_bufs_init(struct idpf_queue *rxbufq)
++static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq)
+ {
+       struct page_pool *pool;
+-      pool = idpf_rx_create_page_pool(rxbufq);
++      pool = idpf_rx_create_page_pool(&bufq->q_vector->napi,
++                                      bufq->desc_count);
+       if (IS_ERR(pool))
+               return PTR_ERR(pool);
+-      rxbufq->pp = pool;
++      bufq->pp = pool;
+-      return idpf_rx_buf_alloc_all(rxbufq);
++      return idpf_rx_buf_alloc_all(bufq);
+ }
+ /**
+@@ -671,7 +793,6 @@ static int idpf_rx_bufs_init(struct idpf_queue *rxbufq)
+ int idpf_rx_bufs_init_all(struct idpf_vport *vport)
+ {
+       struct idpf_rxq_group *rx_qgrp;
+-      struct idpf_queue *q;
+       int i, j, err;
+       for (i = 0; i < vport->num_rxq_grp; i++) {
+@@ -682,8 +803,10 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport)
+                       int num_rxq = rx_qgrp->singleq.num_rxq;
+                       for (j = 0; j < num_rxq; j++) {
++                              struct idpf_rx_queue *q;
++
+                               q = rx_qgrp->singleq.rxqs[j];
+-                              err = idpf_rx_bufs_init(q);
++                              err = idpf_rx_bufs_init_singleq(q);
+                               if (err)
+                                       return err;
+                       }
+@@ -693,6 +816,8 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport)
+               /* Otherwise, allocate bufs for the buffer queues */
+               for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
++                      struct idpf_buf_queue *q;
++
+                       q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+                       err = idpf_rx_bufs_init(q);
+                       if (err)
+@@ -705,22 +830,17 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport)
+ /**
+  * idpf_rx_desc_alloc - Allocate queue Rx resources
++ * @vport: vport to allocate resources for
+  * @rxq: Rx queue for which the resources are setup
+- * @bufq: buffer or completion queue
+- * @q_model: single or split queue model
+  *
+  * Returns 0 on success, negative on failure
+  */
+-static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model)
++static int idpf_rx_desc_alloc(const struct idpf_vport *vport,
++                            struct idpf_rx_queue *rxq)
+ {
+-      struct device *dev = rxq->dev;
++      struct device *dev = &vport->adapter->pdev->dev;
+-      if (bufq)
+-              rxq->size = rxq->desc_count *
+-                      sizeof(struct virtchnl2_splitq_rx_buf_desc);
+-      else
+-              rxq->size = rxq->desc_count *
+-                      sizeof(union virtchnl2_rx_desc);
++      rxq->size = rxq->desc_count * sizeof(union virtchnl2_rx_desc);
+       /* Allocate descriptors and also round up to nearest 4K */
+       rxq->size = ALIGN(rxq->size, 4096);
+@@ -735,7 +855,35 @@ static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model)
+       rxq->next_to_alloc = 0;
+       rxq->next_to_clean = 0;
+       rxq->next_to_use = 0;
+-      set_bit(__IDPF_Q_GEN_CHK, rxq->flags);
++      idpf_queue_set(GEN_CHK, rxq);
++
++      return 0;
++}
++
++/**
++ * idpf_bufq_desc_alloc - Allocate buffer queue descriptor ring
++ * @vport: vport to allocate resources for
++ * @bufq: buffer queue for which the resources are set up
++ *
++ * Return: 0 on success, -ENOMEM on failure.
++ */
++static int idpf_bufq_desc_alloc(const struct idpf_vport *vport,
++                              struct idpf_buf_queue *bufq)
++{
++      struct device *dev = &vport->adapter->pdev->dev;
++
++      bufq->size = array_size(bufq->desc_count, sizeof(*bufq->split_buf));
++
++      bufq->split_buf = dma_alloc_coherent(dev, bufq->size, &bufq->dma,
++                                           GFP_KERNEL);
++      if (!bufq->split_buf)
++              return -ENOMEM;
++
++      bufq->next_to_alloc = 0;
++      bufq->next_to_clean = 0;
++      bufq->next_to_use = 0;
++
++      idpf_queue_set(GEN_CHK, bufq);
+       return 0;
+ }
+@@ -748,9 +896,7 @@ static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model)
+  */
+ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport)
+ {
+-      struct device *dev = &vport->adapter->pdev->dev;
+       struct idpf_rxq_group *rx_qgrp;
+-      struct idpf_queue *q;
+       int i, j, err;
+       u16 num_rxq;
+@@ -762,13 +908,17 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport)
+                       num_rxq = rx_qgrp->singleq.num_rxq;
+               for (j = 0; j < num_rxq; j++) {
++                      struct idpf_rx_queue *q;
++
+                       if (idpf_is_queue_model_split(vport->rxq_model))
+                               q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+                       else
+                               q = rx_qgrp->singleq.rxqs[j];
+-                      err = idpf_rx_desc_alloc(q, false, vport->rxq_model);
++
++                      err = idpf_rx_desc_alloc(vport, q);
+                       if (err) {
+-                              dev_err(dev, "Memory allocation for Rx Queue %u failed\n",
++                              pci_err(vport->adapter->pdev,
++                                      "Memory allocation for Rx Queue %u failed\n",
+                                       i);
+                               goto err_out;
+                       }
+@@ -778,10 +928,14 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport)
+                       continue;
+               for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
++                      struct idpf_buf_queue *q;
++
+                       q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+-                      err = idpf_rx_desc_alloc(q, true, vport->rxq_model);
++
++                      err = idpf_bufq_desc_alloc(vport, q);
+                       if (err) {
+-                              dev_err(dev, "Memory allocation for Rx Buffer Queue %u failed\n",
++                              pci_err(vport->adapter->pdev,
++                                      "Memory allocation for Rx Buffer Queue %u failed\n",
+                                       i);
+                               goto err_out;
+                       }
+@@ -802,11 +956,16 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport)
+  */
+ static void idpf_txq_group_rel(struct idpf_vport *vport)
+ {
++      bool split, flow_sch_en;
+       int i, j;
+       if (!vport->txq_grps)
+               return;
++      split = idpf_is_queue_model_split(vport->txq_model);
++      flow_sch_en = !idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS,
++                                     VIRTCHNL2_CAP_SPLITQ_QSCHED);
++
+       for (i = 0; i < vport->num_txq_grp; i++) {
+               struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+@@ -814,8 +973,15 @@ static void idpf_txq_group_rel(struct idpf_vport *vport)
+                       kfree(txq_grp->txqs[j]);
+                       txq_grp->txqs[j] = NULL;
+               }
++
++              if (!split)
++                      continue;
++
+               kfree(txq_grp->complq);
+               txq_grp->complq = NULL;
++
++              if (flow_sch_en)
++                      kfree(txq_grp->stashes);
+       }
+       kfree(vport->txq_grps);
+       vport->txq_grps = NULL;
+@@ -919,7 +1085,7 @@ static int idpf_vport_init_fast_path_txqs(struct idpf_vport *vport)
+ {
+       int i, j, k = 0;
+-      vport->txqs = kcalloc(vport->num_txq, sizeof(struct idpf_queue *),
++      vport->txqs = kcalloc(vport->num_txq, sizeof(*vport->txqs),
+                             GFP_KERNEL);
+       if (!vport->txqs)
+@@ -1137,7 +1303,8 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport,
+  * @q: rx queue for which descids are set
+  *
+  */
+-static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q)
++static void idpf_rxq_set_descids(const struct idpf_vport *vport,
++                               struct idpf_rx_queue *q)
+ {
+       if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+               q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+@@ -1158,20 +1325,22 @@ static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q)
+  */
+ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq)
+ {
+-      bool flow_sch_en;
+-      int err, i;
++      bool split, flow_sch_en;
++      int i;
+       vport->txq_grps = kcalloc(vport->num_txq_grp,
+                                 sizeof(*vport->txq_grps), GFP_KERNEL);
+       if (!vport->txq_grps)
+               return -ENOMEM;
++      split = idpf_is_queue_model_split(vport->txq_model);
+       flow_sch_en = !idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS,
+                                      VIRTCHNL2_CAP_SPLITQ_QSCHED);
+       for (i = 0; i < vport->num_txq_grp; i++) {
+               struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+               struct idpf_adapter *adapter = vport->adapter;
++              struct idpf_txq_stash *stashes;
+               int j;
+               tx_qgrp->vport = vport;
+@@ -1180,45 +1349,62 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq)
+               for (j = 0; j < tx_qgrp->num_txq; j++) {
+                       tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]),
+                                                  GFP_KERNEL);
+-                      if (!tx_qgrp->txqs[j]) {
+-                              err = -ENOMEM;
++                      if (!tx_qgrp->txqs[j])
+                               goto err_alloc;
+-                      }
++              }
++
++              if (split && flow_sch_en) {
++                      stashes = kcalloc(num_txq, sizeof(*stashes),
++                                        GFP_KERNEL);
++                      if (!stashes)
++                              goto err_alloc;
++
++                      tx_qgrp->stashes = stashes;
+               }
+               for (j = 0; j < tx_qgrp->num_txq; j++) {
+-                      struct idpf_queue *q = tx_qgrp->txqs[j];
++                      struct idpf_tx_queue *q = tx_qgrp->txqs[j];
+                       q->dev = &adapter->pdev->dev;
+                       q->desc_count = vport->txq_desc_count;
+                       q->tx_max_bufs = idpf_get_max_tx_bufs(adapter);
+                       q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter);
+-                      q->vport = vport;
++                      q->netdev = vport->netdev;
+                       q->txq_grp = tx_qgrp;
+-                      hash_init(q->sched_buf_hash);
+-                      if (flow_sch_en)
+-                              set_bit(__IDPF_Q_FLOW_SCH_EN, q->flags);
++                      if (!split) {
++                              q->clean_budget = vport->compln_clean_budget;
++                              idpf_queue_assign(CRC_EN, q,
++                                                vport->crc_enable);
++                      }
++
++                      if (!flow_sch_en)
++                              continue;
++
++                      if (split) {
++                              q->stash = &stashes[j];
++                              hash_init(q->stash->sched_buf_hash);
++                      }
++
++                      idpf_queue_set(FLOW_SCH_EN, q);
+               }
+-              if (!idpf_is_queue_model_split(vport->txq_model))
++              if (!split)
+                       continue;
+               tx_qgrp->complq = kcalloc(IDPF_COMPLQ_PER_GROUP,
+                                         sizeof(*tx_qgrp->complq),
+                                         GFP_KERNEL);
+-              if (!tx_qgrp->complq) {
+-                      err = -ENOMEM;
++              if (!tx_qgrp->complq)
+                       goto err_alloc;
+-              }
+-              tx_qgrp->complq->dev = &adapter->pdev->dev;
+               tx_qgrp->complq->desc_count = vport->complq_desc_count;
+-              tx_qgrp->complq->vport = vport;
+               tx_qgrp->complq->txq_grp = tx_qgrp;
++              tx_qgrp->complq->netdev = vport->netdev;
++              tx_qgrp->complq->clean_budget = vport->compln_clean_budget;
+               if (flow_sch_en)
+-                      __set_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags);
++                      idpf_queue_set(FLOW_SCH_EN, tx_qgrp->complq);
+       }
+       return 0;
+@@ -1226,7 +1412,7 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq)
+ err_alloc:
+       idpf_txq_group_rel(vport);
+-      return err;
++      return -ENOMEM;
+ }
+ /**
+@@ -1238,8 +1424,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq)
+  */
+ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq)
+ {
+-      struct idpf_adapter *adapter = vport->adapter;
+-      struct idpf_queue *q;
+       int i, k, err = 0;
+       bool hs;
+@@ -1292,21 +1476,15 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq)
+                       struct idpf_bufq_set *bufq_set =
+                               &rx_qgrp->splitq.bufq_sets[j];
+                       int swq_size = sizeof(struct idpf_sw_queue);
++                      struct idpf_buf_queue *q;
+                       q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+-                      q->dev = &adapter->pdev->dev;
+                       q->desc_count = vport->bufq_desc_count[j];
+-                      q->vport = vport;
+-                      q->rxq_grp = rx_qgrp;
+-                      q->idx = j;
+                       q->rx_buf_size = vport->bufq_size[j];
+                       q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK;
+-                      q->rx_buf_stride = IDPF_RX_BUF_STRIDE;
+-                      if (hs) {
+-                              q->rx_hsplit_en = true;
+-                              q->rx_hbuf_size = IDPF_HDR_BUF_SIZE;
+-                      }
++                      idpf_queue_assign(HSPLIT_EN, q, hs);
++                      q->rx_hbuf_size = hs ? IDPF_HDR_BUF_SIZE : 0;
+                       bufq_set->num_refillqs = num_rxq;
+                       bufq_set->refillqs = kcalloc(num_rxq, swq_size,
+@@ -1319,13 +1497,12 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq)
+                               struct idpf_sw_queue *refillq =
+                                       &bufq_set->refillqs[k];
+-                              refillq->dev = &vport->adapter->pdev->dev;
+                               refillq->desc_count =
+                                       vport->bufq_desc_count[j];
+-                              set_bit(__IDPF_Q_GEN_CHK, refillq->flags);
+-                              set_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags);
++                              idpf_queue_set(GEN_CHK, refillq);
++                              idpf_queue_set(RFL_GEN_CHK, refillq);
+                               refillq->ring = kcalloc(refillq->desc_count,
+-                                                      sizeof(u16),
++                                                      sizeof(*refillq->ring),
+                                                       GFP_KERNEL);
+                               if (!refillq->ring) {
+                                       err = -ENOMEM;
+@@ -1336,27 +1513,27 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq)
+ skip_splitq_rx_init:
+               for (j = 0; j < num_rxq; j++) {
++                      struct idpf_rx_queue *q;
++
+                       if (!idpf_is_queue_model_split(vport->rxq_model)) {
+                               q = rx_qgrp->singleq.rxqs[j];
+                               goto setup_rxq;
+                       }
+                       q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+-                      rx_qgrp->splitq.rxq_sets[j]->refillq0 =
++                      rx_qgrp->splitq.rxq_sets[j]->refillq[0] =
+                             &rx_qgrp->splitq.bufq_sets[0].refillqs[j];
+                       if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP)
+-                              rx_qgrp->splitq.rxq_sets[j]->refillq1 =
++                              rx_qgrp->splitq.rxq_sets[j]->refillq[1] =
+                                     &rx_qgrp->splitq.bufq_sets[1].refillqs[j];
+-                      if (hs) {
+-                              q->rx_hsplit_en = true;
+-                              q->rx_hbuf_size = IDPF_HDR_BUF_SIZE;
+-                      }
++                      idpf_queue_assign(HSPLIT_EN, q, hs);
++                      q->rx_hbuf_size = hs ? IDPF_HDR_BUF_SIZE : 0;
+ setup_rxq:
+-                      q->dev = &adapter->pdev->dev;
+                       q->desc_count = vport->rxq_desc_count;
+-                      q->vport = vport;
+-                      q->rxq_grp = rx_qgrp;
++                      q->rx_ptype_lkup = vport->rx_ptype_lkup;
++                      q->netdev = vport->netdev;
++                      q->bufq_sets = rx_qgrp->splitq.bufq_sets;
+                       q->idx = (i * num_rxq) + j;
+                       /* In splitq mode, RXQ buffer size should be
+                        * set to that of the first buffer queue
+@@ -1445,12 +1622,13 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport)
+  * idpf_tx_handle_sw_marker - Handle queue marker packet
+  * @tx_q: tx queue to handle software marker
+  */
+-static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q)
++static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q)
+ {
+-      struct idpf_vport *vport = tx_q->vport;
++      struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev);
++      struct idpf_vport *vport = priv->vport;
+       int i;
+-      clear_bit(__IDPF_Q_SW_MARKER, tx_q->flags);
++      idpf_queue_clear(SW_MARKER, tx_q);
+       /* Hardware must write marker packets to all queues associated with
+        * completion queues. So check if all queues received marker packets
+        */
+@@ -1458,7 +1636,7 @@ static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q)
+               /* If we're still waiting on any other TXQ marker completions,
+                * just return now since we cannot wake up the marker_wq yet.
+                */
+-              if (test_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags))
++              if (idpf_queue_has(SW_MARKER, vport->txqs[i]))
+                       return;
+       /* Drain complete */
+@@ -1474,7 +1652,7 @@ static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q)
+  * @cleaned: pointer to stats struct to track cleaned packets/bytes
+  * @napi_budget: Used to determine if we are in netpoll
+  */
+-static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q,
++static void idpf_tx_splitq_clean_hdr(struct idpf_tx_queue *tx_q,
+                                    struct idpf_tx_buf *tx_buf,
+                                    struct idpf_cleaned_stats *cleaned,
+                                    int napi_budget)
+@@ -1505,7 +1683,8 @@ static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q,
+  * @cleaned: pointer to stats struct to track cleaned packets/bytes
+  * @budget: Used to determine if we are in netpoll
+  */
+-static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag,
++static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq,
++                                     u16 compl_tag,
+                                      struct idpf_cleaned_stats *cleaned,
+                                      int budget)
+ {
+@@ -1513,7 +1692,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag,
+       struct hlist_node *tmp_buf;
+       /* Buffer completion */
+-      hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf,
++      hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf,
+                                   hlist, compl_tag) {
+               if (unlikely(stash->buf.compl_tag != (int)compl_tag))
+                       continue;
+@@ -1530,7 +1709,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag,
+               }
+               /* Push shadow buf back onto stack */
+-              idpf_buf_lifo_push(&txq->buf_stack, stash);
++              idpf_buf_lifo_push(&txq->stash->buf_stack, stash);
+               hash_del(&stash->hlist);
+       }
+@@ -1542,7 +1721,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag,
+  * @txq: Tx queue to clean
+  * @tx_buf: buffer to store
+  */
+-static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq,
++static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq,
+                                      struct idpf_tx_buf *tx_buf)
+ {
+       struct idpf_tx_stash *stash;
+@@ -1551,10 +1730,10 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq,
+                    !dma_unmap_len(tx_buf, len)))
+               return 0;
+-      stash = idpf_buf_lifo_pop(&txq->buf_stack);
++      stash = idpf_buf_lifo_pop(&txq->stash->buf_stack);
+       if (unlikely(!stash)) {
+               net_err_ratelimited("%s: No out-of-order TX buffers left!\n",
+-                                  txq->vport->netdev->name);
++                                  netdev_name(txq->netdev));
+               return -ENOMEM;
+       }
+@@ -1568,7 +1747,8 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq,
+       stash->buf.compl_tag = tx_buf->compl_tag;
+       /* Add buffer to buf_hash table to be freed later */
+-      hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag);
++      hash_add(txq->stash->sched_buf_hash, &stash->hlist,
++               stash->buf.compl_tag);
+       memset(tx_buf, 0, sizeof(struct idpf_tx_buf));
+@@ -1607,7 +1787,7 @@ do {                                                             \
+  * and the buffers will be cleaned separately. The stats are not updated from
+  * this function when using flow-based scheduling.
+  */
+-static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end,
++static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end,
+                                int napi_budget,
+                                struct idpf_cleaned_stats *cleaned,
+                                bool descs_only)
+@@ -1703,7 +1883,7 @@ do {                                                     \
+  * stashed. Returns the byte/segment count for the cleaned packet associated
+  * this completion tag.
+  */
+-static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag,
++static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag,
+                                  struct idpf_cleaned_stats *cleaned,
+                                  int budget)
+ {
+@@ -1772,14 +1952,14 @@ static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag,
+  *
+  * Returns bytes/packets cleaned
+  */
+-static void idpf_tx_handle_rs_completion(struct idpf_queue *txq,
++static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq,
+                                        struct idpf_splitq_tx_compl_desc *desc,
+                                        struct idpf_cleaned_stats *cleaned,
+                                        int budget)
+ {
+       u16 compl_tag;
+-      if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) {
++      if (!idpf_queue_has(FLOW_SCH_EN, txq)) {
+               u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head);
+               return idpf_tx_splitq_clean(txq, head, budget, cleaned, false);
+@@ -1802,24 +1982,23 @@ static void idpf_tx_handle_rs_completion(struct idpf_queue *txq,
+  *
+  * Returns true if there's any budget left (e.g. the clean is finished)
+  */
+-static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
++static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget,
+                                int *cleaned)
+ {
+       struct idpf_splitq_tx_compl_desc *tx_desc;
+-      struct idpf_vport *vport = complq->vport;
+       s16 ntc = complq->next_to_clean;
+       struct idpf_netdev_priv *np;
+       unsigned int complq_budget;
+       bool complq_ok = true;
+       int i;
+-      complq_budget = vport->compln_clean_budget;
++      complq_budget = complq->clean_budget;
+       tx_desc = &complq->comp[ntc];
+       ntc -= complq->desc_count;
+       do {
+               struct idpf_cleaned_stats cleaned_stats = { };
+-              struct idpf_queue *tx_q;
++              struct idpf_tx_queue *tx_q;
+               int rel_tx_qid;
+               u16 hw_head;
+               u8 ctype;       /* completion type */
+@@ -1828,7 +2007,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+               /* if the descriptor isn't done, no work yet to do */
+               gen = le16_get_bits(tx_desc->qid_comptype_gen,
+                                   IDPF_TXD_COMPLQ_GEN_M);
+-              if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen)
++              if (idpf_queue_has(GEN_CHK, complq) != gen)
+                       break;
+               /* Find necessary info of TX queue to clean buffers */
+@@ -1836,8 +2015,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+                                          IDPF_TXD_COMPLQ_QID_M);
+               if (rel_tx_qid >= complq->txq_grp->num_txq ||
+                   !complq->txq_grp->txqs[rel_tx_qid]) {
+-                      dev_err(&complq->vport->adapter->pdev->dev,
+-                              "TxQ not found\n");
++                      netdev_err(complq->netdev, "TxQ not found\n");
+                       goto fetch_next_desc;
+               }
+               tx_q = complq->txq_grp->txqs[rel_tx_qid];
+@@ -1860,15 +2038,14 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+                       idpf_tx_handle_sw_marker(tx_q);
+                       break;
+               default:
+-                      dev_err(&tx_q->vport->adapter->pdev->dev,
+-                              "Unknown TX completion type: %d\n",
+-                              ctype);
++                      netdev_err(tx_q->netdev,
++                                 "Unknown TX completion type: %d\n", ctype);
+                       goto fetch_next_desc;
+               }
+               u64_stats_update_begin(&tx_q->stats_sync);
+-              u64_stats_add(&tx_q->q_stats.tx.packets, cleaned_stats.packets);
+-              u64_stats_add(&tx_q->q_stats.tx.bytes, cleaned_stats.bytes);
++              u64_stats_add(&tx_q->q_stats.packets, cleaned_stats.packets);
++              u64_stats_add(&tx_q->q_stats.bytes, cleaned_stats.bytes);
+               tx_q->cleaned_pkts += cleaned_stats.packets;
+               tx_q->cleaned_bytes += cleaned_stats.bytes;
+               complq->num_completions++;
+@@ -1880,7 +2057,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+               if (unlikely(!ntc)) {
+                       ntc -= complq->desc_count;
+                       tx_desc = &complq->comp[0];
+-                      change_bit(__IDPF_Q_GEN_CHK, complq->flags);
++                      idpf_queue_change(GEN_CHK, complq);
+               }
+               prefetch(tx_desc);
+@@ -1896,9 +2073,9 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+                    IDPF_TX_COMPLQ_OVERFLOW_THRESH(complq)))
+               complq_ok = false;
+-      np = netdev_priv(complq->vport->netdev);
++      np = netdev_priv(complq->netdev);
+       for (i = 0; i < complq->txq_grp->num_txq; ++i) {
+-              struct idpf_queue *tx_q = complq->txq_grp->txqs[i];
++              struct idpf_tx_queue *tx_q = complq->txq_grp->txqs[i];
+               struct netdev_queue *nq;
+               bool dont_wake;
+@@ -1909,11 +2086,11 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+               *cleaned += tx_q->cleaned_pkts;
+               /* Update BQL */
+-              nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
++              nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+               dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) ||
+                           np->state != __IDPF_VPORT_UP ||
+-                          !netif_carrier_ok(tx_q->vport->netdev);
++                          !netif_carrier_ok(tx_q->netdev);
+               /* Check if the TXQ needs to and can be restarted */
+               __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes,
+                                          IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
+@@ -1976,7 +2153,7 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
+  *
+  * Returns 0 if stop is not needed
+  */
+-int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size)
++int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size)
+ {
+       struct netdev_queue *nq;
+@@ -1984,10 +2161,10 @@ int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size)
+               return 0;
+       u64_stats_update_begin(&tx_q->stats_sync);
+-      u64_stats_inc(&tx_q->q_stats.tx.q_busy);
++      u64_stats_inc(&tx_q->q_stats.q_busy);
+       u64_stats_update_end(&tx_q->stats_sync);
+-      nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
++      nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+       return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size);
+ }
+@@ -1999,7 +2176,7 @@ int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size)
+  *
+  * Returns 0 if stop is not needed
+  */
+-static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q,
++static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
+                                    unsigned int descs_needed)
+ {
+       if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
+@@ -2023,9 +2200,9 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q,
+ splitq_stop:
+       u64_stats_update_begin(&tx_q->stats_sync);
+-      u64_stats_inc(&tx_q->q_stats.tx.q_busy);
++      u64_stats_inc(&tx_q->q_stats.q_busy);
+       u64_stats_update_end(&tx_q->stats_sync);
+-      netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx);
++      netif_stop_subqueue(tx_q->netdev, tx_q->idx);
+       return -EBUSY;
+ }
+@@ -2040,12 +2217,12 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q,
+  * to do a register write to update our queue status. We know this can only
+  * mean tail here as HW should be owning head for TX.
+  */
+-void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
++void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
+                          bool xmit_more)
+ {
+       struct netdev_queue *nq;
+-      nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
++      nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+       tx_q->next_to_use = val;
+       idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED);
+@@ -2069,7 +2246,7 @@ void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
+  *
+  * Returns number of data descriptors needed for this skb.
+  */
+-unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
++unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq,
+                                        struct sk_buff *skb)
+ {
+       const struct skb_shared_info *shinfo;
+@@ -2102,7 +2279,7 @@ unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
+               count = idpf_size_to_txd_count(skb->len);
+               u64_stats_update_begin(&txq->stats_sync);
+-              u64_stats_inc(&txq->q_stats.tx.linearize);
++              u64_stats_inc(&txq->q_stats.linearize);
+               u64_stats_update_end(&txq->stats_sync);
+       }
+@@ -2116,11 +2293,11 @@ unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
+  * @first: original first buffer info buffer for packet
+  * @idx: starting point on ring to unwind
+  */
+-void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
++void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
+                          struct idpf_tx_buf *first, u16 idx)
+ {
+       u64_stats_update_begin(&txq->stats_sync);
+-      u64_stats_inc(&txq->q_stats.tx.dma_map_errs);
++      u64_stats_inc(&txq->q_stats.dma_map_errs);
+       u64_stats_update_end(&txq->stats_sync);
+       /* clear dma mappings for failed tx_buf map */
+@@ -2159,7 +2336,7 @@ void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
+  * @txq: the tx ring to wrap
+  * @ntu: ring index to bump
+  */
+-static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu)
++static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu)
+ {
+       ntu++;
+@@ -2181,7 +2358,7 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu)
+  * and gets a physical address for each memory location and programs
+  * it and the length into the transmit flex descriptor.
+  */
+-static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
++static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q,
+                              struct idpf_tx_splitq_params *params,
+                              struct idpf_tx_buf *first)
+ {
+@@ -2348,7 +2525,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
+       tx_q->txq_grp->num_completions_pending++;
+       /* record bytecount for BQL */
+-      nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
++      nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
+       netdev_tx_sent_queue(nq, first->bytecount);
+       idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
+@@ -2544,7 +2721,7 @@ bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+  * ring entry to reflect that this index is a context descriptor
+  */
+ static struct idpf_flex_tx_ctx_desc *
+-idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq)
++idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq)
+ {
+       struct idpf_flex_tx_ctx_desc *desc;
+       int i = txq->next_to_use;
+@@ -2564,10 +2741,10 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq)
+  * @tx_q: queue to send buffer on
+  * @skb: pointer to skb
+  */
+-netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb)
++netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb)
+ {
+       u64_stats_update_begin(&tx_q->stats_sync);
+-      u64_stats_inc(&tx_q->q_stats.tx.skb_drops);
++      u64_stats_inc(&tx_q->q_stats.skb_drops);
+       u64_stats_update_end(&tx_q->stats_sync);
+       idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+@@ -2585,7 +2762,7 @@ netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb)
+  * Returns NETDEV_TX_OK if sent, else an error code
+  */
+ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
+-                                      struct idpf_queue *tx_q)
++                                      struct idpf_tx_queue *tx_q)
+ {
+       struct idpf_tx_splitq_params tx_params = { };
+       struct idpf_tx_buf *first;
+@@ -2625,7 +2802,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
+               ctx_desc->tso.qw0.hdr_len = tx_params.offload.tso_hdr_len;
+               u64_stats_update_begin(&tx_q->stats_sync);
+-              u64_stats_inc(&tx_q->q_stats.tx.lso_pkts);
++              u64_stats_inc(&tx_q->q_stats.lso_pkts);
+               u64_stats_update_end(&tx_q->stats_sync);
+       }
+@@ -2642,7 +2819,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
+               first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+       }
+-      if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_q->flags)) {
++      if (idpf_queue_has(FLOW_SCH_EN, tx_q)) {
+               tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE;
+               tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP;
+               /* Set the RE bit to catch any packets that may have not been
+@@ -2682,7 +2859,7 @@ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+                                struct net_device *netdev)
+ {
+       struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+-      struct idpf_queue *tx_q;
++      struct idpf_tx_queue *tx_q;
+       if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) {
+               dev_kfree_skb_any(skb);
+@@ -2735,13 +2912,14 @@ enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *deco
+  * @rx_desc: Receive descriptor
+  * @decoded: Decoded Rx packet type related fields
+  */
+-static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb,
+-                       struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+-                       struct idpf_rx_ptype_decoded *decoded)
++static void
++idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb,
++           const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
++           struct idpf_rx_ptype_decoded *decoded)
+ {
+       u32 hash;
+-      if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXHASH)))
++      if (unlikely(!(rxq->netdev->features & NETIF_F_RXHASH)))
+               return;
+       hash = le16_to_cpu(rx_desc->hash1) |
+@@ -2760,14 +2938,14 @@ static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb,
+  *
+  * skb->protocol must be set before this function is called
+  */
+-static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb,
++static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb,
+                        struct idpf_rx_csum_decoded *csum_bits,
+                        struct idpf_rx_ptype_decoded *decoded)
+ {
+       bool ipv4, ipv6;
+       /* check if Rx checksum is enabled */
+-      if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXCSUM)))
++      if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM)))
+               return;
+       /* check if HW has decoded the packet and checksum */
+@@ -2814,7 +2992,7 @@ static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+ checksum_fail:
+       u64_stats_update_begin(&rxq->stats_sync);
+-      u64_stats_inc(&rxq->q_stats.rx.hw_csum_err);
++      u64_stats_inc(&rxq->q_stats.hw_csum_err);
+       u64_stats_update_end(&rxq->stats_sync);
+ }
+@@ -2824,8 +3002,9 @@ static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+  * @csum: structure to extract checksum fields
+  *
+  **/
+-static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+-                                           struct idpf_rx_csum_decoded *csum)
++static void
++idpf_rx_splitq_extract_csum_bits(const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
++                               struct idpf_rx_csum_decoded *csum)
+ {
+       u8 qword0, qword1;
+@@ -2860,8 +3039,8 @@ static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_n
+  * Populate the skb fields with the total number of RSC segments, RSC payload
+  * length and packet type.
+  */
+-static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb,
+-                     struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
++static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb,
++                     const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+                      struct idpf_rx_ptype_decoded *decoded)
+ {
+       u16 rsc_segments, rsc_seg_len;
+@@ -2914,7 +3093,7 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb,
+       tcp_gro_complete(skb);
+       u64_stats_update_begin(&rxq->stats_sync);
+-      u64_stats_inc(&rxq->q_stats.rx.rsc_pkts);
++      u64_stats_inc(&rxq->q_stats.rsc_pkts);
+       u64_stats_update_end(&rxq->stats_sync);
+       return 0;
+@@ -2930,9 +3109,9 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb,
+  * order to populate the hash, checksum, protocol, and
+  * other fields within the skb.
+  */
+-static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
+-                                    struct sk_buff *skb,
+-                                    struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
++static int
++idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
++                         const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
+ {
+       struct idpf_rx_csum_decoded csum_bits = { };
+       struct idpf_rx_ptype_decoded decoded;
+@@ -2940,19 +3119,13 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
+       rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0,
+                                VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M);
+-
+-      skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
+-
+-      decoded = rxq->vport->rx_ptype_lkup[rx_ptype];
+-      /* If we don't know the ptype we can't do anything else with it. Just
+-       * pass it up the stack as-is.
+-       */
+-      if (!decoded.known)
+-              return 0;
++      decoded = rxq->rx_ptype_lkup[rx_ptype];
+       /* process RSS/hash */
+       idpf_rx_hash(rxq, skb, rx_desc, &decoded);
++      skb->protocol = eth_type_trans(skb, rxq->netdev);
++
+       if (le16_get_bits(rx_desc->hdrlen_flags,
+                         VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M))
+               return idpf_rx_rsc(rxq, skb, rx_desc, &decoded);
+@@ -2992,7 +3165,7 @@ void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
+  * data from the current receive descriptor, taking care to set up the
+  * skb correctly.
+  */
+-struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
++struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq,
+                                     struct idpf_rx_buf *rx_buf,
+                                     unsigned int size)
+ {
+@@ -3005,7 +3178,7 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
+       /* prefetch first cache line of first page */
+       net_prefetch(va);
+       /* allocate a skb to store the frags */
+-      skb = napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE);
++      skb = napi_alloc_skb(rxq->napi, IDPF_RX_HDR_SIZE);
+       if (unlikely(!skb)) {
+               idpf_rx_put_page(rx_buf);
+@@ -3052,14 +3225,14 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
+  * the current receive descriptor, taking care to set up the skb correctly.
+  * This specifically uses a header buffer to start building the skb.
+  */
+-static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq,
+-                                               const void *va,
+-                                               unsigned int size)
++static struct sk_buff *
++idpf_rx_hdr_construct_skb(const struct idpf_rx_queue *rxq, const void *va,
++                        unsigned int size)
+ {
+       struct sk_buff *skb;
+       /* allocate a skb to store the frags */
+-      skb = napi_alloc_skb(&rxq->q_vector->napi, size);
++      skb = napi_alloc_skb(rxq->napi, size);
+       if (unlikely(!skb))
+               return NULL;
+@@ -3115,10 +3288,10 @@ static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_de
+  *
+  * Returns amount of work completed
+  */
+-static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
++static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget)
+ {
+       int total_rx_bytes = 0, total_rx_pkts = 0;
+-      struct idpf_queue *rx_bufq = NULL;
++      struct idpf_buf_queue *rx_bufq = NULL;
+       struct sk_buff *skb = rxq->skb;
+       u16 ntc = rxq->next_to_clean;
+@@ -3148,7 +3321,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+               gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id,
+                                      VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M);
+-              if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id)
++              if (idpf_queue_has(GEN_CHK, rxq) != gen_id)
+                       break;
+               rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M,
+@@ -3156,7 +3329,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+               if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) {
+                       IDPF_RX_BUMP_NTC(rxq, ntc);
+                       u64_stats_update_begin(&rxq->stats_sync);
+-                      u64_stats_inc(&rxq->q_stats.rx.bad_descs);
++                      u64_stats_inc(&rxq->q_stats.bad_descs);
+                       u64_stats_update_end(&rxq->stats_sync);
+                       continue;
+               }
+@@ -3174,7 +3347,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+                        * data/payload buffer.
+                        */
+                       u64_stats_update_begin(&rxq->stats_sync);
+-                      u64_stats_inc(&rxq->q_stats.rx.hsplit_buf_ovf);
++                      u64_stats_inc(&rxq->q_stats.hsplit_buf_ovf);
+                       u64_stats_update_end(&rxq->stats_sync);
+                       goto bypass_hsplit;
+               }
+@@ -3187,13 +3360,10 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+                                       VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M);
+               rxq_set = container_of(rxq, struct idpf_rxq_set, rxq);
+-              if (!bufq_id)
+-                      refillq = rxq_set->refillq0;
+-              else
+-                      refillq = rxq_set->refillq1;
++              refillq = rxq_set->refillq[bufq_id];
+               /* retrieve buffer from the rxq */
+-              rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq;
++              rx_bufq = &rxq->bufq_sets[bufq_id].bufq;
+               buf_id = le16_to_cpu(rx_desc->buf_id);
+@@ -3205,7 +3375,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+                       skb = idpf_rx_hdr_construct_skb(rxq, va, hdr_len);
+                       u64_stats_update_begin(&rxq->stats_sync);
+-                      u64_stats_inc(&rxq->q_stats.rx.hsplit_pkts);
++                      u64_stats_inc(&rxq->q_stats.hsplit_pkts);
+                       u64_stats_update_end(&rxq->stats_sync);
+               }
+@@ -3248,7 +3418,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+               }
+               /* send completed skb up the stack */
+-              napi_gro_receive(&rxq->q_vector->napi, skb);
++              napi_gro_receive(rxq->napi, skb);
+               skb = NULL;
+               /* update budget accounting */
+@@ -3259,8 +3429,8 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+       rxq->skb = skb;
+       u64_stats_update_begin(&rxq->stats_sync);
+-      u64_stats_add(&rxq->q_stats.rx.packets, total_rx_pkts);
+-      u64_stats_add(&rxq->q_stats.rx.bytes, total_rx_bytes);
++      u64_stats_add(&rxq->q_stats.packets, total_rx_pkts);
++      u64_stats_add(&rxq->q_stats.bytes, total_rx_bytes);
+       u64_stats_update_end(&rxq->stats_sync);
+       /* guarantee a trip back through this routine if there was a failure */
+@@ -3270,19 +3440,16 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+ /**
+  * idpf_rx_update_bufq_desc - Update buffer queue descriptor
+  * @bufq: Pointer to the buffer queue
+- * @refill_desc: SW Refill queue descriptor containing buffer ID
++ * @buf_id: buffer ID
+  * @buf_desc: Buffer queue descriptor
+  *
+  * Return 0 on success and negative on failure.
+  */
+-static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc,
++static int idpf_rx_update_bufq_desc(struct idpf_buf_queue *bufq, u32 buf_id,
+                                   struct virtchnl2_splitq_rx_buf_desc *buf_desc)
+ {
+       struct idpf_rx_buf *buf;
+       dma_addr_t addr;
+-      u16 buf_id;
+-
+-      buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc);
+       buf = &bufq->rx_buf.buf[buf_id];
+@@ -3293,7 +3460,7 @@ static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc,
+       buf_desc->pkt_addr = cpu_to_le64(addr);
+       buf_desc->qword0.buf_id = cpu_to_le16(buf_id);
+-      if (!bufq->rx_hsplit_en)
++      if (!idpf_queue_has(HSPLIT_EN, bufq))
+               return 0;
+       buf_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa +
+@@ -3309,33 +3476,32 @@ static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc,
+  *
+  * This function takes care of the buffer refill management
+  */
+-static void idpf_rx_clean_refillq(struct idpf_queue *bufq,
++static void idpf_rx_clean_refillq(struct idpf_buf_queue *bufq,
+                                 struct idpf_sw_queue *refillq)
+ {
+       struct virtchnl2_splitq_rx_buf_desc *buf_desc;
+       u16 bufq_nta = bufq->next_to_alloc;
+       u16 ntc = refillq->next_to_clean;
+       int cleaned = 0;
+-      u16 gen;
+       buf_desc = &bufq->split_buf[bufq_nta];
+       /* make sure we stop at ring wrap in the unlikely case ring is full */
+       while (likely(cleaned < refillq->desc_count)) {
+-              u16 refill_desc = refillq->ring[ntc];
++              u32 buf_id, refill_desc = refillq->ring[ntc];
+               bool failure;
+-              gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc);
+-              if (test_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags) != gen)
++              if (idpf_queue_has(RFL_GEN_CHK, refillq) !=
++                  !!(refill_desc & IDPF_RX_BI_GEN_M))
+                       break;
+-              failure = idpf_rx_update_bufq_desc(bufq, refill_desc,
+-                                                 buf_desc);
++              buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc);
++              failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc);
+               if (failure)
+                       break;
+               if (unlikely(++ntc == refillq->desc_count)) {
+-                      change_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags);
++                      idpf_queue_change(RFL_GEN_CHK, refillq);
+                       ntc = 0;
+               }
+@@ -3374,7 +3540,7 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq,
+  * this vector.  Returns true if clean is complete within budget, false
+  * otherwise.
+  */
+-static void idpf_rx_clean_refillq_all(struct idpf_queue *bufq)
++static void idpf_rx_clean_refillq_all(struct idpf_buf_queue *bufq)
+ {
+       struct idpf_bufq_set *bufq_set;
+       int i;
+@@ -3437,6 +3603,8 @@ void idpf_vport_intr_rel(struct idpf_vport *vport)
+       for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+               struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx];
++              kfree(q_vector->complq);
++              q_vector->complq = NULL;
+               kfree(q_vector->bufq);
+               q_vector->bufq = NULL;
+               kfree(q_vector->tx);
+@@ -3555,13 +3723,13 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector)
+               goto check_rx_itr;
+       for (i = 0, packets = 0, bytes = 0; i < q_vector->num_txq; i++) {
+-              struct idpf_queue *txq = q_vector->tx[i];
++              struct idpf_tx_queue *txq = q_vector->tx[i];
+               unsigned int start;
+               do {
+                       start = u64_stats_fetch_begin(&txq->stats_sync);
+-                      packets += u64_stats_read(&txq->q_stats.tx.packets);
+-                      bytes += u64_stats_read(&txq->q_stats.tx.bytes);
++                      packets += u64_stats_read(&txq->q_stats.packets);
++                      bytes += u64_stats_read(&txq->q_stats.bytes);
+               } while (u64_stats_fetch_retry(&txq->stats_sync, start));
+       }
+@@ -3574,13 +3742,13 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector)
+               return;
+       for (i = 0, packets = 0, bytes = 0; i < q_vector->num_rxq; i++) {
+-              struct idpf_queue *rxq = q_vector->rx[i];
++              struct idpf_rx_queue *rxq = q_vector->rx[i];
+               unsigned int start;
+               do {
+                       start = u64_stats_fetch_begin(&rxq->stats_sync);
+-                      packets += u64_stats_read(&rxq->q_stats.rx.packets);
+-                      bytes += u64_stats_read(&rxq->q_stats.rx.bytes);
++                      packets += u64_stats_read(&rxq->q_stats.packets);
++                      bytes += u64_stats_read(&rxq->q_stats.bytes);
+               } while (u64_stats_fetch_retry(&rxq->stats_sync, start));
+       }
+@@ -3824,16 +3992,17 @@ static void idpf_vport_intr_napi_ena_all(struct idpf_vport *vport)
+ static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec,
+                                    int budget, int *cleaned)
+ {
+-      u16 num_txq = q_vec->num_txq;
++      u16 num_complq = q_vec->num_complq;
+       bool clean_complete = true;
+       int i, budget_per_q;
+-      if (unlikely(!num_txq))
++      if (unlikely(!num_complq))
+               return true;
+-      budget_per_q = DIV_ROUND_UP(budget, num_txq);
+-      for (i = 0; i < num_txq; i++)
+-              clean_complete &= idpf_tx_clean_complq(q_vec->tx[i],
++      budget_per_q = DIV_ROUND_UP(budget, num_complq);
++
++      for (i = 0; i < num_complq; i++)
++              clean_complete &= idpf_tx_clean_complq(q_vec->complq[i],
+                                                      budget_per_q, cleaned);
+       return clean_complete;
+@@ -3860,7 +4029,7 @@ static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget,
+        */
+       budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
+       for (i = 0; i < num_rxq; i++) {
+-              struct idpf_queue *rxq = q_vec->rx[i];
++              struct idpf_rx_queue *rxq = q_vec->rx[i];
+               int pkts_cleaned_per_q;
+               pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q);
+@@ -3915,8 +4084,8 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
+        * queues virtchnl message, as the interrupts will be disabled after
+        * that
+        */
+-      if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE,
+-                                                 q_vector->tx[0]->flags)))
++      if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE,
++                                                       q_vector->tx[0])))
+               return budget;
+       else
+               return work_done;
+@@ -3930,27 +4099,28 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
+  */
+ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport)
+ {
++      bool split = idpf_is_queue_model_split(vport->rxq_model);
+       u16 num_txq_grp = vport->num_txq_grp;
+-      int i, j, qv_idx, bufq_vidx = 0;
+       struct idpf_rxq_group *rx_qgrp;
+       struct idpf_txq_group *tx_qgrp;
+-      struct idpf_queue *q, *bufq;
+-      u16 q_index;
++      u32 i, qv_idx, q_index;
+       for (i = 0, qv_idx = 0; i < vport->num_rxq_grp; i++) {
+               u16 num_rxq;
++              if (qv_idx >= vport->num_q_vectors)
++                      qv_idx = 0;
++
+               rx_qgrp = &vport->rxq_grps[i];
+-              if (idpf_is_queue_model_split(vport->rxq_model))
++              if (split)
+                       num_rxq = rx_qgrp->splitq.num_rxq_sets;
+               else
+                       num_rxq = rx_qgrp->singleq.num_rxq;
+-              for (j = 0; j < num_rxq; j++) {
+-                      if (qv_idx >= vport->num_q_vectors)
+-                              qv_idx = 0;
++              for (u32 j = 0; j < num_rxq; j++) {
++                      struct idpf_rx_queue *q;
+-                      if (idpf_is_queue_model_split(vport->rxq_model))
++                      if (split)
+                               q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+                       else
+                               q = rx_qgrp->singleq.rxqs[j];
+@@ -3958,52 +4128,53 @@ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport)
+                       q_index = q->q_vector->num_rxq;
+                       q->q_vector->rx[q_index] = q;
+                       q->q_vector->num_rxq++;
+-                      qv_idx++;
++
++                      if (split)
++                              q->napi = &q->q_vector->napi;
+               }
+-              if (idpf_is_queue_model_split(vport->rxq_model)) {
+-                      for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
++              if (split) {
++                      for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) {
++                              struct idpf_buf_queue *bufq;
++
+                               bufq = &rx_qgrp->splitq.bufq_sets[j].bufq;
+-                              bufq->q_vector = &vport->q_vectors[bufq_vidx];
++                              bufq->q_vector = &vport->q_vectors[qv_idx];
+                               q_index = bufq->q_vector->num_bufq;
+                               bufq->q_vector->bufq[q_index] = bufq;
+                               bufq->q_vector->num_bufq++;
+                       }
+-                      if (++bufq_vidx >= vport->num_q_vectors)
+-                              bufq_vidx = 0;
+               }
++
++              qv_idx++;
+       }
++      split = idpf_is_queue_model_split(vport->txq_model);
++
+       for (i = 0, qv_idx = 0; i < num_txq_grp; i++) {
+               u16 num_txq;
++              if (qv_idx >= vport->num_q_vectors)
++                      qv_idx = 0;
++
+               tx_qgrp = &vport->txq_grps[i];
+               num_txq = tx_qgrp->num_txq;
+-              if (idpf_is_queue_model_split(vport->txq_model)) {
+-                      if (qv_idx >= vport->num_q_vectors)
+-                              qv_idx = 0;
++              for (u32 j = 0; j < num_txq; j++) {
++                      struct idpf_tx_queue *q;
+-                      q = tx_qgrp->complq;
++                      q = tx_qgrp->txqs[j];
+                       q->q_vector = &vport->q_vectors[qv_idx];
+-                      q_index = q->q_vector->num_txq;
+-                      q->q_vector->tx[q_index] = q;
+-                      q->q_vector->num_txq++;
+-                      qv_idx++;
+-              } else {
+-                      for (j = 0; j < num_txq; j++) {
+-                              if (qv_idx >= vport->num_q_vectors)
+-                                      qv_idx = 0;
++                      q->q_vector->tx[q->q_vector->num_txq++] = q;
++              }
+-                              q = tx_qgrp->txqs[j];
+-                              q->q_vector = &vport->q_vectors[qv_idx];
+-                              q_index = q->q_vector->num_txq;
+-                              q->q_vector->tx[q_index] = q;
+-                              q->q_vector->num_txq++;
++              if (split) {
++                      struct idpf_compl_queue *q = tx_qgrp->complq;
+-                              qv_idx++;
+-                      }
++                      q->q_vector = &vport->q_vectors[qv_idx];
++                      q->q_vector->complq[q->q_vector->num_complq++] = q;
+               }
++
++              qv_idx++;
+       }
+ }
+@@ -4079,18 +4250,22 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport)
+ {
+       u16 txqs_per_vector, rxqs_per_vector, bufqs_per_vector;
+       struct idpf_q_vector *q_vector;
+-      int v_idx, err;
++      u32 complqs_per_vector, v_idx;
+       vport->q_vectors = kcalloc(vport->num_q_vectors,
+                                  sizeof(struct idpf_q_vector), GFP_KERNEL);
+       if (!vport->q_vectors)
+               return -ENOMEM;
+-      txqs_per_vector = DIV_ROUND_UP(vport->num_txq, vport->num_q_vectors);
+-      rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq, vport->num_q_vectors);
++      txqs_per_vector = DIV_ROUND_UP(vport->num_txq_grp,
++                                     vport->num_q_vectors);
++      rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq_grp,
++                                     vport->num_q_vectors);
+       bufqs_per_vector = vport->num_bufqs_per_qgrp *
+                          DIV_ROUND_UP(vport->num_rxq_grp,
+                                       vport->num_q_vectors);
++      complqs_per_vector = DIV_ROUND_UP(vport->num_txq_grp,
++                                        vport->num_q_vectors);
+       for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+               q_vector = &vport->q_vectors[v_idx];
+@@ -4104,32 +4279,30 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport)
+               q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC;
+               q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0;
+-              q_vector->tx = kcalloc(txqs_per_vector,
+-                                     sizeof(struct idpf_queue *),
++              q_vector->tx = kcalloc(txqs_per_vector, sizeof(*q_vector->tx),
+                                      GFP_KERNEL);
+-              if (!q_vector->tx) {
+-                      err = -ENOMEM;
++              if (!q_vector->tx)
+                       goto error;
+-              }
+-              q_vector->rx = kcalloc(rxqs_per_vector,
+-                                     sizeof(struct idpf_queue *),
++              q_vector->rx = kcalloc(rxqs_per_vector, sizeof(*q_vector->rx),
+                                      GFP_KERNEL);
+-              if (!q_vector->rx) {
+-                      err = -ENOMEM;
++              if (!q_vector->rx)
+                       goto error;
+-              }
+               if (!idpf_is_queue_model_split(vport->rxq_model))
+                       continue;
+               q_vector->bufq = kcalloc(bufqs_per_vector,
+-                                       sizeof(struct idpf_queue *),
++                                       sizeof(*q_vector->bufq),
+                                        GFP_KERNEL);
+-              if (!q_vector->bufq) {
+-                      err = -ENOMEM;
++              if (!q_vector->bufq)
++                      goto error;
++
++              q_vector->complq = kcalloc(complqs_per_vector,
++                                         sizeof(*q_vector->complq),
++                                         GFP_KERNEL);
++              if (!q_vector->complq)
+                       goto error;
+-              }
+       }
+       return 0;
+@@ -4137,7 +4310,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport)
+ error:
+       idpf_vport_intr_rel(vport);
+-      return err;
++      return -ENOMEM;
+ }
+ /**
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+index 6dce14483215f..704aec5c383b6 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+@@ -4,6 +4,8 @@
+ #ifndef _IDPF_TXRX_H_
+ #define _IDPF_TXRX_H_
++#include <linux/dim.h>
++
+ #include <net/page_pool/helpers.h>
+ #include <net/tcp.h>
+ #include <net/netdev_queues.h>
+@@ -84,7 +86,7 @@
+ do {                                                          \
+       if (unlikely(++(ntc) == (rxq)->desc_count)) {           \
+               ntc = 0;                                        \
+-              change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags);     \
++              idpf_queue_change(GEN_CHK, rxq);                \
+       }                                                       \
+ } while (0)
+@@ -111,10 +113,9 @@ do {                                                              \
+  */
+ #define IDPF_TX_SPLITQ_RE_MIN_GAP     64
+-#define IDPF_RX_BI_BUFID_S            0
+-#define IDPF_RX_BI_BUFID_M            GENMASK(14, 0)
+-#define IDPF_RX_BI_GEN_S              15
+-#define IDPF_RX_BI_GEN_M              BIT(IDPF_RX_BI_GEN_S)
++#define IDPF_RX_BI_GEN_M              BIT(16)
++#define IDPF_RX_BI_BUFID_M            GENMASK(15, 0)
++
+ #define IDPF_RXD_EOF_SPLITQ           VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M
+ #define IDPF_RXD_EOF_SINGLEQ          VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M
+@@ -122,7 +123,7 @@ do {                                                               \
+       ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \
+       (txq)->next_to_clean - (txq)->next_to_use - 1)
+-#define IDPF_TX_BUF_RSV_UNUSED(txq)   ((txq)->buf_stack.top)
++#define IDPF_TX_BUF_RSV_UNUSED(txq)   ((txq)->stash->buf_stack.top)
+ #define IDPF_TX_BUF_RSV_LOW(txq)      (IDPF_TX_BUF_RSV_UNUSED(txq) < \
+                                        (txq)->desc_count >> 2)
+@@ -433,23 +434,37 @@ struct idpf_rx_ptype_decoded {
+  *                  to 1 and knows that reading a gen bit of 1 in any
+  *                  descriptor on the initial pass of the ring indicates a
+  *                  writeback. It also flips on every ring wrap.
+- * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit
+- *                     and RFLGQ_GEN is the SW bit.
++ * @__IDPF_Q_RFL_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW
++ *                      bit and Q_RFL_GEN is the SW bit.
+  * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling
+  * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
+  * @__IDPF_Q_POLL_MODE: Enable poll mode
++ * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode
++ * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq)
+  * @__IDPF_Q_FLAGS_NBITS: Must be last
+  */
+ enum idpf_queue_flags_t {
+       __IDPF_Q_GEN_CHK,
+-      __IDPF_RFLQ_GEN_CHK,
++      __IDPF_Q_RFL_GEN_CHK,
+       __IDPF_Q_FLOW_SCH_EN,
+       __IDPF_Q_SW_MARKER,
+       __IDPF_Q_POLL_MODE,
++      __IDPF_Q_CRC_EN,
++      __IDPF_Q_HSPLIT_EN,
+       __IDPF_Q_FLAGS_NBITS,
+ };
++#define idpf_queue_set(f, q)          __set_bit(__IDPF_Q_##f, (q)->flags)
++#define idpf_queue_clear(f, q)                __clear_bit(__IDPF_Q_##f, (q)->flags)
++#define idpf_queue_change(f, q)               __change_bit(__IDPF_Q_##f, (q)->flags)
++#define idpf_queue_has(f, q)          test_bit(__IDPF_Q_##f, (q)->flags)
++
++#define idpf_queue_has_clear(f, q)                    \
++      __test_and_clear_bit(__IDPF_Q_##f, (q)->flags)
++#define idpf_queue_assign(f, q, v)                    \
++      __assign_bit(__IDPF_Q_##f, (q)->flags, v)
++
+ /**
+  * struct idpf_vec_regs
+  * @dyn_ctl_reg: Dynamic control interrupt register offset
+@@ -495,7 +510,9 @@ struct idpf_intr_reg {
+  * @v_idx: Vector index
+  * @intr_reg: See struct idpf_intr_reg
+  * @num_txq: Number of TX queues
++ * @num_complq: number of completion queues
+  * @tx: Array of TX queues to service
++ * @complq: array of completion queues
+  * @tx_dim: Data for TX net_dim algorithm
+  * @tx_itr_value: TX interrupt throttling rate
+  * @tx_intr_mode: Dynamic ITR or not
+@@ -519,21 +536,24 @@ struct idpf_q_vector {
+       struct idpf_intr_reg intr_reg;
+       u16 num_txq;
+-      struct idpf_queue **tx;
++      u16 num_complq;
++      struct idpf_tx_queue **tx;
++      struct idpf_compl_queue **complq;
++
+       struct dim tx_dim;
+       u16 tx_itr_value;
+       bool tx_intr_mode;
+       u32 tx_itr_idx;
+       u16 num_rxq;
+-      struct idpf_queue **rx;
++      struct idpf_rx_queue **rx;
+       struct dim rx_dim;
+       u16 rx_itr_value;
+       bool rx_intr_mode;
+       u32 rx_itr_idx;
+       u16 num_bufq;
+-      struct idpf_queue **bufq;
++      struct idpf_buf_queue **bufq;
+       u16 total_events;
+       char *name;
+@@ -564,11 +584,6 @@ struct idpf_cleaned_stats {
+       u32 bytes;
+ };
+-union idpf_queue_stats {
+-      struct idpf_rx_queue_stats rx;
+-      struct idpf_tx_queue_stats tx;
+-};
+-
+ #define IDPF_ITR_DYNAMIC      1
+ #define IDPF_ITR_MAX          0x1FE0
+ #define IDPF_ITR_20K          0x0032
+@@ -584,39 +599,114 @@ union idpf_queue_stats {
+ #define IDPF_DIM_DEFAULT_PROFILE_IX           1
+ /**
+- * struct idpf_queue
+- * @dev: Device back pointer for DMA mapping
+- * @vport: Back pointer to associated vport
+- * @txq_grp: See struct idpf_txq_group
+- * @rxq_grp: See struct idpf_rxq_group
+- * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean,
+- *     buffer queue uses this index to determine which group of refill queues
+- *     to clean.
+- *     For TX queue, it is used as index to map between TX queue group and
+- *     hot path TX pointers stored in vport. Used in both singleq/splitq.
+- *     For RX queue, it is used to index to total RX queue across groups and
++ * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode
++ * @buf_stack: Stack of empty buffers to store buffer info for out of order
++ *           buffer completions. See struct idpf_buf_lifo
++ * @sched_buf_hash: Hash table to store buffers
++ */
++struct idpf_txq_stash {
++      struct idpf_buf_lifo buf_stack;
++      DECLARE_HASHTABLE(sched_buf_hash, 12);
++} ____cacheline_aligned;
++
++/**
++ * struct idpf_rx_queue - software structure representing a receive queue
++ * @rx: universal receive descriptor array
++ * @single_buf: buffer descriptor array in singleq
++ * @desc_ring: virtual descriptor ring address
++ * @bufq_sets: Pointer to the array of buffer queues in splitq mode
++ * @napi: NAPI instance corresponding to this queue (splitq)
++ * @rx_buf: See struct idpf_rx_buf
++ * @pp: Page pool pointer in singleq mode
++ * @netdev: &net_device corresponding to this queue
++ * @tail: Tail offset. Used for both queue models single and split.
++ * @flags: See enum idpf_queue_flags_t
++ * @idx: For RX queue, it is used to index to total RX queue across groups and
+  *     used for skb reporting.
+- * @tail: Tail offset. Used for both queue models single and split. In splitq
+- *      model relevant only for TX queue and RX queue.
+- * @tx_buf: See struct idpf_tx_buf
+- * @rx_buf: Struct with RX buffer related members
+- * @rx_buf.buf: See struct idpf_rx_buf
+- * @rx_buf.hdr_buf_pa: DMA handle
+- * @rx_buf.hdr_buf_va: Virtual address
+- * @pp: Page pool pointer
++ * @desc_count: Number of descriptors
++ * @next_to_use: Next descriptor to use
++ * @next_to_clean: Next descriptor to clean
++ * @next_to_alloc: RX buffer to allocate at
++ * @rxdids: Supported RX descriptor ids
++ * @rx_ptype_lkup: LUT of Rx ptypes
+  * @skb: Pointer to the skb
+- * @q_type: Queue type (TX, RX, TX completion, RX buffer)
++ * @stats_sync: See struct u64_stats_sync
++ * @q_stats: See union idpf_rx_queue_stats
+  * @q_id: Queue id
+- * @desc_count: Number of descriptors
+- * @next_to_use: Next descriptor to use. Relevant in both split & single txq
+- *             and bufq.
+- * @next_to_clean: Next descriptor to clean. In split queue model, only
+- *               relevant to TX completion queue and RX queue.
+- * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model
+- *               only relevant to RX queue.
++ * @size: Length of descriptor ring in bytes
++ * @dma: Physical address of ring
++ * @q_vector: Backreference to associated vector
++ * @rx_buffer_low_watermark: RX buffer low watermark
++ * @rx_hbuf_size: Header buffer size
++ * @rx_buf_size: Buffer size
++ * @rx_max_pkt_size: RX max packet size
++ */
++struct idpf_rx_queue {
++      union {
++              union virtchnl2_rx_desc *rx;
++              struct virtchnl2_singleq_rx_buf_desc *single_buf;
++
++              void *desc_ring;
++      };
++      union {
++              struct {
++                      struct idpf_bufq_set *bufq_sets;
++                      struct napi_struct *napi;
++              };
++              struct {
++                      struct idpf_rx_buf *rx_buf;
++                      struct page_pool *pp;
++              };
++      };
++      struct net_device *netdev;
++      void __iomem *tail;
++
++      DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
++      u16 idx;
++      u16 desc_count;
++      u16 next_to_use;
++      u16 next_to_clean;
++      u16 next_to_alloc;
++
++      u32 rxdids;
++
++      const struct idpf_rx_ptype_decoded *rx_ptype_lkup;
++      struct sk_buff *skb;
++
++      struct u64_stats_sync stats_sync;
++      struct idpf_rx_queue_stats q_stats;
++
++      /* Slowpath */
++      u32 q_id;
++      u32 size;
++      dma_addr_t dma;
++
++      struct idpf_q_vector *q_vector;
++
++      u16 rx_buffer_low_watermark;
++      u16 rx_hbuf_size;
++      u16 rx_buf_size;
++      u16 rx_max_pkt_size;
++} ____cacheline_aligned;
++
++/**
++ * struct idpf_tx_queue - software structure representing a transmit queue
++ * @base_tx: base Tx descriptor array
++ * @base_ctx: base Tx context descriptor array
++ * @flex_tx: flex Tx descriptor array
++ * @flex_ctx: flex Tx context descriptor array
++ * @desc_ring: virtual descriptor ring address
++ * @tx_buf: See struct idpf_tx_buf
++ * @txq_grp: See struct idpf_txq_group
++ * @dev: Device back pointer for DMA mapping
++ * @tail: Tail offset. Used for both queue models single and split
+  * @flags: See enum idpf_queue_flags_t
+- * @q_stats: See union idpf_queue_stats
+- * @stats_sync: See struct u64_stats_sync
++ * @idx: For TX queue, it is used as index to map between TX queue group and
++ *     hot path TX pointers stored in vport. Used in both singleq/splitq.
++ * @desc_count: Number of descriptors
++ * @next_to_use: Next descriptor to use
++ * @next_to_clean: Next descriptor to clean
++ * @netdev: &net_device corresponding to this queue
+  * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on
+  *               the TX completion queue, it can be for any TXQ associated
+  *               with that completion queue. This means we can clean up to
+@@ -625,34 +715,10 @@ union idpf_queue_stats {
+  *               that single call to clean the completion queue. By doing so,
+  *               we can update BQL with aggregate cleaned stats for each TXQ
+  *               only once at the end of the cleaning routine.
++ * @clean_budget: singleq only, queue cleaning budget
+  * @cleaned_pkts: Number of packets cleaned for the above said case
+- * @rx_hsplit_en: RX headsplit enable
+- * @rx_hbuf_size: Header buffer size
+- * @rx_buf_size: Buffer size
+- * @rx_max_pkt_size: RX max packet size
+- * @rx_buf_stride: RX buffer stride
+- * @rx_buffer_low_watermark: RX buffer low watermark
+- * @rxdids: Supported RX descriptor ids
+- * @q_vector: Backreference to associated vector
+- * @size: Length of descriptor ring in bytes
+- * @dma: Physical address of ring
+- * @rx: universal receive descriptor array
+- * @single_buf: Rx buffer descriptor array in singleq
+- * @split_buf: Rx buffer descriptor array in splitq
+- * @base_tx: basic Tx descriptor array
+- * @base_ctx: basic Tx context descriptor array
+- * @flex_tx: flex Tx descriptor array
+- * @flex_ctx: flex Tx context descriptor array
+- * @comp: completion descriptor array
+- * @desc_ring: virtual descriptor ring address
+  * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
+  * @tx_min_pkt_len: Min supported packet length
+- * @num_completions: Only relevant for TX completion queue. It tracks the
+- *                 number of completions received to compare against the
+- *                 number of completions pending, as accumulated by the
+- *                 TX queues.
+- * @buf_stack: Stack of empty buffers to store buffer info for out of order
+- *           buffer completions. See struct idpf_buf_lifo.
+  * @compl_tag_bufid_m: Completion tag buffer id mask
+  * @compl_tag_gen_s: Completion tag generation bit
+  *    The format of the completion tag will change based on the TXQ
+@@ -676,120 +742,188 @@ union idpf_queue_stats {
+  *    This gives us 8*8160 = 65280 possible unique values.
+  * @compl_tag_cur_gen: Used to keep track of current completion tag generation
+  * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset
+- * @sched_buf_hash: Hash table to stores buffers
++ * @stash: Tx buffer stash for Flow-based scheduling mode
++ * @stats_sync: See struct u64_stats_sync
++ * @q_stats: See union idpf_tx_queue_stats
++ * @q_id: Queue id
++ * @size: Length of descriptor ring in bytes
++ * @dma: Physical address of ring
++ * @q_vector: Backreference to associated vector
+  */
+-struct idpf_queue {
+-      struct device *dev;
+-      struct idpf_vport *vport;
++struct idpf_tx_queue {
+       union {
+-              struct idpf_txq_group *txq_grp;
+-              struct idpf_rxq_group *rxq_grp;
++              struct idpf_base_tx_desc *base_tx;
++              struct idpf_base_tx_ctx_desc *base_ctx;
++              union idpf_tx_flex_desc *flex_tx;
++              struct idpf_flex_tx_ctx_desc *flex_ctx;
++
++              void *desc_ring;
+       };
+-      u16 idx;
++      struct idpf_tx_buf *tx_buf;
++      struct idpf_txq_group *txq_grp;
++      struct device *dev;
+       void __iomem *tail;
+-      union {
+-              struct idpf_tx_buf *tx_buf;
+-              struct {
+-                      struct idpf_rx_buf *buf;
+-                      dma_addr_t hdr_buf_pa;
+-                      void *hdr_buf_va;
+-              } rx_buf;
+-      };
+-      struct page_pool *pp;
+-      struct sk_buff *skb;
+-      u16 q_type;
+-      u32 q_id;
+-      u16 desc_count;
++      DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
++      u16 idx;
++      u16 desc_count;
+       u16 next_to_use;
+       u16 next_to_clean;
+-      u16 next_to_alloc;
+-      DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+-      union idpf_queue_stats q_stats;
+-      struct u64_stats_sync stats_sync;
++      struct net_device *netdev;
+-      u32 cleaned_bytes;
++      union {
++              u32 cleaned_bytes;
++              u32 clean_budget;
++      };
+       u16 cleaned_pkts;
+-      bool rx_hsplit_en;
+-      u16 rx_hbuf_size;
+-      u16 rx_buf_size;
+-      u16 rx_max_pkt_size;
+-      u16 rx_buf_stride;
+-      u8 rx_buffer_low_watermark;
+-      u64 rxdids;
+-      struct idpf_q_vector *q_vector;
+-      unsigned int size;
++      u16 tx_max_bufs;
++      u16 tx_min_pkt_len;
++
++      u16 compl_tag_bufid_m;
++      u16 compl_tag_gen_s;
++
++      u16 compl_tag_cur_gen;
++      u16 compl_tag_gen_max;
++
++      struct idpf_txq_stash *stash;
++
++      struct u64_stats_sync stats_sync;
++      struct idpf_tx_queue_stats q_stats;
++
++      /* Slowpath */
++      u32 q_id;
++      u32 size;
+       dma_addr_t dma;
+-      union {
+-              union virtchnl2_rx_desc *rx;
+-              struct virtchnl2_singleq_rx_buf_desc *single_buf;
+-              struct virtchnl2_splitq_rx_buf_desc *split_buf;
++      struct idpf_q_vector *q_vector;
++} ____cacheline_aligned;
+-              struct idpf_base_tx_desc *base_tx;
+-              struct idpf_base_tx_ctx_desc *base_ctx;
+-              union idpf_tx_flex_desc *flex_tx;
+-              struct idpf_flex_tx_ctx_desc *flex_ctx;
++/**
++ * struct idpf_buf_queue - software structure representing a buffer queue
++ * @split_buf: buffer descriptor array
++ * @rx_buf: Struct with RX buffer related members
++ * @rx_buf.buf: See struct idpf_rx_buf
++ * @rx_buf.hdr_buf_pa: DMA handle
++ * @rx_buf.hdr_buf_va: Virtual address
++ * @pp: Page pool pointer
++ * @tail: Tail offset
++ * @flags: See enum idpf_queue_flags_t
++ * @desc_count: Number of descriptors
++ * @next_to_use: Next descriptor to use
++ * @next_to_clean: Next descriptor to clean
++ * @next_to_alloc: RX buffer to allocate at
++ * @q_id: Queue id
++ * @size: Length of descriptor ring in bytes
++ * @dma: Physical address of ring
++ * @q_vector: Backreference to associated vector
++ * @rx_buffer_low_watermark: RX buffer low watermark
++ * @rx_hbuf_size: Header buffer size
++ * @rx_buf_size: Buffer size
++ */
++struct idpf_buf_queue {
++      struct virtchnl2_splitq_rx_buf_desc *split_buf;
++      struct {
++              struct idpf_rx_buf *buf;
++              dma_addr_t hdr_buf_pa;
++              void *hdr_buf_va;
++      } rx_buf;
++      struct page_pool *pp;
++      void __iomem *tail;
+-              struct idpf_splitq_tx_compl_desc *comp;
++      DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
++      u16 desc_count;
++      u16 next_to_use;
++      u16 next_to_clean;
++      u16 next_to_alloc;
+-              void *desc_ring;
+-      };
++      /* Slowpath */
++      u32 q_id;
++      u32 size;
++      dma_addr_t dma;
+-      u16 tx_max_bufs;
+-      u8 tx_min_pkt_len;
++      struct idpf_q_vector *q_vector;
+-      u32 num_completions;
++      u16 rx_buffer_low_watermark;
++      u16 rx_hbuf_size;
++      u16 rx_buf_size;
++} ____cacheline_aligned;
+-      struct idpf_buf_lifo buf_stack;
++/**
++ * struct idpf_compl_queue - software structure representing a completion queue
++ * @comp: completion descriptor array
++ * @txq_grp: See struct idpf_txq_group
++ * @flags: See enum idpf_queue_flags_t
++ * @desc_count: Number of descriptors
++ * @next_to_use: Next descriptor to use. Relevant in both split & single txq
++ *             and bufq.
++ * @next_to_clean: Next descriptor to clean
++ * @netdev: &net_device corresponding to this queue
++ * @clean_budget: queue cleaning budget
++ * @num_completions: Only relevant for TX completion queue. It tracks the
++ *                 number of completions received to compare against the
++ *                 number of completions pending, as accumulated by the
++ *                 TX queues.
++ * @q_id: Queue id
++ * @size: Length of descriptor ring in bytes
++ * @dma: Physical address of ring
++ * @q_vector: Backreference to associated vector
++ */
++struct idpf_compl_queue {
++      struct idpf_splitq_tx_compl_desc *comp;
++      struct idpf_txq_group *txq_grp;
+-      u16 compl_tag_bufid_m;
+-      u16 compl_tag_gen_s;
++      DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
++      u16 desc_count;
++      u16 next_to_use;
++      u16 next_to_clean;
+-      u16 compl_tag_cur_gen;
+-      u16 compl_tag_gen_max;
++      struct net_device *netdev;
++      u32 clean_budget;
++      u32 num_completions;
+-      DECLARE_HASHTABLE(sched_buf_hash, 12);
+-} ____cacheline_internodealigned_in_smp;
++      /* Slowpath */
++      u32 q_id;
++      u32 size;
++      dma_addr_t dma;
++
++      struct idpf_q_vector *q_vector;
++} ____cacheline_aligned;
+ /**
+  * struct idpf_sw_queue
+- * @next_to_clean: Next descriptor to clean
+- * @next_to_alloc: Buffer to allocate at
+- * @flags: See enum idpf_queue_flags_t
+  * @ring: Pointer to the ring
++ * @flags: See enum idpf_queue_flags_t
+  * @desc_count: Descriptor count
+- * @dev: Device back pointer for DMA mapping
++ * @next_to_use: Buffer to allocate at
++ * @next_to_clean: Next descriptor to clean
+  *
+  * Software queues are used in splitq mode to manage buffers between rxq
+  * producer and the bufq consumer.  These are required in order to maintain a
+  * lockless buffer management system and are strictly software only constructs.
+  */
+ struct idpf_sw_queue {
+-      u16 next_to_clean;
+-      u16 next_to_alloc;
++      u32 *ring;
++
+       DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+-      u16 *ring;
+       u16 desc_count;
+-      struct device *dev;
+-} ____cacheline_internodealigned_in_smp;
++      u16 next_to_use;
++      u16 next_to_clean;
++} ____cacheline_aligned;
+ /**
+  * struct idpf_rxq_set
+  * @rxq: RX queue
+- * @refillq0: Pointer to refill queue 0
+- * @refillq1: Pointer to refill queue 1
++ * @refillq: pointers to refill queues
+  *
+  * Splitq only.  idpf_rxq_set associates an rxq with at an array of refillqs.
+  * Each rxq needs a refillq to return used buffers back to the respective bufq.
+  * Bufqs then clean these refillqs for buffers to give to hardware.
+  */
+ struct idpf_rxq_set {
+-      struct idpf_queue rxq;
+-      struct idpf_sw_queue *refillq0;
+-      struct idpf_sw_queue *refillq1;
++      struct idpf_rx_queue rxq;
++      struct idpf_sw_queue *refillq[IDPF_MAX_BUFQS_PER_RXQ_GRP];
+ };
+ /**
+@@ -808,7 +942,7 @@ struct idpf_rxq_set {
+  * managed by at most two bufqs (depending on performance configuration).
+  */
+ struct idpf_bufq_set {
+-      struct idpf_queue bufq;
++      struct idpf_buf_queue bufq;
+       int num_refillqs;
+       struct idpf_sw_queue *refillqs;
+ };
+@@ -834,7 +968,7 @@ struct idpf_rxq_group {
+       union {
+               struct {
+                       u16 num_rxq;
+-                      struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q];
++                      struct idpf_rx_queue *rxqs[IDPF_LARGE_MAX_Q];
+               } singleq;
+               struct {
+                       u16 num_rxq_sets;
+@@ -849,6 +983,7 @@ struct idpf_rxq_group {
+  * @vport: Vport back pointer
+  * @num_txq: Number of TX queues associated
+  * @txqs: Array of TX queue pointers
++ * @stashes: array of OOO stashes for the queues
+  * @complq: Associated completion queue pointer, split queue only
+  * @num_completions_pending: Total number of completions pending for the
+  *                         completion queue, acculumated for all TX queues
+@@ -862,9 +997,10 @@ struct idpf_txq_group {
+       struct idpf_vport *vport;
+       u16 num_txq;
+-      struct idpf_queue *txqs[IDPF_LARGE_MAX_Q];
++      struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q];
++      struct idpf_txq_stash *stashes;
+-      struct idpf_queue *complq;
++      struct idpf_compl_queue *complq;
+       u32 num_completions_pending;
+ };
+@@ -1001,28 +1137,26 @@ void idpf_deinit_rss(struct idpf_vport *vport);
+ int idpf_rx_bufs_init_all(struct idpf_vport *vport);
+ void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
+                     unsigned int size);
+-struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
++struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq,
+                                     struct idpf_rx_buf *rx_buf,
+                                     unsigned int size);
+-bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf);
+-void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val);
+-void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
++void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
+                          bool xmit_more);
+ unsigned int idpf_size_to_txd_count(unsigned int size);
+-netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb);
+-void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
++netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb);
++void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
+                          struct idpf_tx_buf *first, u16 ring_idx);
+-unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
++unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq,
+                                        struct sk_buff *skb);
+ bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+                       unsigned int count);
+-int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size);
++int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size);
+ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
+ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+                                struct net_device *netdev);
+ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+                                 struct net_device *netdev);
+-bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq,
++bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
+                                     u16 cleaned_count);
+ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+index a5f9b7a5effe7..44602b87cd411 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+@@ -750,7 +750,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport)
+       int i;
+       for (i = 0; i < vport->num_txq; i++)
+-              set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags);
++              idpf_queue_set(SW_MARKER, vport->txqs[i]);
+       event = wait_event_timeout(vport->sw_marker_wq,
+                                  test_and_clear_bit(IDPF_VPORT_SW_MARKER,
+@@ -758,7 +758,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport)
+                                  msecs_to_jiffies(500));
+       for (i = 0; i < vport->num_txq; i++)
+-              clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
++              idpf_queue_clear(POLL_MODE, vport->txqs[i]);
+       if (event)
+               return 0;
+@@ -1092,7 +1092,6 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
+                                int num_regs, u32 q_type)
+ {
+       struct idpf_adapter *adapter = vport->adapter;
+-      struct idpf_queue *q;
+       int i, j, k = 0;
+       switch (q_type) {
+@@ -1111,6 +1110,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
+                       u16 num_rxq = rx_qgrp->singleq.num_rxq;
+                       for (j = 0; j < num_rxq && k < num_regs; j++, k++) {
++                              struct idpf_rx_queue *q;
++
+                               q = rx_qgrp->singleq.rxqs[j];
+                               q->tail = idpf_get_reg_addr(adapter,
+                                                           reg_vals[k]);
+@@ -1123,6 +1124,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
+                       u8 num_bufqs = vport->num_bufqs_per_qgrp;
+                       for (j = 0; j < num_bufqs && k < num_regs; j++, k++) {
++                              struct idpf_buf_queue *q;
++
+                               q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+                               q->tail = idpf_get_reg_addr(adapter,
+                                                           reg_vals[k]);
+@@ -1449,19 +1452,19 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
+                       qi[k].model =
+                               cpu_to_le16(vport->txq_model);
+                       qi[k].type =
+-                              cpu_to_le32(tx_qgrp->txqs[j]->q_type);
++                              cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX);
+                       qi[k].ring_len =
+                               cpu_to_le16(tx_qgrp->txqs[j]->desc_count);
+                       qi[k].dma_ring_addr =
+                               cpu_to_le64(tx_qgrp->txqs[j]->dma);
+                       if (idpf_is_queue_model_split(vport->txq_model)) {
+-                              struct idpf_queue *q = tx_qgrp->txqs[j];
++                              struct idpf_tx_queue *q = tx_qgrp->txqs[j];
+                               qi[k].tx_compl_queue_id =
+                                       cpu_to_le16(tx_qgrp->complq->q_id);
+                               qi[k].relative_queue_id = cpu_to_le16(j);
+-                              if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags))
++                              if (idpf_queue_has(FLOW_SCH_EN, q))
+                                       qi[k].sched_mode =
+                                       cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW);
+                               else
+@@ -1478,11 +1481,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
+               qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
+               qi[k].model = cpu_to_le16(vport->txq_model);
+-              qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
++              qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION);
+               qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count);
+               qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma);
+-              if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags))
++              if (idpf_queue_has(FLOW_SCH_EN, tx_qgrp->complq))
+                       sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+               else
+                       sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE;
+@@ -1567,17 +1570,18 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
+                       goto setup_rxqs;
+               for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
+-                      struct idpf_queue *bufq =
++                      struct idpf_buf_queue *bufq =
+                               &rx_qgrp->splitq.bufq_sets[j].bufq;
+                       qi[k].queue_id = cpu_to_le32(bufq->q_id);
+                       qi[k].model = cpu_to_le16(vport->rxq_model);
+-                      qi[k].type = cpu_to_le32(bufq->q_type);
++                      qi[k].type =
++                              cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER);
+                       qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M);
+                       qi[k].ring_len = cpu_to_le16(bufq->desc_count);
+                       qi[k].dma_ring_addr = cpu_to_le64(bufq->dma);
+                       qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size);
+-                      qi[k].buffer_notif_stride = bufq->rx_buf_stride;
++                      qi[k].buffer_notif_stride = IDPF_RX_BUF_STRIDE;
+                       qi[k].rx_buffer_low_watermark =
+                               cpu_to_le16(bufq->rx_buffer_low_watermark);
+                       if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW))
+@@ -1591,7 +1595,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
+                       num_rxq = rx_qgrp->singleq.num_rxq;
+               for (j = 0; j < num_rxq; j++, k++) {
+-                      struct idpf_queue *rxq;
++                      struct idpf_rx_queue *rxq;
+                       if (!idpf_is_queue_model_split(vport->rxq_model)) {
+                               rxq = rx_qgrp->singleq.rxqs[j];
+@@ -1599,11 +1603,11 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
+                       }
+                       rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+                       qi[k].rx_bufq1_id =
+-                        cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id);
++                        cpu_to_le16(rxq->bufq_sets[0].bufq.q_id);
+                       if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) {
+                               qi[k].bufq2_ena = IDPF_BUFQ2_ENA;
+                               qi[k].rx_bufq2_id =
+-                                cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id);
++                                cpu_to_le16(rxq->bufq_sets[1].bufq.q_id);
+                       }
+                       qi[k].rx_buffer_low_watermark =
+                               cpu_to_le16(rxq->rx_buffer_low_watermark);
+@@ -1611,7 +1615,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
+                               qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC);
+ common_qi_fields:
+-                      if (rxq->rx_hsplit_en) {
++                      if (idpf_queue_has(HSPLIT_EN, rxq)) {
+                               qi[k].qflags |=
+                                       cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT);
+                               qi[k].hdr_buffer_size =
+@@ -1619,7 +1623,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
+                       }
+                       qi[k].queue_id = cpu_to_le32(rxq->q_id);
+                       qi[k].model = cpu_to_le16(vport->rxq_model);
+-                      qi[k].type = cpu_to_le32(rxq->q_type);
++                      qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
+                       qi[k].ring_len = cpu_to_le16(rxq->desc_count);
+                       qi[k].dma_ring_addr = cpu_to_le64(rxq->dma);
+                       qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size);
+@@ -1706,7 +1710,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
+               struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+               for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+-                      qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
++                      qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX);
+                       qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+                       qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+               }
+@@ -1720,7 +1724,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
+       for (i = 0; i < vport->num_txq_grp; i++, k++) {
+               struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+-              qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
++              qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION);
+               qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
+               qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+       }
+@@ -1741,12 +1745,12 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
+                               qc[k].start_queue_id =
+                               cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id);
+                               qc[k].type =
+-                              cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type);
++                              cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
+                       } else {
+                               qc[k].start_queue_id =
+                               cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id);
+                               qc[k].type =
+-                              cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type);
++                              cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
+                       }
+                       qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+               }
+@@ -1761,10 +1765,11 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
+               struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+               for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
+-                      struct idpf_queue *q;
++                      const struct idpf_buf_queue *q;
+                       q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+-                      qc[k].type = cpu_to_le32(q->q_type);
++                      qc[k].type =
++                              cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER);
+                       qc[k].start_queue_id = cpu_to_le32(q->q_id);
+                       qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+               }
+@@ -1849,7 +1854,8 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
+               struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+               for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+-                      vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
++                      vqv[k].queue_type =
++                              cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX);
+                       vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+                       if (idpf_is_queue_model_split(vport->txq_model)) {
+@@ -1879,14 +1885,15 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
+                       num_rxq = rx_qgrp->singleq.num_rxq;
+               for (j = 0; j < num_rxq; j++, k++) {
+-                      struct idpf_queue *rxq;
++                      struct idpf_rx_queue *rxq;
+                       if (idpf_is_queue_model_split(vport->rxq_model))
+                               rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+                       else
+                               rxq = rx_qgrp->singleq.rxqs[j];
+-                      vqv[k].queue_type = cpu_to_le32(rxq->q_type);
++                      vqv[k].queue_type =
++                              cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
+                       vqv[k].queue_id = cpu_to_le32(rxq->q_id);
+                       vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx);
+                       vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx);
+@@ -1975,7 +1982,7 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport)
+        * queues virtchnl message is sent
+        */
+       for (i = 0; i < vport->num_txq; i++)
+-              set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
++              idpf_queue_set(POLL_MODE, vport->txqs[i]);
+       /* schedule the napi to receive all the marker packets */
+       local_bh_disable();
+@@ -3242,7 +3249,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
+                                      int num_qids,
+                                      u32 q_type)
+ {
+-      struct idpf_queue *q;
+       int i, j, k = 0;
+       switch (q_type) {
+@@ -3250,11 +3256,8 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
+               for (i = 0; i < vport->num_txq_grp; i++) {
+                       struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+-                      for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) {
++                      for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++)
+                               tx_qgrp->txqs[j]->q_id = qids[k];
+-                              tx_qgrp->txqs[j]->q_type =
+-                                      VIRTCHNL2_QUEUE_TYPE_TX;
+-                      }
+               }
+               break;
+       case VIRTCHNL2_QUEUE_TYPE_RX:
+@@ -3268,12 +3271,13 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
+                               num_rxq = rx_qgrp->singleq.num_rxq;
+                       for (j = 0; j < num_rxq && k < num_qids; j++, k++) {
++                              struct idpf_rx_queue *q;
++
+                               if (idpf_is_queue_model_split(vport->rxq_model))
+                                       q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+                               else
+                                       q = rx_qgrp->singleq.rxqs[j];
+                               q->q_id = qids[k];
+-                              q->q_type = VIRTCHNL2_QUEUE_TYPE_RX;
+                       }
+               }
+               break;
+@@ -3282,8 +3286,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
+                       struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+                       tx_qgrp->complq->q_id = qids[k];
+-                      tx_qgrp->complq->q_type =
+-                              VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+               }
+               break;
+       case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+@@ -3292,9 +3294,10 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
+                       u8 num_bufqs = vport->num_bufqs_per_qgrp;
+                       for (j = 0; j < num_bufqs && k < num_qids; j++, k++) {
++                              struct idpf_buf_queue *q;
++
+                               q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+                               q->q_id = qids[k];
+-                              q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+                       }
+               }
+               break;
+-- 
+2.43.0
+
diff --git a/queue-6.10/idpf-stop-using-macros-for-accessing-queue-descripto.patch b/queue-6.10/idpf-stop-using-macros-for-accessing-queue-descripto.patch
new file mode 100644 (file)
index 0000000..5614cb6
--- /dev/null
@@ -0,0 +1,374 @@
+From f243a142c50fb6f93429320be341c8c8b3043ed8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2024 15:53:37 +0200
+Subject: idpf: stop using macros for accessing queue descriptors
+
+From: Alexander Lobakin <aleksander.lobakin@intel.com>
+
+[ Upstream commit 66c27e3b19d5aae58d7f0145113de61d6fba5e09 ]
+
+In C, we have structures and unions.
+Casting `void *` via macros is not only error-prone, but also looks
+confusing and awful in general.
+In preparation for splitting the queue structs, replace it with a
+union and direct array dereferences.
+
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Mina Almasry <almasrymina@google.com>
+Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: e4b398dd82f5 ("idpf: fix netdev Tx queue stop/wake")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/idpf/idpf.h        |  1 -
+ .../net/ethernet/intel/idpf/idpf_lan_txrx.h   |  2 +
+ .../ethernet/intel/idpf/idpf_singleq_txrx.c   | 20 ++++----
+ drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 32 ++++++-------
+ drivers/net/ethernet/intel/idpf/idpf_txrx.h   | 47 ++++++++++---------
+ 5 files changed, 52 insertions(+), 50 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
+index e7a0365382465..0b26dd9b8a512 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf.h
++++ b/drivers/net/ethernet/intel/idpf/idpf.h
+@@ -20,7 +20,6 @@ struct idpf_vport_max_q;
+ #include <linux/dim.h>
+ #include "virtchnl2.h"
+-#include "idpf_lan_txrx.h"
+ #include "idpf_txrx.h"
+ #include "idpf_controlq.h"
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
+index a5752dcab8887..8c7f8ef8f1a15 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
++++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
+@@ -4,6 +4,8 @@
+ #ifndef _IDPF_LAN_TXRX_H_
+ #define _IDPF_LAN_TXRX_H_
++#include <linux/bits.h>
++
+ enum idpf_rss_hash {
+       IDPF_HASH_INVALID                       = 0,
+       /* Values 1 - 28 are reserved for future use */
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+index 27b93592c4bab..b17d88e150006 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+@@ -205,7 +205,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+       data_len = skb->data_len;
+       size = skb_headlen(skb);
+-      tx_desc = IDPF_BASE_TX_DESC(tx_q, i);
++      tx_desc = &tx_q->base_tx[i];
+       dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
+@@ -239,7 +239,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+                       i++;
+                       if (i == tx_q->desc_count) {
+-                              tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
++                              tx_desc = &tx_q->base_tx[0];
+                               i = 0;
+                       }
+@@ -259,7 +259,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+               i++;
+               if (i == tx_q->desc_count) {
+-                      tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
++                      tx_desc = &tx_q->base_tx[0];
+                       i = 0;
+               }
+@@ -307,7 +307,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq)
+       memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf));
+       txq->tx_buf[ntu].ctx_entry = true;
+-      ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu);
++      ctx_desc = &txq->base_ctx[ntu];
+       IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu);
+       txq->next_to_use = ntu;
+@@ -455,7 +455,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
+       struct netdev_queue *nq;
+       bool dont_wake;
+-      tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc);
++      tx_desc = &tx_q->base_tx[ntc];
+       tx_buf = &tx_q->tx_buf[ntc];
+       ntc -= tx_q->desc_count;
+@@ -517,7 +517,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
+                       if (unlikely(!ntc)) {
+                               ntc -= tx_q->desc_count;
+                               tx_buf = tx_q->tx_buf;
+-                              tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
++                              tx_desc = &tx_q->base_tx[0];
+                       }
+                       /* unmap any remaining paged data */
+@@ -540,7 +540,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
+               if (unlikely(!ntc)) {
+                       ntc -= tx_q->desc_count;
+                       tx_buf = tx_q->tx_buf;
+-                      tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
++                      tx_desc = &tx_q->base_tx[0];
+               }
+       } while (likely(budget));
+@@ -895,7 +895,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+       if (!cleaned_count)
+               return false;
+-      desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta);
++      desc = &rx_q->single_buf[nta];
+       buf = &rx_q->rx_buf.buf[nta];
+       do {
+@@ -915,7 +915,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+               buf++;
+               nta++;
+               if (unlikely(nta == rx_q->desc_count)) {
+-                      desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0);
++                      desc = &rx_q->single_buf[0];
+                       buf = rx_q->rx_buf.buf;
+                       nta = 0;
+               }
+@@ -1016,7 +1016,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+               struct idpf_rx_buf *rx_buf;
+               /* get the Rx desc from Rx queue based on 'next_to_clean' */
+-              rx_desc = IDPF_RX_DESC(rx_q, ntc);
++              rx_desc = &rx_q->rx[ntc];
+               /* status_error_ptype_len will always be zero for unused
+                * descriptors because it's cleared in cleanup, and overlaps
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+index 20ca04320d4bd..948b485da539c 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+@@ -531,7 +531,7 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id)
+       struct idpf_rx_buf *buf;
+       dma_addr_t addr;
+-      splitq_rx_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, nta);
++      splitq_rx_desc = &bufq->split_buf[nta];
+       buf = &bufq->rx_buf.buf[buf_id];
+       if (bufq->rx_hsplit_en) {
+@@ -1584,7 +1584,7 @@ do {                                                             \
+       if (unlikely(!(ntc))) {                                 \
+               ntc -= (txq)->desc_count;                       \
+               buf = (txq)->tx_buf;                            \
+-              desc = IDPF_FLEX_TX_DESC(txq, 0);               \
++              desc = &(txq)->flex_tx[0];                      \
+       } else {                                                \
+               (buf)++;                                        \
+               (desc)++;                                       \
+@@ -1617,8 +1617,8 @@ static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end,
+       s16 ntc = tx_q->next_to_clean;
+       struct idpf_tx_buf *tx_buf;
+-      tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc);
+-      next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end);
++      tx_desc = &tx_q->flex_tx[ntc];
++      next_pending_desc = &tx_q->flex_tx[end];
+       tx_buf = &tx_q->tx_buf[ntc];
+       ntc -= tx_q->desc_count;
+@@ -1814,7 +1814,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+       int i;
+       complq_budget = vport->compln_clean_budget;
+-      tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, ntc);
++      tx_desc = &complq->comp[ntc];
+       ntc -= complq->desc_count;
+       do {
+@@ -1879,7 +1879,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+               ntc++;
+               if (unlikely(!ntc)) {
+                       ntc -= complq->desc_count;
+-                      tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, 0);
++                      tx_desc = &complq->comp[0];
+                       change_bit(__IDPF_Q_GEN_CHK, complq->flags);
+               }
+@@ -2143,7 +2143,7 @@ void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
+                * used one additional descriptor for a context
+                * descriptor. Reset that here.
+                */
+-              tx_desc = IDPF_FLEX_TX_DESC(txq, idx);
++              tx_desc = &txq->flex_tx[idx];
+               memset(tx_desc, 0, sizeof(struct idpf_flex_tx_ctx_desc));
+               if (idx == 0)
+                       idx = txq->desc_count;
+@@ -2202,7 +2202,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
+       data_len = skb->data_len;
+       size = skb_headlen(skb);
+-      tx_desc = IDPF_FLEX_TX_DESC(tx_q, i);
++      tx_desc = &tx_q->flex_tx[i];
+       dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
+@@ -2275,7 +2275,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
+                       i++;
+                       if (i == tx_q->desc_count) {
+-                              tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0);
++                              tx_desc = &tx_q->flex_tx[0];
+                               i = 0;
+                               tx_q->compl_tag_cur_gen =
+                                       IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+@@ -2320,7 +2320,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
+               i++;
+               if (i == tx_q->desc_count) {
+-                      tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0);
++                      tx_desc = &tx_q->flex_tx[0];
+                       i = 0;
+                       tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+               }
+@@ -2553,7 +2553,7 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq)
+       txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+       /* grab the next descriptor */
+-      desc = IDPF_FLEX_TX_CTX_DESC(txq, i);
++      desc = &txq->flex_ctx[i];
+       txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i);
+       return desc;
+@@ -3128,7 +3128,6 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+               struct idpf_sw_queue *refillq = NULL;
+               struct idpf_rxq_set *rxq_set = NULL;
+               struct idpf_rx_buf *rx_buf = NULL;
+-              union virtchnl2_rx_desc *desc;
+               unsigned int pkt_len = 0;
+               unsigned int hdr_len = 0;
+               u16 gen_id, buf_id = 0;
+@@ -3138,8 +3137,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+               u8 rxdid;
+               /* get the Rx desc from Rx queue based on 'next_to_clean' */
+-              desc = IDPF_RX_DESC(rxq, ntc);
+-              rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)desc;
++              rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb;
+               /* This memory barrier is needed to keep us from reading
+                * any other fields out of the rx_desc
+@@ -3320,11 +3318,11 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq,
+       int cleaned = 0;
+       u16 gen;
+-      buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, bufq_nta);
++      buf_desc = &bufq->split_buf[bufq_nta];
+       /* make sure we stop at ring wrap in the unlikely case ring is full */
+       while (likely(cleaned < refillq->desc_count)) {
+-              u16 refill_desc = IDPF_SPLITQ_RX_BI_DESC(refillq, ntc);
++              u16 refill_desc = refillq->ring[ntc];
+               bool failure;
+               gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc);
+@@ -3342,7 +3340,7 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq,
+               }
+               if (unlikely(++bufq_nta == bufq->desc_count)) {
+-                      buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, 0);
++                      buf_desc = &bufq->split_buf[0];
+                       bufq_nta = 0;
+               } else {
+                       buf_desc++;
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+index 551391e204647..6dce14483215f 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+@@ -8,6 +8,7 @@
+ #include <net/tcp.h>
+ #include <net/netdev_queues.h>
++#include "idpf_lan_txrx.h"
+ #include "virtchnl2_lan_desc.h"
+ #define IDPF_LARGE_MAX_Q                      256
+@@ -117,24 +118,6 @@ do {                                                              \
+ #define IDPF_RXD_EOF_SPLITQ           VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M
+ #define IDPF_RXD_EOF_SINGLEQ          VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M
+-#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i)      \
+-      (&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i]))
+-#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i)       \
+-      (&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i]))
+-#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i])
+-
+-#define IDPF_BASE_TX_DESC(txq, i)     \
+-      (&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i]))
+-#define IDPF_BASE_TX_CTX_DESC(txq, i) \
+-      (&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i]))
+-#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i)   \
+-      (&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i]))
+-
+-#define IDPF_FLEX_TX_DESC(txq, i) \
+-      (&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i]))
+-#define IDPF_FLEX_TX_CTX_DESC(txq, i) \
+-      (&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i]))
+-
+ #define IDPF_DESC_UNUSED(txq)     \
+       ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \
+       (txq)->next_to_clean - (txq)->next_to_use - 1)
+@@ -317,8 +300,6 @@ struct idpf_rx_extracted {
+ #define IDPF_RX_DMA_ATTR \
+       (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+-#define IDPF_RX_DESC(rxq, i)  \
+-      (&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i]))
+ struct idpf_rx_buf {
+       struct page *page;
+@@ -655,7 +636,15 @@ union idpf_queue_stats {
+  * @q_vector: Backreference to associated vector
+  * @size: Length of descriptor ring in bytes
+  * @dma: Physical address of ring
+- * @desc_ring: Descriptor ring memory
++ * @rx: universal receive descriptor array
++ * @single_buf: Rx buffer descriptor array in singleq
++ * @split_buf: Rx buffer descriptor array in splitq
++ * @base_tx: basic Tx descriptor array
++ * @base_ctx: basic Tx context descriptor array
++ * @flex_tx: flex Tx descriptor array
++ * @flex_ctx: flex Tx context descriptor array
++ * @comp: completion descriptor array
++ * @desc_ring: virtual descriptor ring address
+  * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
+  * @tx_min_pkt_len: Min supported packet length
+  * @num_completions: Only relevant for TX completion queue. It tracks the
+@@ -733,7 +722,21 @@ struct idpf_queue {
+       struct idpf_q_vector *q_vector;
+       unsigned int size;
+       dma_addr_t dma;
+-      void *desc_ring;
++      union {
++              union virtchnl2_rx_desc *rx;
++
++              struct virtchnl2_singleq_rx_buf_desc *single_buf;
++              struct virtchnl2_splitq_rx_buf_desc *split_buf;
++
++              struct idpf_base_tx_desc *base_tx;
++              struct idpf_base_tx_ctx_desc *base_ctx;
++              union idpf_tx_flex_desc *flex_tx;
++              struct idpf_flex_tx_ctx_desc *flex_ctx;
++
++              struct idpf_splitq_tx_compl_desc *comp;
++
++              void *desc_ring;
++      };
+       u16 tx_max_bufs;
+       u8 tx_min_pkt_len;
+-- 
+2.43.0
+
diff --git a/queue-6.10/kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch b/queue-6.10/kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch
new file mode 100644 (file)
index 0000000..c146abe
--- /dev/null
@@ -0,0 +1,55 @@
+From 4f6130a16e1a443d0d3580cc59467478cdf9e865 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 May 2024 14:35:02 +0800
+Subject: KVM: x86: Drop unused check_apicv_inhibit_reasons() callback
+ definition
+
+From: Hou Wenlong <houwenlong.hwl@antgroup.com>
+
+[ Upstream commit c7d4c5f01961cdc4f1d29525e2b0d71f62c5bc33 ]
+
+The check_apicv_inhibit_reasons() callback implementation was dropped in
+the commit b3f257a84696 ("KVM: x86: Track required APICv inhibits with
+variable, not callback"), but the definition removal was missed in the
+final version patch (it was removed in the v4). Therefore, it should be
+dropped, and the vmx_check_apicv_inhibit_reasons() function declaration
+should also be removed.
+
+Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Reviewed-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Link: https://lore.kernel.org/r/54abd1d0ccaba4d532f81df61259b9c0e021fbde.1714977229.git.houwenlong.hwl@antgroup.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Stable-dep-of: 73b42dc69be8 ("KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm_host.h | 1 -
+ arch/x86/kvm/vmx/x86_ops.h      | 1 -
+ 2 files changed, 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index d0274b3be2c40..a571f89db6977 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1708,7 +1708,6 @@ struct kvm_x86_ops {
+       void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+       void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+       void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+-      bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
+       const unsigned long required_apicv_inhibits;
+       bool allow_apicv_in_x2apic_without_x2apic_virtualization;
+       void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
+diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
+index d404227c164d6..e46aba18600e7 100644
+--- a/arch/x86/kvm/vmx/x86_ops.h
++++ b/arch/x86/kvm/vmx/x86_ops.h
+@@ -46,7 +46,6 @@ bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu);
+ void vmx_migrate_timers(struct kvm_vcpu *vcpu);
+ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
+ void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
+-bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason);
+ void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
+ void vmx_hwapic_isr_update(int max_isr);
+ int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu);
+-- 
+2.43.0
+
diff --git a/queue-6.10/kvm-x86-make-x2apic-id-100-readonly.patch b/queue-6.10/kvm-x86-make-x2apic-id-100-readonly.patch
new file mode 100644 (file)
index 0000000..a7c8d78
--- /dev/null
@@ -0,0 +1,128 @@
+From 4b84e6390a908a3653b1ae74c37856ea4da0a5c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Aug 2024 13:29:40 -0700
+Subject: KVM: x86: Make x2APIC ID 100% readonly
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 ]
+
+Ignore the userspace provided x2APIC ID when fixing up APIC state for
+KVM_SET_LAPIC, i.e. make the x2APIC fully readonly in KVM.  Commit
+a92e2543d6a8 ("KVM: x86: use hardware-compatible format for APIC ID
+register"), which added the fixup, didn't intend to allow userspace to
+modify the x2APIC ID.  In fact, that commit is when KVM first started
+treating the x2APIC ID as readonly, apparently to fix some race:
+
+ static inline u32 kvm_apic_id(struct kvm_lapic *apic)
+ {
+-       return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
++       /* To avoid a race between apic_base and following APIC_ID update when
++        * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
++        */
++       if (apic_x2apic_mode(apic))
++               return apic->vcpu->vcpu_id;
++
++       return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+ }
+
+Furthermore, KVM doesn't support delivering interrupts to vCPUs with a
+modified x2APIC ID, but KVM *does* return the modified value on a guest
+RDMSR and for KVM_GET_LAPIC.  I.e. no remotely sane setup can actually
+work with a modified x2APIC ID.
+
+Making the x2APIC ID fully readonly fixes a WARN in KVM's optimized map
+calculation, which expects the LDR to align with the x2APIC ID.
+
+  WARNING: CPU: 2 PID: 958 at arch/x86/kvm/lapic.c:331 kvm_recalculate_apic_map+0x609/0xa00 [kvm]
+  CPU: 2 PID: 958 Comm: recalc_apic_map Not tainted 6.4.0-rc3-vanilla+ #35
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.2-1-1 04/01/2014
+  RIP: 0010:kvm_recalculate_apic_map+0x609/0xa00 [kvm]
+  Call Trace:
+   <TASK>
+   kvm_apic_set_state+0x1cf/0x5b0 [kvm]
+   kvm_arch_vcpu_ioctl+0x1806/0x2100 [kvm]
+   kvm_vcpu_ioctl+0x663/0x8a0 [kvm]
+   __x64_sys_ioctl+0xb8/0xf0
+   do_syscall_64+0x56/0x80
+   entry_SYSCALL_64_after_hwframe+0x46/0xb0
+  RIP: 0033:0x7fade8b9dd6f
+
+Unfortunately, the WARN can still trigger for other CPUs than the current
+one by racing against KVM_SET_LAPIC, so remove it completely.
+
+Reported-by: Michal Luczaj <mhal@rbox.co>
+Closes: https://lore.kernel.org/all/814baa0c-1eaa-4503-129f-059917365e80@rbox.co
+Reported-by: Haoyu Wu <haoyuwu254@gmail.com>
+Closes: https://lore.kernel.org/all/20240126161633.62529-1-haoyuwu254@gmail.com
+Reported-by: syzbot+545f1326f405db4e1c3e@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/000000000000c2a6b9061cbca3c3@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-ID: <20240802202941.344889-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 73b42dc69be8 ("KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/lapic.c | 22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
+index f1f54218b0603..9392d6e3d8e37 100644
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
+        * reversing the LDR calculation to get cluster of APICs, i.e. no
+        * additional work is required.
+        */
+-      if (apic_x2apic_mode(apic)) {
+-              WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
++      if (apic_x2apic_mode(apic))
+               return;
+-      }
+       if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
+                                                       &cluster, &mask))) {
+@@ -2987,18 +2985,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
+               struct kvm_lapic_state *s, bool set)
+ {
+       if (apic_x2apic_mode(vcpu->arch.apic)) {
++              u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
+               u32 *id = (u32 *)(s->regs + APIC_ID);
+               u32 *ldr = (u32 *)(s->regs + APIC_LDR);
+               u64 icr;
+               if (vcpu->kvm->arch.x2apic_format) {
+-                      if (*id != vcpu->vcpu_id)
++                      if (*id != x2apic_id)
+                               return -EINVAL;
+               } else {
++                      /*
++                       * Ignore the userspace value when setting APIC state.
++                       * KVM's model is that the x2APIC ID is readonly, e.g.
++                       * KVM only supports delivering interrupts to KVM's
++                       * version of the x2APIC ID.  However, for backwards
++                       * compatibility, don't reject attempts to set a
++                       * mismatched ID for userspace that hasn't opted into
++                       * x2apic_format.
++                       */
+                       if (set)
+-                              *id >>= 24;
++                              *id = x2apic_id;
+                       else
+-                              *id <<= 24;
++                              *id = x2apic_id << 24;
+               }
+               /*
+@@ -3007,7 +3015,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
+                * split to ICR+ICR2 in userspace for backwards compatibility.
+                */
+               if (set) {
+-                      *ldr = kvm_apic_calc_x2apic_ldr(*id);
++                      *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
+                       icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+                             (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+-- 
+2.43.0
+
diff --git a/queue-6.10/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch b/queue-6.10/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch
new file mode 100644 (file)
index 0000000..882b166
--- /dev/null
@@ -0,0 +1,160 @@
+From 405f1ac9c78994085351ac64ccab5331d42128af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Jul 2024 16:51:00 -0700
+Subject: KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 73b42dc69be8564d4951a14d00f827929fe5ef79 ]
+
+Re-introduce the "split" x2APIC ICR storage that KVM used prior to Intel's
+IPI virtualization support, but only for AMD.  While not stated anywhere
+in the APM, despite stating the ICR is a single 64-bit register, AMD CPUs
+store the 64-bit ICR as two separate 32-bit values in ICR and ICR2.  When
+IPI virtualization (IPIv on Intel, all AVIC flavors on AMD) is enabled,
+KVM needs to match CPU behavior as some ICR ICR writes will be handled by
+the CPU, not by KVM.
+
+Add a kvm_x86_ops knob to control the underlying format used by the CPU to
+store the x2APIC ICR, and tune it to AMD vs. Intel regardless of whether
+or not x2AVIC is enabled.  If KVM is handling all ICR writes, the storage
+format for x2APIC mode doesn't matter, and having the behavior follow AMD
+versus Intel will provide better test coverage and ease debugging.
+
+Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
+Cc: stable@vger.kernel.org
+Cc: Maxim Levitsky <mlevitsk@redhat.com>
+Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Link: https://lore.kernel.org/r/20240719235107.3023592-4-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm_host.h |  2 ++
+ arch/x86/kvm/lapic.c            | 42 +++++++++++++++++++++++----------
+ arch/x86/kvm/svm/svm.c          |  2 ++
+ arch/x86/kvm/vmx/main.c         |  2 ++
+ 4 files changed, 36 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index a571f89db6977..e18399d08fb17 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1708,6 +1708,8 @@ struct kvm_x86_ops {
+       void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+       void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+       void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
++
++      const bool x2apic_icr_is_split;
+       const unsigned long required_apicv_inhibits;
+       bool allow_apicv_in_x2apic_without_x2apic_virtualization;
+       void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
+diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
+index 9392d6e3d8e37..523d02c50562f 100644
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2469,11 +2469,25 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
+       data &= ~APIC_ICR_BUSY;
+       kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+-      kvm_lapic_set_reg64(apic, APIC_ICR, data);
++      if (kvm_x86_ops.x2apic_icr_is_split) {
++              kvm_lapic_set_reg(apic, APIC_ICR, data);
++              kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
++      } else {
++              kvm_lapic_set_reg64(apic, APIC_ICR, data);
++      }
+       trace_kvm_apic_write(APIC_ICR, data);
+       return 0;
+ }
++static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
++{
++      if (kvm_x86_ops.x2apic_icr_is_split)
++              return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
++                     (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
++
++      return kvm_lapic_get_reg64(apic, APIC_ICR);
++}
++
+ /* emulate APIC access in a trap manner */
+ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
+ {
+@@ -2491,7 +2505,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
+        * maybe-unecessary write, and both are in the noise anyways.
+        */
+       if (apic_x2apic_mode(apic) && offset == APIC_ICR)
+-              WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)));
++              WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
+       else
+               kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
+ }
+@@ -3011,18 +3025,22 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
+               /*
+                * In x2APIC mode, the LDR is fixed and based on the id.  And
+-               * ICR is internally a single 64-bit register, but needs to be
+-               * split to ICR+ICR2 in userspace for backwards compatibility.
++               * if the ICR is _not_ split, ICR is internally a single 64-bit
++               * register, but needs to be split to ICR+ICR2 in userspace for
++               * backwards compatibility.
+                */
+-              if (set) {
++              if (set)
+                       *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
+-                      icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+-                            (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+-                      __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
+-              } else {
+-                      icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
+-                      __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
++              if (!kvm_x86_ops.x2apic_icr_is_split) {
++                      if (set) {
++                              icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
++                                    (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
++                              __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
++                      } else {
++                              icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
++                              __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
++                      }
+               }
+       }
+@@ -3219,7 +3237,7 @@ static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
+       u32 low;
+       if (reg == APIC_ICR) {
+-              *data = kvm_lapic_get_reg64(apic, APIC_ICR);
++              *data = kvm_x2apic_icr_read(apic);
+               return 0;
+       }
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 0357f7af55966..6d5da700268a5 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -5051,6 +5051,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+       .enable_nmi_window = svm_enable_nmi_window,
+       .enable_irq_window = svm_enable_irq_window,
+       .update_cr8_intercept = svm_update_cr8_intercept,
++
++      .x2apic_icr_is_split = true,
+       .set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
+       .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
+       .apicv_post_state_restore = avic_apicv_post_state_restore,
+diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
+index 547fca3709feb..35c2c004dacd2 100644
+--- a/arch/x86/kvm/vmx/main.c
++++ b/arch/x86/kvm/vmx/main.c
+@@ -89,6 +89,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
+       .enable_nmi_window = vmx_enable_nmi_window,
+       .enable_irq_window = vmx_enable_irq_window,
+       .update_cr8_intercept = vmx_update_cr8_intercept,
++
++      .x2apic_icr_is_split = false,
+       .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
+       .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
+       .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
+-- 
+2.43.0
+
diff --git a/queue-6.10/lsm-infrastructure-management-of-the-sock-security.patch b/queue-6.10/lsm-infrastructure-management-of-the-sock-security.patch
new file mode 100644 (file)
index 0000000..5d1d5b5
--- /dev/null
@@ -0,0 +1,933 @@
+From d24e5d45e9ad144b89c1f1442d35ada883c2839a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2024 14:32:25 -0700
+Subject: lsm: infrastructure management of the sock security
+
+From: Casey Schaufler <casey@schaufler-ca.com>
+
+[ Upstream commit 2aff9d20d50ac45dd13a013ef5231f4fb8912356 ]
+
+Move management of the sock->sk_security blob out
+of the individual security modules and into the security
+infrastructure. Instead of allocating the blobs from within
+the modules the modules tell the infrastructure how much
+space is required, and the space is allocated there.
+
+Acked-by: Paul Moore <paul@paul-moore.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: John Johansen <john.johansen@canonical.com>
+Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
+[PM: subject tweak]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Stable-dep-of: 63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/lsm_hooks.h         |  1 +
+ security/apparmor/include/net.h   |  3 +-
+ security/apparmor/lsm.c           | 17 +------
+ security/apparmor/net.c           |  2 +-
+ security/security.c               | 36 +++++++++++++-
+ security/selinux/hooks.c          | 80 ++++++++++++++-----------------
+ security/selinux/include/objsec.h |  5 ++
+ security/selinux/netlabel.c       | 23 ++++-----
+ security/smack/smack.h            |  5 ++
+ security/smack/smack_lsm.c        | 70 +++++++++++++--------------
+ security/smack/smack_netfilter.c  |  4 +-
+ 11 files changed, 133 insertions(+), 113 deletions(-)
+
+diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
+index a2ade0ffe9e7d..efd4a0655159c 100644
+--- a/include/linux/lsm_hooks.h
++++ b/include/linux/lsm_hooks.h
+@@ -73,6 +73,7 @@ struct lsm_blob_sizes {
+       int     lbs_cred;
+       int     lbs_file;
+       int     lbs_inode;
++      int     lbs_sock;
+       int     lbs_superblock;
+       int     lbs_ipc;
+       int     lbs_msg_msg;
+diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
+index 67bf888c3bd6b..c42ed8a73f1ce 100644
+--- a/security/apparmor/include/net.h
++++ b/security/apparmor/include/net.h
+@@ -51,10 +51,9 @@ struct aa_sk_ctx {
+       struct aa_label *peer;
+ };
+-#define SK_CTX(X) ((X)->sk_security)
+ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk)
+ {
+-      return sk->sk_security;
++      return sk->sk_security + apparmor_blob_sizes.lbs_sock;
+ }
+ #define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P)                                 \
+diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
+index 4373b914acf20..b8366fca98d23 100644
+--- a/security/apparmor/lsm.c
++++ b/security/apparmor/lsm.c
+@@ -1057,27 +1057,12 @@ static int apparmor_userns_create(const struct cred *cred)
+       return error;
+ }
+-static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
+-{
+-      struct aa_sk_ctx *ctx;
+-
+-      ctx = kzalloc(sizeof(*ctx), flags);
+-      if (!ctx)
+-              return -ENOMEM;
+-
+-      sk->sk_security = ctx;
+-
+-      return 0;
+-}
+-
+ static void apparmor_sk_free_security(struct sock *sk)
+ {
+       struct aa_sk_ctx *ctx = aa_sock(sk);
+-      sk->sk_security = NULL;
+       aa_put_label(ctx->label);
+       aa_put_label(ctx->peer);
+-      kfree(ctx);
+ }
+ /**
+@@ -1432,6 +1417,7 @@ struct lsm_blob_sizes apparmor_blob_sizes __ro_after_init = {
+       .lbs_cred = sizeof(struct aa_label *),
+       .lbs_file = sizeof(struct aa_file_ctx),
+       .lbs_task = sizeof(struct aa_task_ctx),
++      .lbs_sock = sizeof(struct aa_sk_ctx),
+ };
+ static const struct lsm_id apparmor_lsmid = {
+@@ -1477,7 +1463,6 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = {
+       LSM_HOOK_INIT(getprocattr, apparmor_getprocattr),
+       LSM_HOOK_INIT(setprocattr, apparmor_setprocattr),
+-      LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security),
+       LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security),
+       LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security),
+diff --git a/security/apparmor/net.c b/security/apparmor/net.c
+index 87e934b2b5488..77413a5191179 100644
+--- a/security/apparmor/net.c
++++ b/security/apparmor/net.c
+@@ -151,7 +151,7 @@ static int aa_label_sk_perm(const struct cred *subj_cred,
+                           const char *op, u32 request,
+                           struct sock *sk)
+ {
+-      struct aa_sk_ctx *ctx = SK_CTX(sk);
++      struct aa_sk_ctx *ctx = aa_sock(sk);
+       int error = 0;
+       AA_BUG(!label);
+diff --git a/security/security.c b/security/security.c
+index 41ab07eafc7fa..43166e341526c 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -29,6 +29,7 @@
+ #include <linux/msg.h>
+ #include <linux/overflow.h>
+ #include <net/flow.h>
++#include <net/sock.h>
+ /* How many LSMs were built into the kernel? */
+ #define LSM_COUNT (__end_lsm_info - __start_lsm_info)
+@@ -227,6 +228,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
+       lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
+       lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc);
+       lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg);
++      lsm_set_blob_size(&needed->lbs_sock, &blob_sizes.lbs_sock);
+       lsm_set_blob_size(&needed->lbs_superblock, &blob_sizes.lbs_superblock);
+       lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task);
+       lsm_set_blob_size(&needed->lbs_xattr_count,
+@@ -401,6 +403,7 @@ static void __init ordered_lsm_init(void)
+       init_debug("inode blob size      = %d\n", blob_sizes.lbs_inode);
+       init_debug("ipc blob size        = %d\n", blob_sizes.lbs_ipc);
+       init_debug("msg_msg blob size    = %d\n", blob_sizes.lbs_msg_msg);
++      init_debug("sock blob size       = %d\n", blob_sizes.lbs_sock);
+       init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock);
+       init_debug("task blob size       = %d\n", blob_sizes.lbs_task);
+       init_debug("xattr slots          = %d\n", blob_sizes.lbs_xattr_count);
+@@ -4673,6 +4676,28 @@ int security_socket_getpeersec_dgram(struct socket *sock,
+ }
+ EXPORT_SYMBOL(security_socket_getpeersec_dgram);
++/**
++ * lsm_sock_alloc - allocate a composite sock blob
++ * @sock: the sock that needs a blob
++ * @priority: allocation mode
++ *
++ * Allocate the sock blob for all the modules
++ *
++ * Returns 0, or -ENOMEM if memory can't be allocated.
++ */
++static int lsm_sock_alloc(struct sock *sock, gfp_t priority)
++{
++      if (blob_sizes.lbs_sock == 0) {
++              sock->sk_security = NULL;
++              return 0;
++      }
++
++      sock->sk_security = kzalloc(blob_sizes.lbs_sock, priority);
++      if (sock->sk_security == NULL)
++              return -ENOMEM;
++      return 0;
++}
++
+ /**
+  * security_sk_alloc() - Allocate and initialize a sock's LSM blob
+  * @sk: sock
+@@ -4686,7 +4711,14 @@ EXPORT_SYMBOL(security_socket_getpeersec_dgram);
+  */
+ int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+ {
+-      return call_int_hook(sk_alloc_security, sk, family, priority);
++      int rc = lsm_sock_alloc(sk, priority);
++
++      if (unlikely(rc))
++              return rc;
++      rc = call_int_hook(sk_alloc_security, sk, family, priority);
++      if (unlikely(rc))
++              security_sk_free(sk);
++      return rc;
+ }
+ /**
+@@ -4698,6 +4730,8 @@ int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+ void security_sk_free(struct sock *sk)
+ {
+       call_void_hook(sk_free_security, sk);
++      kfree(sk->sk_security);
++      sk->sk_security = NULL;
+ }
+ /**
+diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
+index 400eca4ad0fb6..c11303d662d80 100644
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -4594,7 +4594,7 @@ static int socket_sockcreate_sid(const struct task_security_struct *tsec,
+ static int sock_has_perm(struct sock *sk, u32 perms)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       struct common_audit_data ad;
+       struct lsm_network_audit net;
+@@ -4662,7 +4662,7 @@ static int selinux_socket_post_create(struct socket *sock, int family,
+       isec->initialized = LABEL_INITIALIZED;
+       if (sock->sk) {
+-              sksec = sock->sk->sk_security;
++              sksec = selinux_sock(sock->sk);
+               sksec->sclass = sclass;
+               sksec->sid = sid;
+               /* Allows detection of the first association on this socket */
+@@ -4678,8 +4678,8 @@ static int selinux_socket_post_create(struct socket *sock, int family,
+ static int selinux_socket_socketpair(struct socket *socka,
+                                    struct socket *sockb)
+ {
+-      struct sk_security_struct *sksec_a = socka->sk->sk_security;
+-      struct sk_security_struct *sksec_b = sockb->sk->sk_security;
++      struct sk_security_struct *sksec_a = selinux_sock(socka->sk);
++      struct sk_security_struct *sksec_b = selinux_sock(sockb->sk);
+       sksec_a->peer_sid = sksec_b->sid;
+       sksec_b->peer_sid = sksec_a->sid;
+@@ -4694,7 +4694,7 @@ static int selinux_socket_socketpair(struct socket *socka,
+ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
+ {
+       struct sock *sk = sock->sk;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       u16 family;
+       int err;
+@@ -4834,7 +4834,7 @@ static int selinux_socket_connect_helper(struct socket *sock,
+                                        struct sockaddr *address, int addrlen)
+ {
+       struct sock *sk = sock->sk;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       int err;
+       err = sock_has_perm(sk, SOCKET__CONNECT);
+@@ -5012,9 +5012,9 @@ static int selinux_socket_unix_stream_connect(struct sock *sock,
+                                             struct sock *other,
+                                             struct sock *newsk)
+ {
+-      struct sk_security_struct *sksec_sock = sock->sk_security;
+-      struct sk_security_struct *sksec_other = other->sk_security;
+-      struct sk_security_struct *sksec_new = newsk->sk_security;
++      struct sk_security_struct *sksec_sock = selinux_sock(sock);
++      struct sk_security_struct *sksec_other = selinux_sock(other);
++      struct sk_security_struct *sksec_new = selinux_sock(newsk);
+       struct common_audit_data ad;
+       struct lsm_network_audit net;
+       int err;
+@@ -5043,8 +5043,8 @@ static int selinux_socket_unix_stream_connect(struct sock *sock,
+ static int selinux_socket_unix_may_send(struct socket *sock,
+                                       struct socket *other)
+ {
+-      struct sk_security_struct *ssec = sock->sk->sk_security;
+-      struct sk_security_struct *osec = other->sk->sk_security;
++      struct sk_security_struct *ssec = selinux_sock(sock->sk);
++      struct sk_security_struct *osec = selinux_sock(other->sk);
+       struct common_audit_data ad;
+       struct lsm_network_audit net;
+@@ -5081,7 +5081,7 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
+                                      u16 family)
+ {
+       int err = 0;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       u32 sk_sid = sksec->sid;
+       struct common_audit_data ad;
+       struct lsm_network_audit net;
+@@ -5110,7 +5110,7 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
+ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ {
+       int err, peerlbl_active, secmark_active;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       u16 family = sk->sk_family;
+       u32 sk_sid = sksec->sid;
+       struct common_audit_data ad;
+@@ -5178,7 +5178,7 @@ static int selinux_socket_getpeersec_stream(struct socket *sock,
+       int err = 0;
+       char *scontext = NULL;
+       u32 scontext_len;
+-      struct sk_security_struct *sksec = sock->sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sock->sk);
+       u32 peer_sid = SECSID_NULL;
+       if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET ||
+@@ -5238,34 +5238,27 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock,
+ static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
+ {
+-      struct sk_security_struct *sksec;
+-
+-      sksec = kzalloc(sizeof(*sksec), priority);
+-      if (!sksec)
+-              return -ENOMEM;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       sksec->peer_sid = SECINITSID_UNLABELED;
+       sksec->sid = SECINITSID_UNLABELED;
+       sksec->sclass = SECCLASS_SOCKET;
+       selinux_netlbl_sk_security_reset(sksec);
+-      sk->sk_security = sksec;
+       return 0;
+ }
+ static void selinux_sk_free_security(struct sock *sk)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+-      sk->sk_security = NULL;
+       selinux_netlbl_sk_security_free(sksec);
+-      kfree(sksec);
+ }
+ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
+-      struct sk_security_struct *newsksec = newsk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
++      struct sk_security_struct *newsksec = selinux_sock(newsk);
+       newsksec->sid = sksec->sid;
+       newsksec->peer_sid = sksec->peer_sid;
+@@ -5279,7 +5272,7 @@ static void selinux_sk_getsecid(const struct sock *sk, u32 *secid)
+       if (!sk)
+               *secid = SECINITSID_ANY_SOCKET;
+       else {
+-              const struct sk_security_struct *sksec = sk->sk_security;
++              const struct sk_security_struct *sksec = selinux_sock(sk);
+               *secid = sksec->sid;
+       }
+@@ -5289,7 +5282,7 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
+ {
+       struct inode_security_struct *isec =
+               inode_security_novalidate(SOCK_INODE(parent));
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 ||
+           sk->sk_family == PF_UNIX)
+@@ -5306,7 +5299,7 @@ static int selinux_sctp_process_new_assoc(struct sctp_association *asoc,
+ {
+       struct sock *sk = asoc->base.sk;
+       u16 family = sk->sk_family;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       struct common_audit_data ad;
+       struct lsm_network_audit net;
+       int err;
+@@ -5361,7 +5354,7 @@ static int selinux_sctp_process_new_assoc(struct sctp_association *asoc,
+ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
+                                     struct sk_buff *skb)
+ {
+-      struct sk_security_struct *sksec = asoc->base.sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
+       u32 conn_sid;
+       int err;
+@@ -5394,7 +5387,7 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
+ static int selinux_sctp_assoc_established(struct sctp_association *asoc,
+                                         struct sk_buff *skb)
+ {
+-      struct sk_security_struct *sksec = asoc->base.sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
+       if (!selinux_policycap_extsockclass())
+               return 0;
+@@ -5493,8 +5486,8 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
+ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
+                                 struct sock *newsk)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
+-      struct sk_security_struct *newsksec = newsk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
++      struct sk_security_struct *newsksec = selinux_sock(newsk);
+       /* If policy does not support SECCLASS_SCTP_SOCKET then call
+        * the non-sctp clone version.
+@@ -5510,8 +5503,8 @@ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk
+ static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
+ {
+-      struct sk_security_struct *ssksec = ssk->sk_security;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *ssksec = selinux_sock(ssk);
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       ssksec->sclass = sksec->sclass;
+       ssksec->sid = sksec->sid;
+@@ -5526,7 +5519,7 @@ static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
+ static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
+                                    struct request_sock *req)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       int err;
+       u16 family = req->rsk_ops->family;
+       u32 connsid;
+@@ -5547,7 +5540,7 @@ static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
+ static void selinux_inet_csk_clone(struct sock *newsk,
+                                  const struct request_sock *req)
+ {
+-      struct sk_security_struct *newsksec = newsk->sk_security;
++      struct sk_security_struct *newsksec = selinux_sock(newsk);
+       newsksec->sid = req->secid;
+       newsksec->peer_sid = req->peer_secid;
+@@ -5564,7 +5557,7 @@ static void selinux_inet_csk_clone(struct sock *newsk,
+ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
+ {
+       u16 family = sk->sk_family;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       /* handle mapped IPv4 packets arriving via IPv6 sockets */
+       if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
+@@ -5639,7 +5632,7 @@ static int selinux_tun_dev_attach_queue(void *security)
+ static int selinux_tun_dev_attach(struct sock *sk, void *security)
+ {
+       struct tun_security_struct *tunsec = security;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       /* we don't currently perform any NetLabel based labeling here and it
+        * isn't clear that we would want to do so anyway; while we could apply
+@@ -5762,7 +5755,7 @@ static unsigned int selinux_ip_output(void *priv, struct sk_buff *skb,
+                       return NF_ACCEPT;
+               /* standard practice, label using the parent socket */
+-              sksec = sk->sk_security;
++              sksec = selinux_sock(sk);
+               sid = sksec->sid;
+       } else
+               sid = SECINITSID_KERNEL;
+@@ -5785,7 +5778,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
+       sk = skb_to_full_sk(skb);
+       if (sk == NULL)
+               return NF_ACCEPT;
+-      sksec = sk->sk_security;
++      sksec = selinux_sock(sk);
+       ad_net_init_from_iif(&ad, &net, state->out->ifindex, state->pf);
+       if (selinux_parse_skb(skb, &ad, NULL, 0, &proto))
+@@ -5874,7 +5867,7 @@ static unsigned int selinux_ip_postroute(void *priv,
+               u32 skb_sid;
+               struct sk_security_struct *sksec;
+-              sksec = sk->sk_security;
++              sksec = selinux_sock(sk);
+               if (selinux_skb_peerlbl_sid(skb, family, &skb_sid))
+                       return NF_DROP;
+               /* At this point, if the returned skb peerlbl is SECSID_NULL
+@@ -5903,7 +5896,7 @@ static unsigned int selinux_ip_postroute(void *priv,
+       } else {
+               /* Locally generated packet, fetch the security label from the
+                * associated socket. */
+-              struct sk_security_struct *sksec = sk->sk_security;
++              struct sk_security_struct *sksec = selinux_sock(sk);
+               peer_sid = sksec->sid;
+               secmark_perm = PACKET__SEND;
+       }
+@@ -5946,7 +5939,7 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
+       unsigned int data_len = skb->len;
+       unsigned char *data = skb->data;
+       struct nlmsghdr *nlh;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       u16 sclass = sksec->sclass;
+       u32 perm;
+@@ -7004,6 +6997,7 @@ struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
+       .lbs_inode = sizeof(struct inode_security_struct),
+       .lbs_ipc = sizeof(struct ipc_security_struct),
+       .lbs_msg_msg = sizeof(struct msg_security_struct),
++      .lbs_sock = sizeof(struct sk_security_struct),
+       .lbs_superblock = sizeof(struct superblock_security_struct),
+       .lbs_xattr_count = SELINUX_INODE_INIT_XATTRS,
+ };
+diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
+index dea1d6f3ed2d3..b074099acbaf7 100644
+--- a/security/selinux/include/objsec.h
++++ b/security/selinux/include/objsec.h
+@@ -195,4 +195,9 @@ selinux_superblock(const struct super_block *superblock)
+       return superblock->s_security + selinux_blob_sizes.lbs_superblock;
+ }
++static inline struct sk_security_struct *selinux_sock(const struct sock *sock)
++{
++      return sock->sk_security + selinux_blob_sizes.lbs_sock;
++}
++
+ #endif /* _SELINUX_OBJSEC_H_ */
+diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
+index 55885634e8804..fbe5f8c29f813 100644
+--- a/security/selinux/netlabel.c
++++ b/security/selinux/netlabel.c
+@@ -17,6 +17,7 @@
+ #include <linux/gfp.h>
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
++#include <linux/lsm_hooks.h>
+ #include <net/sock.h>
+ #include <net/netlabel.h>
+ #include <net/ip.h>
+@@ -68,7 +69,7 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb,
+ static struct netlbl_lsm_secattr *selinux_netlbl_sock_genattr(struct sock *sk)
+ {
+       int rc;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       struct netlbl_lsm_secattr *secattr;
+       if (sksec->nlbl_secattr != NULL)
+@@ -100,7 +101,7 @@ static struct netlbl_lsm_secattr *selinux_netlbl_sock_getattr(
+                                                       const struct sock *sk,
+                                                       u32 sid)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       struct netlbl_lsm_secattr *secattr = sksec->nlbl_secattr;
+       if (secattr == NULL)
+@@ -240,7 +241,7 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
+        * being labeled by it's parent socket, if it is just exit */
+       sk = skb_to_full_sk(skb);
+       if (sk != NULL) {
+-              struct sk_security_struct *sksec = sk->sk_security;
++              struct sk_security_struct *sksec = selinux_sock(sk);
+               if (sksec->nlbl_state != NLBL_REQSKB)
+                       return 0;
+@@ -277,7 +278,7 @@ int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc,
+ {
+       int rc;
+       struct netlbl_lsm_secattr secattr;
+-      struct sk_security_struct *sksec = asoc->base.sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
+       struct sockaddr_in addr4;
+       struct sockaddr_in6 addr6;
+@@ -356,7 +357,7 @@ int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family)
+  */
+ void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       if (family == PF_INET)
+               sksec->nlbl_state = NLBL_LABELED;
+@@ -374,8 +375,8 @@ void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family)
+  */
+ void selinux_netlbl_sctp_sk_clone(struct sock *sk, struct sock *newsk)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
+-      struct sk_security_struct *newsksec = newsk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
++      struct sk_security_struct *newsksec = selinux_sock(newsk);
+       newsksec->nlbl_state = sksec->nlbl_state;
+ }
+@@ -393,7 +394,7 @@ void selinux_netlbl_sctp_sk_clone(struct sock *sk, struct sock *newsk)
+ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family)
+ {
+       int rc;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       struct netlbl_lsm_secattr *secattr;
+       if (family != PF_INET && family != PF_INET6)
+@@ -510,7 +511,7 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock,
+ {
+       int rc = 0;
+       struct sock *sk = sock->sk;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       struct netlbl_lsm_secattr secattr;
+       if (selinux_netlbl_option(level, optname) &&
+@@ -548,7 +549,7 @@ static int selinux_netlbl_socket_connect_helper(struct sock *sk,
+                                               struct sockaddr *addr)
+ {
+       int rc;
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       struct netlbl_lsm_secattr *secattr;
+       /* connected sockets are allowed to disconnect when the address family
+@@ -587,7 +588,7 @@ static int selinux_netlbl_socket_connect_helper(struct sock *sk,
+ int selinux_netlbl_socket_connect_locked(struct sock *sk,
+                                        struct sockaddr *addr)
+ {
+-      struct sk_security_struct *sksec = sk->sk_security;
++      struct sk_security_struct *sksec = selinux_sock(sk);
+       if (sksec->nlbl_state != NLBL_REQSKB &&
+           sksec->nlbl_state != NLBL_CONNLABELED)
+diff --git a/security/smack/smack.h b/security/smack/smack.h
+index 041688e5a77a3..297f21446f456 100644
+--- a/security/smack/smack.h
++++ b/security/smack/smack.h
+@@ -355,6 +355,11 @@ static inline struct superblock_smack *smack_superblock(
+       return superblock->s_security + smack_blob_sizes.lbs_superblock;
+ }
++static inline struct socket_smack *smack_sock(const struct sock *sock)
++{
++      return sock->sk_security + smack_blob_sizes.lbs_sock;
++}
++
+ /*
+  * Is the directory transmuting?
+  */
+diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
+index 002a1b9ed83a5..6ec9a40f3ec59 100644
+--- a/security/smack/smack_lsm.c
++++ b/security/smack/smack_lsm.c
+@@ -1606,7 +1606,7 @@ static int smack_inode_getsecurity(struct mnt_idmap *idmap,
+               if (sock == NULL || sock->sk == NULL)
+                       return -EOPNOTSUPP;
+-              ssp = sock->sk->sk_security;
++              ssp = smack_sock(sock->sk);
+               if (strcmp(name, XATTR_SMACK_IPIN) == 0)
+                       isp = ssp->smk_in;
+@@ -1994,7 +1994,7 @@ static int smack_file_receive(struct file *file)
+       if (inode->i_sb->s_magic == SOCKFS_MAGIC) {
+               sock = SOCKET_I(inode);
+-              ssp = sock->sk->sk_security;
++              ssp = smack_sock(sock->sk);
+               tsp = smack_cred(current_cred());
+               /*
+                * If the receiving process can't write to the
+@@ -2409,11 +2409,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
+ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
+ {
+       struct smack_known *skp = smk_of_current();
+-      struct socket_smack *ssp;
+-
+-      ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
+-      if (ssp == NULL)
+-              return -ENOMEM;
++      struct socket_smack *ssp = smack_sock(sk);
+       /*
+        * Sockets created by kernel threads receive web label.
+@@ -2427,11 +2423,10 @@ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
+       }
+       ssp->smk_packet = NULL;
+-      sk->sk_security = ssp;
+-
+       return 0;
+ }
++#ifdef SMACK_IPV6_PORT_LABELING
+ /**
+  * smack_sk_free_security - Free a socket blob
+  * @sk: the socket
+@@ -2440,7 +2435,6 @@ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
+  */
+ static void smack_sk_free_security(struct sock *sk)
+ {
+-#ifdef SMACK_IPV6_PORT_LABELING
+       struct smk_port_label *spp;
+       if (sk->sk_family == PF_INET6) {
+@@ -2453,9 +2447,8 @@ static void smack_sk_free_security(struct sock *sk)
+               }
+               rcu_read_unlock();
+       }
+-#endif
+-      kfree(sk->sk_security);
+ }
++#endif
+ /**
+  * smack_sk_clone_security - Copy security context
+@@ -2466,8 +2459,8 @@ static void smack_sk_free_security(struct sock *sk)
+  */
+ static void smack_sk_clone_security(const struct sock *sk, struct sock *newsk)
+ {
+-      struct socket_smack *ssp_old = sk->sk_security;
+-      struct socket_smack *ssp_new = newsk->sk_security;
++      struct socket_smack *ssp_old = smack_sock(sk);
++      struct socket_smack *ssp_new = smack_sock(newsk);
+       *ssp_new = *ssp_old;
+ }
+@@ -2583,7 +2576,7 @@ static struct smack_known *smack_ipv6host_label(struct sockaddr_in6 *sip)
+  */
+ static int smack_netlbl_add(struct sock *sk)
+ {
+-      struct socket_smack *ssp = sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sk);
+       struct smack_known *skp = ssp->smk_out;
+       int rc;
+@@ -2616,7 +2609,7 @@ static int smack_netlbl_add(struct sock *sk)
+  */
+ static void smack_netlbl_delete(struct sock *sk)
+ {
+-      struct socket_smack *ssp = sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sk);
+       /*
+        * Take the label off the socket if one is set.
+@@ -2648,7 +2641,7 @@ static int smk_ipv4_check(struct sock *sk, struct sockaddr_in *sap)
+       struct smack_known *skp;
+       int rc = 0;
+       struct smack_known *hkp;
+-      struct socket_smack *ssp = sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sk);
+       struct smk_audit_info ad;
+       rcu_read_lock();
+@@ -2721,7 +2714,7 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address)
+ {
+       struct sock *sk = sock->sk;
+       struct sockaddr_in6 *addr6;
+-      struct socket_smack *ssp = sock->sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sock->sk);
+       struct smk_port_label *spp;
+       unsigned short port = 0;
+@@ -2809,7 +2802,7 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address,
+                               int act)
+ {
+       struct smk_port_label *spp;
+-      struct socket_smack *ssp = sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sk);
+       struct smack_known *skp = NULL;
+       unsigned short port;
+       struct smack_known *object;
+@@ -2912,7 +2905,7 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name,
+       if (sock == NULL || sock->sk == NULL)
+               return -EOPNOTSUPP;
+-      ssp = sock->sk->sk_security;
++      ssp = smack_sock(sock->sk);
+       if (strcmp(name, XATTR_SMACK_IPIN) == 0)
+               ssp->smk_in = skp;
+@@ -2960,7 +2953,7 @@ static int smack_socket_post_create(struct socket *sock, int family,
+        * Sockets created by kernel threads receive web label.
+        */
+       if (unlikely(current->flags & PF_KTHREAD)) {
+-              ssp = sock->sk->sk_security;
++              ssp = smack_sock(sock->sk);
+               ssp->smk_in = &smack_known_web;
+               ssp->smk_out = &smack_known_web;
+       }
+@@ -2985,8 +2978,8 @@ static int smack_socket_post_create(struct socket *sock, int family,
+ static int smack_socket_socketpair(struct socket *socka,
+                                  struct socket *sockb)
+ {
+-      struct socket_smack *asp = socka->sk->sk_security;
+-      struct socket_smack *bsp = sockb->sk->sk_security;
++      struct socket_smack *asp = smack_sock(socka->sk);
++      struct socket_smack *bsp = smack_sock(sockb->sk);
+       asp->smk_packet = bsp->smk_out;
+       bsp->smk_packet = asp->smk_out;
+@@ -3049,7 +3042,7 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap,
+               if (__is_defined(SMACK_IPV6_SECMARK_LABELING))
+                       rsp = smack_ipv6host_label(sip);
+               if (rsp != NULL) {
+-                      struct socket_smack *ssp = sock->sk->sk_security;
++                      struct socket_smack *ssp = smack_sock(sock->sk);
+                       rc = smk_ipv6_check(ssp->smk_out, rsp, sip,
+                                           SMK_CONNECTING);
+@@ -3844,9 +3837,9 @@ static int smack_unix_stream_connect(struct sock *sock,
+ {
+       struct smack_known *skp;
+       struct smack_known *okp;
+-      struct socket_smack *ssp = sock->sk_security;
+-      struct socket_smack *osp = other->sk_security;
+-      struct socket_smack *nsp = newsk->sk_security;
++      struct socket_smack *ssp = smack_sock(sock);
++      struct socket_smack *osp = smack_sock(other);
++      struct socket_smack *nsp = smack_sock(newsk);
+       struct smk_audit_info ad;
+       int rc = 0;
+ #ifdef CONFIG_AUDIT
+@@ -3898,8 +3891,8 @@ static int smack_unix_stream_connect(struct sock *sock,
+  */
+ static int smack_unix_may_send(struct socket *sock, struct socket *other)
+ {
+-      struct socket_smack *ssp = sock->sk->sk_security;
+-      struct socket_smack *osp = other->sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sock->sk);
++      struct socket_smack *osp = smack_sock(other->sk);
+       struct smk_audit_info ad;
+       int rc;
+@@ -3936,7 +3929,7 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg,
+       struct sockaddr_in6 *sap = (struct sockaddr_in6 *) msg->msg_name;
+ #endif
+ #ifdef SMACK_IPV6_SECMARK_LABELING
+-      struct socket_smack *ssp = sock->sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sock->sk);
+       struct smack_known *rsp;
+ #endif
+       int rc = 0;
+@@ -4148,7 +4141,7 @@ static struct smack_known *smack_from_netlbl(const struct sock *sk, u16 family,
+       netlbl_secattr_init(&secattr);
+       if (sk)
+-              ssp = sk->sk_security;
++              ssp = smack_sock(sk);
+       if (netlbl_skbuff_getattr(skb, family, &secattr) == 0) {
+               skp = smack_from_secattr(&secattr, ssp);
+@@ -4170,7 +4163,7 @@ static struct smack_known *smack_from_netlbl(const struct sock *sk, u16 family,
+  */
+ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ {
+-      struct socket_smack *ssp = sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sk);
+       struct smack_known *skp = NULL;
+       int rc = 0;
+       struct smk_audit_info ad;
+@@ -4274,7 +4267,7 @@ static int smack_socket_getpeersec_stream(struct socket *sock,
+       u32 slen = 1;
+       int rc = 0;
+-      ssp = sock->sk->sk_security;
++      ssp = smack_sock(sock->sk);
+       if (ssp->smk_packet != NULL) {
+               rcp = ssp->smk_packet->smk_known;
+               slen = strlen(rcp) + 1;
+@@ -4324,7 +4317,7 @@ static int smack_socket_getpeersec_dgram(struct socket *sock,
+       switch (family) {
+       case PF_UNIX:
+-              ssp = sock->sk->sk_security;
++              ssp = smack_sock(sock->sk);
+               s = ssp->smk_out->smk_secid;
+               break;
+       case PF_INET:
+@@ -4373,7 +4366,7 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent)
+           (sk->sk_family != PF_INET && sk->sk_family != PF_INET6))
+               return;
+-      ssp = sk->sk_security;
++      ssp = smack_sock(sk);
+       ssp->smk_in = skp;
+       ssp->smk_out = skp;
+       /* cssp->smk_packet is already set in smack_inet_csk_clone() */
+@@ -4393,7 +4386,7 @@ static int smack_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
+ {
+       u16 family = sk->sk_family;
+       struct smack_known *skp;
+-      struct socket_smack *ssp = sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sk);
+       struct sockaddr_in addr;
+       struct iphdr *hdr;
+       struct smack_known *hskp;
+@@ -4479,7 +4472,7 @@ static int smack_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
+ static void smack_inet_csk_clone(struct sock *sk,
+                                const struct request_sock *req)
+ {
+-      struct socket_smack *ssp = sk->sk_security;
++      struct socket_smack *ssp = smack_sock(sk);
+       struct smack_known *skp;
+       if (req->peer_secid != 0) {
+@@ -5049,6 +5042,7 @@ struct lsm_blob_sizes smack_blob_sizes __ro_after_init = {
+       .lbs_inode = sizeof(struct inode_smack),
+       .lbs_ipc = sizeof(struct smack_known *),
+       .lbs_msg_msg = sizeof(struct smack_known *),
++      .lbs_sock = sizeof(struct socket_smack),
+       .lbs_superblock = sizeof(struct superblock_smack),
+       .lbs_xattr_count = SMACK_INODE_INIT_XATTRS,
+ };
+@@ -5173,7 +5167,9 @@ static struct security_hook_list smack_hooks[] __ro_after_init = {
+       LSM_HOOK_INIT(socket_getpeersec_stream, smack_socket_getpeersec_stream),
+       LSM_HOOK_INIT(socket_getpeersec_dgram, smack_socket_getpeersec_dgram),
+       LSM_HOOK_INIT(sk_alloc_security, smack_sk_alloc_security),
++#ifdef SMACK_IPV6_PORT_LABELING
+       LSM_HOOK_INIT(sk_free_security, smack_sk_free_security),
++#endif
+       LSM_HOOK_INIT(sk_clone_security, smack_sk_clone_security),
+       LSM_HOOK_INIT(sock_graft, smack_sock_graft),
+       LSM_HOOK_INIT(inet_conn_request, smack_inet_conn_request),
+diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
+index b945c1d3a7431..bad71b7e648da 100644
+--- a/security/smack/smack_netfilter.c
++++ b/security/smack/smack_netfilter.c
+@@ -26,8 +26,8 @@ static unsigned int smack_ip_output(void *priv,
+       struct socket_smack *ssp;
+       struct smack_known *skp;
+-      if (sk && sk->sk_security) {
+-              ssp = sk->sk_security;
++      if (sk) {
++              ssp = smack_sock(sk);
+               skp = ssp->smk_out;
+               skb->secmark = skp->smk_secid;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.10/serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch b/queue-6.10/serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch
new file mode 100644 (file)
index 0000000..13b8c15
--- /dev/null
@@ -0,0 +1,48 @@
+From 18028110d3062573f5df7e1ce692782babda7533 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 15:13:32 +0200
+Subject: serial: qcom-geni: fix arg types for qcom_geni_serial_poll_bit()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit c2eaf5e01275ae13f1ec5b1434f6c49cfff57430 ]
+
+The "offset" passed in should be unsigned since it's always a positive
+offset from our memory mapped IO.
+
+The "field" should be u32 since we're anding it with a 32-bit value
+read from the device.
+
+Suggested-by: Stephen Boyd <swboyd@chromium.org>
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
+Link: https://lore.kernel.org/r/20240610152420.v4.4.I24a0de52dd7336908df180fa6b698e001f3aff82@changeid
+Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/20240906131336.23625-5-johan+linaro@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: cc4a0e5754a1 ("serial: qcom-geni: fix console corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/qcom_geni_serial.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
+index b88435c0ea507..54052c68555d7 100644
+--- a/drivers/tty/serial/qcom_geni_serial.c
++++ b/drivers/tty/serial/qcom_geni_serial.c
+@@ -266,7 +266,7 @@ static bool qcom_geni_serial_secondary_active(struct uart_port *uport)
+ }
+ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
+-                              int offset, int field, bool set)
++                                    unsigned int offset, u32 field, bool set)
+ {
+       u32 reg;
+       struct qcom_geni_serial_port *port;
+-- 
+2.43.0
+
diff --git a/queue-6.10/serial-qcom-geni-fix-console-corruption.patch b/queue-6.10/serial-qcom-geni-fix-console-corruption.patch
new file mode 100644 (file)
index 0000000..0ab674f
--- /dev/null
@@ -0,0 +1,169 @@
+From 9fd9b8c3e4ea317dd5ecb85b2d14186818de310b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 15:13:34 +0200
+Subject: serial: qcom-geni: fix console corruption
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit cc4a0e5754a16bbc1e215c091349a7c83a2c5e14 ]
+
+The Qualcomm serial console implementation is broken and can lose
+characters when the serial port is also used for tty output.
+
+Specifically, the console code only waits for the current tx command to
+complete when all data has already been written to the fifo. When there
+are on-going longer transfers this often means that console output is
+lost when the console code inadvertently "hijacks" the current tx
+command instead of starting a new one.
+
+This can, for example, be observed during boot when console output that
+should have been interspersed with init output is truncated:
+
+       [    9.462317] qcom-snps-eusb2-hsphy fde000.phy: Registered Qcom-eUSB2 phy
+       [  OK  ] Found device KBG50ZNS256G KIOXIA Wi[    9.471743ndows.
+       [    9.539915] xhci-hcd xhci-hcd.0.auto: xHCI Host Controller
+
+Add a new state variable to track how much data has been written to the
+fifo and use it to determine when the fifo and shift register are both
+empty. This is needed since there is currently no other known way to
+determine when the shift register is empty.
+
+This in turn allows the console code to interrupt long transfers without
+losing data.
+
+Note that the oops-in-progress case is similarly broken as it does not
+cancel any active command and also waits for the wrong status flag when
+attempting to drain the fifo (TX_FIFO_NOT_EMPTY_EN is only set when
+cancelling a command leaves data in the fifo).
+
+Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
+Fixes: a1fee899e5be ("tty: serial: qcom_geni_serial: Fix softlock")
+Fixes: 9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get the port lock")
+Cc: stable@vger.kernel.org     # 4.17
+Reviewed-by: Douglas Anderson <dianders@chromium.org>
+Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/20240906131336.23625-7-johan+linaro@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/qcom_geni_serial.c | 45 +++++++++++++--------------
+ 1 file changed, 22 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
+index 7bbd70c306201..f8f6e9466b400 100644
+--- a/drivers/tty/serial/qcom_geni_serial.c
++++ b/drivers/tty/serial/qcom_geni_serial.c
+@@ -131,6 +131,7 @@ struct qcom_geni_serial_port {
+       bool brk;
+       unsigned int tx_remaining;
++      unsigned int tx_queued;
+       int wakeup_irq;
+       bool rx_tx_swap;
+       bool cts_rts_swap;
+@@ -144,6 +145,8 @@ static const struct uart_ops qcom_geni_uart_pops;
+ static struct uart_driver qcom_geni_console_driver;
+ static struct uart_driver qcom_geni_uart_driver;
++static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
++
+ static inline struct qcom_geni_serial_port *to_dev_port(struct uart_port *uport)
+ {
+       return container_of(uport, struct qcom_geni_serial_port, uport);
+@@ -393,6 +396,14 @@ static void qcom_geni_serial_poll_put_char(struct uart_port *uport,
+ #endif
+ #ifdef CONFIG_SERIAL_QCOM_GENI_CONSOLE
++static void qcom_geni_serial_drain_fifo(struct uart_port *uport)
++{
++      struct qcom_geni_serial_port *port = to_dev_port(uport);
++
++      qcom_geni_serial_poll_bitfield(uport, SE_GENI_M_GP_LENGTH, GP_LENGTH,
++                      port->tx_queued);
++}
++
+ static void qcom_geni_serial_wr_char(struct uart_port *uport, unsigned char ch)
+ {
+       struct qcom_geni_private_data *private_data = uport->private_data;
+@@ -468,7 +479,6 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
+       struct qcom_geni_serial_port *port;
+       bool locked = true;
+       unsigned long flags;
+-      u32 geni_status;
+       WARN_ON(co->index < 0 || co->index >= GENI_UART_CONS_PORTS);
+@@ -482,34 +492,20 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
+       else
+               uart_port_lock_irqsave(uport, &flags);
+-      geni_status = readl(uport->membase + SE_GENI_STATUS);
++      if (qcom_geni_serial_main_active(uport)) {
++              /* Wait for completion or drain FIFO */
++              if (!locked || port->tx_remaining == 0)
++                      qcom_geni_serial_poll_tx_done(uport);
++              else
++                      qcom_geni_serial_drain_fifo(uport);
+-      if (!locked) {
+-              /*
+-               * We can only get here if an oops is in progress then we were
+-               * unable to get the lock. This means we can't safely access
+-               * our state variables like tx_remaining. About the best we
+-               * can do is wait for the FIFO to be empty before we start our
+-               * transfer, so we'll do that.
+-               */
+-              qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
+-                                        M_TX_FIFO_NOT_EMPTY_EN, false);
+-      } else if ((geni_status & M_GENI_CMD_ACTIVE) && !port->tx_remaining) {
+-              /*
+-               * It seems we can't interrupt existing transfers if all data
+-               * has been sent, in which case we need to look for done first.
+-               */
+-              qcom_geni_serial_poll_tx_done(uport);
++              qcom_geni_serial_cancel_tx_cmd(uport);
+       }
+       __qcom_geni_serial_console_write(uport, s, count);
+-
+-      if (locked) {
+-              if (port->tx_remaining)
+-                      qcom_geni_serial_setup_tx(uport, port->tx_remaining);
++      if (locked)
+               uart_port_unlock_irqrestore(uport, flags);
+-      }
+ }
+ static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop)
+@@ -690,6 +686,7 @@ static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
+       writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+       port->tx_remaining = 0;
++      port->tx_queued = 0;
+ }
+ static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop)
+@@ -916,6 +913,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
+       if (!port->tx_remaining) {
+               qcom_geni_serial_setup_tx(uport, pending);
+               port->tx_remaining = pending;
++              port->tx_queued = 0;
+               irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
+               if (!(irq_en & M_TX_FIFO_WATERMARK_EN))
+@@ -924,6 +922,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
+       }
+       qcom_geni_serial_send_chunk_fifo(uport, chunk);
++      port->tx_queued += chunk;
+       /*
+        * The tx fifo watermark is level triggered and latched. Though we had
+-- 
+2.43.0
+
diff --git a/queue-6.10/serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch b/queue-6.10/serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch
new file mode 100644 (file)
index 0000000..3ec8119
--- /dev/null
@@ -0,0 +1,70 @@
+From c6511f4d88c312a11be28f152c02f6bade9d8e64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 15:13:33 +0200
+Subject: serial: qcom-geni: introduce qcom_geni_serial_poll_bitfield()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit b26d1ad1221273c88c2c4f5b4080338b8ca23859 ]
+
+With a small modification the qcom_geni_serial_poll_bit() function
+could be used to poll more than just a single bit. Let's generalize
+it. We'll make the qcom_geni_serial_poll_bit() into just a wrapper of
+the general function.
+
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
+Link: https://lore.kernel.org/r/20240610152420.v4.5.Ic6411eab8d9d37acc451705f583fb535cd6dadb2@changeid
+Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/20240906131336.23625-6-johan+linaro@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: cc4a0e5754a1 ("serial: qcom-geni: fix console corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/qcom_geni_serial.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
+index 54052c68555d7..7bbd70c306201 100644
+--- a/drivers/tty/serial/qcom_geni_serial.c
++++ b/drivers/tty/serial/qcom_geni_serial.c
+@@ -265,8 +265,8 @@ static bool qcom_geni_serial_secondary_active(struct uart_port *uport)
+       return readl(uport->membase + SE_GENI_STATUS) & S_GENI_CMD_ACTIVE;
+ }
+-static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
+-                                    unsigned int offset, u32 field, bool set)
++static bool qcom_geni_serial_poll_bitfield(struct uart_port *uport,
++                                         unsigned int offset, u32 field, u32 val)
+ {
+       u32 reg;
+       struct qcom_geni_serial_port *port;
+@@ -286,7 +286,7 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
+       timeout_us = DIV_ROUND_UP(timeout_us, 10) * 10;
+       while (timeout_us) {
+               reg = readl(uport->membase + offset);
+-              if ((bool)(reg & field) == set)
++              if ((reg & field) == val)
+                       return true;
+               udelay(10);
+               timeout_us -= 10;
+@@ -294,6 +294,12 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
+       return false;
+ }
++static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
++                                    unsigned int offset, u32 field, bool set)
++{
++      return qcom_geni_serial_poll_bitfield(uport, offset, field, set ? field : 0);
++}
++
+ static void qcom_geni_serial_setup_tx(struct uart_port *uport, u32 xmit_size)
+ {
+       u32 m_cmd;
+-- 
+2.43.0
+
index 36de71ca69f5d9746939e4f175ff24105f2a6452..d083557f860e115f411af4d50115c403b6549ae6 100644 (file)
@@ -596,3 +596,27 @@ dt-bindings-spi-nxp-fspi-add-imx8ulp-support.patch
 arm-dts-imx6ul-geam-fix-fsl-pins-property-in-tscgrp-pinctrl.patch
 arm-dts-imx6ull-seeed-npi-fix-fsl-pins-property-in-tscgrp-pinctrl.patch
 tools-nolibc-include-arch.h-from-string.h.patch
+soc-versatile-realview-fix-memory-leak-during-device.patch
+soc-versatile-realview-fix-soc_dev-leak-during-devic.patch
+kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch
+kvm-x86-make-x2apic-id-100-readonly.patch
+kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch
+x86-mm-make-x86_platform.guest.enc_status_change_-re.patch
+x86-tdx-account-shared-memory.patch
+x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch
+x86-tdx-convert-shared-memory-back-to-private-on-kex.patch
+x86-tdx-fix-in-kernel-mmio-check.patch
+xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch
+usb-xhci-fix-loss-of-data-on-cadence-xhc.patch
+soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch
+serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch
+serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch
+serial-qcom-geni-fix-console-corruption.patch
+idpf-stop-using-macros-for-accessing-queue-descripto.patch
+idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch
+idpf-merge-singleq-and-splitq-net_device_ops.patch
+idpf-fix-netdev-tx-queue-stop-wake.patch
+fs_parse-add-uid-gid-option-option-parsing-helpers.patch
+debugfs-convert-to-new-uid-gid-option-parsing-helper.patch
+debugfs-show-actual-source-in-proc-mounts.patch
+lsm-infrastructure-management-of-the-sock-security.patch
diff --git a/queue-6.10/soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch b/queue-6.10/soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch
new file mode 100644 (file)
index 0000000..5211e14
--- /dev/null
@@ -0,0 +1,75 @@
+From 429c30fd2323f3e1c0e151a27221ed6d24f8b82a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 15:13:31 +0200
+Subject: soc: qcom: geni-se: add GP_LENGTH/IRQ_EN_SET/IRQ_EN_CLEAR registers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit b03ffc76b83c1a7d058454efbcf1bf0e345ef1c2 ]
+
+For UART devices the M_GP_LENGTH is the TX word count. For other
+devices this is the transaction word count.
+
+For UART devices the S_GP_LENGTH is the RX word count.
+
+The IRQ_EN set/clear registers allow you to set or clear bits in the
+IRQ_EN register without needing a read-modify-write.
+
+Acked-by: Bjorn Andersson <andersson@kernel.org>
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Link: https://lore.kernel.org/r/20240610152420.v4.1.Ife7ced506aef1be3158712aa3ff34a006b973559@changeid
+Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/20240906131336.23625-4-johan+linaro@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: cc4a0e5754a1 ("serial: qcom-geni: fix console corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/soc/qcom/geni-se.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h
+index 0f038a1a03309..c3bca9c0bf2cf 100644
+--- a/include/linux/soc/qcom/geni-se.h
++++ b/include/linux/soc/qcom/geni-se.h
+@@ -88,11 +88,15 @@ struct geni_se {
+ #define SE_GENI_M_IRQ_STATUS          0x610
+ #define SE_GENI_M_IRQ_EN              0x614
+ #define SE_GENI_M_IRQ_CLEAR           0x618
++#define SE_GENI_M_IRQ_EN_SET          0x61c
++#define SE_GENI_M_IRQ_EN_CLEAR                0x620
+ #define SE_GENI_S_CMD0                        0x630
+ #define SE_GENI_S_CMD_CTRL_REG                0x634
+ #define SE_GENI_S_IRQ_STATUS          0x640
+ #define SE_GENI_S_IRQ_EN              0x644
+ #define SE_GENI_S_IRQ_CLEAR           0x648
++#define SE_GENI_S_IRQ_EN_SET          0x64c
++#define SE_GENI_S_IRQ_EN_CLEAR                0x650
+ #define SE_GENI_TX_FIFOn              0x700
+ #define SE_GENI_RX_FIFOn              0x780
+ #define SE_GENI_TX_FIFO_STATUS                0x800
+@@ -101,6 +105,8 @@ struct geni_se {
+ #define SE_GENI_RX_WATERMARK_REG      0x810
+ #define SE_GENI_RX_RFR_WATERMARK_REG  0x814
+ #define SE_GENI_IOS                   0x908
++#define SE_GENI_M_GP_LENGTH           0x910
++#define SE_GENI_S_GP_LENGTH           0x914
+ #define SE_DMA_TX_IRQ_STAT            0xc40
+ #define SE_DMA_TX_IRQ_CLR             0xc44
+ #define SE_DMA_TX_FSM_RST             0xc58
+@@ -234,6 +240,9 @@ struct geni_se {
+ #define IO2_DATA_IN                   BIT(1)
+ #define RX_DATA_IN                    BIT(0)
++/* SE_GENI_M_GP_LENGTH and SE_GENI_S_GP_LENGTH fields */
++#define GP_LENGTH                     GENMASK(31, 0)
++
+ /* SE_DMA_TX_IRQ_STAT Register fields */
+ #define TX_DMA_DONE                   BIT(0)
+ #define TX_EOT                                BIT(1)
+-- 
+2.43.0
+
diff --git a/queue-6.10/soc-versatile-realview-fix-memory-leak-during-device.patch b/queue-6.10/soc-versatile-realview-fix-memory-leak-during-device.patch
new file mode 100644 (file)
index 0000000..c8d4ed7
--- /dev/null
@@ -0,0 +1,50 @@
+From 7c463388c4faca9c1d47db60f2888eeef1e00c94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 25 Aug 2024 20:05:23 +0200
+Subject: soc: versatile: realview: fix memory leak during device remove
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+[ Upstream commit 1c4f26a41f9d052f334f6ae629e01f598ed93508 ]
+
+If device is unbound, the memory allocated for soc_dev_attr should be
+freed to prevent leaks.
+
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Link: https://lore.kernel.org/20240825-soc-dev-fixes-v1-2-ff4b35abed83@linaro.org
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Stable-dep-of: c774f2564c00 ("soc: versatile: realview: fix soc_dev leak during device remove")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/versatile/soc-realview.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/soc/versatile/soc-realview.c b/drivers/soc/versatile/soc-realview.c
+index c6876d232d8fd..d304ee69287af 100644
+--- a/drivers/soc/versatile/soc-realview.c
++++ b/drivers/soc/versatile/soc-realview.c
+@@ -93,7 +93,7 @@ static int realview_soc_probe(struct platform_device *pdev)
+       if (IS_ERR(syscon_regmap))
+               return PTR_ERR(syscon_regmap);
+-      soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
++      soc_dev_attr = devm_kzalloc(&pdev->dev, sizeof(*soc_dev_attr), GFP_KERNEL);
+       if (!soc_dev_attr)
+               return -ENOMEM;
+@@ -106,10 +106,9 @@ static int realview_soc_probe(struct platform_device *pdev)
+       soc_dev_attr->family = "Versatile";
+       soc_dev_attr->custom_attr_group = realview_groups[0];
+       soc_dev = soc_device_register(soc_dev_attr);
+-      if (IS_ERR(soc_dev)) {
+-              kfree(soc_dev_attr);
++      if (IS_ERR(soc_dev))
+               return -ENODEV;
+-      }
++
+       ret = regmap_read(syscon_regmap, REALVIEW_SYS_ID_OFFSET,
+                         &realview_coreid);
+       if (ret)
+-- 
+2.43.0
+
diff --git a/queue-6.10/soc-versatile-realview-fix-soc_dev-leak-during-devic.patch b/queue-6.10/soc-versatile-realview-fix-soc_dev-leak-during-devic.patch
new file mode 100644 (file)
index 0000000..7c730ce
--- /dev/null
@@ -0,0 +1,63 @@
+From 015be69c0db89a0fa48410c1745b32504a95ee02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 25 Aug 2024 20:05:24 +0200
+Subject: soc: versatile: realview: fix soc_dev leak during device remove
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+[ Upstream commit c774f2564c0086c23f5269fd4691f233756bf075 ]
+
+If device is unbound, the soc_dev should be unregistered to prevent
+memory leak.
+
+Fixes: a2974c9c1f83 ("soc: add driver for the ARM RealView")
+Cc: stable@vger.kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Link: https://lore.kernel.org/20240825-soc-dev-fixes-v1-3-ff4b35abed83@linaro.org
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/versatile/soc-realview.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/drivers/soc/versatile/soc-realview.c b/drivers/soc/versatile/soc-realview.c
+index d304ee69287af..cf91abe07d38d 100644
+--- a/drivers/soc/versatile/soc-realview.c
++++ b/drivers/soc/versatile/soc-realview.c
+@@ -4,6 +4,7 @@
+  *
+  * Author: Linus Walleij <linus.walleij@linaro.org>
+  */
++#include <linux/device.h>
+ #include <linux/init.h>
+ #include <linux/io.h>
+ #include <linux/slab.h>
+@@ -81,6 +82,13 @@ static struct attribute *realview_attrs[] = {
+ ATTRIBUTE_GROUPS(realview);
++static void realview_soc_socdev_release(void *data)
++{
++      struct soc_device *soc_dev = data;
++
++      soc_device_unregister(soc_dev);
++}
++
+ static int realview_soc_probe(struct platform_device *pdev)
+ {
+       struct regmap *syscon_regmap;
+@@ -109,6 +117,11 @@ static int realview_soc_probe(struct platform_device *pdev)
+       if (IS_ERR(soc_dev))
+               return -ENODEV;
++      ret = devm_add_action_or_reset(&pdev->dev, realview_soc_socdev_release,
++                                     soc_dev);
++      if (ret)
++              return ret;
++
+       ret = regmap_read(syscon_regmap, REALVIEW_SYS_ID_OFFSET,
+                         &realview_coreid);
+       if (ret)
+-- 
+2.43.0
+
diff --git a/queue-6.10/usb-xhci-fix-loss-of-data-on-cadence-xhc.patch b/queue-6.10/usb-xhci-fix-loss-of-data-on-cadence-xhc.patch
new file mode 100644 (file)
index 0000000..c49526a
--- /dev/null
@@ -0,0 +1,117 @@
+From 65a6a1f627626dee5f356a04001d03c25bfacd7d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Sep 2024 07:03:28 +0000
+Subject: usb: xhci: fix loss of data on Cadence xHC
+
+From: Pawel Laszczak <pawell@cadence.com>
+
+[ Upstream commit e5fa8db0be3e8757e8641600c518425a4589b85c ]
+
+Streams should flush their TRB cache, re-read TRBs, and start executing
+TRBs from the beginning of the new dequeue pointer after a 'Set TR Dequeue
+Pointer' command.
+
+Cadence controllers may fail to start from the beginning of the dequeue
+TRB as it doesn't clear the Opaque 'RsvdO' field of the stream context
+during 'Set TR Dequeue' command. This stream context area is where xHC
+stores information about the last partially executed TD when a stream
+is stopped. xHC uses this information to resume the transfer where it left
+mid TD, when the stream is restarted.
+
+Patch fixes this by clearing out all RsvdO fields before initializing new
+Stream transfer using a 'Set TR Dequeue Pointer' command.
+
+Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
+cc: stable@vger.kernel.org
+Signed-off-by: Pawel Laszczak <pawell@cadence.com>
+Reviewed-by: Peter Chen <peter.chen@kernel.org>
+Link: https://lore.kernel.org/r/PH7PR07MB95386A40146E3EC64086F409DD9D2@PH7PR07MB9538.namprd07.prod.outlook.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/cdns3/host.c     |  4 +++-
+ drivers/usb/host/xhci-pci.c  |  7 +++++++
+ drivers/usb/host/xhci-ring.c | 14 ++++++++++++++
+ drivers/usb/host/xhci.h      |  1 +
+ 4 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c
+index ceca4d839dfd4..7ba760ee62e33 100644
+--- a/drivers/usb/cdns3/host.c
++++ b/drivers/usb/cdns3/host.c
+@@ -62,7 +62,9 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = {
+       .resume_quirk = xhci_cdns3_resume_quirk,
+ };
+-static const struct xhci_plat_priv xhci_plat_cdnsp_xhci;
++static const struct xhci_plat_priv xhci_plat_cdnsp_xhci = {
++      .quirks = XHCI_CDNS_SCTX_QUIRK,
++};
+ static int __cdns_host_init(struct cdns *cdns)
+ {
+diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
+index 19527b856c550..994fd8b38bd01 100644
+--- a/drivers/usb/host/xhci-pci.c
++++ b/drivers/usb/host/xhci-pci.c
+@@ -81,6 +81,9 @@
+ #define PCI_DEVICE_ID_ASMEDIA_2142_XHCI                       0x2142
+ #define PCI_DEVICE_ID_ASMEDIA_3242_XHCI                       0x3242
++#define PCI_DEVICE_ID_CADENCE                         0x17CD
++#define PCI_DEVICE_ID_CADENCE_SSP                     0x0200
++
+ static const char hcd_name[] = "xhci_hcd";
+ static struct hc_driver __read_mostly xhci_pci_hc_driver;
+@@ -480,6 +483,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+                       xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH;
+       }
++      if (pdev->vendor == PCI_DEVICE_ID_CADENCE &&
++          pdev->device == PCI_DEVICE_ID_CADENCE_SSP)
++              xhci->quirks |= XHCI_CDNS_SCTX_QUIRK;
++
+       /* xHC spec requires PCI devices to support D3hot and D3cold */
+       if (xhci->hci_version >= 0x120)
+               xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index fd0cde3d1569c..0fe6bef6c3980 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -1426,6 +1426,20 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
+                       struct xhci_stream_ctx *ctx =
+                               &ep->stream_info->stream_ctx_array[stream_id];
+                       deq = le64_to_cpu(ctx->stream_ring) & SCTX_DEQ_MASK;
++
++                      /*
++                       * Cadence xHCI controllers store some endpoint state
++                       * information within Rsvd0 fields of Stream Endpoint
++                       * context. This field is not cleared during Set TR
++                       * Dequeue Pointer command which causes XDMA to skip
++                       * over transfer ring and leads to data loss on stream
++                       * pipe.
++                       * To fix this issue driver must clear Rsvd0 field.
++                       */
++                      if (xhci->quirks & XHCI_CDNS_SCTX_QUIRK) {
++                              ctx->reserved[0] = 0;
++                              ctx->reserved[1] = 0;
++                      }
+               } else {
+                       deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK;
+               }
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 5a8925474176d..ac8da8a7df86b 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1630,6 +1630,7 @@ struct xhci_hcd {
+ #define XHCI_ZHAOXIN_TRB_FETCH        BIT_ULL(45)
+ #define XHCI_ZHAOXIN_HOST     BIT_ULL(46)
+ #define XHCI_WRITE_64_HI_LO   BIT_ULL(47)
++#define XHCI_CDNS_SCTX_QUIRK  BIT_ULL(48)
+       unsigned int            num_active_eps;
+       unsigned int            limit_active_eps;
+-- 
+2.43.0
+
diff --git a/queue-6.10/x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch b/queue-6.10/x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch
new file mode 100644 (file)
index 0000000..15dd21c
--- /dev/null
@@ -0,0 +1,155 @@
+From cbe17f625208e4cb8eaa9c9c53ef0a457a559f50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jun 2024 12:58:55 +0300
+Subject: x86/mm: Add callbacks to prepare encrypted memory for kexec
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+[ Upstream commit 22daa42294b419a0d8060a3870285e7a72aa63e4 ]
+
+AMD SEV and Intel TDX guests allocate shared buffers for performing I/O.
+This is done by allocating pages normally from the buddy allocator and
+then converting them to shared using set_memory_decrypted().
+
+On kexec, the second kernel is unaware of which memory has been
+converted in this manner. It only sees E820_TYPE_RAM. Accessing shared
+memory as private is fatal.
+
+Therefore, the memory state must be reset to its original state before
+starting the new kernel with kexec.
+
+The process of converting shared memory back to private occurs in two
+steps:
+
+- enc_kexec_begin() stops new conversions.
+
+- enc_kexec_finish() unshares all existing shared memory, reverting it
+  back to private.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Tested-by: Tao Liu <ltao@redhat.com>
+Link: https://lore.kernel.org/r/20240614095904.1345461-11-kirill.shutemov@linux.intel.com
+Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/x86_init.h | 10 ++++++++++
+ arch/x86/kernel/crash.c         | 12 ++++++++++++
+ arch/x86/kernel/reboot.c        | 12 ++++++++++++
+ arch/x86/kernel/x86_init.c      |  4 ++++
+ 4 files changed, 38 insertions(+)
+
+diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
+index 28ac3cb9b987b..213cf5379a5a6 100644
+--- a/arch/x86/include/asm/x86_init.h
++++ b/arch/x86/include/asm/x86_init.h
+@@ -149,12 +149,22 @@ struct x86_init_acpi {
+  * @enc_status_change_finish  Notify HV after the encryption status of a range is changed
+  * @enc_tlb_flush_required    Returns true if a TLB flush is needed before changing page encryption status
+  * @enc_cache_flush_required  Returns true if a cache flush is needed before changing page encryption status
++ * @enc_kexec_begin           Begin the two-step process of converting shared memory back
++ *                            to private. It stops the new conversions from being started
++ *                            and waits in-flight conversions to finish, if possible.
++ * @enc_kexec_finish          Finish the two-step process of converting shared memory to
++ *                            private. All memory is private after the call when
++ *                            the function returns.
++ *                            It is called on only one CPU while the others are shut down
++ *                            and with interrupts disabled.
+  */
+ struct x86_guest {
+       int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+       int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+       bool (*enc_tlb_flush_required)(bool enc);
+       bool (*enc_cache_flush_required)(void);
++      void (*enc_kexec_begin)(void);
++      void (*enc_kexec_finish)(void);
+ };
+ /**
+diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
+index f06501445cd98..340af81556584 100644
+--- a/arch/x86/kernel/crash.c
++++ b/arch/x86/kernel/crash.c
+@@ -128,6 +128,18 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
+ #ifdef CONFIG_HPET_TIMER
+       hpet_disable();
+ #endif
++
++      /*
++       * Non-crash kexec calls enc_kexec_begin() while scheduling is still
++       * active. This allows the callback to wait until all in-flight
++       * shared<->private conversions are complete. In a crash scenario,
++       * enc_kexec_begin() gets called after all but one CPU have been shut
++       * down and interrupts have been disabled. This allows the callback to
++       * detect a race with the conversion and report it.
++       */
++      x86_platform.guest.enc_kexec_begin();
++      x86_platform.guest.enc_kexec_finish();
++
+       crash_save_cpu(regs, safe_smp_processor_id());
+ }
+diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
+index f3130f762784a..bb7a44af7efd1 100644
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -12,6 +12,7 @@
+ #include <linux/delay.h>
+ #include <linux/objtool.h>
+ #include <linux/pgtable.h>
++#include <linux/kexec.h>
+ #include <acpi/reboot.h>
+ #include <asm/io.h>
+ #include <asm/apic.h>
+@@ -716,6 +717,14 @@ static void native_machine_emergency_restart(void)
+ void native_machine_shutdown(void)
+ {
++      /*
++       * Call enc_kexec_begin() while all CPUs are still active and
++       * interrupts are enabled. This will allow all in-flight memory
++       * conversions to finish cleanly.
++       */
++      if (kexec_in_progress)
++              x86_platform.guest.enc_kexec_begin();
++
+       /* Stop the cpus and apics */
+ #ifdef CONFIG_X86_IO_APIC
+       /*
+@@ -752,6 +761,9 @@ void native_machine_shutdown(void)
+ #ifdef CONFIG_X86_64
+       x86_platform.iommu_shutdown();
+ #endif
++
++      if (kexec_in_progress)
++              x86_platform.guest.enc_kexec_finish();
+ }
+ static void __machine_emergency_restart(int emergency)
+diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
+index 3f95e32dd288e..0a2bbd674a6d9 100644
+--- a/arch/x86/kernel/x86_init.c
++++ b/arch/x86/kernel/x86_init.c
+@@ -139,6 +139,8 @@ static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool
+ static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
+ static bool enc_tlb_flush_required_noop(bool enc) { return false; }
+ static bool enc_cache_flush_required_noop(void) { return false; }
++static void enc_kexec_begin_noop(void) {}
++static void enc_kexec_finish_noop(void) {}
+ static bool is_private_mmio_noop(u64 addr) {return false; }
+ struct x86_platform_ops x86_platform __ro_after_init = {
+@@ -162,6 +164,8 @@ struct x86_platform_ops x86_platform __ro_after_init = {
+               .enc_status_change_finish  = enc_status_change_finish_noop,
+               .enc_tlb_flush_required    = enc_tlb_flush_required_noop,
+               .enc_cache_flush_required  = enc_cache_flush_required_noop,
++              .enc_kexec_begin           = enc_kexec_begin_noop,
++              .enc_kexec_finish          = enc_kexec_finish_noop,
+       },
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.10/x86-mm-make-x86_platform.guest.enc_status_change_-re.patch b/queue-6.10/x86-mm-make-x86_platform.guest.enc_status_change_-re.patch
new file mode 100644 (file)
index 0000000..03459c6
--- /dev/null
@@ -0,0 +1,248 @@
+From dfaedba14744b7e751e19d1cbcf05c623f8e0f17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jun 2024 12:58:52 +0300
+Subject: x86/mm: Make x86_platform.guest.enc_status_change_*() return an error
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+[ Upstream commit 99c5c4c60e0db1d2ff58b8a61c93b6851146469f ]
+
+TDX is going to have more than one reason to fail enc_status_change_prepare().
+
+Change the callback to return errno instead of assuming -EIO. Change
+enc_status_change_finish() too to keep the interface symmetric.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Reviewed-by: Michael Kelley <mhklinux@outlook.com>
+Tested-by: Tao Liu <ltao@redhat.com>
+Link: https://lore.kernel.org/r/20240614095904.1345461-8-kirill.shutemov@linux.intel.com
+Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/coco/tdx/tdx.c         | 20 +++++++++++---------
+ arch/x86/hyperv/ivm.c           | 22 ++++++++++------------
+ arch/x86/include/asm/x86_init.h |  4 ++--
+ arch/x86/kernel/x86_init.c      |  4 ++--
+ arch/x86/mm/mem_encrypt_amd.c   |  8 ++++----
+ arch/x86/mm/pat/set_memory.c    | 12 +++++++-----
+ 6 files changed, 36 insertions(+), 34 deletions(-)
+
+diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
+index 8fe4c2b07128e..fdcc081317764 100644
+--- a/arch/x86/coco/tdx/tdx.c
++++ b/arch/x86/coco/tdx/tdx.c
+@@ -797,28 +797,30 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+       return true;
+ }
+-static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
+-                                        bool enc)
++static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
++                                       bool enc)
+ {
+       /*
+        * Only handle shared->private conversion here.
+        * See the comment in tdx_early_init().
+        */
+-      if (enc)
+-              return tdx_enc_status_changed(vaddr, numpages, enc);
+-      return true;
++      if (enc && !tdx_enc_status_changed(vaddr, numpages, enc))
++              return -EIO;
++
++      return 0;
+ }
+-static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
++static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+                                        bool enc)
+ {
+       /*
+        * Only handle private->shared conversion here.
+        * See the comment in tdx_early_init().
+        */
+-      if (!enc)
+-              return tdx_enc_status_changed(vaddr, numpages, enc);
+-      return true;
++      if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc))
++              return -EIO;
++
++      return 0;
+ }
+ void __init tdx_early_init(void)
+diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
+index 768d73de0d098..b4a851d27c7cb 100644
+--- a/arch/x86/hyperv/ivm.c
++++ b/arch/x86/hyperv/ivm.c
+@@ -523,9 +523,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
+  * transition is complete, hv_vtom_set_host_visibility() marks the pages
+  * as "present" again.
+  */
+-static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
++static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
+ {
+-      return !set_memory_np(kbuffer, pagecount);
++      return set_memory_np(kbuffer, pagecount);
+ }
+ /*
+@@ -536,20 +536,19 @@ static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc
+  * with host. This function works as wrap of hv_mark_gpa_visibility()
+  * with memory base and size.
+  */
+-static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
++static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
+ {
+       enum hv_mem_host_visibility visibility = enc ?
+                       VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
+       u64 *pfn_array;
+       phys_addr_t paddr;
++      int i, pfn, err;
+       void *vaddr;
+       int ret = 0;
+-      bool result = true;
+-      int i, pfn;
+       pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
+       if (!pfn_array) {
+-              result = false;
++              ret = -ENOMEM;
+               goto err_set_memory_p;
+       }
+@@ -568,10 +567,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
+               if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
+                       ret = hv_mark_gpa_visibility(pfn, pfn_array,
+                                                    visibility);
+-                      if (ret) {
+-                              result = false;
++                      if (ret)
+                               goto err_free_pfn_array;
+-                      }
+                       pfn = 0;
+               }
+       }
+@@ -586,10 +583,11 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
+        * order to avoid leaving the memory range in a "broken" state. Setting
+        * the PRESENT bits shouldn't fail, but return an error if it does.
+        */
+-      if (set_memory_p(kbuffer, pagecount))
+-              result = false;
++      err = set_memory_p(kbuffer, pagecount);
++      if (err && !ret)
++              ret = err;
+-      return result;
++      return ret;
+ }
+ static bool hv_vtom_tlb_flush_required(bool private)
+diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
+index 6149eabe200f5..28ac3cb9b987b 100644
+--- a/arch/x86/include/asm/x86_init.h
++++ b/arch/x86/include/asm/x86_init.h
+@@ -151,8 +151,8 @@ struct x86_init_acpi {
+  * @enc_cache_flush_required  Returns true if a cache flush is needed before changing page encryption status
+  */
+ struct x86_guest {
+-      bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+-      bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
++      int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
++      int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+       bool (*enc_tlb_flush_required)(bool enc);
+       bool (*enc_cache_flush_required)(void);
+ };
+diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
+index 5474a6fdd6895..3f95e32dd288e 100644
+--- a/arch/x86/kernel/x86_init.c
++++ b/arch/x86/kernel/x86_init.c
+@@ -135,8 +135,8 @@ struct x86_cpuinit_ops x86_cpuinit = {
+ static void default_nmi_init(void) { };
+-static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
+-static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; }
++static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
++static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
+ static bool enc_tlb_flush_required_noop(bool enc) { return false; }
+ static bool enc_cache_flush_required_noop(void) { return false; }
+ static bool is_private_mmio_noop(u64 addr) {return false; }
+diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
+index 422602f6039b8..e7b67519ddb5d 100644
+--- a/arch/x86/mm/mem_encrypt_amd.c
++++ b/arch/x86/mm/mem_encrypt_amd.c
+@@ -283,7 +283,7 @@ static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
+ #endif
+ }
+-static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
++static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+ {
+       /*
+        * To maintain the security guarantees of SEV-SNP guests, make sure
+@@ -292,11 +292,11 @@ static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
+               snp_set_memory_shared(vaddr, npages);
+-      return true;
++      return 0;
+ }
+ /* Return true unconditionally: return value doesn't matter for the SEV side */
+-static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
++static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
+ {
+       /*
+        * After memory is mapped encrypted in the page table, validate it
+@@ -308,7 +308,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e
+       if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+               enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
+-      return true;
++      return 0;
+ }
+ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
+index 19fdfbb171ed6..498812f067cd5 100644
+--- a/arch/x86/mm/pat/set_memory.c
++++ b/arch/x86/mm/pat/set_memory.c
+@@ -2196,7 +2196,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
+               cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
+       /* Notify hypervisor that we are about to set/clr encryption attribute. */
+-      if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
++      ret = x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
++      if (ret)
+               goto vmm_fail;
+       ret = __change_page_attr_set_clr(&cpa, 1);
+@@ -2214,16 +2215,17 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
+               return ret;
+       /* Notify hypervisor that we have successfully set/clr encryption attribute. */
+-      if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
++      ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc);
++      if (ret)
+               goto vmm_fail;
+       return 0;
+ vmm_fail:
+-      WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s.\n",
+-                (void *)addr, numpages, enc ? "private" : "shared");
++      WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s: %d\n",
++                (void *)addr, numpages, enc ? "private" : "shared", ret);
+-      return -EIO;
++      return ret;
+ }
+ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
+-- 
+2.43.0
+
diff --git a/queue-6.10/x86-tdx-account-shared-memory.patch b/queue-6.10/x86-tdx-account-shared-memory.patch
new file mode 100644 (file)
index 0000000..e1d0dd1
--- /dev/null
@@ -0,0 +1,62 @@
+From 8066e22947aa074499e766c86d887e3809015968 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jun 2024 12:58:54 +0300
+Subject: x86/tdx: Account shared memory
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+[ Upstream commit c3abbf1376874f0d6eb22859a8655831644efa42 ]
+
+The kernel will convert all shared memory back to private during kexec.
+The direct mapping page tables will provide information on which memory
+is shared.
+
+It is extremely important to convert all shared memory. If a page is
+missed, it will cause the second kernel to crash when it accesses it.
+
+Keep track of the number of shared pages. This will allow for
+cross-checking against the shared information in the direct mapping and
+reporting if the shared bit is lost.
+
+Memory conversion is slow and does not happen often. Global atomic is
+not going to be a bottleneck.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Tested-by: Tao Liu <ltao@redhat.com>
+Link: https://lore.kernel.org/r/20240614095904.1345461-10-kirill.shutemov@linux.intel.com
+Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/coco/tdx/tdx.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
+index fdcc081317764..729ef77b65865 100644
+--- a/arch/x86/coco/tdx/tdx.c
++++ b/arch/x86/coco/tdx/tdx.c
+@@ -38,6 +38,8 @@
+ #define TDREPORT_SUBTYPE_0    0
++static atomic_long_t nr_shared;
++
+ /* Called from __tdx_hypercall() for unrecoverable failure */
+ noinstr void __noreturn __tdx_hypercall_failed(void)
+ {
+@@ -820,6 +822,11 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+       if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc))
+               return -EIO;
++      if (enc)
++              atomic_long_sub(numpages, &nr_shared);
++      else
++              atomic_long_add(numpages, &nr_shared);
++
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.10/x86-tdx-convert-shared-memory-back-to-private-on-kex.patch b/queue-6.10/x86-tdx-convert-shared-memory-back-to-private-on-kex.patch
new file mode 100644 (file)
index 0000000..d83bfce
--- /dev/null
@@ -0,0 +1,261 @@
+From 69bd596b0d0d2d6f12594826ab1ff5d9cb412f10 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jun 2024 12:58:56 +0300
+Subject: x86/tdx: Convert shared memory back to private on kexec
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+[ Upstream commit 859e63b789d6b17b3c64e51a0aabdc58752a0254 ]
+
+TDX guests allocate shared buffers to perform I/O. It is done by allocating
+pages normally from the buddy allocator and converting them to shared with
+set_memory_decrypted().
+
+The second, kexec-ed kernel has no idea what memory is converted this way. It
+only sees E820_TYPE_RAM.
+
+Accessing shared memory via private mapping is fatal. It leads to unrecoverable
+TD exit.
+
+On kexec, walk direct mapping and convert all shared memory back to private. It
+makes all RAM private again and second kernel may use it normally.
+
+The conversion occurs in two steps: stopping new conversions and unsharing all
+memory. In the case of normal kexec, the stopping of conversions takes place
+while scheduling is still functioning. This allows for waiting until any ongoing
+conversions are finished. The second step is carried out when all CPUs except one
+are inactive and interrupts are disabled. This prevents any conflicts with code
+that may access shared memory.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Tested-by: Tao Liu <ltao@redhat.com>
+Link: https://lore.kernel.org/r/20240614095904.1345461-12-kirill.shutemov@linux.intel.com
+Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/coco/tdx/tdx.c           | 94 +++++++++++++++++++++++++++++++
+ arch/x86/include/asm/pgtable.h    |  5 ++
+ arch/x86/include/asm/set_memory.h |  3 +
+ arch/x86/mm/pat/set_memory.c      | 42 +++++++++++++-
+ 4 files changed, 141 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
+index 729ef77b65865..da8b66dce0da5 100644
+--- a/arch/x86/coco/tdx/tdx.c
++++ b/arch/x86/coco/tdx/tdx.c
+@@ -7,6 +7,7 @@
+ #include <linux/cpufeature.h>
+ #include <linux/export.h>
+ #include <linux/io.h>
++#include <linux/kexec.h>
+ #include <asm/coco.h>
+ #include <asm/tdx.h>
+ #include <asm/vmx.h>
+@@ -14,6 +15,7 @@
+ #include <asm/insn.h>
+ #include <asm/insn-eval.h>
+ #include <asm/pgtable.h>
++#include <asm/set_memory.h>
+ /* MMIO direction */
+ #define EPT_READ      0
+@@ -830,6 +832,95 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+       return 0;
+ }
++/* Stop new private<->shared conversions */
++static void tdx_kexec_begin(void)
++{
++      if (!IS_ENABLED(CONFIG_KEXEC_CORE))
++              return;
++
++      /*
++       * Crash kernel reaches here with interrupts disabled: can't wait for
++       * conversions to finish.
++       *
++       * If race happened, just report and proceed.
++       */
++      if (!set_memory_enc_stop_conversion())
++              pr_warn("Failed to stop shared<->private conversions\n");
++}
++
++/* Walk direct mapping and convert all shared memory back to private */
++static void tdx_kexec_finish(void)
++{
++      unsigned long addr, end;
++      long found = 0, shared;
++
++      if (!IS_ENABLED(CONFIG_KEXEC_CORE))
++              return;
++
++      lockdep_assert_irqs_disabled();
++
++      addr = PAGE_OFFSET;
++      end  = PAGE_OFFSET + get_max_mapped();
++
++      while (addr < end) {
++              unsigned long size;
++              unsigned int level;
++              pte_t *pte;
++
++              pte = lookup_address(addr, &level);
++              size = page_level_size(level);
++
++              if (pte && pte_decrypted(*pte)) {
++                      int pages = size / PAGE_SIZE;
++
++                      /*
++                       * Touching memory with shared bit set triggers implicit
++                       * conversion to shared.
++                       *
++                       * Make sure nobody touches the shared range from
++                       * now on.
++                       */
++                      set_pte(pte, __pte(0));
++
++                      /*
++                       * Memory encryption state persists across kexec.
++                       * If tdx_enc_status_changed() fails in the first
++                       * kernel, it leaves memory in an unknown state.
++                       *
++                       * If that memory remains shared, accessing it in the
++                       * *next* kernel through a private mapping will result
++                       * in an unrecoverable guest shutdown.
++                       *
++                       * The kdump kernel boot is not impacted as it uses
++                       * a pre-reserved memory range that is always private.
++                       * However, gathering crash information could lead to
++                       * a crash if it accesses unconverted memory through
++                       * a private mapping which is possible when accessing
++                       * that memory through /proc/vmcore, for example.
++                       *
++                       * In all cases, print error info in order to leave
++                       * enough bread crumbs for debugging.
++                       */
++                      if (!tdx_enc_status_changed(addr, pages, true)) {
++                              pr_err("Failed to unshare range %#lx-%#lx\n",
++                                     addr, addr + size);
++                      }
++
++                      found += pages;
++              }
++
++              addr += size;
++      }
++
++      __flush_tlb_all();
++
++      shared = atomic_long_read(&nr_shared);
++      if (shared != found) {
++              pr_err("shared page accounting is off\n");
++              pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found);
++      }
++}
++
+ void __init tdx_early_init(void)
+ {
+       struct tdx_module_args args = {
+@@ -889,6 +980,9 @@ void __init tdx_early_init(void)
+       x86_platform.guest.enc_cache_flush_required  = tdx_cache_flush_required;
+       x86_platform.guest.enc_tlb_flush_required    = tdx_tlb_flush_required;
++      x86_platform.guest.enc_kexec_begin           = tdx_kexec_begin;
++      x86_platform.guest.enc_kexec_finish          = tdx_kexec_finish;
++
+       /*
+        * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
+        * bringup low level code. That raises #VE which cannot be handled
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 65b8e5bb902cc..e39311a89bf47 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte)
+       return pte_flags(pte) & _PAGE_ACCESSED;
+ }
++static inline bool pte_decrypted(pte_t pte)
++{
++      return cc_mkdec(pte_val(pte)) == pte_val(pte);
++}
++
+ #define pmd_dirty pmd_dirty
+ static inline bool pmd_dirty(pmd_t pmd)
+ {
+diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
+index 9aee31862b4a8..4b2abce2e3e7d 100644
+--- a/arch/x86/include/asm/set_memory.h
++++ b/arch/x86/include/asm/set_memory.h
+@@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages);
+ int set_memory_np(unsigned long addr, int numpages);
+ int set_memory_p(unsigned long addr, int numpages);
+ int set_memory_4k(unsigned long addr, int numpages);
++
++bool set_memory_enc_stop_conversion(void);
+ int set_memory_encrypted(unsigned long addr, int numpages);
+ int set_memory_decrypted(unsigned long addr, int numpages);
++
+ int set_memory_np_noalias(unsigned long addr, int numpages);
+ int set_memory_nonglobal(unsigned long addr, int numpages);
+ int set_memory_global(unsigned long addr, int numpages);
+diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
+index 498812f067cd5..1356e25e6d125 100644
+--- a/arch/x86/mm/pat/set_memory.c
++++ b/arch/x86/mm/pat/set_memory.c
+@@ -2228,12 +2228,48 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
+       return ret;
+ }
++/*
++ * The lock serializes conversions between private and shared memory.
++ *
++ * It is taken for read on conversion. A write lock guarantees that no
++ * concurrent conversions are in progress.
++ */
++static DECLARE_RWSEM(mem_enc_lock);
++
++/*
++ * Stop new private<->shared conversions.
++ *
++ * Taking the exclusive mem_enc_lock waits for in-flight conversions to complete.
++ * The lock is not released to prevent new conversions from being started.
++ */
++bool set_memory_enc_stop_conversion(void)
++{
++      /*
++       * In a crash scenario, sleep is not allowed. Try to take the lock.
++       * Failure indicates that there is a race with the conversion.
++       */
++      if (oops_in_progress)
++              return down_write_trylock(&mem_enc_lock);
++
++      down_write(&mem_enc_lock);
++
++      return true;
++}
++
+ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
+ {
+-      if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+-              return __set_memory_enc_pgtable(addr, numpages, enc);
++      int ret = 0;
+-      return 0;
++      if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
++              if (!down_read_trylock(&mem_enc_lock))
++                      return -EBUSY;
++
++              ret = __set_memory_enc_pgtable(addr, numpages, enc);
++
++              up_read(&mem_enc_lock);
++      }
++
++      return ret;
+ }
+ int set_memory_encrypted(unsigned long addr, int numpages)
+-- 
+2.43.0
+
diff --git a/queue-6.10/x86-tdx-fix-in-kernel-mmio-check.patch b/queue-6.10/x86-tdx-fix-in-kernel-mmio-check.patch
new file mode 100644 (file)
index 0000000..06aeb59
--- /dev/null
@@ -0,0 +1,60 @@
+From 602d64b8731b4e2df83b0f8416a0539a22775a59 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Sep 2024 19:05:56 +0200
+Subject: x86/tdx: Fix "in-kernel MMIO" check
+
+From: Alexey Gladkov (Intel) <legion@kernel.org>
+
+[ Upstream commit d4fc4d01471528da8a9797a065982e05090e1d81 ]
+
+TDX only supports kernel-initiated MMIO operations. The handle_mmio()
+function checks if the #VE exception occurred in the kernel and rejects
+the operation if it did not.
+
+However, userspace can deceive the kernel into performing MMIO on its
+behalf. For example, if userspace can point a syscall to an MMIO address,
+syscall does get_user() or put_user() on it, triggering MMIO #VE. The
+kernel will treat the #VE as in-kernel MMIO.
+
+Ensure that the target MMIO address is within the kernel before decoding
+instruction.
+
+Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO")
+Signed-off-by: Alexey Gladkov (Intel) <legion@kernel.org>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/565a804b80387970460a4ebc67c88d1380f61ad1.1726237595.git.legion%40kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/coco/tdx/tdx.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
+index da8b66dce0da5..327c45c5013fe 100644
+--- a/arch/x86/coco/tdx/tdx.c
++++ b/arch/x86/coco/tdx/tdx.c
+@@ -16,6 +16,7 @@
+ #include <asm/insn-eval.h>
+ #include <asm/pgtable.h>
+ #include <asm/set_memory.h>
++#include <asm/traps.h>
+ /* MMIO direction */
+ #define EPT_READ      0
+@@ -433,6 +434,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+                       return -EINVAL;
+       }
++      if (!fault_in_kernel_space(ve->gla)) {
++              WARN_ONCE(1, "Access to userspace address is not supported");
++              return -EINVAL;
++      }
++
+       /*
+        * Reject EPT violation #VEs that split pages.
+        *
+-- 
+2.43.0
+
diff --git a/queue-6.10/xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch b/queue-6.10/xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch
new file mode 100644 (file)
index 0000000..f9692da
--- /dev/null
@@ -0,0 +1,65 @@
+From 525222814f03c36309bea0992d0e53f045f9031a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jun 2024 20:39:12 +0900
+Subject: xhci: Add a quirk for writing ERST in high-low order
+
+From: Daehwan Jung <dh10.jung@samsung.com>
+
+[ Upstream commit bc162403e33e1d57e40994977acaf19f1434e460 ]
+
+This quirk is for the controller that has a limitation in supporting
+separate ERSTBA_HI and ERSTBA_LO programming. It's supported when
+the ERSTBA is programmed ERSTBA_HI before ERSTBA_LO. That's because
+the internal initialization of event ring fetches the
+"Event Ring Segment Table Entry" based on the indication of ERSTBA_LO
+written.
+
+Signed-off-by: Daehwan Jung <dh10.jung@samsung.com>
+Link: https://lore.kernel.org/r/1718019553-111939-3-git-send-email-dh10.jung@samsung.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: e5fa8db0be3e ("usb: xhci: fix loss of data on Cadence xHC")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-mem.c | 5 ++++-
+ drivers/usb/host/xhci.h     | 2 ++
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
+index f591ddd086627..fa3ee53df0ecc 100644
+--- a/drivers/usb/host/xhci-mem.c
++++ b/drivers/usb/host/xhci-mem.c
+@@ -2325,7 +2325,10 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir,
+       erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base);
+       erst_base &= ERST_BASE_RSVDP;
+       erst_base |= ir->erst.erst_dma_addr & ~ERST_BASE_RSVDP;
+-      xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base);
++      if (xhci->quirks & XHCI_WRITE_64_HI_LO)
++              hi_lo_writeq(erst_base, &ir->ir_set->erst_base);
++      else
++              xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base);
+       /* Set the event ring dequeue address of this interrupter */
+       xhci_set_hc_event_deq(xhci, ir);
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 78d014c4d884a..5a8925474176d 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -17,6 +17,7 @@
+ #include <linux/kernel.h>
+ #include <linux/usb/hcd.h>
+ #include <linux/io-64-nonatomic-lo-hi.h>
++#include <linux/io-64-nonatomic-hi-lo.h>
+ /* Code sharing between pci-quirks and xhci hcd */
+ #include      "xhci-ext-caps.h"
+@@ -1628,6 +1629,7 @@ struct xhci_hcd {
+ #define XHCI_RESET_TO_DEFAULT BIT_ULL(44)
+ #define XHCI_ZHAOXIN_TRB_FETCH        BIT_ULL(45)
+ #define XHCI_ZHAOXIN_HOST     BIT_ULL(46)
++#define XHCI_WRITE_64_HI_LO   BIT_ULL(47)
+       unsigned int            num_active_eps;
+       unsigned int            limit_active_eps;
+-- 
+2.43.0
+