From: Sasha Levin Date: Wed, 2 Oct 2024 05:17:33 +0000 (-0400) Subject: Fixes for 6.10 X-Git-Tag: v6.6.54~50 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0cd0ebeb8c395440f500c80203586a334edbe31b;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.10 Signed-off-by: Sasha Levin --- diff --git a/queue-6.10/debugfs-convert-to-new-uid-gid-option-parsing-helper.patch b/queue-6.10/debugfs-convert-to-new-uid-gid-option-parsing-helper.patch new file mode 100644 index 00000000000..c80ce05959e --- /dev/null +++ b/queue-6.10/debugfs-convert-to-new-uid-gid-option-parsing-helper.patch @@ -0,0 +1,67 @@ +From 52fe235e918c637b013c3382dc951842c8318ce3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Jun 2024 19:29:46 -0500 +Subject: debugfs: Convert to new uid/gid option parsing helpers + +From: Eric Sandeen + +[ Upstream commit 49abee5991e18f14ec822ef53acd173ae58ff594 ] + +Convert to new uid/gid option parsing helpers + +Signed-off-by: Eric Sandeen +Link: https://lore.kernel.org/r/b2f44ee0-3cee-49eb-a416-f26a9306eb56@redhat.com +Signed-off-by: Christian Brauner +Stable-dep-of: 3a987b88a425 ("debugfs show actual source in /proc/mounts") +Signed-off-by: Sasha Levin +--- + fs/debugfs/inode.c | 16 ++++------------ + 1 file changed, 4 insertions(+), 12 deletions(-) + +diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c +index 8fd928899a59e..91521576f5003 100644 +--- a/fs/debugfs/inode.c ++++ b/fs/debugfs/inode.c +@@ -92,9 +92,9 @@ enum { + }; + + static const struct fs_parameter_spec debugfs_param_specs[] = { +- fsparam_u32 ("gid", Opt_gid), ++ fsparam_gid ("gid", Opt_gid), + fsparam_u32oct ("mode", Opt_mode), +- fsparam_u32 ("uid", Opt_uid), ++ fsparam_uid ("uid", Opt_uid), + {} + }; + +@@ -102,8 +102,6 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param + { + struct debugfs_fs_info *opts = fc->s_fs_info; + struct fs_parse_result result; +- kuid_t uid; +- kgid_t gid; + int opt; + + opt = fs_parse(fc, debugfs_param_specs, param, &result); +@@ -120,16 +118,10 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param + + switch (opt) { + case Opt_uid: +- uid = make_kuid(current_user_ns(), result.uint_32); +- if (!uid_valid(uid)) +- return invalf(fc, "Unknown uid"); +- opts->uid = uid; ++ opts->uid = result.uid; + break; + case Opt_gid: +- gid = make_kgid(current_user_ns(), result.uint_32); +- if (!gid_valid(gid)) +- return invalf(fc, "Unknown gid"); +- opts->gid = gid; ++ opts->gid = result.gid; + break; + case Opt_mode: + opts->mode = result.uint_32 & S_IALLUGO; +-- +2.43.0 + diff --git a/queue-6.10/debugfs-show-actual-source-in-proc-mounts.patch b/queue-6.10/debugfs-show-actual-source-in-proc-mounts.patch new file mode 100644 index 00000000000..9a637365445 --- /dev/null +++ b/queue-6.10/debugfs-show-actual-source-in-proc-mounts.patch @@ -0,0 +1,61 @@ +From f0b5b89d0a0c152c4e411edfbc647e8ad98b179e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 10 Aug 2024 13:25:27 -0600 +Subject: debugfs show actual source in /proc/mounts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Marc Aurèle La France + +[ Upstream commit 3a987b88a42593875f6345188ca33731c7df728c ] + +After its conversion to the new mount API, debugfs displays "none" in +/proc/mounts instead of the actual source. Fix this by recognising its +"source" mount option. + +Signed-off-by: Marc Aurèle La France +Link: https://lore.kernel.org/r/e439fae2-01da-234b-75b9-2a7951671e27@tuyoix.net +Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API") +Cc: stable@vger.kernel.org # 6.10.x: 49abee5991e1: debugfs: Convert to new uid/gid option parsing helpers +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/debugfs/inode.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c +index 91521576f5003..66d9b3b4c5881 100644 +--- a/fs/debugfs/inode.c ++++ b/fs/debugfs/inode.c +@@ -89,12 +89,14 @@ enum { + Opt_uid, + Opt_gid, + Opt_mode, ++ Opt_source, + }; + + static const struct fs_parameter_spec debugfs_param_specs[] = { + fsparam_gid ("gid", Opt_gid), + fsparam_u32oct ("mode", Opt_mode), + fsparam_uid ("uid", Opt_uid), ++ fsparam_string ("source", Opt_source), + {} + }; + +@@ -126,6 +128,12 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param + case Opt_mode: + opts->mode = result.uint_32 & S_IALLUGO; + break; ++ case Opt_source: ++ if (fc->source) ++ return invalfc(fc, "Multiple sources specified"); ++ fc->source = param->string; ++ param->string = NULL; ++ break; + /* + * We might like to report bad mount options here; + * but traditionally debugfs has ignored all mount options +-- +2.43.0 + diff --git a/queue-6.10/fs_parse-add-uid-gid-option-option-parsing-helpers.patch b/queue-6.10/fs_parse-add-uid-gid-option-option-parsing-helpers.patch new file mode 100644 index 00000000000..bb53d9ff12a --- /dev/null +++ b/queue-6.10/fs_parse-add-uid-gid-option-option-parsing-helpers.patch @@ -0,0 +1,143 @@ +From f1c7aae89ddbb223fac562b507d8dd8463f268df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Jun 2024 19:26:24 -0500 +Subject: fs_parse: add uid & gid option option parsing helpers + +From: Eric Sandeen + +[ Upstream commit 9f111059e725f7ca79a136bfc734da3c8c1838b4 ] + +Multiple filesystems take uid and gid as options, and the code to +create the ID from an integer and validate it is standard boilerplate +that can be moved into common helper functions, so do that for +consistency and less cut&paste. + +This also helps avoid the buggy pattern noted by Seth Jenkins at +https://lore.kernel.org/lkml/CALxfFW4BXhEwxR0Q5LSkg-8Vb4r2MONKCcUCVioehXQKr35eHg@mail.gmail.com/ +because uid/gid parsing will fail before any assignment in most +filesystems. + +Signed-off-by: Eric Sandeen +Link: https://lore.kernel.org/r/de859d0a-feb9-473d-a5e2-c195a3d47abb@redhat.com +Signed-off-by: Christian Brauner +Stable-dep-of: 3a987b88a425 ("debugfs show actual source in /proc/mounts") +Signed-off-by: Sasha Levin +--- + Documentation/filesystems/mount_api.rst | 9 +++++-- + fs/fs_parser.c | 34 +++++++++++++++++++++++++ + include/linux/fs_parser.h | 6 ++++- + 3 files changed, 46 insertions(+), 3 deletions(-) + +diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst +index 9aaf6ef75eb53..317934c9e8fca 100644 +--- a/Documentation/filesystems/mount_api.rst ++++ b/Documentation/filesystems/mount_api.rst +@@ -645,6 +645,8 @@ The members are as follows: + fs_param_is_blockdev Blockdev path * Needs lookup + fs_param_is_path Path * Needs lookup + fs_param_is_fd File descriptor result->int_32 ++ fs_param_is_uid User ID (u32) result->uid ++ fs_param_is_gid Group ID (u32) result->gid + ======================= ======================= ===================== + + Note that if the value is of fs_param_is_bool type, fs_parse() will try +@@ -678,6 +680,8 @@ The members are as follows: + fsparam_bdev() fs_param_is_blockdev + fsparam_path() fs_param_is_path + fsparam_fd() fs_param_is_fd ++ fsparam_uid() fs_param_is_uid ++ fsparam_gid() fs_param_is_gid + ======================= =============================================== + + all of which take two arguments, name string and option number - for +@@ -784,8 +788,9 @@ process the parameters it is given. + option number (which it returns). + + If successful, and if the parameter type indicates the result is a +- boolean, integer or enum type, the value is converted by this function and +- the result stored in result->{boolean,int_32,uint_32,uint_64}. ++ boolean, integer, enum, uid, or gid type, the value is converted by this ++ function and the result stored in ++ result->{boolean,int_32,uint_32,uint_64,uid,gid}. + + If a match isn't initially made, the key is prefixed with "no" and no + value is present then an attempt will be made to look up the key with the +diff --git a/fs/fs_parser.c b/fs/fs_parser.c +index a4d6ca0b8971e..24727ec34e5aa 100644 +--- a/fs/fs_parser.c ++++ b/fs/fs_parser.c +@@ -308,6 +308,40 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p, + } + EXPORT_SYMBOL(fs_param_is_fd); + ++int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p, ++ struct fs_parameter *param, struct fs_parse_result *result) ++{ ++ kuid_t uid; ++ ++ if (fs_param_is_u32(log, p, param, result) != 0) ++ return fs_param_bad_value(log, param); ++ ++ uid = make_kuid(current_user_ns(), result->uint_32); ++ if (!uid_valid(uid)) ++ return inval_plog(log, "Invalid uid '%s'", param->string); ++ ++ result->uid = uid; ++ return 0; ++} ++EXPORT_SYMBOL(fs_param_is_uid); ++ ++int fs_param_is_gid(struct p_log *log, const struct fs_parameter_spec *p, ++ struct fs_parameter *param, struct fs_parse_result *result) ++{ ++ kgid_t gid; ++ ++ if (fs_param_is_u32(log, p, param, result) != 0) ++ return fs_param_bad_value(log, param); ++ ++ gid = make_kgid(current_user_ns(), result->uint_32); ++ if (!gid_valid(gid)) ++ return inval_plog(log, "Invalid gid '%s'", param->string); ++ ++ result->gid = gid; ++ return 0; ++} ++EXPORT_SYMBOL(fs_param_is_gid); ++ + int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) + { +diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h +index d3350979115f0..6cf713a7e6c6f 100644 +--- a/include/linux/fs_parser.h ++++ b/include/linux/fs_parser.h +@@ -28,7 +28,7 @@ typedef int fs_param_type(struct p_log *, + */ + fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, + fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, +- fs_param_is_path, fs_param_is_fd; ++ fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid; + + /* + * Specification of the type of value a parameter wants. +@@ -57,6 +57,8 @@ struct fs_parse_result { + int int_32; /* For spec_s32/spec_enum */ + unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */ + u64 uint_64; /* For spec_u64 */ ++ kuid_t uid; ++ kgid_t gid; + }; + }; + +@@ -131,6 +133,8 @@ static inline bool fs_validate_description(const char *name, + #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) + #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) + #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) ++#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL) ++#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL) + + /* String parameter that allows empty argument */ + #define fsparam_string_empty(NAME, OPT) \ +-- +2.43.0 + diff --git a/queue-6.10/idpf-fix-netdev-tx-queue-stop-wake.patch b/queue-6.10/idpf-fix-netdev-tx-queue-stop-wake.patch new file mode 100644 index 00000000000..6549f575681 --- /dev/null +++ b/queue-6.10/idpf-fix-netdev-tx-queue-stop-wake.patch @@ -0,0 +1,145 @@ +From c453341c47dccfbc87bf7c67ed0be13c8e5de572 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Sep 2024 17:47:47 +0200 +Subject: idpf: fix netdev Tx queue stop/wake + +From: Michal Kubiak + +[ Upstream commit e4b398dd82f5d5867bc5f442c43abc8fba30ed2c ] + +netif_txq_maybe_stop() returns -1, 0, or 1, while +idpf_tx_maybe_stop_common() says it returns 0 or -EBUSY. As a result, +there sometimes are Tx queue timeout warnings despite that the queue +is empty or there is at least enough space to restart it. +Make idpf_tx_maybe_stop_common() inline and returning true or false, +handling the return of netif_txq_maybe_stop() properly. Use a correct +goto in idpf_tx_maybe_stop_splitq() to avoid stopping the queue or +incrementing the stops counter twice. + +Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit") +Fixes: a5ab9ee0df0b ("idpf: add singleq start_xmit and napi poll") +Cc: stable@vger.kernel.org # 6.7+ +Signed-off-by: Michal Kubiak +Reviewed-by: Przemek Kitszel +Signed-off-by: Alexander Lobakin +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + .../ethernet/intel/idpf/idpf_singleq_txrx.c | 4 +++ + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 35 +++++-------------- + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 9 ++++- + 3 files changed, 21 insertions(+), 27 deletions(-) + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index 8630db24f63a7..5e5fa2d0aa4d1 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@ -369,6 +369,10 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + IDPF_TX_DESCS_FOR_CTX)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + ++ u64_stats_update_begin(&tx_q->stats_sync); ++ u64_stats_inc(&tx_q->q_stats.q_busy); ++ u64_stats_update_end(&tx_q->stats_sync); ++ + return NETDEV_TX_BUSY; + } + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 7b06ca7b9732a..9b7e67d0f38be 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -2149,29 +2149,6 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, + desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); + } + +-/** +- * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions +- * @tx_q: the queue to be checked +- * @size: number of descriptors we want to assure is available +- * +- * Returns 0 if stop is not needed +- */ +-int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size) +-{ +- struct netdev_queue *nq; +- +- if (likely(IDPF_DESC_UNUSED(tx_q) >= size)) +- return 0; +- +- u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_inc(&tx_q->q_stats.q_busy); +- u64_stats_update_end(&tx_q->stats_sync); +- +- nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); +- +- return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); +-} +- + /** + * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions + * @tx_q: the queue to be checked +@@ -2183,7 +2160,7 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, + unsigned int descs_needed) + { + if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) +- goto splitq_stop; ++ goto out; + + /* If there are too many outstanding completions expected on the + * completion queue, stop the TX queue to give the device some time to +@@ -2202,10 +2179,12 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, + return 0; + + splitq_stop: ++ netif_stop_subqueue(tx_q->netdev, tx_q->idx); ++ ++out: + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); +- netif_stop_subqueue(tx_q->netdev, tx_q->idx); + + return -EBUSY; + } +@@ -2228,7 +2207,11 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + tx_q->next_to_use = val; + +- idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); ++ if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) { ++ u64_stats_update_begin(&tx_q->stats_sync); ++ u64_stats_inc(&tx_q->q_stats.q_busy); ++ u64_stats_update_end(&tx_q->stats_sync); ++ } + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 5b3f19200255a..214a24e684634 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@ -1148,7 +1148,6 @@ void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 ring_idx); + unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb); +-int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size); + void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); + netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_tx_queue *tx_q); +@@ -1157,4 +1156,12 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, + u16 cleaned_count); + int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); + ++static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, ++ u32 needed) ++{ ++ return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, ++ IDPF_DESC_UNUSED(tx_q), ++ needed, needed); ++} ++ + #endif /* !_IDPF_TXRX_H_ */ +-- +2.43.0 + diff --git a/queue-6.10/idpf-merge-singleq-and-splitq-net_device_ops.patch b/queue-6.10/idpf-merge-singleq-and-splitq-net_device_ops.patch new file mode 100644 index 00000000000..125e46b18d6 --- /dev/null +++ b/queue-6.10/idpf-merge-singleq-and-splitq-net_device_ops.patch @@ -0,0 +1,211 @@ +From ba7332fc1b466a126d4a986d059fb6f7c0dff12d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jun 2024 15:53:41 +0200 +Subject: idpf: merge singleq and splitq &net_device_ops + +From: Alexander Lobakin + +[ Upstream commit 14f662b43bf8c765114f73d184af2702b2280436 ] + +It makes no sense to have a second &net_device_ops struct (800 bytes of +rodata) with only one difference in .ndo_start_xmit, which can easily +be just one `if`. This `if` is a drop in the ocean and you won't see +any difference. +Define unified idpf_xmit_start(). The preparation for sending is the +same, just call either idpf_tx_splitq_frame() or idpf_tx_singleq_frame() +depending on the active model to actually map and send the skb. + +Reviewed-by: Przemek Kitszel +Reviewed-by: Jacob Keller +Signed-off-by: Alexander Lobakin +Signed-off-by: Tony Nguyen +Stable-dep-of: e4b398dd82f5 ("idpf: fix netdev Tx queue stop/wake") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/idpf/idpf_lib.c | 26 +++------------- + .../ethernet/intel/idpf/idpf_singleq_txrx.c | 31 ++----------------- + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 17 ++++++---- + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 9 ++---- + 4 files changed, 20 insertions(+), 63 deletions(-) + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c +index 1ab679a719c77..5e336f64bc25e 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@ -4,8 +4,7 @@ + #include "idpf.h" + #include "idpf_virtchnl.h" + +-static const struct net_device_ops idpf_netdev_ops_splitq; +-static const struct net_device_ops idpf_netdev_ops_singleq; ++static const struct net_device_ops idpf_netdev_ops; + + /** + * idpf_init_vector_stack - Fill the MSIX vector stack with vector index +@@ -765,10 +764,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) + } + + /* assign netdev_ops */ +- if (idpf_is_queue_model_split(vport->txq_model)) +- netdev->netdev_ops = &idpf_netdev_ops_splitq; +- else +- netdev->netdev_ops = &idpf_netdev_ops_singleq; ++ netdev->netdev_ops = &idpf_netdev_ops; + + /* setup watchdog timeout value to be 5 second */ + netdev->watchdog_timeo = 5 * HZ; +@@ -2353,24 +2349,10 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) + mem->pa = 0; + } + +-static const struct net_device_ops idpf_netdev_ops_splitq = { +- .ndo_open = idpf_open, +- .ndo_stop = idpf_stop, +- .ndo_start_xmit = idpf_tx_splitq_start, +- .ndo_features_check = idpf_features_check, +- .ndo_set_rx_mode = idpf_set_rx_mode, +- .ndo_validate_addr = eth_validate_addr, +- .ndo_set_mac_address = idpf_set_mac, +- .ndo_change_mtu = idpf_change_mtu, +- .ndo_get_stats64 = idpf_get_stats64, +- .ndo_set_features = idpf_set_features, +- .ndo_tx_timeout = idpf_tx_timeout, +-}; +- +-static const struct net_device_ops idpf_netdev_ops_singleq = { ++static const struct net_device_ops idpf_netdev_ops = { + .ndo_open = idpf_open, + .ndo_stop = idpf_stop, +- .ndo_start_xmit = idpf_tx_singleq_start, ++ .ndo_start_xmit = idpf_tx_start, + .ndo_features_check = idpf_features_check, + .ndo_set_rx_mode = idpf_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, +diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index 9864a3992f0c3..8630db24f63a7 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@ -351,8 +351,8 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +-static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, +- struct idpf_tx_queue *tx_q) ++netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, ++ struct idpf_tx_queue *tx_q) + { + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; +@@ -408,33 +408,6 @@ static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + return idpf_tx_drop_skb(tx_q, skb); + } + +-/** +- * idpf_tx_singleq_start - Selects the right Tx queue to send buffer +- * @skb: send buffer +- * @netdev: network interface device structure +- * +- * Returns NETDEV_TX_OK if sent, else an error code +- */ +-netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, +- struct net_device *netdev) +-{ +- struct idpf_vport *vport = idpf_netdev_to_vport(netdev); +- struct idpf_tx_queue *tx_q; +- +- tx_q = vport->txqs[skb_get_queue_mapping(skb)]; +- +- /* hardware can't handle really short frames, hardware padding works +- * beyond this point +- */ +- if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) { +- idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); +- +- return NETDEV_TX_OK; +- } +- +- return idpf_tx_singleq_frame(skb, tx_q); +-} +- + /** + * idpf_tx_singleq_clean - Reclaim resources from queue + * @tx_q: Tx queue to clean +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index cdb01c54213f9..7b06ca7b9732a 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -4,6 +4,9 @@ + #include "idpf.h" + #include "idpf_virtchnl.h" + ++static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, ++ unsigned int count); ++ + /** + * idpf_buf_lifo_push - push a buffer pointer onto stack + * @stack: pointer to stack struct +@@ -2702,8 +2705,8 @@ static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) + * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO + * header, 1 for segment payload, and then 7 for the fragments. + */ +-bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, +- unsigned int count) ++static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, ++ unsigned int count) + { + if (likely(count < max_bufs)) + return false; +@@ -2849,14 +2852,13 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, + } + + /** +- * idpf_tx_splitq_start - Selects the right Tx queue to send buffer ++ * idpf_tx_start - Selects the right Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +-netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, +- struct net_device *netdev) ++netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev) + { + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_tx_queue *tx_q; +@@ -2878,7 +2880,10 @@ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + return NETDEV_TX_OK; + } + +- return idpf_tx_splitq_frame(skb, tx_q); ++ if (idpf_is_queue_model_split(vport->txq_model)) ++ return idpf_tx_splitq_frame(skb, tx_q); ++ else ++ return idpf_tx_singleq_frame(skb, tx_q); + } + + /** +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 704aec5c383b6..5b3f19200255a 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@ -1148,14 +1148,11 @@ void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 ring_idx); + unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb); +-bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, +- unsigned int count); + int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size); + void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); +-netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, +- struct net_device *netdev); +-netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, +- struct net_device *netdev); ++netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, ++ struct idpf_tx_queue *tx_q); ++netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev); + bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, + u16 cleaned_count); + int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); +-- +2.43.0 + diff --git a/queue-6.10/idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch b/queue-6.10/idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch new file mode 100644 index 00000000000..6748d8ca2b1 --- /dev/null +++ b/queue-6.10/idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch @@ -0,0 +1,3988 @@ +From 006b3857a893f911630279c3590f415040801b6b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jun 2024 15:53:38 +0200 +Subject: idpf: split &idpf_queue into 4 strictly-typed queue structures + +From: Alexander Lobakin + +[ Upstream commit e4891e4687c8dd136d80d6c1b857a02931ed6fc8 ] + +Currently, sizeof(struct idpf_queue) is 32 Kb. +This is due to the 12-bit hashtable declaration at the end of the queue. +This HT is needed only for Tx queues when the flow scheduling mode is +enabled. But &idpf_queue is unified for all of the queue types, +provoking excessive memory usage. +The unified structure in general makes the code less effective via +suboptimal fields placement. You can't avoid that unless you make unions +each 2 fields. Even then, different field alignment etc., doesn't allow +you to optimize things to the limit. +Split &idpf_queue into 4 structures corresponding to the queue types: +RQ (Rx queue), SQ (Tx queue), FQ (buffer queue), and CQ (completion +queue). Place only needed fields there and shortcuts handy for hotpath. +Allocate the abovementioned hashtable dynamically and only when needed, +keeping &idpf_tx_queue relatively short (192 bytes, same as Rx). This HT +is used only for OOO completions, which aren't really hotpath anyway. +Note that this change must be done atomically, otherwise it's really +easy to get lost and miss something. + +Signed-off-by: Alexander Lobakin +Signed-off-by: Tony Nguyen +Stable-dep-of: e4b398dd82f5 ("idpf: fix netdev Tx queue stop/wake") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/idpf/idpf.h | 3 +- + .../net/ethernet/intel/idpf/idpf_ethtool.c | 125 +-- + drivers/net/ethernet/intel/idpf/idpf_lib.c | 46 +- + .../ethernet/intel/idpf/idpf_singleq_txrx.c | 144 +-- + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 915 +++++++++++------- + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 440 ++++++--- + .../net/ethernet/intel/idpf/idpf_virtchnl.c | 73 +- + 7 files changed, 1018 insertions(+), 728 deletions(-) + +diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h +index 0b26dd9b8a512..f9e43d171f171 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf.h ++++ b/drivers/net/ethernet/intel/idpf/idpf.h +@@ -17,7 +17,6 @@ struct idpf_vport_max_q; + #include + #include + #include +-#include + + #include "virtchnl2.h" + #include "idpf_txrx.h" +@@ -301,7 +300,7 @@ struct idpf_vport { + u16 num_txq_grp; + struct idpf_txq_group *txq_grps; + u32 txq_model; +- struct idpf_queue **txqs; ++ struct idpf_tx_queue **txqs; + bool crc_enable; + + u16 num_rxq; +diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +index 1885ba618981d..e933fed16c7ea 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +@@ -437,22 +437,24 @@ struct idpf_stats { + .stat_offset = offsetof(_type, _stat) \ + } + +-/* Helper macro for defining some statistics related to queues */ +-#define IDPF_QUEUE_STAT(_name, _stat) \ +- IDPF_STAT(struct idpf_queue, _name, _stat) ++/* Helper macros for defining some statistics related to queues */ ++#define IDPF_RX_QUEUE_STAT(_name, _stat) \ ++ IDPF_STAT(struct idpf_rx_queue, _name, _stat) ++#define IDPF_TX_QUEUE_STAT(_name, _stat) \ ++ IDPF_STAT(struct idpf_tx_queue, _name, _stat) + + /* Stats associated with a Tx queue */ + static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = { +- IDPF_QUEUE_STAT("pkts", q_stats.tx.packets), +- IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes), +- IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts), ++ IDPF_TX_QUEUE_STAT("pkts", q_stats.packets), ++ IDPF_TX_QUEUE_STAT("bytes", q_stats.bytes), ++ IDPF_TX_QUEUE_STAT("lso_pkts", q_stats.lso_pkts), + }; + + /* Stats associated with an Rx queue */ + static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = { +- IDPF_QUEUE_STAT("pkts", q_stats.rx.packets), +- IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes), +- IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts), ++ IDPF_RX_QUEUE_STAT("pkts", q_stats.packets), ++ IDPF_RX_QUEUE_STAT("bytes", q_stats.bytes), ++ IDPF_RX_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rsc_pkts), + }; + + #define IDPF_TX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_tx_queue_stats) +@@ -633,7 +635,7 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset) + * Copies the stat data defined by the pointer and stat structure pair into + * the memory supplied as data. If the pointer is null, data will be zero'd. + */ +-static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, ++static void idpf_add_one_ethtool_stat(u64 *data, const void *pstat, + const struct idpf_stats *stat) + { + char *p; +@@ -671,6 +673,7 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, + * idpf_add_queue_stats - copy queue statistics into supplied buffer + * @data: ethtool stats buffer + * @q: the queue to copy ++ * @type: type of the queue + * + * Queue statistics must be copied while protected by u64_stats_fetch_begin, + * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats +@@ -681,19 +684,23 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, + * + * This function expects to be called while under rcu_read_lock(). + */ +-static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q) ++static void idpf_add_queue_stats(u64 **data, const void *q, ++ enum virtchnl2_queue_type type) + { ++ const struct u64_stats_sync *stats_sync; + const struct idpf_stats *stats; + unsigned int start; + unsigned int size; + unsigned int i; + +- if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { ++ if (type == VIRTCHNL2_QUEUE_TYPE_RX) { + size = IDPF_RX_QUEUE_STATS_LEN; + stats = idpf_gstrings_rx_queue_stats; ++ stats_sync = &((const struct idpf_rx_queue *)q)->stats_sync; + } else { + size = IDPF_TX_QUEUE_STATS_LEN; + stats = idpf_gstrings_tx_queue_stats; ++ stats_sync = &((const struct idpf_tx_queue *)q)->stats_sync; + } + + /* To avoid invalid statistics values, ensure that we keep retrying +@@ -701,10 +708,10 @@ static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q) + * u64_stats_fetch_retry. + */ + do { +- start = u64_stats_fetch_begin(&q->stats_sync); ++ start = u64_stats_fetch_begin(stats_sync); + for (i = 0; i < size; i++) + idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]); +- } while (u64_stats_fetch_retry(&q->stats_sync, start)); ++ } while (u64_stats_fetch_retry(stats_sync, start)); + + /* Once we successfully copy the stats in, update the data pointer */ + *data += size; +@@ -793,7 +800,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) + for (j = 0; j < num_rxq; j++) { + u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs; + struct idpf_rx_queue_stats *stats; +- struct idpf_queue *rxq; ++ struct idpf_rx_queue *rxq; + unsigned int start; + + if (idpf_is_queue_model_split(vport->rxq_model)) +@@ -807,7 +814,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) + do { + start = u64_stats_fetch_begin(&rxq->stats_sync); + +- stats = &rxq->q_stats.rx; ++ stats = &rxq->q_stats; + hw_csum_err = u64_stats_read(&stats->hw_csum_err); + hsplit = u64_stats_read(&stats->hsplit_pkts); + hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf); +@@ -828,7 +835,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) + + for (j = 0; j < txq_grp->num_txq; j++) { + u64 linearize, qbusy, skb_drops, dma_map_errs; +- struct idpf_queue *txq = txq_grp->txqs[j]; ++ struct idpf_tx_queue *txq = txq_grp->txqs[j]; + struct idpf_tx_queue_stats *stats; + unsigned int start; + +@@ -838,7 +845,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport) + do { + start = u64_stats_fetch_begin(&txq->stats_sync); + +- stats = &txq->q_stats.tx; ++ stats = &txq->q_stats; + linearize = u64_stats_read(&stats->linearize); + qbusy = u64_stats_read(&stats->q_busy); + skb_drops = u64_stats_read(&stats->skb_drops); +@@ -896,12 +903,12 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, + qtype = VIRTCHNL2_QUEUE_TYPE_TX; + + for (j = 0; j < txq_grp->num_txq; j++, total++) { +- struct idpf_queue *txq = txq_grp->txqs[j]; ++ struct idpf_tx_queue *txq = txq_grp->txqs[j]; + + if (!txq) + idpf_add_empty_queue_stats(&data, qtype); + else +- idpf_add_queue_stats(&data, txq); ++ idpf_add_queue_stats(&data, txq, qtype); + } + } + +@@ -929,7 +936,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, + num_rxq = rxq_grp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, total++) { +- struct idpf_queue *rxq; ++ struct idpf_rx_queue *rxq; + + if (is_splitq) + rxq = &rxq_grp->splitq.rxq_sets[j]->rxq; +@@ -938,7 +945,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, + if (!rxq) + idpf_add_empty_queue_stats(&data, qtype); + else +- idpf_add_queue_stats(&data, rxq); ++ idpf_add_queue_stats(&data, rxq, qtype); + + /* In splitq mode, don't get page pool stats here since + * the pools are attached to the buffer queues +@@ -953,7 +960,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, + + for (i = 0; i < vport->num_rxq_grp; i++) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { +- struct idpf_queue *rxbufq = ++ struct idpf_buf_queue *rxbufq = + &vport->rxq_grps[i].splitq.bufq_sets[j].bufq; + + page_pool_get_stats(rxbufq->pp, &pp_stats); +@@ -971,60 +978,64 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, + } + + /** +- * idpf_find_rxq - find rxq from q index ++ * idpf_find_rxq_vec - find rxq vector from q index + * @vport: virtual port associated to queue + * @q_num: q index used to find queue + * +- * returns pointer to rx queue ++ * returns pointer to rx vector + */ +-static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num) ++static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, ++ int q_num) + { + int q_grp, q_idx; + + if (!idpf_is_queue_model_split(vport->rxq_model)) +- return vport->rxq_grps->singleq.rxqs[q_num]; ++ return vport->rxq_grps->singleq.rxqs[q_num]->q_vector; + + q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + +- return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq; ++ return vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq.q_vector; + } + + /** +- * idpf_find_txq - find txq from q index ++ * idpf_find_txq_vec - find txq vector from q index + * @vport: virtual port associated to queue + * @q_num: q index used to find queue + * +- * returns pointer to tx queue ++ * returns pointer to tx vector + */ +-static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num) ++static struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, ++ int q_num) + { + int q_grp; + + if (!idpf_is_queue_model_split(vport->txq_model)) +- return vport->txqs[q_num]; ++ return vport->txqs[q_num]->q_vector; + + q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP; + +- return vport->txq_grps[q_grp].complq; ++ return vport->txq_grps[q_grp].complq->q_vector; + } + + /** + * __idpf_get_q_coalesce - get ITR values for specific queue + * @ec: ethtool structure to fill with driver's coalesce settings +- * @q: quuee of Rx or Tx ++ * @q_vector: queue vector corresponding to this queue ++ * @type: queue type + */ + static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec, +- struct idpf_queue *q) ++ const struct idpf_q_vector *q_vector, ++ enum virtchnl2_queue_type type) + { +- if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { ++ if (type == VIRTCHNL2_QUEUE_TYPE_RX) { + ec->use_adaptive_rx_coalesce = +- IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode); +- ec->rx_coalesce_usecs = q->q_vector->rx_itr_value; ++ IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode); ++ ec->rx_coalesce_usecs = q_vector->rx_itr_value; + } else { + ec->use_adaptive_tx_coalesce = +- IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode); +- ec->tx_coalesce_usecs = q->q_vector->tx_itr_value; ++ IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode); ++ ec->tx_coalesce_usecs = q_vector->tx_itr_value; + } + } + +@@ -1040,8 +1051,8 @@ static int idpf_get_q_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + u32 q_num) + { +- struct idpf_netdev_priv *np = netdev_priv(netdev); +- struct idpf_vport *vport; ++ const struct idpf_netdev_priv *np = netdev_priv(netdev); ++ const struct idpf_vport *vport; + int err = 0; + + idpf_vport_ctrl_lock(netdev); +@@ -1056,10 +1067,12 @@ static int idpf_get_q_coalesce(struct net_device *netdev, + } + + if (q_num < vport->num_rxq) +- __idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num)); ++ __idpf_get_q_coalesce(ec, idpf_find_rxq_vec(vport, q_num), ++ VIRTCHNL2_QUEUE_TYPE_RX); + + if (q_num < vport->num_txq) +- __idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num)); ++ __idpf_get_q_coalesce(ec, idpf_find_txq_vec(vport, q_num), ++ VIRTCHNL2_QUEUE_TYPE_TX); + + unlock_mutex: + idpf_vport_ctrl_unlock(netdev); +@@ -1103,16 +1116,15 @@ static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num, + /** + * __idpf_set_q_coalesce - set ITR values for specific queue + * @ec: ethtool structure from user to update ITR settings +- * @q: queue for which itr values has to be set ++ * @qv: queue vector for which itr values has to be set + * @is_rxq: is queue type rx + * + * Returns 0 on success, negative otherwise. + */ +-static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, +- struct idpf_queue *q, bool is_rxq) ++static int __idpf_set_q_coalesce(const struct ethtool_coalesce *ec, ++ struct idpf_q_vector *qv, bool is_rxq) + { + u32 use_adaptive_coalesce, coalesce_usecs; +- struct idpf_q_vector *qv = q->q_vector; + bool is_dim_ena = false; + u16 itr_val; + +@@ -1128,7 +1140,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, + itr_val = qv->tx_itr_value; + } + if (coalesce_usecs != itr_val && use_adaptive_coalesce) { +- netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n"); ++ netdev_err(qv->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n"); + + return -EINVAL; + } +@@ -1137,7 +1149,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, + return 0; + + if (coalesce_usecs > IDPF_ITR_MAX) { +- netdev_err(q->vport->netdev, ++ netdev_err(qv->vport->netdev, + "Invalid value, %d-usecs range is 0-%d\n", + coalesce_usecs, IDPF_ITR_MAX); + +@@ -1146,7 +1158,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, + + if (coalesce_usecs % 2) { + coalesce_usecs--; +- netdev_info(q->vport->netdev, ++ netdev_info(qv->vport->netdev, + "HW only supports even ITR values, ITR rounded to %d\n", + coalesce_usecs); + } +@@ -1185,15 +1197,16 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, + * + * Return 0 on success, and negative on failure + */ +-static int idpf_set_q_coalesce(struct idpf_vport *vport, +- struct ethtool_coalesce *ec, ++static int idpf_set_q_coalesce(const struct idpf_vport *vport, ++ const struct ethtool_coalesce *ec, + int q_num, bool is_rxq) + { +- struct idpf_queue *q; ++ struct idpf_q_vector *qv; + +- q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num); ++ qv = is_rxq ? idpf_find_rxq_vec(vport, q_num) : ++ idpf_find_txq_vec(vport, q_num); + +- if (q && __idpf_set_q_coalesce(ec, q, is_rxq)) ++ if (qv && __idpf_set_q_coalesce(ec, qv, is_rxq)) + return -EINVAL; + + return 0; +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c +index 3ac9d7ab83f20..1ab679a719c77 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@ -1318,14 +1318,14 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) + + if (idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { +- struct idpf_queue *q = ++ const struct idpf_buf_queue *q = + &grp->splitq.bufq_sets[j].bufq; + + writel(q->next_to_alloc, q->tail); + } + } else { + for (j = 0; j < grp->singleq.num_rxq; j++) { +- struct idpf_queue *q = ++ const struct idpf_rx_queue *q = + grp->singleq.rxqs[j]; + + writel(q->next_to_alloc, q->tail); +@@ -1852,7 +1852,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, + enum idpf_vport_state current_state = np->state; + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport *new_vport; +- int err, i; ++ int err; + + /* If the system is low on memory, we can end up in bad state if we + * free all the memory for queue resources and try to allocate them +@@ -1923,46 +1923,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, + */ + memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps)); + +- /* Since idpf_vport_queues_alloc was called with new_port, the queue +- * back pointers are currently pointing to the local new_vport. Reset +- * the backpointers to the original vport here +- */ +- for (i = 0; i < vport->num_txq_grp; i++) { +- struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; +- int j; +- +- tx_qgrp->vport = vport; +- for (j = 0; j < tx_qgrp->num_txq; j++) +- tx_qgrp->txqs[j]->vport = vport; +- +- if (idpf_is_queue_model_split(vport->txq_model)) +- tx_qgrp->complq->vport = vport; +- } +- +- for (i = 0; i < vport->num_rxq_grp; i++) { +- struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; +- struct idpf_queue *q; +- u16 num_rxq; +- int j; +- +- rx_qgrp->vport = vport; +- for (j = 0; j < vport->num_bufqs_per_qgrp; j++) +- rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport; +- +- if (idpf_is_queue_model_split(vport->rxq_model)) +- num_rxq = rx_qgrp->splitq.num_rxq_sets; +- else +- num_rxq = rx_qgrp->singleq.num_rxq; +- +- for (j = 0; j < num_rxq; j++) { +- if (idpf_is_queue_model_split(vport->rxq_model)) +- q = &rx_qgrp->splitq.rxq_sets[j]->rxq; +- else +- q = rx_qgrp->singleq.rxqs[j]; +- q->vport = vport; +- } +- } +- + if (reset_cause == IDPF_SR_Q_CHANGE) + idpf_vport_alloc_vec_indexes(vport); + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index b17d88e150006..9864a3992f0c3 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@ -186,7 +186,7 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb, + * and gets a physical address for each memory location and programs + * it and the length into the transmit base mode descriptor. + */ +-static void idpf_tx_singleq_map(struct idpf_queue *tx_q, ++static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, + struct idpf_tx_buf *first, + struct idpf_tx_offload_params *offloads) + { +@@ -210,7 +210,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + /* write each descriptor with CRC bit */ +- if (tx_q->vport->crc_enable) ++ if (idpf_queue_has(CRC_EN, tx_q)) + td_cmd |= IDPF_TX_DESC_CMD_ICRC; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { +@@ -285,7 +285,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + +- nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +@@ -299,7 +299,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + * ring entry to reflect that this index is a context descriptor + */ + static struct idpf_base_tx_ctx_desc * +-idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) ++idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) + { + struct idpf_base_tx_ctx_desc *ctx_desc; + int ntu = txq->next_to_use; +@@ -320,7 +320,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) + * @txq: queue to send buffer on + * @offload: offload parameter structure + **/ +-static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, ++static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, + struct idpf_tx_offload_params *offload) + { + struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); +@@ -333,7 +333,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); + + u64_stats_update_begin(&txq->stats_sync); +- u64_stats_inc(&txq->q_stats.tx.lso_pkts); ++ u64_stats_inc(&txq->q_stats.lso_pkts); + u64_stats_update_end(&txq->stats_sync); + } + +@@ -352,7 +352,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, + * Returns NETDEV_TX_OK if sent, else an error code + */ + static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, +- struct idpf_queue *tx_q) ++ struct idpf_tx_queue *tx_q) + { + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; +@@ -419,7 +419,7 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev) + { + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); +- struct idpf_queue *tx_q; ++ struct idpf_tx_queue *tx_q; + + tx_q = vport->txqs[skb_get_queue_mapping(skb)]; + +@@ -442,16 +442,15 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + * @cleaned: returns number of packets cleaned + * + */ +-static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, ++static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, + int *cleaned) + { +- unsigned int budget = tx_q->vport->compln_clean_budget; + unsigned int total_bytes = 0, total_pkts = 0; + struct idpf_base_tx_desc *tx_desc; ++ u32 budget = tx_q->clean_budget; + s16 ntc = tx_q->next_to_clean; + struct idpf_netdev_priv *np; + struct idpf_tx_buf *tx_buf; +- struct idpf_vport *vport; + struct netdev_queue *nq; + bool dont_wake; + +@@ -550,16 +549,15 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, + *cleaned += total_pkts; + + u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts); +- u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes); ++ u64_stats_add(&tx_q->q_stats.packets, total_pkts); ++ u64_stats_add(&tx_q->q_stats.bytes, total_bytes); + u64_stats_update_end(&tx_q->stats_sync); + +- vport = tx_q->vport; +- np = netdev_priv(vport->netdev); +- nq = netdev_get_tx_queue(vport->netdev, tx_q->idx); ++ np = netdev_priv(tx_q->netdev); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + + dont_wake = np->state != __IDPF_VPORT_UP || +- !netif_carrier_ok(vport->netdev); ++ !netif_carrier_ok(tx_q->netdev); + __netif_txq_completed_wake(nq, total_pkts, total_bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, + dont_wake); +@@ -584,7 +582,7 @@ static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + + budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; + for (i = 0; i < num_txq; i++) { +- struct idpf_queue *q; ++ struct idpf_tx_queue *q; + + q = q_vec->tx[i]; + clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, +@@ -614,14 +612,9 @@ static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, + + /** + * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers +- * @rxq: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer +- * @skb: Current socket buffer containing buffer in progress +- * @ntc: next to clean + */ +-static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, +- union virtchnl2_rx_desc *rx_desc, +- struct sk_buff *skb, u16 ntc) ++static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) + { + /* if we are the last buffer then there is nothing else to do */ + if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) +@@ -639,7 +632,7 @@ static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, + * + * skb->protocol must be set before this function is called + */ +-static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, ++static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, + struct idpf_rx_csum_decoded *csum_bits, + u16 ptype) + { +@@ -647,14 +640,14 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ +- if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM))) ++ if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM))) + return; + + /* check if HW has decoded the packet and checksum */ + if (unlikely(!(csum_bits->l3l4p))) + return; + +- decoded = rxq->vport->rx_ptype_lkup[ptype]; ++ decoded = rxq->rx_ptype_lkup[ptype]; + if (unlikely(!(decoded.known && decoded.outer_ip))) + return; + +@@ -707,7 +700,7 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + + checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); ++ u64_stats_inc(&rxq->q_stats.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); + } + +@@ -721,9 +714,9 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_base_csum(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + u16 ptype) + { + struct idpf_rx_csum_decoded csum_bits; +@@ -761,9 +754,9 @@ static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_flex_csum(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + u16 ptype) + { + struct idpf_rx_csum_decoded csum_bits; +@@ -801,14 +794,14 @@ static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) + { + u64 mask, qw1; + +- if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) ++ if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH))) + return; + + mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; +@@ -831,12 +824,12 @@ static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +-static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, ++static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, ++ const union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) + { +- if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) ++ if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH))) + return; + + if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, +@@ -857,16 +850,16 @@ static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +-static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, +- struct sk_buff *skb, +- union virtchnl2_rx_desc *rx_desc, +- u16 ptype) ++static void ++idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, ++ struct sk_buff *skb, ++ const union virtchnl2_rx_desc *rx_desc, ++ u16 ptype) + { +- struct idpf_rx_ptype_decoded decoded = +- rx_q->vport->rx_ptype_lkup[ptype]; ++ struct idpf_rx_ptype_decoded decoded = rx_q->rx_ptype_lkup[ptype]; + + /* modifies the skb - consumes the enet header */ +- skb->protocol = eth_type_trans(skb, rx_q->vport->netdev); ++ skb->protocol = eth_type_trans(skb, rx_q->netdev); + + /* Check if we're using base mode descriptor IDs */ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { +@@ -878,6 +871,22 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, + } + } + ++/** ++ * idpf_rx_buf_hw_update - Store the new tail and head values ++ * @rxq: queue to bump ++ * @val: new head index ++ */ ++static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) ++{ ++ rxq->next_to_use = val; ++ ++ if (unlikely(!rxq->tail)) ++ return; ++ ++ /* writel has an implicit memory barrier */ ++ writel(val, rxq->tail); ++} ++ + /** + * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers + * @rx_q: queue for which the hw buffers are allocated +@@ -885,7 +894,7 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, + * + * Returns false if all allocations were successful, true if any fail + */ +-bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, ++bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, + u16 cleaned_count) + { + struct virtchnl2_singleq_rx_buf_desc *desc; +@@ -896,7 +905,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + return false; + + desc = &rx_q->single_buf[nta]; +- buf = &rx_q->rx_buf.buf[nta]; ++ buf = &rx_q->rx_buf[nta]; + + do { + dma_addr_t addr; +@@ -916,7 +925,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + nta++; + if (unlikely(nta == rx_q->desc_count)) { + desc = &rx_q->single_buf[0]; +- buf = rx_q->rx_buf.buf; ++ buf = rx_q->rx_buf; + nta = 0; + } + +@@ -933,7 +942,6 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + + /** + * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor +- * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * +@@ -943,9 +951,9 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + */ +-static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, +- union virtchnl2_rx_desc *rx_desc, +- struct idpf_rx_extracted *fields) ++static void ++idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, ++ struct idpf_rx_extracted *fields) + { + u64 qword; + +@@ -957,7 +965,6 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, + + /** + * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor +- * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * +@@ -967,9 +974,9 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + */ +-static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, +- union virtchnl2_rx_desc *rx_desc, +- struct idpf_rx_extracted *fields) ++static void ++idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, ++ struct idpf_rx_extracted *fields) + { + fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, + le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); +@@ -984,14 +991,15 @@ static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, + * @fields: storage for extracted values + * + */ +-static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, +- union virtchnl2_rx_desc *rx_desc, +- struct idpf_rx_extracted *fields) ++static void ++idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, ++ const union virtchnl2_rx_desc *rx_desc, ++ struct idpf_rx_extracted *fields) + { + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) +- idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields); ++ idpf_rx_singleq_extract_base_fields(rx_desc, fields); + else +- idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields); ++ idpf_rx_singleq_extract_flex_fields(rx_desc, fields); + } + + /** +@@ -1001,7 +1009,7 @@ static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +-static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) ++static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) + { + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + struct sk_buff *skb = rx_q->skb; +@@ -1036,7 +1044,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + + idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); + +- rx_buf = &rx_q->rx_buf.buf[ntc]; ++ rx_buf = &rx_q->rx_buf[ntc]; + if (!fields.size) { + idpf_rx_put_page(rx_buf); + goto skip_data; +@@ -1058,7 +1066,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + cleaned_count++; + + /* skip if it is non EOP desc */ +- if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc)) ++ if (idpf_rx_singleq_is_non_eop(rx_desc)) + continue; + + #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ +@@ -1084,7 +1092,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + rx_desc, fields.rx_ptype); + + /* send completed skb up the stack */ +- napi_gro_receive(&rx_q->q_vector->napi, skb); ++ napi_gro_receive(rx_q->pp->p.napi, skb); + skb = NULL; + + /* update budget accounting */ +@@ -1099,8 +1107,8 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); + + u64_stats_update_begin(&rx_q->stats_sync); +- u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts); +- u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes); ++ u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); ++ u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); + u64_stats_update_end(&rx_q->stats_sync); + + /* guarantee a trip back through this routine if there was a failure */ +@@ -1127,7 +1135,7 @@ static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { +- struct idpf_queue *rxq = q_vec->rx[i]; ++ struct idpf_rx_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 948b485da539c..cdb01c54213f9 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -60,7 +60,8 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) + * @tx_q: the queue that owns the buffer + * @tx_buf: the buffer to free + */ +-static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) ++static void idpf_tx_buf_rel(struct idpf_tx_queue *tx_q, ++ struct idpf_tx_buf *tx_buf) + { + if (tx_buf->skb) { + if (dma_unmap_len(tx_buf, len)) +@@ -86,8 +87,9 @@ static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) + * idpf_tx_buf_rel_all - Free any empty Tx buffers + * @txq: queue to be cleaned + */ +-static void idpf_tx_buf_rel_all(struct idpf_queue *txq) ++static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) + { ++ struct idpf_buf_lifo *buf_stack; + u16 i; + + /* Buffers already cleared, nothing to do */ +@@ -101,38 +103,57 @@ static void idpf_tx_buf_rel_all(struct idpf_queue *txq) + kfree(txq->tx_buf); + txq->tx_buf = NULL; + +- if (!txq->buf_stack.bufs) ++ if (!idpf_queue_has(FLOW_SCH_EN, txq)) + return; + +- for (i = 0; i < txq->buf_stack.size; i++) +- kfree(txq->buf_stack.bufs[i]); ++ buf_stack = &txq->stash->buf_stack; ++ if (!buf_stack->bufs) ++ return; ++ ++ for (i = 0; i < buf_stack->size; i++) ++ kfree(buf_stack->bufs[i]); + +- kfree(txq->buf_stack.bufs); +- txq->buf_stack.bufs = NULL; ++ kfree(buf_stack->bufs); ++ buf_stack->bufs = NULL; + } + + /** + * idpf_tx_desc_rel - Free Tx resources per queue + * @txq: Tx descriptor ring for a specific queue +- * @bufq: buffer q or completion q + * + * Free all transmit software resources + */ +-static void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq) ++static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) + { +- if (bufq) +- idpf_tx_buf_rel_all(txq); ++ idpf_tx_buf_rel_all(txq); + + if (!txq->desc_ring) + return; + + dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); + txq->desc_ring = NULL; +- txq->next_to_alloc = 0; + txq->next_to_use = 0; + txq->next_to_clean = 0; + } + ++/** ++ * idpf_compl_desc_rel - Free completion resources per queue ++ * @complq: completion queue ++ * ++ * Free all completion software resources. ++ */ ++static void idpf_compl_desc_rel(struct idpf_compl_queue *complq) ++{ ++ if (!complq->comp) ++ return; ++ ++ dma_free_coherent(complq->netdev->dev.parent, complq->size, ++ complq->comp, complq->dma); ++ complq->comp = NULL; ++ complq->next_to_use = 0; ++ complq->next_to_clean = 0; ++} ++ + /** + * idpf_tx_desc_rel_all - Free Tx Resources for All Queues + * @vport: virtual port structure +@@ -150,10 +171,10 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (j = 0; j < txq_grp->num_txq; j++) +- idpf_tx_desc_rel(txq_grp->txqs[j], true); ++ idpf_tx_desc_rel(txq_grp->txqs[j]); + + if (idpf_is_queue_model_split(vport->txq_model)) +- idpf_tx_desc_rel(txq_grp->complq, false); ++ idpf_compl_desc_rel(txq_grp->complq); + } + } + +@@ -163,8 +184,9 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) + * + * Returns 0 on success, negative on failure + */ +-static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) ++static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) + { ++ struct idpf_buf_lifo *buf_stack; + int buf_size; + int i; + +@@ -180,22 +202,26 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) + for (i = 0; i < tx_q->desc_count; i++) + tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + ++ if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) ++ return 0; ++ ++ buf_stack = &tx_q->stash->buf_stack; ++ + /* Initialize tx buf stack for out-of-order completions if + * flow scheduling offload is enabled + */ +- tx_q->buf_stack.bufs = +- kcalloc(tx_q->desc_count, sizeof(struct idpf_tx_stash *), +- GFP_KERNEL); +- if (!tx_q->buf_stack.bufs) ++ buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs), ++ GFP_KERNEL); ++ if (!buf_stack->bufs) + return -ENOMEM; + +- tx_q->buf_stack.size = tx_q->desc_count; +- tx_q->buf_stack.top = tx_q->desc_count; ++ buf_stack->size = tx_q->desc_count; ++ buf_stack->top = tx_q->desc_count; + + for (i = 0; i < tx_q->desc_count; i++) { +- tx_q->buf_stack.bufs[i] = kzalloc(sizeof(*tx_q->buf_stack.bufs[i]), +- GFP_KERNEL); +- if (!tx_q->buf_stack.bufs[i]) ++ buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]), ++ GFP_KERNEL); ++ if (!buf_stack->bufs[i]) + return -ENOMEM; + } + +@@ -204,28 +230,22 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) + + /** + * idpf_tx_desc_alloc - Allocate the Tx descriptors ++ * @vport: vport to allocate resources for + * @tx_q: the tx ring to set up +- * @bufq: buffer or completion queue + * + * Returns 0 on success, negative on failure + */ +-static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) ++static int idpf_tx_desc_alloc(const struct idpf_vport *vport, ++ struct idpf_tx_queue *tx_q) + { + struct device *dev = tx_q->dev; +- u32 desc_sz; + int err; + +- if (bufq) { +- err = idpf_tx_buf_alloc_all(tx_q); +- if (err) +- goto err_alloc; +- +- desc_sz = sizeof(struct idpf_base_tx_desc); +- } else { +- desc_sz = sizeof(struct idpf_splitq_tx_compl_desc); +- } ++ err = idpf_tx_buf_alloc_all(tx_q); ++ if (err) ++ goto err_alloc; + +- tx_q->size = tx_q->desc_count * desc_sz; ++ tx_q->size = tx_q->desc_count * sizeof(*tx_q->base_tx); + + /* Allocate descriptors also round up to nearest 4K */ + tx_q->size = ALIGN(tx_q->size, 4096); +@@ -238,19 +258,43 @@ static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) + goto err_alloc; + } + +- tx_q->next_to_alloc = 0; + tx_q->next_to_use = 0; + tx_q->next_to_clean = 0; +- set_bit(__IDPF_Q_GEN_CHK, tx_q->flags); ++ idpf_queue_set(GEN_CHK, tx_q); + + return 0; + + err_alloc: +- idpf_tx_desc_rel(tx_q, bufq); ++ idpf_tx_desc_rel(tx_q); + + return err; + } + ++/** ++ * idpf_compl_desc_alloc - allocate completion descriptors ++ * @vport: vport to allocate resources for ++ * @complq: completion queue to set up ++ * ++ * Return: 0 on success, -errno on failure. ++ */ ++static int idpf_compl_desc_alloc(const struct idpf_vport *vport, ++ struct idpf_compl_queue *complq) ++{ ++ complq->size = array_size(complq->desc_count, sizeof(*complq->comp)); ++ ++ complq->comp = dma_alloc_coherent(complq->netdev->dev.parent, ++ complq->size, &complq->dma, ++ GFP_KERNEL); ++ if (!complq->comp) ++ return -ENOMEM; ++ ++ complq->next_to_use = 0; ++ complq->next_to_clean = 0; ++ idpf_queue_set(GEN_CHK, complq); ++ ++ return 0; ++} ++ + /** + * idpf_tx_desc_alloc_all - allocate all queues Tx resources + * @vport: virtual port private structure +@@ -259,7 +303,6 @@ static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) + */ + static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) + { +- struct device *dev = &vport->adapter->pdev->dev; + int err = 0; + int i, j; + +@@ -268,13 +311,14 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) + */ + for (i = 0; i < vport->num_txq_grp; i++) { + for (j = 0; j < vport->txq_grps[i].num_txq; j++) { +- struct idpf_queue *txq = vport->txq_grps[i].txqs[j]; ++ struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j]; + u8 gen_bits = 0; + u16 bufidx_mask; + +- err = idpf_tx_desc_alloc(txq, true); ++ err = idpf_tx_desc_alloc(vport, txq); + if (err) { +- dev_err(dev, "Allocation for Tx Queue %u failed\n", ++ pci_err(vport->adapter->pdev, ++ "Allocation for Tx Queue %u failed\n", + i); + goto err_out; + } +@@ -312,9 +356,10 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) + continue; + + /* Setup completion queues */ +- err = idpf_tx_desc_alloc(vport->txq_grps[i].complq, false); ++ err = idpf_compl_desc_alloc(vport, vport->txq_grps[i].complq); + if (err) { +- dev_err(dev, "Allocation for Tx Completion Queue %u failed\n", ++ pci_err(vport->adapter->pdev, ++ "Allocation for Tx Completion Queue %u failed\n", + i); + goto err_out; + } +@@ -329,15 +374,14 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) + + /** + * idpf_rx_page_rel - Release an rx buffer page +- * @rxq: the queue that owns the buffer + * @rx_buf: the buffer to free + */ +-static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf) ++static void idpf_rx_page_rel(struct idpf_rx_buf *rx_buf) + { + if (unlikely(!rx_buf->page)) + return; + +- page_pool_put_full_page(rxq->pp, rx_buf->page, false); ++ page_pool_put_full_page(rx_buf->page->pp, rx_buf->page, false); + + rx_buf->page = NULL; + rx_buf->page_offset = 0; +@@ -345,54 +389,72 @@ static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf) + + /** + * idpf_rx_hdr_buf_rel_all - Release header buffer memory +- * @rxq: queue to use ++ * @bufq: queue to use ++ * @dev: device to free DMA memory + */ +-static void idpf_rx_hdr_buf_rel_all(struct idpf_queue *rxq) ++static void idpf_rx_hdr_buf_rel_all(struct idpf_buf_queue *bufq, ++ struct device *dev) + { +- struct idpf_adapter *adapter = rxq->vport->adapter; +- +- dma_free_coherent(&adapter->pdev->dev, +- rxq->desc_count * IDPF_HDR_BUF_SIZE, +- rxq->rx_buf.hdr_buf_va, +- rxq->rx_buf.hdr_buf_pa); +- rxq->rx_buf.hdr_buf_va = NULL; ++ dma_free_coherent(dev, bufq->desc_count * IDPF_HDR_BUF_SIZE, ++ bufq->rx_buf.hdr_buf_va, bufq->rx_buf.hdr_buf_pa); ++ bufq->rx_buf.hdr_buf_va = NULL; + } + + /** +- * idpf_rx_buf_rel_all - Free all Rx buffer resources for a queue +- * @rxq: queue to be cleaned ++ * idpf_rx_buf_rel_bufq - Free all Rx buffer resources for a buffer queue ++ * @bufq: queue to be cleaned ++ * @dev: device to free DMA memory + */ +-static void idpf_rx_buf_rel_all(struct idpf_queue *rxq) ++static void idpf_rx_buf_rel_bufq(struct idpf_buf_queue *bufq, ++ struct device *dev) + { +- u16 i; +- + /* queue already cleared, nothing to do */ +- if (!rxq->rx_buf.buf) ++ if (!bufq->rx_buf.buf) + return; + + /* Free all the bufs allocated and given to hw on Rx queue */ +- for (i = 0; i < rxq->desc_count; i++) +- idpf_rx_page_rel(rxq, &rxq->rx_buf.buf[i]); ++ for (u32 i = 0; i < bufq->desc_count; i++) ++ idpf_rx_page_rel(&bufq->rx_buf.buf[i]); ++ ++ if (idpf_queue_has(HSPLIT_EN, bufq)) ++ idpf_rx_hdr_buf_rel_all(bufq, dev); ++ ++ page_pool_destroy(bufq->pp); ++ bufq->pp = NULL; ++ ++ kfree(bufq->rx_buf.buf); ++ bufq->rx_buf.buf = NULL; ++} + +- if (rxq->rx_hsplit_en) +- idpf_rx_hdr_buf_rel_all(rxq); ++/** ++ * idpf_rx_buf_rel_all - Free all Rx buffer resources for a receive queue ++ * @rxq: queue to be cleaned ++ */ ++static void idpf_rx_buf_rel_all(struct idpf_rx_queue *rxq) ++{ ++ if (!rxq->rx_buf) ++ return; ++ ++ for (u32 i = 0; i < rxq->desc_count; i++) ++ idpf_rx_page_rel(&rxq->rx_buf[i]); + + page_pool_destroy(rxq->pp); + rxq->pp = NULL; + +- kfree(rxq->rx_buf.buf); +- rxq->rx_buf.buf = NULL; ++ kfree(rxq->rx_buf); ++ rxq->rx_buf = NULL; + } + + /** + * idpf_rx_desc_rel - Free a specific Rx q resources + * @rxq: queue to clean the resources from +- * @bufq: buffer q or completion q +- * @q_model: single or split q model ++ * @dev: device to free DMA memory ++ * @model: single or split queue model + * + * Free a specific rx queue resources + */ +-static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) ++static void idpf_rx_desc_rel(struct idpf_rx_queue *rxq, struct device *dev, ++ u32 model) + { + if (!rxq) + return; +@@ -402,7 +464,7 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) + rxq->skb = NULL; + } + +- if (bufq || !idpf_is_queue_model_split(q_model)) ++ if (!idpf_is_queue_model_split(model)) + idpf_rx_buf_rel_all(rxq); + + rxq->next_to_alloc = 0; +@@ -411,10 +473,34 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) + if (!rxq->desc_ring) + return; + +- dmam_free_coherent(rxq->dev, rxq->size, rxq->desc_ring, rxq->dma); ++ dmam_free_coherent(dev, rxq->size, rxq->desc_ring, rxq->dma); + rxq->desc_ring = NULL; + } + ++/** ++ * idpf_rx_desc_rel_bufq - free buffer queue resources ++ * @bufq: buffer queue to clean the resources from ++ * @dev: device to free DMA memory ++ */ ++static void idpf_rx_desc_rel_bufq(struct idpf_buf_queue *bufq, ++ struct device *dev) ++{ ++ if (!bufq) ++ return; ++ ++ idpf_rx_buf_rel_bufq(bufq, dev); ++ ++ bufq->next_to_alloc = 0; ++ bufq->next_to_clean = 0; ++ bufq->next_to_use = 0; ++ ++ if (!bufq->split_buf) ++ return; ++ ++ dma_free_coherent(dev, bufq->size, bufq->split_buf, bufq->dma); ++ bufq->split_buf = NULL; ++} ++ + /** + * idpf_rx_desc_rel_all - Free Rx Resources for All Queues + * @vport: virtual port structure +@@ -423,6 +509,7 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) + */ + static void idpf_rx_desc_rel_all(struct idpf_vport *vport) + { ++ struct device *dev = &vport->adapter->pdev->dev; + struct idpf_rxq_group *rx_qgrp; + u16 num_rxq; + int i, j; +@@ -435,15 +522,15 @@ static void idpf_rx_desc_rel_all(struct idpf_vport *vport) + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) +- idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j], +- false, vport->rxq_model); ++ idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j], dev, ++ VIRTCHNL2_QUEUE_MODEL_SINGLE); + continue; + } + + num_rxq = rx_qgrp->splitq.num_rxq_sets; + for (j = 0; j < num_rxq; j++) + idpf_rx_desc_rel(&rx_qgrp->splitq.rxq_sets[j]->rxq, +- false, vport->rxq_model); ++ dev, VIRTCHNL2_QUEUE_MODEL_SPLIT); + + if (!rx_qgrp->splitq.bufq_sets) + continue; +@@ -452,44 +539,40 @@ static void idpf_rx_desc_rel_all(struct idpf_vport *vport) + struct idpf_bufq_set *bufq_set = + &rx_qgrp->splitq.bufq_sets[j]; + +- idpf_rx_desc_rel(&bufq_set->bufq, true, +- vport->rxq_model); ++ idpf_rx_desc_rel_bufq(&bufq_set->bufq, dev); + } + } + } + + /** + * idpf_rx_buf_hw_update - Store the new tail and head values +- * @rxq: queue to bump ++ * @bufq: queue to bump + * @val: new head index + */ +-void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val) ++static void idpf_rx_buf_hw_update(struct idpf_buf_queue *bufq, u32 val) + { +- rxq->next_to_use = val; ++ bufq->next_to_use = val; + +- if (unlikely(!rxq->tail)) ++ if (unlikely(!bufq->tail)) + return; + + /* writel has an implicit memory barrier */ +- writel(val, rxq->tail); ++ writel(val, bufq->tail); + } + + /** + * idpf_rx_hdr_buf_alloc_all - Allocate memory for header buffers +- * @rxq: ring to use ++ * @bufq: ring to use + * + * Returns 0 on success, negative on failure. + */ +-static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq) ++static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) + { +- struct idpf_adapter *adapter = rxq->vport->adapter; +- +- rxq->rx_buf.hdr_buf_va = +- dma_alloc_coherent(&adapter->pdev->dev, +- IDPF_HDR_BUF_SIZE * rxq->desc_count, +- &rxq->rx_buf.hdr_buf_pa, +- GFP_KERNEL); +- if (!rxq->rx_buf.hdr_buf_va) ++ bufq->rx_buf.hdr_buf_va = ++ dma_alloc_coherent(bufq->q_vector->vport->netdev->dev.parent, ++ IDPF_HDR_BUF_SIZE * bufq->desc_count, ++ &bufq->rx_buf.hdr_buf_pa, GFP_KERNEL); ++ if (!bufq->rx_buf.hdr_buf_va) + return -ENOMEM; + + return 0; +@@ -502,19 +585,20 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq) + */ + static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) + { +- u16 nta = refillq->next_to_alloc; ++ u32 nta = refillq->next_to_use; + + /* store the buffer ID and the SW maintained GEN bit to the refillq */ + refillq->ring[nta] = + FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | + FIELD_PREP(IDPF_RX_BI_GEN_M, +- test_bit(__IDPF_Q_GEN_CHK, refillq->flags)); ++ idpf_queue_has(GEN_CHK, refillq)); + + if (unlikely(++nta == refillq->desc_count)) { + nta = 0; +- change_bit(__IDPF_Q_GEN_CHK, refillq->flags); ++ idpf_queue_change(GEN_CHK, refillq); + } +- refillq->next_to_alloc = nta; ++ ++ refillq->next_to_use = nta; + } + + /** +@@ -524,7 +608,7 @@ static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) + * + * Returns false if buffer could not be allocated, true otherwise. + */ +-static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) ++static bool idpf_rx_post_buf_desc(struct idpf_buf_queue *bufq, u16 buf_id) + { + struct virtchnl2_splitq_rx_buf_desc *splitq_rx_desc = NULL; + u16 nta = bufq->next_to_alloc; +@@ -534,11 +618,10 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) + splitq_rx_desc = &bufq->split_buf[nta]; + buf = &bufq->rx_buf.buf[buf_id]; + +- if (bufq->rx_hsplit_en) { ++ if (idpf_queue_has(HSPLIT_EN, bufq)) + splitq_rx_desc->hdr_addr = + cpu_to_le64(bufq->rx_buf.hdr_buf_pa + + (u32)buf_id * IDPF_HDR_BUF_SIZE); +- } + + addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size); + if (unlikely(addr == DMA_MAPPING_ERROR)) +@@ -562,7 +645,8 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) + * + * Returns true if @working_set bufs were posted successfully, false otherwise. + */ +-static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set) ++static bool idpf_rx_post_init_bufs(struct idpf_buf_queue *bufq, ++ u16 working_set) + { + int i; + +@@ -571,26 +655,28 @@ static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set) + return false; + } + +- idpf_rx_buf_hw_update(bufq, +- bufq->next_to_alloc & ~(bufq->rx_buf_stride - 1)); ++ idpf_rx_buf_hw_update(bufq, ALIGN_DOWN(bufq->next_to_alloc, ++ IDPF_RX_BUF_STRIDE)); + + return true; + } + + /** + * idpf_rx_create_page_pool - Create a page pool +- * @rxbufq: RX queue to create page pool for ++ * @napi: NAPI of the associated queue vector ++ * @count: queue descriptor count + * + * Returns &page_pool on success, casted -errno on failure + */ +-static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq) ++static struct page_pool *idpf_rx_create_page_pool(struct napi_struct *napi, ++ u32 count) + { + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, +- .pool_size = rxbufq->desc_count, ++ .pool_size = count, + .nid = NUMA_NO_NODE, +- .dev = rxbufq->vport->netdev->dev.parent, ++ .dev = napi->dev->dev.parent, + .max_len = PAGE_SIZE, + .dma_dir = DMA_FROM_DEVICE, + .offset = 0, +@@ -599,15 +685,58 @@ static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq) + return page_pool_create(&pp); + } + ++/** ++ * idpf_rx_buf_alloc_singleq - Allocate memory for all buffer resources ++ * @rxq: queue for which the buffers are allocated ++ * ++ * Return: 0 on success, -ENOMEM on failure. ++ */ ++static int idpf_rx_buf_alloc_singleq(struct idpf_rx_queue *rxq) ++{ ++ rxq->rx_buf = kcalloc(rxq->desc_count, sizeof(*rxq->rx_buf), ++ GFP_KERNEL); ++ if (!rxq->rx_buf) ++ return -ENOMEM; ++ ++ if (idpf_rx_singleq_buf_hw_alloc_all(rxq, rxq->desc_count - 1)) ++ goto err; ++ ++ return 0; ++ ++err: ++ idpf_rx_buf_rel_all(rxq); ++ ++ return -ENOMEM; ++} ++ ++/** ++ * idpf_rx_bufs_init_singleq - Initialize page pool and allocate Rx bufs ++ * @rxq: buffer queue to create page pool for ++ * ++ * Return: 0 on success, -errno on failure. ++ */ ++static int idpf_rx_bufs_init_singleq(struct idpf_rx_queue *rxq) ++{ ++ struct page_pool *pool; ++ ++ pool = idpf_rx_create_page_pool(&rxq->q_vector->napi, rxq->desc_count); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ ++ rxq->pp = pool; ++ ++ return idpf_rx_buf_alloc_singleq(rxq); ++} ++ + /** + * idpf_rx_buf_alloc_all - Allocate memory for all buffer resources +- * @rxbufq: queue for which the buffers are allocated; equivalent to +- * rxq when operating in singleq mode ++ * @rxbufq: queue for which the buffers are allocated + * + * Returns 0 on success, negative on failure + */ +-static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq) ++static int idpf_rx_buf_alloc_all(struct idpf_buf_queue *rxbufq) + { ++ struct device *dev = rxbufq->q_vector->vport->netdev->dev.parent; + int err = 0; + + /* Allocate book keeping buffers */ +@@ -618,48 +747,41 @@ static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq) + goto rx_buf_alloc_all_out; + } + +- if (rxbufq->rx_hsplit_en) { ++ if (idpf_queue_has(HSPLIT_EN, rxbufq)) { + err = idpf_rx_hdr_buf_alloc_all(rxbufq); + if (err) + goto rx_buf_alloc_all_out; + } + + /* Allocate buffers to be given to HW. */ +- if (idpf_is_queue_model_split(rxbufq->vport->rxq_model)) { +- int working_set = IDPF_RX_BUFQ_WORKING_SET(rxbufq); +- +- if (!idpf_rx_post_init_bufs(rxbufq, working_set)) +- err = -ENOMEM; +- } else { +- if (idpf_rx_singleq_buf_hw_alloc_all(rxbufq, +- rxbufq->desc_count - 1)) +- err = -ENOMEM; +- } ++ if (!idpf_rx_post_init_bufs(rxbufq, IDPF_RX_BUFQ_WORKING_SET(rxbufq))) ++ err = -ENOMEM; + + rx_buf_alloc_all_out: + if (err) +- idpf_rx_buf_rel_all(rxbufq); ++ idpf_rx_buf_rel_bufq(rxbufq, dev); + + return err; + } + + /** + * idpf_rx_bufs_init - Initialize page pool, allocate rx bufs, and post to HW +- * @rxbufq: RX queue to create page pool for ++ * @bufq: buffer queue to create page pool for + * + * Returns 0 on success, negative on failure + */ +-static int idpf_rx_bufs_init(struct idpf_queue *rxbufq) ++static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq) + { + struct page_pool *pool; + +- pool = idpf_rx_create_page_pool(rxbufq); ++ pool = idpf_rx_create_page_pool(&bufq->q_vector->napi, ++ bufq->desc_count); + if (IS_ERR(pool)) + return PTR_ERR(pool); + +- rxbufq->pp = pool; ++ bufq->pp = pool; + +- return idpf_rx_buf_alloc_all(rxbufq); ++ return idpf_rx_buf_alloc_all(bufq); + } + + /** +@@ -671,7 +793,6 @@ static int idpf_rx_bufs_init(struct idpf_queue *rxbufq) + int idpf_rx_bufs_init_all(struct idpf_vport *vport) + { + struct idpf_rxq_group *rx_qgrp; +- struct idpf_queue *q; + int i, j, err; + + for (i = 0; i < vport->num_rxq_grp; i++) { +@@ -682,8 +803,10 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) + int num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { ++ struct idpf_rx_queue *q; ++ + q = rx_qgrp->singleq.rxqs[j]; +- err = idpf_rx_bufs_init(q); ++ err = idpf_rx_bufs_init_singleq(q); + if (err) + return err; + } +@@ -693,6 +816,8 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) + + /* Otherwise, allocate bufs for the buffer queues */ + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { ++ struct idpf_buf_queue *q; ++ + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + err = idpf_rx_bufs_init(q); + if (err) +@@ -705,22 +830,17 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) + + /** + * idpf_rx_desc_alloc - Allocate queue Rx resources ++ * @vport: vport to allocate resources for + * @rxq: Rx queue for which the resources are setup +- * @bufq: buffer or completion queue +- * @q_model: single or split queue model + * + * Returns 0 on success, negative on failure + */ +-static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) ++static int idpf_rx_desc_alloc(const struct idpf_vport *vport, ++ struct idpf_rx_queue *rxq) + { +- struct device *dev = rxq->dev; ++ struct device *dev = &vport->adapter->pdev->dev; + +- if (bufq) +- rxq->size = rxq->desc_count * +- sizeof(struct virtchnl2_splitq_rx_buf_desc); +- else +- rxq->size = rxq->desc_count * +- sizeof(union virtchnl2_rx_desc); ++ rxq->size = rxq->desc_count * sizeof(union virtchnl2_rx_desc); + + /* Allocate descriptors and also round up to nearest 4K */ + rxq->size = ALIGN(rxq->size, 4096); +@@ -735,7 +855,35 @@ static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) + rxq->next_to_alloc = 0; + rxq->next_to_clean = 0; + rxq->next_to_use = 0; +- set_bit(__IDPF_Q_GEN_CHK, rxq->flags); ++ idpf_queue_set(GEN_CHK, rxq); ++ ++ return 0; ++} ++ ++/** ++ * idpf_bufq_desc_alloc - Allocate buffer queue descriptor ring ++ * @vport: vport to allocate resources for ++ * @bufq: buffer queue for which the resources are set up ++ * ++ * Return: 0 on success, -ENOMEM on failure. ++ */ ++static int idpf_bufq_desc_alloc(const struct idpf_vport *vport, ++ struct idpf_buf_queue *bufq) ++{ ++ struct device *dev = &vport->adapter->pdev->dev; ++ ++ bufq->size = array_size(bufq->desc_count, sizeof(*bufq->split_buf)); ++ ++ bufq->split_buf = dma_alloc_coherent(dev, bufq->size, &bufq->dma, ++ GFP_KERNEL); ++ if (!bufq->split_buf) ++ return -ENOMEM; ++ ++ bufq->next_to_alloc = 0; ++ bufq->next_to_clean = 0; ++ bufq->next_to_use = 0; ++ ++ idpf_queue_set(GEN_CHK, bufq); + + return 0; + } +@@ -748,9 +896,7 @@ static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) + */ + static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) + { +- struct device *dev = &vport->adapter->pdev->dev; + struct idpf_rxq_group *rx_qgrp; +- struct idpf_queue *q; + int i, j, err; + u16 num_rxq; + +@@ -762,13 +908,17 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { ++ struct idpf_rx_queue *q; ++ + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; +- err = idpf_rx_desc_alloc(q, false, vport->rxq_model); ++ ++ err = idpf_rx_desc_alloc(vport, q); + if (err) { +- dev_err(dev, "Memory allocation for Rx Queue %u failed\n", ++ pci_err(vport->adapter->pdev, ++ "Memory allocation for Rx Queue %u failed\n", + i); + goto err_out; + } +@@ -778,10 +928,14 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) + continue; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { ++ struct idpf_buf_queue *q; ++ + q = &rx_qgrp->splitq.bufq_sets[j].bufq; +- err = idpf_rx_desc_alloc(q, true, vport->rxq_model); ++ ++ err = idpf_bufq_desc_alloc(vport, q); + if (err) { +- dev_err(dev, "Memory allocation for Rx Buffer Queue %u failed\n", ++ pci_err(vport->adapter->pdev, ++ "Memory allocation for Rx Buffer Queue %u failed\n", + i); + goto err_out; + } +@@ -802,11 +956,16 @@ static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) + */ + static void idpf_txq_group_rel(struct idpf_vport *vport) + { ++ bool split, flow_sch_en; + int i, j; + + if (!vport->txq_grps) + return; + ++ split = idpf_is_queue_model_split(vport->txq_model); ++ flow_sch_en = !idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS, ++ VIRTCHNL2_CAP_SPLITQ_QSCHED); ++ + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + +@@ -814,8 +973,15 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) + kfree(txq_grp->txqs[j]); + txq_grp->txqs[j] = NULL; + } ++ ++ if (!split) ++ continue; ++ + kfree(txq_grp->complq); + txq_grp->complq = NULL; ++ ++ if (flow_sch_en) ++ kfree(txq_grp->stashes); + } + kfree(vport->txq_grps); + vport->txq_grps = NULL; +@@ -919,7 +1085,7 @@ static int idpf_vport_init_fast_path_txqs(struct idpf_vport *vport) + { + int i, j, k = 0; + +- vport->txqs = kcalloc(vport->num_txq, sizeof(struct idpf_queue *), ++ vport->txqs = kcalloc(vport->num_txq, sizeof(*vport->txqs), + GFP_KERNEL); + + if (!vport->txqs) +@@ -1137,7 +1303,8 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, + * @q: rx queue for which descids are set + * + */ +-static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q) ++static void idpf_rxq_set_descids(const struct idpf_vport *vport, ++ struct idpf_rx_queue *q) + { + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; +@@ -1158,20 +1325,22 @@ static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q) + */ + static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) + { +- bool flow_sch_en; +- int err, i; ++ bool split, flow_sch_en; ++ int i; + + vport->txq_grps = kcalloc(vport->num_txq_grp, + sizeof(*vport->txq_grps), GFP_KERNEL); + if (!vport->txq_grps) + return -ENOMEM; + ++ split = idpf_is_queue_model_split(vport->txq_model); + flow_sch_en = !idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_SPLITQ_QSCHED); + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + struct idpf_adapter *adapter = vport->adapter; ++ struct idpf_txq_stash *stashes; + int j; + + tx_qgrp->vport = vport; +@@ -1180,45 +1349,62 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) + for (j = 0; j < tx_qgrp->num_txq; j++) { + tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]), + GFP_KERNEL); +- if (!tx_qgrp->txqs[j]) { +- err = -ENOMEM; ++ if (!tx_qgrp->txqs[j]) + goto err_alloc; +- } ++ } ++ ++ if (split && flow_sch_en) { ++ stashes = kcalloc(num_txq, sizeof(*stashes), ++ GFP_KERNEL); ++ if (!stashes) ++ goto err_alloc; ++ ++ tx_qgrp->stashes = stashes; + } + + for (j = 0; j < tx_qgrp->num_txq; j++) { +- struct idpf_queue *q = tx_qgrp->txqs[j]; ++ struct idpf_tx_queue *q = tx_qgrp->txqs[j]; + + q->dev = &adapter->pdev->dev; + q->desc_count = vport->txq_desc_count; + q->tx_max_bufs = idpf_get_max_tx_bufs(adapter); + q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter); +- q->vport = vport; ++ q->netdev = vport->netdev; + q->txq_grp = tx_qgrp; +- hash_init(q->sched_buf_hash); + +- if (flow_sch_en) +- set_bit(__IDPF_Q_FLOW_SCH_EN, q->flags); ++ if (!split) { ++ q->clean_budget = vport->compln_clean_budget; ++ idpf_queue_assign(CRC_EN, q, ++ vport->crc_enable); ++ } ++ ++ if (!flow_sch_en) ++ continue; ++ ++ if (split) { ++ q->stash = &stashes[j]; ++ hash_init(q->stash->sched_buf_hash); ++ } ++ ++ idpf_queue_set(FLOW_SCH_EN, q); + } + +- if (!idpf_is_queue_model_split(vport->txq_model)) ++ if (!split) + continue; + + tx_qgrp->complq = kcalloc(IDPF_COMPLQ_PER_GROUP, + sizeof(*tx_qgrp->complq), + GFP_KERNEL); +- if (!tx_qgrp->complq) { +- err = -ENOMEM; ++ if (!tx_qgrp->complq) + goto err_alloc; +- } + +- tx_qgrp->complq->dev = &adapter->pdev->dev; + tx_qgrp->complq->desc_count = vport->complq_desc_count; +- tx_qgrp->complq->vport = vport; + tx_qgrp->complq->txq_grp = tx_qgrp; ++ tx_qgrp->complq->netdev = vport->netdev; ++ tx_qgrp->complq->clean_budget = vport->compln_clean_budget; + + if (flow_sch_en) +- __set_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags); ++ idpf_queue_set(FLOW_SCH_EN, tx_qgrp->complq); + } + + return 0; +@@ -1226,7 +1412,7 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) + err_alloc: + idpf_txq_group_rel(vport); + +- return err; ++ return -ENOMEM; + } + + /** +@@ -1238,8 +1424,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) + */ + static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) + { +- struct idpf_adapter *adapter = vport->adapter; +- struct idpf_queue *q; + int i, k, err = 0; + bool hs; + +@@ -1292,21 +1476,15 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) + struct idpf_bufq_set *bufq_set = + &rx_qgrp->splitq.bufq_sets[j]; + int swq_size = sizeof(struct idpf_sw_queue); ++ struct idpf_buf_queue *q; + + q = &rx_qgrp->splitq.bufq_sets[j].bufq; +- q->dev = &adapter->pdev->dev; + q->desc_count = vport->bufq_desc_count[j]; +- q->vport = vport; +- q->rxq_grp = rx_qgrp; +- q->idx = j; + q->rx_buf_size = vport->bufq_size[j]; + q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; +- q->rx_buf_stride = IDPF_RX_BUF_STRIDE; + +- if (hs) { +- q->rx_hsplit_en = true; +- q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; +- } ++ idpf_queue_assign(HSPLIT_EN, q, hs); ++ q->rx_hbuf_size = hs ? IDPF_HDR_BUF_SIZE : 0; + + bufq_set->num_refillqs = num_rxq; + bufq_set->refillqs = kcalloc(num_rxq, swq_size, +@@ -1319,13 +1497,12 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) + struct idpf_sw_queue *refillq = + &bufq_set->refillqs[k]; + +- refillq->dev = &vport->adapter->pdev->dev; + refillq->desc_count = + vport->bufq_desc_count[j]; +- set_bit(__IDPF_Q_GEN_CHK, refillq->flags); +- set_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); ++ idpf_queue_set(GEN_CHK, refillq); ++ idpf_queue_set(RFL_GEN_CHK, refillq); + refillq->ring = kcalloc(refillq->desc_count, +- sizeof(u16), ++ sizeof(*refillq->ring), + GFP_KERNEL); + if (!refillq->ring) { + err = -ENOMEM; +@@ -1336,27 +1513,27 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) + + skip_splitq_rx_init: + for (j = 0; j < num_rxq; j++) { ++ struct idpf_rx_queue *q; ++ + if (!idpf_is_queue_model_split(vport->rxq_model)) { + q = rx_qgrp->singleq.rxqs[j]; + goto setup_rxq; + } + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; +- rx_qgrp->splitq.rxq_sets[j]->refillq0 = ++ rx_qgrp->splitq.rxq_sets[j]->refillq[0] = + &rx_qgrp->splitq.bufq_sets[0].refillqs[j]; + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) +- rx_qgrp->splitq.rxq_sets[j]->refillq1 = ++ rx_qgrp->splitq.rxq_sets[j]->refillq[1] = + &rx_qgrp->splitq.bufq_sets[1].refillqs[j]; + +- if (hs) { +- q->rx_hsplit_en = true; +- q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; +- } ++ idpf_queue_assign(HSPLIT_EN, q, hs); ++ q->rx_hbuf_size = hs ? IDPF_HDR_BUF_SIZE : 0; + + setup_rxq: +- q->dev = &adapter->pdev->dev; + q->desc_count = vport->rxq_desc_count; +- q->vport = vport; +- q->rxq_grp = rx_qgrp; ++ q->rx_ptype_lkup = vport->rx_ptype_lkup; ++ q->netdev = vport->netdev; ++ q->bufq_sets = rx_qgrp->splitq.bufq_sets; + q->idx = (i * num_rxq) + j; + /* In splitq mode, RXQ buffer size should be + * set to that of the first buffer queue +@@ -1445,12 +1622,13 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport) + * idpf_tx_handle_sw_marker - Handle queue marker packet + * @tx_q: tx queue to handle software marker + */ +-static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) ++static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q) + { +- struct idpf_vport *vport = tx_q->vport; ++ struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev); ++ struct idpf_vport *vport = priv->vport; + int i; + +- clear_bit(__IDPF_Q_SW_MARKER, tx_q->flags); ++ idpf_queue_clear(SW_MARKER, tx_q); + /* Hardware must write marker packets to all queues associated with + * completion queues. So check if all queues received marker packets + */ +@@ -1458,7 +1636,7 @@ static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) + /* If we're still waiting on any other TXQ marker completions, + * just return now since we cannot wake up the marker_wq yet. + */ +- if (test_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags)) ++ if (idpf_queue_has(SW_MARKER, vport->txqs[i])) + return; + + /* Drain complete */ +@@ -1474,7 +1652,7 @@ static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @napi_budget: Used to determine if we are in netpoll + */ +-static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q, ++static void idpf_tx_splitq_clean_hdr(struct idpf_tx_queue *tx_q, + struct idpf_tx_buf *tx_buf, + struct idpf_cleaned_stats *cleaned, + int napi_budget) +@@ -1505,7 +1683,8 @@ static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q, + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @budget: Used to determine if we are in netpoll + */ +-static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, ++static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq, ++ u16 compl_tag, + struct idpf_cleaned_stats *cleaned, + int budget) + { +@@ -1513,7 +1692,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, + struct hlist_node *tmp_buf; + + /* Buffer completion */ +- hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf, ++ hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf, + hlist, compl_tag) { + if (unlikely(stash->buf.compl_tag != (int)compl_tag)) + continue; +@@ -1530,7 +1709,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, + } + + /* Push shadow buf back onto stack */ +- idpf_buf_lifo_push(&txq->buf_stack, stash); ++ idpf_buf_lifo_push(&txq->stash->buf_stack, stash); + + hash_del(&stash->hlist); + } +@@ -1542,7 +1721,7 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, + * @txq: Tx queue to clean + * @tx_buf: buffer to store + */ +-static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, ++static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq, + struct idpf_tx_buf *tx_buf) + { + struct idpf_tx_stash *stash; +@@ -1551,10 +1730,10 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, + !dma_unmap_len(tx_buf, len))) + return 0; + +- stash = idpf_buf_lifo_pop(&txq->buf_stack); ++ stash = idpf_buf_lifo_pop(&txq->stash->buf_stack); + if (unlikely(!stash)) { + net_err_ratelimited("%s: No out-of-order TX buffers left!\n", +- txq->vport->netdev->name); ++ netdev_name(txq->netdev)); + + return -ENOMEM; + } +@@ -1568,7 +1747,8 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, + stash->buf.compl_tag = tx_buf->compl_tag; + + /* Add buffer to buf_hash table to be freed later */ +- hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag); ++ hash_add(txq->stash->sched_buf_hash, &stash->hlist, ++ stash->buf.compl_tag); + + memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); + +@@ -1607,7 +1787,7 @@ do { \ + * and the buffers will be cleaned separately. The stats are not updated from + * this function when using flow-based scheduling. + */ +-static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, ++static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, + int napi_budget, + struct idpf_cleaned_stats *cleaned, + bool descs_only) +@@ -1703,7 +1883,7 @@ do { \ + * stashed. Returns the byte/segment count for the cleaned packet associated + * this completion tag. + */ +-static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, ++static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag, + struct idpf_cleaned_stats *cleaned, + int budget) + { +@@ -1772,14 +1952,14 @@ static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, + * + * Returns bytes/packets cleaned + */ +-static void idpf_tx_handle_rs_completion(struct idpf_queue *txq, ++static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, + struct idpf_splitq_tx_compl_desc *desc, + struct idpf_cleaned_stats *cleaned, + int budget) + { + u16 compl_tag; + +- if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) { ++ if (!idpf_queue_has(FLOW_SCH_EN, txq)) { + u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); + + return idpf_tx_splitq_clean(txq, head, budget, cleaned, false); +@@ -1802,24 +1982,23 @@ static void idpf_tx_handle_rs_completion(struct idpf_queue *txq, + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +-static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, ++static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, + int *cleaned) + { + struct idpf_splitq_tx_compl_desc *tx_desc; +- struct idpf_vport *vport = complq->vport; + s16 ntc = complq->next_to_clean; + struct idpf_netdev_priv *np; + unsigned int complq_budget; + bool complq_ok = true; + int i; + +- complq_budget = vport->compln_clean_budget; ++ complq_budget = complq->clean_budget; + tx_desc = &complq->comp[ntc]; + ntc -= complq->desc_count; + + do { + struct idpf_cleaned_stats cleaned_stats = { }; +- struct idpf_queue *tx_q; ++ struct idpf_tx_queue *tx_q; + int rel_tx_qid; + u16 hw_head; + u8 ctype; /* completion type */ +@@ -1828,7 +2007,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + /* if the descriptor isn't done, no work yet to do */ + gen = le16_get_bits(tx_desc->qid_comptype_gen, + IDPF_TXD_COMPLQ_GEN_M); +- if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen) ++ if (idpf_queue_has(GEN_CHK, complq) != gen) + break; + + /* Find necessary info of TX queue to clean buffers */ +@@ -1836,8 +2015,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + IDPF_TXD_COMPLQ_QID_M); + if (rel_tx_qid >= complq->txq_grp->num_txq || + !complq->txq_grp->txqs[rel_tx_qid]) { +- dev_err(&complq->vport->adapter->pdev->dev, +- "TxQ not found\n"); ++ netdev_err(complq->netdev, "TxQ not found\n"); + goto fetch_next_desc; + } + tx_q = complq->txq_grp->txqs[rel_tx_qid]; +@@ -1860,15 +2038,14 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + idpf_tx_handle_sw_marker(tx_q); + break; + default: +- dev_err(&tx_q->vport->adapter->pdev->dev, +- "Unknown TX completion type: %d\n", +- ctype); ++ netdev_err(tx_q->netdev, ++ "Unknown TX completion type: %d\n", ctype); + goto fetch_next_desc; + } + + u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_add(&tx_q->q_stats.tx.packets, cleaned_stats.packets); +- u64_stats_add(&tx_q->q_stats.tx.bytes, cleaned_stats.bytes); ++ u64_stats_add(&tx_q->q_stats.packets, cleaned_stats.packets); ++ u64_stats_add(&tx_q->q_stats.bytes, cleaned_stats.bytes); + tx_q->cleaned_pkts += cleaned_stats.packets; + tx_q->cleaned_bytes += cleaned_stats.bytes; + complq->num_completions++; +@@ -1880,7 +2057,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + if (unlikely(!ntc)) { + ntc -= complq->desc_count; + tx_desc = &complq->comp[0]; +- change_bit(__IDPF_Q_GEN_CHK, complq->flags); ++ idpf_queue_change(GEN_CHK, complq); + } + + prefetch(tx_desc); +@@ -1896,9 +2073,9 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + IDPF_TX_COMPLQ_OVERFLOW_THRESH(complq))) + complq_ok = false; + +- np = netdev_priv(complq->vport->netdev); ++ np = netdev_priv(complq->netdev); + for (i = 0; i < complq->txq_grp->num_txq; ++i) { +- struct idpf_queue *tx_q = complq->txq_grp->txqs[i]; ++ struct idpf_tx_queue *tx_q = complq->txq_grp->txqs[i]; + struct netdev_queue *nq; + bool dont_wake; + +@@ -1909,11 +2086,11 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + *cleaned += tx_q->cleaned_pkts; + + /* Update BQL */ +- nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + + dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || + np->state != __IDPF_VPORT_UP || +- !netif_carrier_ok(tx_q->vport->netdev); ++ !netif_carrier_ok(tx_q->netdev); + /* Check if the TXQ needs to and can be restarted */ + __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, +@@ -1976,7 +2153,7 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, + * + * Returns 0 if stop is not needed + */ +-int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) ++int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size) + { + struct netdev_queue *nq; + +@@ -1984,10 +2161,10 @@ int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) + return 0; + + u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_inc(&tx_q->q_stats.tx.q_busy); ++ u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + +- nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + + return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); + } +@@ -1999,7 +2176,7 @@ int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) + * + * Returns 0 if stop is not needed + */ +-static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, ++static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, + unsigned int descs_needed) + { + if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) +@@ -2023,9 +2200,9 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, + + splitq_stop: + u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_inc(&tx_q->q_stats.tx.q_busy); ++ u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); +- netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); ++ netif_stop_subqueue(tx_q->netdev, tx_q->idx); + + return -EBUSY; + } +@@ -2040,12 +2217,12 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, + * to do a register write to update our queue status. We know this can only + * mean tail here as HW should be owning head for TX. + */ +-void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, ++void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, + bool xmit_more) + { + struct netdev_queue *nq; + +- nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + tx_q->next_to_use = val; + + idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); +@@ -2069,7 +2246,7 @@ void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, + * + * Returns number of data descriptors needed for this skb. + */ +-unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, ++unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb) + { + const struct skb_shared_info *shinfo; +@@ -2102,7 +2279,7 @@ unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + + count = idpf_size_to_txd_count(skb->len); + u64_stats_update_begin(&txq->stats_sync); +- u64_stats_inc(&txq->q_stats.tx.linearize); ++ u64_stats_inc(&txq->q_stats.linearize); + u64_stats_update_end(&txq->stats_sync); + } + +@@ -2116,11 +2293,11 @@ unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +-void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, ++void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) + { + u64_stats_update_begin(&txq->stats_sync); +- u64_stats_inc(&txq->q_stats.tx.dma_map_errs); ++ u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ +@@ -2159,7 +2336,7 @@ void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + * @txq: the tx ring to wrap + * @ntu: ring index to bump + */ +-static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu) ++static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) + { + ntu++; + +@@ -2181,7 +2358,7 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu) + * and gets a physical address for each memory location and programs + * it and the length into the transmit flex descriptor. + */ +-static void idpf_tx_splitq_map(struct idpf_queue *tx_q, ++static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, + struct idpf_tx_splitq_params *params, + struct idpf_tx_buf *first) + { +@@ -2348,7 +2525,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, + tx_q->txq_grp->num_completions_pending++; + + /* record bytecount for BQL */ +- nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); ++ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +@@ -2544,7 +2721,7 @@ bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + * ring entry to reflect that this index is a context descriptor + */ + static struct idpf_flex_tx_ctx_desc * +-idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) ++idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq) + { + struct idpf_flex_tx_ctx_desc *desc; + int i = txq->next_to_use; +@@ -2564,10 +2741,10 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) + * @tx_q: queue to send buffer on + * @skb: pointer to skb + */ +-netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb) ++netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb) + { + u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_inc(&tx_q->q_stats.tx.skb_drops); ++ u64_stats_inc(&tx_q->q_stats.skb_drops); + u64_stats_update_end(&tx_q->stats_sync); + + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); +@@ -2585,7 +2762,7 @@ netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb) + * Returns NETDEV_TX_OK if sent, else an error code + */ + static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, +- struct idpf_queue *tx_q) ++ struct idpf_tx_queue *tx_q) + { + struct idpf_tx_splitq_params tx_params = { }; + struct idpf_tx_buf *first; +@@ -2625,7 +2802,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, + ctx_desc->tso.qw0.hdr_len = tx_params.offload.tso_hdr_len; + + u64_stats_update_begin(&tx_q->stats_sync); +- u64_stats_inc(&tx_q->q_stats.tx.lso_pkts); ++ u64_stats_inc(&tx_q->q_stats.lso_pkts); + u64_stats_update_end(&tx_q->stats_sync); + } + +@@ -2642,7 +2819,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + } + +- if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_q->flags)) { ++ if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; + tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; + /* Set the RE bit to catch any packets that may have not been +@@ -2682,7 +2859,7 @@ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev) + { + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); +- struct idpf_queue *tx_q; ++ struct idpf_tx_queue *tx_q; + + if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) { + dev_kfree_skb_any(skb); +@@ -2735,13 +2912,14 @@ enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *deco + * @rx_desc: Receive descriptor + * @decoded: Decoded Rx packet type related fields + */ +-static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb, +- struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, +- struct idpf_rx_ptype_decoded *decoded) ++static void ++idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb, ++ const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, ++ struct idpf_rx_ptype_decoded *decoded) + { + u32 hash; + +- if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXHASH))) ++ if (unlikely(!(rxq->netdev->features & NETIF_F_RXHASH))) + return; + + hash = le16_to_cpu(rx_desc->hash1) | +@@ -2760,14 +2938,14 @@ static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb, + * + * skb->protocol must be set before this function is called + */ +-static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, ++static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, + struct idpf_rx_csum_decoded *csum_bits, + struct idpf_rx_ptype_decoded *decoded) + { + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ +- if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXCSUM))) ++ if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM))) + return; + + /* check if HW has decoded the packet and checksum */ +@@ -2814,7 +2992,7 @@ static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, + + checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); ++ u64_stats_inc(&rxq->q_stats.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); + } + +@@ -2824,8 +3002,9 @@ static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, + * @csum: structure to extract checksum fields + * + **/ +-static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, +- struct idpf_rx_csum_decoded *csum) ++static void ++idpf_rx_splitq_extract_csum_bits(const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, ++ struct idpf_rx_csum_decoded *csum) + { + u8 qword0, qword1; + +@@ -2860,8 +3039,8 @@ static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_n + * Populate the skb fields with the total number of RSC segments, RSC payload + * length and packet type. + */ +-static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, +- struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, ++static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, ++ const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_ptype_decoded *decoded) + { + u16 rsc_segments, rsc_seg_len; +@@ -2914,7 +3093,7 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, + tcp_gro_complete(skb); + + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_inc(&rxq->q_stats.rx.rsc_pkts); ++ u64_stats_inc(&rxq->q_stats.rsc_pkts); + u64_stats_update_end(&rxq->stats_sync); + + return 0; +@@ -2930,9 +3109,9 @@ static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, + * order to populate the hash, checksum, protocol, and + * other fields within the skb. + */ +-static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, +- struct sk_buff *skb, +- struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) ++static int ++idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, ++ const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) + { + struct idpf_rx_csum_decoded csum_bits = { }; + struct idpf_rx_ptype_decoded decoded; +@@ -2940,19 +3119,13 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, + + rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0, + VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M); +- +- skb->protocol = eth_type_trans(skb, rxq->vport->netdev); +- +- decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; +- /* If we don't know the ptype we can't do anything else with it. Just +- * pass it up the stack as-is. +- */ +- if (!decoded.known) +- return 0; ++ decoded = rxq->rx_ptype_lkup[rx_ptype]; + + /* process RSS/hash */ + idpf_rx_hash(rxq, skb, rx_desc, &decoded); + ++ skb->protocol = eth_type_trans(skb, rxq->netdev); ++ + if (le16_get_bits(rx_desc->hdrlen_flags, + VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) + return idpf_rx_rsc(rxq, skb, rx_desc, &decoded); +@@ -2992,7 +3165,7 @@ void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + * data from the current receive descriptor, taking care to set up the + * skb correctly. + */ +-struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, ++struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq, + struct idpf_rx_buf *rx_buf, + unsigned int size) + { +@@ -3005,7 +3178,7 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + /* prefetch first cache line of first page */ + net_prefetch(va); + /* allocate a skb to store the frags */ +- skb = napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE); ++ skb = napi_alloc_skb(rxq->napi, IDPF_RX_HDR_SIZE); + if (unlikely(!skb)) { + idpf_rx_put_page(rx_buf); + +@@ -3052,14 +3225,14 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + * the current receive descriptor, taking care to set up the skb correctly. + * This specifically uses a header buffer to start building the skb. + */ +-static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq, +- const void *va, +- unsigned int size) ++static struct sk_buff * ++idpf_rx_hdr_construct_skb(const struct idpf_rx_queue *rxq, const void *va, ++ unsigned int size) + { + struct sk_buff *skb; + + /* allocate a skb to store the frags */ +- skb = napi_alloc_skb(&rxq->q_vector->napi, size); ++ skb = napi_alloc_skb(rxq->napi, size); + if (unlikely(!skb)) + return NULL; + +@@ -3115,10 +3288,10 @@ static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_de + * + * Returns amount of work completed + */ +-static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) ++static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) + { + int total_rx_bytes = 0, total_rx_pkts = 0; +- struct idpf_queue *rx_bufq = NULL; ++ struct idpf_buf_queue *rx_bufq = NULL; + struct sk_buff *skb = rxq->skb; + u16 ntc = rxq->next_to_clean; + +@@ -3148,7 +3321,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, + VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M); + +- if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id) ++ if (idpf_queue_has(GEN_CHK, rxq) != gen_id) + break; + + rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M, +@@ -3156,7 +3329,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) { + IDPF_RX_BUMP_NTC(rxq, ntc); + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_inc(&rxq->q_stats.rx.bad_descs); ++ u64_stats_inc(&rxq->q_stats.bad_descs); + u64_stats_update_end(&rxq->stats_sync); + continue; + } +@@ -3174,7 +3347,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + * data/payload buffer. + */ + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_inc(&rxq->q_stats.rx.hsplit_buf_ovf); ++ u64_stats_inc(&rxq->q_stats.hsplit_buf_ovf); + u64_stats_update_end(&rxq->stats_sync); + goto bypass_hsplit; + } +@@ -3187,13 +3360,10 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M); + + rxq_set = container_of(rxq, struct idpf_rxq_set, rxq); +- if (!bufq_id) +- refillq = rxq_set->refillq0; +- else +- refillq = rxq_set->refillq1; ++ refillq = rxq_set->refillq[bufq_id]; + + /* retrieve buffer from the rxq */ +- rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq; ++ rx_bufq = &rxq->bufq_sets[bufq_id].bufq; + + buf_id = le16_to_cpu(rx_desc->buf_id); + +@@ -3205,7 +3375,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + + skb = idpf_rx_hdr_construct_skb(rxq, va, hdr_len); + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_inc(&rxq->q_stats.rx.hsplit_pkts); ++ u64_stats_inc(&rxq->q_stats.hsplit_pkts); + u64_stats_update_end(&rxq->stats_sync); + } + +@@ -3248,7 +3418,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + } + + /* send completed skb up the stack */ +- napi_gro_receive(&rxq->q_vector->napi, skb); ++ napi_gro_receive(rxq->napi, skb); + skb = NULL; + + /* update budget accounting */ +@@ -3259,8 +3429,8 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + + rxq->skb = skb; + u64_stats_update_begin(&rxq->stats_sync); +- u64_stats_add(&rxq->q_stats.rx.packets, total_rx_pkts); +- u64_stats_add(&rxq->q_stats.rx.bytes, total_rx_bytes); ++ u64_stats_add(&rxq->q_stats.packets, total_rx_pkts); ++ u64_stats_add(&rxq->q_stats.bytes, total_rx_bytes); + u64_stats_update_end(&rxq->stats_sync); + + /* guarantee a trip back through this routine if there was a failure */ +@@ -3270,19 +3440,16 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + /** + * idpf_rx_update_bufq_desc - Update buffer queue descriptor + * @bufq: Pointer to the buffer queue +- * @refill_desc: SW Refill queue descriptor containing buffer ID ++ * @buf_id: buffer ID + * @buf_desc: Buffer queue descriptor + * + * Return 0 on success and negative on failure. + */ +-static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, ++static int idpf_rx_update_bufq_desc(struct idpf_buf_queue *bufq, u32 buf_id, + struct virtchnl2_splitq_rx_buf_desc *buf_desc) + { + struct idpf_rx_buf *buf; + dma_addr_t addr; +- u16 buf_id; +- +- buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + + buf = &bufq->rx_buf.buf[buf_id]; + +@@ -3293,7 +3460,7 @@ static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, + buf_desc->pkt_addr = cpu_to_le64(addr); + buf_desc->qword0.buf_id = cpu_to_le16(buf_id); + +- if (!bufq->rx_hsplit_en) ++ if (!idpf_queue_has(HSPLIT_EN, bufq)) + return 0; + + buf_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa + +@@ -3309,33 +3476,32 @@ static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, + * + * This function takes care of the buffer refill management + */ +-static void idpf_rx_clean_refillq(struct idpf_queue *bufq, ++static void idpf_rx_clean_refillq(struct idpf_buf_queue *bufq, + struct idpf_sw_queue *refillq) + { + struct virtchnl2_splitq_rx_buf_desc *buf_desc; + u16 bufq_nta = bufq->next_to_alloc; + u16 ntc = refillq->next_to_clean; + int cleaned = 0; +- u16 gen; + + buf_desc = &bufq->split_buf[bufq_nta]; + + /* make sure we stop at ring wrap in the unlikely case ring is full */ + while (likely(cleaned < refillq->desc_count)) { +- u16 refill_desc = refillq->ring[ntc]; ++ u32 buf_id, refill_desc = refillq->ring[ntc]; + bool failure; + +- gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc); +- if (test_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags) != gen) ++ if (idpf_queue_has(RFL_GEN_CHK, refillq) != ++ !!(refill_desc & IDPF_RX_BI_GEN_M)) + break; + +- failure = idpf_rx_update_bufq_desc(bufq, refill_desc, +- buf_desc); ++ buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); ++ failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc); + if (failure) + break; + + if (unlikely(++ntc == refillq->desc_count)) { +- change_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); ++ idpf_queue_change(RFL_GEN_CHK, refillq); + ntc = 0; + } + +@@ -3374,7 +3540,7 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq, + * this vector. Returns true if clean is complete within budget, false + * otherwise. + */ +-static void idpf_rx_clean_refillq_all(struct idpf_queue *bufq) ++static void idpf_rx_clean_refillq_all(struct idpf_buf_queue *bufq) + { + struct idpf_bufq_set *bufq_set; + int i; +@@ -3437,6 +3603,8 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) + for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + ++ kfree(q_vector->complq); ++ q_vector->complq = NULL; + kfree(q_vector->bufq); + q_vector->bufq = NULL; + kfree(q_vector->tx); +@@ -3555,13 +3723,13 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector) + goto check_rx_itr; + + for (i = 0, packets = 0, bytes = 0; i < q_vector->num_txq; i++) { +- struct idpf_queue *txq = q_vector->tx[i]; ++ struct idpf_tx_queue *txq = q_vector->tx[i]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&txq->stats_sync); +- packets += u64_stats_read(&txq->q_stats.tx.packets); +- bytes += u64_stats_read(&txq->q_stats.tx.bytes); ++ packets += u64_stats_read(&txq->q_stats.packets); ++ bytes += u64_stats_read(&txq->q_stats.bytes); + } while (u64_stats_fetch_retry(&txq->stats_sync, start)); + } + +@@ -3574,13 +3742,13 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector) + return; + + for (i = 0, packets = 0, bytes = 0; i < q_vector->num_rxq; i++) { +- struct idpf_queue *rxq = q_vector->rx[i]; ++ struct idpf_rx_queue *rxq = q_vector->rx[i]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rxq->stats_sync); +- packets += u64_stats_read(&rxq->q_stats.rx.packets); +- bytes += u64_stats_read(&rxq->q_stats.rx.bytes); ++ packets += u64_stats_read(&rxq->q_stats.packets); ++ bytes += u64_stats_read(&rxq->q_stats.bytes); + } while (u64_stats_fetch_retry(&rxq->stats_sync, start)); + } + +@@ -3824,16 +3992,17 @@ static void idpf_vport_intr_napi_ena_all(struct idpf_vport *vport) + static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec, + int budget, int *cleaned) + { +- u16 num_txq = q_vec->num_txq; ++ u16 num_complq = q_vec->num_complq; + bool clean_complete = true; + int i, budget_per_q; + +- if (unlikely(!num_txq)) ++ if (unlikely(!num_complq)) + return true; + +- budget_per_q = DIV_ROUND_UP(budget, num_txq); +- for (i = 0; i < num_txq; i++) +- clean_complete &= idpf_tx_clean_complq(q_vec->tx[i], ++ budget_per_q = DIV_ROUND_UP(budget, num_complq); ++ ++ for (i = 0; i < num_complq; i++) ++ clean_complete &= idpf_tx_clean_complq(q_vec->complq[i], + budget_per_q, cleaned); + + return clean_complete; +@@ -3860,7 +4029,7 @@ static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { +- struct idpf_queue *rxq = q_vec->rx[i]; ++ struct idpf_rx_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q); +@@ -3915,8 +4084,8 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) + * queues virtchnl message, as the interrupts will be disabled after + * that + */ +- if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE, +- q_vector->tx[0]->flags))) ++ if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, ++ q_vector->tx[0]))) + return budget; + else + return work_done; +@@ -3930,27 +4099,28 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) + */ + static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) + { ++ bool split = idpf_is_queue_model_split(vport->rxq_model); + u16 num_txq_grp = vport->num_txq_grp; +- int i, j, qv_idx, bufq_vidx = 0; + struct idpf_rxq_group *rx_qgrp; + struct idpf_txq_group *tx_qgrp; +- struct idpf_queue *q, *bufq; +- u16 q_index; ++ u32 i, qv_idx, q_index; + + for (i = 0, qv_idx = 0; i < vport->num_rxq_grp; i++) { + u16 num_rxq; + ++ if (qv_idx >= vport->num_q_vectors) ++ qv_idx = 0; ++ + rx_qgrp = &vport->rxq_grps[i]; +- if (idpf_is_queue_model_split(vport->rxq_model)) ++ if (split) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + +- for (j = 0; j < num_rxq; j++) { +- if (qv_idx >= vport->num_q_vectors) +- qv_idx = 0; ++ for (u32 j = 0; j < num_rxq; j++) { ++ struct idpf_rx_queue *q; + +- if (idpf_is_queue_model_split(vport->rxq_model)) ++ if (split) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; +@@ -3958,52 +4128,53 @@ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) + q_index = q->q_vector->num_rxq; + q->q_vector->rx[q_index] = q; + q->q_vector->num_rxq++; +- qv_idx++; ++ ++ if (split) ++ q->napi = &q->q_vector->napi; + } + +- if (idpf_is_queue_model_split(vport->rxq_model)) { +- for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { ++ if (split) { ++ for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) { ++ struct idpf_buf_queue *bufq; ++ + bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; +- bufq->q_vector = &vport->q_vectors[bufq_vidx]; ++ bufq->q_vector = &vport->q_vectors[qv_idx]; + q_index = bufq->q_vector->num_bufq; + bufq->q_vector->bufq[q_index] = bufq; + bufq->q_vector->num_bufq++; + } +- if (++bufq_vidx >= vport->num_q_vectors) +- bufq_vidx = 0; + } ++ ++ qv_idx++; + } + ++ split = idpf_is_queue_model_split(vport->txq_model); ++ + for (i = 0, qv_idx = 0; i < num_txq_grp; i++) { + u16 num_txq; + ++ if (qv_idx >= vport->num_q_vectors) ++ qv_idx = 0; ++ + tx_qgrp = &vport->txq_grps[i]; + num_txq = tx_qgrp->num_txq; + +- if (idpf_is_queue_model_split(vport->txq_model)) { +- if (qv_idx >= vport->num_q_vectors) +- qv_idx = 0; ++ for (u32 j = 0; j < num_txq; j++) { ++ struct idpf_tx_queue *q; + +- q = tx_qgrp->complq; ++ q = tx_qgrp->txqs[j]; + q->q_vector = &vport->q_vectors[qv_idx]; +- q_index = q->q_vector->num_txq; +- q->q_vector->tx[q_index] = q; +- q->q_vector->num_txq++; +- qv_idx++; +- } else { +- for (j = 0; j < num_txq; j++) { +- if (qv_idx >= vport->num_q_vectors) +- qv_idx = 0; ++ q->q_vector->tx[q->q_vector->num_txq++] = q; ++ } + +- q = tx_qgrp->txqs[j]; +- q->q_vector = &vport->q_vectors[qv_idx]; +- q_index = q->q_vector->num_txq; +- q->q_vector->tx[q_index] = q; +- q->q_vector->num_txq++; ++ if (split) { ++ struct idpf_compl_queue *q = tx_qgrp->complq; + +- qv_idx++; +- } ++ q->q_vector = &vport->q_vectors[qv_idx]; ++ q->q_vector->complq[q->q_vector->num_complq++] = q; + } ++ ++ qv_idx++; + } + } + +@@ -4079,18 +4250,22 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) + { + u16 txqs_per_vector, rxqs_per_vector, bufqs_per_vector; + struct idpf_q_vector *q_vector; +- int v_idx, err; ++ u32 complqs_per_vector, v_idx; + + vport->q_vectors = kcalloc(vport->num_q_vectors, + sizeof(struct idpf_q_vector), GFP_KERNEL); + if (!vport->q_vectors) + return -ENOMEM; + +- txqs_per_vector = DIV_ROUND_UP(vport->num_txq, vport->num_q_vectors); +- rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq, vport->num_q_vectors); ++ txqs_per_vector = DIV_ROUND_UP(vport->num_txq_grp, ++ vport->num_q_vectors); ++ rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq_grp, ++ vport->num_q_vectors); + bufqs_per_vector = vport->num_bufqs_per_qgrp * + DIV_ROUND_UP(vport->num_rxq_grp, + vport->num_q_vectors); ++ complqs_per_vector = DIV_ROUND_UP(vport->num_txq_grp, ++ vport->num_q_vectors); + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + q_vector = &vport->q_vectors[v_idx]; +@@ -4104,32 +4279,30 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) + q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC; + q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0; + +- q_vector->tx = kcalloc(txqs_per_vector, +- sizeof(struct idpf_queue *), ++ q_vector->tx = kcalloc(txqs_per_vector, sizeof(*q_vector->tx), + GFP_KERNEL); +- if (!q_vector->tx) { +- err = -ENOMEM; ++ if (!q_vector->tx) + goto error; +- } + +- q_vector->rx = kcalloc(rxqs_per_vector, +- sizeof(struct idpf_queue *), ++ q_vector->rx = kcalloc(rxqs_per_vector, sizeof(*q_vector->rx), + GFP_KERNEL); +- if (!q_vector->rx) { +- err = -ENOMEM; ++ if (!q_vector->rx) + goto error; +- } + + if (!idpf_is_queue_model_split(vport->rxq_model)) + continue; + + q_vector->bufq = kcalloc(bufqs_per_vector, +- sizeof(struct idpf_queue *), ++ sizeof(*q_vector->bufq), + GFP_KERNEL); +- if (!q_vector->bufq) { +- err = -ENOMEM; ++ if (!q_vector->bufq) ++ goto error; ++ ++ q_vector->complq = kcalloc(complqs_per_vector, ++ sizeof(*q_vector->complq), ++ GFP_KERNEL); ++ if (!q_vector->complq) + goto error; +- } + } + + return 0; +@@ -4137,7 +4310,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) + error: + idpf_vport_intr_rel(vport); + +- return err; ++ return -ENOMEM; + } + + /** +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 6dce14483215f..704aec5c383b6 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@ -4,6 +4,8 @@ + #ifndef _IDPF_TXRX_H_ + #define _IDPF_TXRX_H_ + ++#include ++ + #include + #include + #include +@@ -84,7 +86,7 @@ + do { \ + if (unlikely(++(ntc) == (rxq)->desc_count)) { \ + ntc = 0; \ +- change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags); \ ++ idpf_queue_change(GEN_CHK, rxq); \ + } \ + } while (0) + +@@ -111,10 +113,9 @@ do { \ + */ + #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 + +-#define IDPF_RX_BI_BUFID_S 0 +-#define IDPF_RX_BI_BUFID_M GENMASK(14, 0) +-#define IDPF_RX_BI_GEN_S 15 +-#define IDPF_RX_BI_GEN_M BIT(IDPF_RX_BI_GEN_S) ++#define IDPF_RX_BI_GEN_M BIT(16) ++#define IDPF_RX_BI_BUFID_M GENMASK(15, 0) ++ + #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M + #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M + +@@ -122,7 +123,7 @@ do { \ + ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ + (txq)->next_to_clean - (txq)->next_to_use - 1) + +-#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->buf_stack.top) ++#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->stash->buf_stack.top) + #define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ + (txq)->desc_count >> 2) + +@@ -433,23 +434,37 @@ struct idpf_rx_ptype_decoded { + * to 1 and knows that reading a gen bit of 1 in any + * descriptor on the initial pass of the ring indicates a + * writeback. It also flips on every ring wrap. +- * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit +- * and RFLGQ_GEN is the SW bit. ++ * @__IDPF_Q_RFL_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW ++ * bit and Q_RFL_GEN is the SW bit. + * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling + * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions + * @__IDPF_Q_POLL_MODE: Enable poll mode ++ * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode ++ * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq) + * @__IDPF_Q_FLAGS_NBITS: Must be last + */ + enum idpf_queue_flags_t { + __IDPF_Q_GEN_CHK, +- __IDPF_RFLQ_GEN_CHK, ++ __IDPF_Q_RFL_GEN_CHK, + __IDPF_Q_FLOW_SCH_EN, + __IDPF_Q_SW_MARKER, + __IDPF_Q_POLL_MODE, ++ __IDPF_Q_CRC_EN, ++ __IDPF_Q_HSPLIT_EN, + + __IDPF_Q_FLAGS_NBITS, + }; + ++#define idpf_queue_set(f, q) __set_bit(__IDPF_Q_##f, (q)->flags) ++#define idpf_queue_clear(f, q) __clear_bit(__IDPF_Q_##f, (q)->flags) ++#define idpf_queue_change(f, q) __change_bit(__IDPF_Q_##f, (q)->flags) ++#define idpf_queue_has(f, q) test_bit(__IDPF_Q_##f, (q)->flags) ++ ++#define idpf_queue_has_clear(f, q) \ ++ __test_and_clear_bit(__IDPF_Q_##f, (q)->flags) ++#define idpf_queue_assign(f, q, v) \ ++ __assign_bit(__IDPF_Q_##f, (q)->flags, v) ++ + /** + * struct idpf_vec_regs + * @dyn_ctl_reg: Dynamic control interrupt register offset +@@ -495,7 +510,9 @@ struct idpf_intr_reg { + * @v_idx: Vector index + * @intr_reg: See struct idpf_intr_reg + * @num_txq: Number of TX queues ++ * @num_complq: number of completion queues + * @tx: Array of TX queues to service ++ * @complq: array of completion queues + * @tx_dim: Data for TX net_dim algorithm + * @tx_itr_value: TX interrupt throttling rate + * @tx_intr_mode: Dynamic ITR or not +@@ -519,21 +536,24 @@ struct idpf_q_vector { + struct idpf_intr_reg intr_reg; + + u16 num_txq; +- struct idpf_queue **tx; ++ u16 num_complq; ++ struct idpf_tx_queue **tx; ++ struct idpf_compl_queue **complq; ++ + struct dim tx_dim; + u16 tx_itr_value; + bool tx_intr_mode; + u32 tx_itr_idx; + + u16 num_rxq; +- struct idpf_queue **rx; ++ struct idpf_rx_queue **rx; + struct dim rx_dim; + u16 rx_itr_value; + bool rx_intr_mode; + u32 rx_itr_idx; + + u16 num_bufq; +- struct idpf_queue **bufq; ++ struct idpf_buf_queue **bufq; + + u16 total_events; + char *name; +@@ -564,11 +584,6 @@ struct idpf_cleaned_stats { + u32 bytes; + }; + +-union idpf_queue_stats { +- struct idpf_rx_queue_stats rx; +- struct idpf_tx_queue_stats tx; +-}; +- + #define IDPF_ITR_DYNAMIC 1 + #define IDPF_ITR_MAX 0x1FE0 + #define IDPF_ITR_20K 0x0032 +@@ -584,39 +599,114 @@ union idpf_queue_stats { + #define IDPF_DIM_DEFAULT_PROFILE_IX 1 + + /** +- * struct idpf_queue +- * @dev: Device back pointer for DMA mapping +- * @vport: Back pointer to associated vport +- * @txq_grp: See struct idpf_txq_group +- * @rxq_grp: See struct idpf_rxq_group +- * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean, +- * buffer queue uses this index to determine which group of refill queues +- * to clean. +- * For TX queue, it is used as index to map between TX queue group and +- * hot path TX pointers stored in vport. Used in both singleq/splitq. +- * For RX queue, it is used to index to total RX queue across groups and ++ * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode ++ * @buf_stack: Stack of empty buffers to store buffer info for out of order ++ * buffer completions. See struct idpf_buf_lifo ++ * @sched_buf_hash: Hash table to store buffers ++ */ ++struct idpf_txq_stash { ++ struct idpf_buf_lifo buf_stack; ++ DECLARE_HASHTABLE(sched_buf_hash, 12); ++} ____cacheline_aligned; ++ ++/** ++ * struct idpf_rx_queue - software structure representing a receive queue ++ * @rx: universal receive descriptor array ++ * @single_buf: buffer descriptor array in singleq ++ * @desc_ring: virtual descriptor ring address ++ * @bufq_sets: Pointer to the array of buffer queues in splitq mode ++ * @napi: NAPI instance corresponding to this queue (splitq) ++ * @rx_buf: See struct idpf_rx_buf ++ * @pp: Page pool pointer in singleq mode ++ * @netdev: &net_device corresponding to this queue ++ * @tail: Tail offset. Used for both queue models single and split. ++ * @flags: See enum idpf_queue_flags_t ++ * @idx: For RX queue, it is used to index to total RX queue across groups and + * used for skb reporting. +- * @tail: Tail offset. Used for both queue models single and split. In splitq +- * model relevant only for TX queue and RX queue. +- * @tx_buf: See struct idpf_tx_buf +- * @rx_buf: Struct with RX buffer related members +- * @rx_buf.buf: See struct idpf_rx_buf +- * @rx_buf.hdr_buf_pa: DMA handle +- * @rx_buf.hdr_buf_va: Virtual address +- * @pp: Page pool pointer ++ * @desc_count: Number of descriptors ++ * @next_to_use: Next descriptor to use ++ * @next_to_clean: Next descriptor to clean ++ * @next_to_alloc: RX buffer to allocate at ++ * @rxdids: Supported RX descriptor ids ++ * @rx_ptype_lkup: LUT of Rx ptypes + * @skb: Pointer to the skb +- * @q_type: Queue type (TX, RX, TX completion, RX buffer) ++ * @stats_sync: See struct u64_stats_sync ++ * @q_stats: See union idpf_rx_queue_stats + * @q_id: Queue id +- * @desc_count: Number of descriptors +- * @next_to_use: Next descriptor to use. Relevant in both split & single txq +- * and bufq. +- * @next_to_clean: Next descriptor to clean. In split queue model, only +- * relevant to TX completion queue and RX queue. +- * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model +- * only relevant to RX queue. ++ * @size: Length of descriptor ring in bytes ++ * @dma: Physical address of ring ++ * @q_vector: Backreference to associated vector ++ * @rx_buffer_low_watermark: RX buffer low watermark ++ * @rx_hbuf_size: Header buffer size ++ * @rx_buf_size: Buffer size ++ * @rx_max_pkt_size: RX max packet size ++ */ ++struct idpf_rx_queue { ++ union { ++ union virtchnl2_rx_desc *rx; ++ struct virtchnl2_singleq_rx_buf_desc *single_buf; ++ ++ void *desc_ring; ++ }; ++ union { ++ struct { ++ struct idpf_bufq_set *bufq_sets; ++ struct napi_struct *napi; ++ }; ++ struct { ++ struct idpf_rx_buf *rx_buf; ++ struct page_pool *pp; ++ }; ++ }; ++ struct net_device *netdev; ++ void __iomem *tail; ++ ++ DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); ++ u16 idx; ++ u16 desc_count; ++ u16 next_to_use; ++ u16 next_to_clean; ++ u16 next_to_alloc; ++ ++ u32 rxdids; ++ ++ const struct idpf_rx_ptype_decoded *rx_ptype_lkup; ++ struct sk_buff *skb; ++ ++ struct u64_stats_sync stats_sync; ++ struct idpf_rx_queue_stats q_stats; ++ ++ /* Slowpath */ ++ u32 q_id; ++ u32 size; ++ dma_addr_t dma; ++ ++ struct idpf_q_vector *q_vector; ++ ++ u16 rx_buffer_low_watermark; ++ u16 rx_hbuf_size; ++ u16 rx_buf_size; ++ u16 rx_max_pkt_size; ++} ____cacheline_aligned; ++ ++/** ++ * struct idpf_tx_queue - software structure representing a transmit queue ++ * @base_tx: base Tx descriptor array ++ * @base_ctx: base Tx context descriptor array ++ * @flex_tx: flex Tx descriptor array ++ * @flex_ctx: flex Tx context descriptor array ++ * @desc_ring: virtual descriptor ring address ++ * @tx_buf: See struct idpf_tx_buf ++ * @txq_grp: See struct idpf_txq_group ++ * @dev: Device back pointer for DMA mapping ++ * @tail: Tail offset. Used for both queue models single and split + * @flags: See enum idpf_queue_flags_t +- * @q_stats: See union idpf_queue_stats +- * @stats_sync: See struct u64_stats_sync ++ * @idx: For TX queue, it is used as index to map between TX queue group and ++ * hot path TX pointers stored in vport. Used in both singleq/splitq. ++ * @desc_count: Number of descriptors ++ * @next_to_use: Next descriptor to use ++ * @next_to_clean: Next descriptor to clean ++ * @netdev: &net_device corresponding to this queue + * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on + * the TX completion queue, it can be for any TXQ associated + * with that completion queue. This means we can clean up to +@@ -625,34 +715,10 @@ union idpf_queue_stats { + * that single call to clean the completion queue. By doing so, + * we can update BQL with aggregate cleaned stats for each TXQ + * only once at the end of the cleaning routine. ++ * @clean_budget: singleq only, queue cleaning budget + * @cleaned_pkts: Number of packets cleaned for the above said case +- * @rx_hsplit_en: RX headsplit enable +- * @rx_hbuf_size: Header buffer size +- * @rx_buf_size: Buffer size +- * @rx_max_pkt_size: RX max packet size +- * @rx_buf_stride: RX buffer stride +- * @rx_buffer_low_watermark: RX buffer low watermark +- * @rxdids: Supported RX descriptor ids +- * @q_vector: Backreference to associated vector +- * @size: Length of descriptor ring in bytes +- * @dma: Physical address of ring +- * @rx: universal receive descriptor array +- * @single_buf: Rx buffer descriptor array in singleq +- * @split_buf: Rx buffer descriptor array in splitq +- * @base_tx: basic Tx descriptor array +- * @base_ctx: basic Tx context descriptor array +- * @flex_tx: flex Tx descriptor array +- * @flex_ctx: flex Tx context descriptor array +- * @comp: completion descriptor array +- * @desc_ring: virtual descriptor ring address + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather + * @tx_min_pkt_len: Min supported packet length +- * @num_completions: Only relevant for TX completion queue. It tracks the +- * number of completions received to compare against the +- * number of completions pending, as accumulated by the +- * TX queues. +- * @buf_stack: Stack of empty buffers to store buffer info for out of order +- * buffer completions. See struct idpf_buf_lifo. + * @compl_tag_bufid_m: Completion tag buffer id mask + * @compl_tag_gen_s: Completion tag generation bit + * The format of the completion tag will change based on the TXQ +@@ -676,120 +742,188 @@ union idpf_queue_stats { + * This gives us 8*8160 = 65280 possible unique values. + * @compl_tag_cur_gen: Used to keep track of current completion tag generation + * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset +- * @sched_buf_hash: Hash table to stores buffers ++ * @stash: Tx buffer stash for Flow-based scheduling mode ++ * @stats_sync: See struct u64_stats_sync ++ * @q_stats: See union idpf_tx_queue_stats ++ * @q_id: Queue id ++ * @size: Length of descriptor ring in bytes ++ * @dma: Physical address of ring ++ * @q_vector: Backreference to associated vector + */ +-struct idpf_queue { +- struct device *dev; +- struct idpf_vport *vport; ++struct idpf_tx_queue { + union { +- struct idpf_txq_group *txq_grp; +- struct idpf_rxq_group *rxq_grp; ++ struct idpf_base_tx_desc *base_tx; ++ struct idpf_base_tx_ctx_desc *base_ctx; ++ union idpf_tx_flex_desc *flex_tx; ++ struct idpf_flex_tx_ctx_desc *flex_ctx; ++ ++ void *desc_ring; + }; +- u16 idx; ++ struct idpf_tx_buf *tx_buf; ++ struct idpf_txq_group *txq_grp; ++ struct device *dev; + void __iomem *tail; +- union { +- struct idpf_tx_buf *tx_buf; +- struct { +- struct idpf_rx_buf *buf; +- dma_addr_t hdr_buf_pa; +- void *hdr_buf_va; +- } rx_buf; +- }; +- struct page_pool *pp; +- struct sk_buff *skb; +- u16 q_type; +- u32 q_id; +- u16 desc_count; + ++ DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); ++ u16 idx; ++ u16 desc_count; + u16 next_to_use; + u16 next_to_clean; +- u16 next_to_alloc; +- DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + +- union idpf_queue_stats q_stats; +- struct u64_stats_sync stats_sync; ++ struct net_device *netdev; + +- u32 cleaned_bytes; ++ union { ++ u32 cleaned_bytes; ++ u32 clean_budget; ++ }; + u16 cleaned_pkts; + +- bool rx_hsplit_en; +- u16 rx_hbuf_size; +- u16 rx_buf_size; +- u16 rx_max_pkt_size; +- u16 rx_buf_stride; +- u8 rx_buffer_low_watermark; +- u64 rxdids; +- struct idpf_q_vector *q_vector; +- unsigned int size; ++ u16 tx_max_bufs; ++ u16 tx_min_pkt_len; ++ ++ u16 compl_tag_bufid_m; ++ u16 compl_tag_gen_s; ++ ++ u16 compl_tag_cur_gen; ++ u16 compl_tag_gen_max; ++ ++ struct idpf_txq_stash *stash; ++ ++ struct u64_stats_sync stats_sync; ++ struct idpf_tx_queue_stats q_stats; ++ ++ /* Slowpath */ ++ u32 q_id; ++ u32 size; + dma_addr_t dma; +- union { +- union virtchnl2_rx_desc *rx; + +- struct virtchnl2_singleq_rx_buf_desc *single_buf; +- struct virtchnl2_splitq_rx_buf_desc *split_buf; ++ struct idpf_q_vector *q_vector; ++} ____cacheline_aligned; + +- struct idpf_base_tx_desc *base_tx; +- struct idpf_base_tx_ctx_desc *base_ctx; +- union idpf_tx_flex_desc *flex_tx; +- struct idpf_flex_tx_ctx_desc *flex_ctx; ++/** ++ * struct idpf_buf_queue - software structure representing a buffer queue ++ * @split_buf: buffer descriptor array ++ * @rx_buf: Struct with RX buffer related members ++ * @rx_buf.buf: See struct idpf_rx_buf ++ * @rx_buf.hdr_buf_pa: DMA handle ++ * @rx_buf.hdr_buf_va: Virtual address ++ * @pp: Page pool pointer ++ * @tail: Tail offset ++ * @flags: See enum idpf_queue_flags_t ++ * @desc_count: Number of descriptors ++ * @next_to_use: Next descriptor to use ++ * @next_to_clean: Next descriptor to clean ++ * @next_to_alloc: RX buffer to allocate at ++ * @q_id: Queue id ++ * @size: Length of descriptor ring in bytes ++ * @dma: Physical address of ring ++ * @q_vector: Backreference to associated vector ++ * @rx_buffer_low_watermark: RX buffer low watermark ++ * @rx_hbuf_size: Header buffer size ++ * @rx_buf_size: Buffer size ++ */ ++struct idpf_buf_queue { ++ struct virtchnl2_splitq_rx_buf_desc *split_buf; ++ struct { ++ struct idpf_rx_buf *buf; ++ dma_addr_t hdr_buf_pa; ++ void *hdr_buf_va; ++ } rx_buf; ++ struct page_pool *pp; ++ void __iomem *tail; + +- struct idpf_splitq_tx_compl_desc *comp; ++ DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); ++ u16 desc_count; ++ u16 next_to_use; ++ u16 next_to_clean; ++ u16 next_to_alloc; + +- void *desc_ring; +- }; ++ /* Slowpath */ ++ u32 q_id; ++ u32 size; ++ dma_addr_t dma; + +- u16 tx_max_bufs; +- u8 tx_min_pkt_len; ++ struct idpf_q_vector *q_vector; + +- u32 num_completions; ++ u16 rx_buffer_low_watermark; ++ u16 rx_hbuf_size; ++ u16 rx_buf_size; ++} ____cacheline_aligned; + +- struct idpf_buf_lifo buf_stack; ++/** ++ * struct idpf_compl_queue - software structure representing a completion queue ++ * @comp: completion descriptor array ++ * @txq_grp: See struct idpf_txq_group ++ * @flags: See enum idpf_queue_flags_t ++ * @desc_count: Number of descriptors ++ * @next_to_use: Next descriptor to use. Relevant in both split & single txq ++ * and bufq. ++ * @next_to_clean: Next descriptor to clean ++ * @netdev: &net_device corresponding to this queue ++ * @clean_budget: queue cleaning budget ++ * @num_completions: Only relevant for TX completion queue. It tracks the ++ * number of completions received to compare against the ++ * number of completions pending, as accumulated by the ++ * TX queues. ++ * @q_id: Queue id ++ * @size: Length of descriptor ring in bytes ++ * @dma: Physical address of ring ++ * @q_vector: Backreference to associated vector ++ */ ++struct idpf_compl_queue { ++ struct idpf_splitq_tx_compl_desc *comp; ++ struct idpf_txq_group *txq_grp; + +- u16 compl_tag_bufid_m; +- u16 compl_tag_gen_s; ++ DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); ++ u16 desc_count; ++ u16 next_to_use; ++ u16 next_to_clean; + +- u16 compl_tag_cur_gen; +- u16 compl_tag_gen_max; ++ struct net_device *netdev; ++ u32 clean_budget; ++ u32 num_completions; + +- DECLARE_HASHTABLE(sched_buf_hash, 12); +-} ____cacheline_internodealigned_in_smp; ++ /* Slowpath */ ++ u32 q_id; ++ u32 size; ++ dma_addr_t dma; ++ ++ struct idpf_q_vector *q_vector; ++} ____cacheline_aligned; + + /** + * struct idpf_sw_queue +- * @next_to_clean: Next descriptor to clean +- * @next_to_alloc: Buffer to allocate at +- * @flags: See enum idpf_queue_flags_t + * @ring: Pointer to the ring ++ * @flags: See enum idpf_queue_flags_t + * @desc_count: Descriptor count +- * @dev: Device back pointer for DMA mapping ++ * @next_to_use: Buffer to allocate at ++ * @next_to_clean: Next descriptor to clean + * + * Software queues are used in splitq mode to manage buffers between rxq + * producer and the bufq consumer. These are required in order to maintain a + * lockless buffer management system and are strictly software only constructs. + */ + struct idpf_sw_queue { +- u16 next_to_clean; +- u16 next_to_alloc; ++ u32 *ring; ++ + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); +- u16 *ring; + u16 desc_count; +- struct device *dev; +-} ____cacheline_internodealigned_in_smp; ++ u16 next_to_use; ++ u16 next_to_clean; ++} ____cacheline_aligned; + + /** + * struct idpf_rxq_set + * @rxq: RX queue +- * @refillq0: Pointer to refill queue 0 +- * @refillq1: Pointer to refill queue 1 ++ * @refillq: pointers to refill queues + * + * Splitq only. idpf_rxq_set associates an rxq with at an array of refillqs. + * Each rxq needs a refillq to return used buffers back to the respective bufq. + * Bufqs then clean these refillqs for buffers to give to hardware. + */ + struct idpf_rxq_set { +- struct idpf_queue rxq; +- struct idpf_sw_queue *refillq0; +- struct idpf_sw_queue *refillq1; ++ struct idpf_rx_queue rxq; ++ struct idpf_sw_queue *refillq[IDPF_MAX_BUFQS_PER_RXQ_GRP]; + }; + + /** +@@ -808,7 +942,7 @@ struct idpf_rxq_set { + * managed by at most two bufqs (depending on performance configuration). + */ + struct idpf_bufq_set { +- struct idpf_queue bufq; ++ struct idpf_buf_queue bufq; + int num_refillqs; + struct idpf_sw_queue *refillqs; + }; +@@ -834,7 +968,7 @@ struct idpf_rxq_group { + union { + struct { + u16 num_rxq; +- struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q]; ++ struct idpf_rx_queue *rxqs[IDPF_LARGE_MAX_Q]; + } singleq; + struct { + u16 num_rxq_sets; +@@ -849,6 +983,7 @@ struct idpf_rxq_group { + * @vport: Vport back pointer + * @num_txq: Number of TX queues associated + * @txqs: Array of TX queue pointers ++ * @stashes: array of OOO stashes for the queues + * @complq: Associated completion queue pointer, split queue only + * @num_completions_pending: Total number of completions pending for the + * completion queue, acculumated for all TX queues +@@ -862,9 +997,10 @@ struct idpf_txq_group { + struct idpf_vport *vport; + + u16 num_txq; +- struct idpf_queue *txqs[IDPF_LARGE_MAX_Q]; ++ struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q]; ++ struct idpf_txq_stash *stashes; + +- struct idpf_queue *complq; ++ struct idpf_compl_queue *complq; + + u32 num_completions_pending; + }; +@@ -1001,28 +1137,26 @@ void idpf_deinit_rss(struct idpf_vport *vport); + int idpf_rx_bufs_init_all(struct idpf_vport *vport); + void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size); +-struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, ++struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq, + struct idpf_rx_buf *rx_buf, + unsigned int size); +-bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf); +-void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val); +-void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, ++void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, + bool xmit_more); + unsigned int idpf_size_to_txd_count(unsigned int size); +-netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb); +-void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, ++netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); ++void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 ring_idx); +-unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, ++unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb); + bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count); +-int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size); ++int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size); + void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); + netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev); + netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev); +-bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq, ++bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, + u16 cleaned_count); + int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +index a5f9b7a5effe7..44602b87cd411 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +@@ -750,7 +750,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport) + int i; + + for (i = 0; i < vport->num_txq; i++) +- set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags); ++ idpf_queue_set(SW_MARKER, vport->txqs[i]); + + event = wait_event_timeout(vport->sw_marker_wq, + test_and_clear_bit(IDPF_VPORT_SW_MARKER, +@@ -758,7 +758,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport) + msecs_to_jiffies(500)); + + for (i = 0; i < vport->num_txq; i++) +- clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); ++ idpf_queue_clear(POLL_MODE, vport->txqs[i]); + + if (event) + return 0; +@@ -1092,7 +1092,6 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + int num_regs, u32 q_type) + { + struct idpf_adapter *adapter = vport->adapter; +- struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { +@@ -1111,6 +1110,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + u16 num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_regs; j++, k++) { ++ struct idpf_rx_queue *q; ++ + q = rx_qgrp->singleq.rxqs[j]; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); +@@ -1123,6 +1124,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_regs; j++, k++) { ++ struct idpf_buf_queue *q; ++ + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); +@@ -1449,19 +1452,19 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) + qi[k].model = + cpu_to_le16(vport->txq_model); + qi[k].type = +- cpu_to_le32(tx_qgrp->txqs[j]->q_type); ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + qi[k].ring_len = + cpu_to_le16(tx_qgrp->txqs[j]->desc_count); + qi[k].dma_ring_addr = + cpu_to_le64(tx_qgrp->txqs[j]->dma); + if (idpf_is_queue_model_split(vport->txq_model)) { +- struct idpf_queue *q = tx_qgrp->txqs[j]; ++ struct idpf_tx_queue *q = tx_qgrp->txqs[j]; + + qi[k].tx_compl_queue_id = + cpu_to_le16(tx_qgrp->complq->q_id); + qi[k].relative_queue_id = cpu_to_le16(j); + +- if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags)) ++ if (idpf_queue_has(FLOW_SCH_EN, q)) + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW); + else +@@ -1478,11 +1481,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) + + qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qi[k].model = cpu_to_le16(vport->txq_model); +- qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type); ++ qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma); + +- if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags)) ++ if (idpf_queue_has(FLOW_SCH_EN, tx_qgrp->complq)) + sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; + else + sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; +@@ -1567,17 +1570,18 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + goto setup_rxqs; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { +- struct idpf_queue *bufq = ++ struct idpf_buf_queue *bufq = + &rx_qgrp->splitq.bufq_sets[j].bufq; + + qi[k].queue_id = cpu_to_le32(bufq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); +- qi[k].type = cpu_to_le32(bufq->q_type); ++ qi[k].type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + qi[k].ring_len = cpu_to_le16(bufq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(bufq->dma); + qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size); +- qi[k].buffer_notif_stride = bufq->rx_buf_stride; ++ qi[k].buffer_notif_stride = IDPF_RX_BUF_STRIDE; + qi[k].rx_buffer_low_watermark = + cpu_to_le16(bufq->rx_buffer_low_watermark); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) +@@ -1591,7 +1595,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { +- struct idpf_queue *rxq; ++ struct idpf_rx_queue *rxq; + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + rxq = rx_qgrp->singleq.rxqs[j]; +@@ -1599,11 +1603,11 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + } + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + qi[k].rx_bufq1_id = +- cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id); ++ cpu_to_le16(rxq->bufq_sets[0].bufq.q_id); + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { + qi[k].bufq2_ena = IDPF_BUFQ2_ENA; + qi[k].rx_bufq2_id = +- cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id); ++ cpu_to_le16(rxq->bufq_sets[1].bufq.q_id); + } + qi[k].rx_buffer_low_watermark = + cpu_to_le16(rxq->rx_buffer_low_watermark); +@@ -1611,7 +1615,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + + common_qi_fields: +- if (rxq->rx_hsplit_en) { ++ if (idpf_queue_has(HSPLIT_EN, rxq)) { + qi[k].qflags |= + cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi[k].hdr_buffer_size = +@@ -1619,7 +1623,7 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) + } + qi[k].queue_id = cpu_to_le32(rxq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); +- qi[k].type = cpu_to_le32(rxq->q_type); ++ qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + qi[k].ring_len = cpu_to_le16(rxq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(rxq->dma); + qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size); +@@ -1706,7 +1710,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { +- qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); ++ qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1720,7 +1724,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + for (i = 0; i < vport->num_txq_grp; i++, k++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + +- qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type); ++ qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1741,12 +1745,12 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id); + qc[k].type = +- cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type); ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + } else { + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id); + qc[k].type = +- cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type); ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + } + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1761,10 +1765,11 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { +- struct idpf_queue *q; ++ const struct idpf_buf_queue *q; + + q = &rx_qgrp->splitq.bufq_sets[j].bufq; +- qc[k].type = cpu_to_le32(q->q_type); ++ qc[k].type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + qc[k].start_queue_id = cpu_to_le32(q->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } +@@ -1849,7 +1854,8 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { +- vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); ++ vqv[k].queue_type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + + if (idpf_is_queue_model_split(vport->txq_model)) { +@@ -1879,14 +1885,15 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { +- struct idpf_queue *rxq; ++ struct idpf_rx_queue *rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + rxq = rx_qgrp->singleq.rxqs[j]; + +- vqv[k].queue_type = cpu_to_le32(rxq->q_type); ++ vqv[k].queue_type = ++ cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + vqv[k].queue_id = cpu_to_le32(rxq->q_id); + vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx); + vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx); +@@ -1975,7 +1982,7 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) + * queues virtchnl message is sent + */ + for (i = 0; i < vport->num_txq; i++) +- set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); ++ idpf_queue_set(POLL_MODE, vport->txqs[i]); + + /* schedule the napi to receive all the marker packets */ + local_bh_disable(); +@@ -3242,7 +3249,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + int num_qids, + u32 q_type) + { +- struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { +@@ -3250,11 +3256,8 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + +- for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) { ++ for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) + tx_qgrp->txqs[j]->q_id = qids[k]; +- tx_qgrp->txqs[j]->q_type = +- VIRTCHNL2_QUEUE_TYPE_TX; +- } + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX: +@@ -3268,12 +3271,13 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_qids; j++, k++) { ++ struct idpf_rx_queue *q; ++ + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->q_id = qids[k]; +- q->q_type = VIRTCHNL2_QUEUE_TYPE_RX; + } + } + break; +@@ -3282,8 +3286,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + tx_qgrp->complq->q_id = qids[k]; +- tx_qgrp->complq->q_type = +- VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: +@@ -3292,9 +3294,10 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_qids; j++, k++) { ++ struct idpf_buf_queue *q; ++ + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->q_id = qids[k]; +- q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + } + } + break; +-- +2.43.0 + diff --git a/queue-6.10/idpf-stop-using-macros-for-accessing-queue-descripto.patch b/queue-6.10/idpf-stop-using-macros-for-accessing-queue-descripto.patch new file mode 100644 index 00000000000..5614cb6692b --- /dev/null +++ b/queue-6.10/idpf-stop-using-macros-for-accessing-queue-descripto.patch @@ -0,0 +1,374 @@ +From f243a142c50fb6f93429320be341c8c8b3043ed8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jun 2024 15:53:37 +0200 +Subject: idpf: stop using macros for accessing queue descriptors + +From: Alexander Lobakin + +[ Upstream commit 66c27e3b19d5aae58d7f0145113de61d6fba5e09 ] + +In C, we have structures and unions. +Casting `void *` via macros is not only error-prone, but also looks +confusing and awful in general. +In preparation for splitting the queue structs, replace it with a +union and direct array dereferences. + +Reviewed-by: Przemek Kitszel +Reviewed-by: Mina Almasry +Signed-off-by: Alexander Lobakin +Signed-off-by: Tony Nguyen +Stable-dep-of: e4b398dd82f5 ("idpf: fix netdev Tx queue stop/wake") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/idpf/idpf.h | 1 - + .../net/ethernet/intel/idpf/idpf_lan_txrx.h | 2 + + .../ethernet/intel/idpf/idpf_singleq_txrx.c | 20 ++++---- + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 32 ++++++------- + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 47 ++++++++++--------- + 5 files changed, 52 insertions(+), 50 deletions(-) + +diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h +index e7a0365382465..0b26dd9b8a512 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf.h ++++ b/drivers/net/ethernet/intel/idpf/idpf.h +@@ -20,7 +20,6 @@ struct idpf_vport_max_q; + #include + + #include "virtchnl2.h" +-#include "idpf_lan_txrx.h" + #include "idpf_txrx.h" + #include "idpf_controlq.h" + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h +index a5752dcab8887..8c7f8ef8f1a15 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h +@@ -4,6 +4,8 @@ + #ifndef _IDPF_LAN_TXRX_H_ + #define _IDPF_LAN_TXRX_H_ + ++#include ++ + enum idpf_rss_hash { + IDPF_HASH_INVALID = 0, + /* Values 1 - 28 are reserved for future use */ +diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +index 27b93592c4bab..b17d88e150006 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +@@ -205,7 +205,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + data_len = skb->data_len; + size = skb_headlen(skb); + +- tx_desc = IDPF_BASE_TX_DESC(tx_q, i); ++ tx_desc = &tx_q->base_tx[i]; + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + +@@ -239,7 +239,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + i++; + + if (i == tx_q->desc_count) { +- tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); ++ tx_desc = &tx_q->base_tx[0]; + i = 0; + } + +@@ -259,7 +259,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + i++; + + if (i == tx_q->desc_count) { +- tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); ++ tx_desc = &tx_q->base_tx[0]; + i = 0; + } + +@@ -307,7 +307,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) + memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf)); + txq->tx_buf[ntu].ctx_entry = true; + +- ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu); ++ ctx_desc = &txq->base_ctx[ntu]; + + IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); + txq->next_to_use = ntu; +@@ -455,7 +455,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, + struct netdev_queue *nq; + bool dont_wake; + +- tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc); ++ tx_desc = &tx_q->base_tx[ntc]; + tx_buf = &tx_q->tx_buf[ntc]; + ntc -= tx_q->desc_count; + +@@ -517,7 +517,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; +- tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); ++ tx_desc = &tx_q->base_tx[0]; + } + + /* unmap any remaining paged data */ +@@ -540,7 +540,7 @@ static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; +- tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); ++ tx_desc = &tx_q->base_tx[0]; + } + } while (likely(budget)); + +@@ -895,7 +895,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + if (!cleaned_count) + return false; + +- desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta); ++ desc = &rx_q->single_buf[nta]; + buf = &rx_q->rx_buf.buf[nta]; + + do { +@@ -915,7 +915,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + buf++; + nta++; + if (unlikely(nta == rx_q->desc_count)) { +- desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0); ++ desc = &rx_q->single_buf[0]; + buf = rx_q->rx_buf.buf; + nta = 0; + } +@@ -1016,7 +1016,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) + struct idpf_rx_buf *rx_buf; + + /* get the Rx desc from Rx queue based on 'next_to_clean' */ +- rx_desc = IDPF_RX_DESC(rx_q, ntc); ++ rx_desc = &rx_q->rx[ntc]; + + /* status_error_ptype_len will always be zero for unused + * descriptors because it's cleared in cleanup, and overlaps +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 20ca04320d4bd..948b485da539c 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -531,7 +531,7 @@ static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) + struct idpf_rx_buf *buf; + dma_addr_t addr; + +- splitq_rx_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, nta); ++ splitq_rx_desc = &bufq->split_buf[nta]; + buf = &bufq->rx_buf.buf[buf_id]; + + if (bufq->rx_hsplit_en) { +@@ -1584,7 +1584,7 @@ do { \ + if (unlikely(!(ntc))) { \ + ntc -= (txq)->desc_count; \ + buf = (txq)->tx_buf; \ +- desc = IDPF_FLEX_TX_DESC(txq, 0); \ ++ desc = &(txq)->flex_tx[0]; \ + } else { \ + (buf)++; \ + (desc)++; \ +@@ -1617,8 +1617,8 @@ static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, + s16 ntc = tx_q->next_to_clean; + struct idpf_tx_buf *tx_buf; + +- tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc); +- next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end); ++ tx_desc = &tx_q->flex_tx[ntc]; ++ next_pending_desc = &tx_q->flex_tx[end]; + tx_buf = &tx_q->tx_buf[ntc]; + ntc -= tx_q->desc_count; + +@@ -1814,7 +1814,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + int i; + + complq_budget = vport->compln_clean_budget; +- tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, ntc); ++ tx_desc = &complq->comp[ntc]; + ntc -= complq->desc_count; + + do { +@@ -1879,7 +1879,7 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + ntc++; + if (unlikely(!ntc)) { + ntc -= complq->desc_count; +- tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, 0); ++ tx_desc = &complq->comp[0]; + change_bit(__IDPF_Q_GEN_CHK, complq->flags); + } + +@@ -2143,7 +2143,7 @@ void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + * used one additional descriptor for a context + * descriptor. Reset that here. + */ +- tx_desc = IDPF_FLEX_TX_DESC(txq, idx); ++ tx_desc = &txq->flex_tx[idx]; + memset(tx_desc, 0, sizeof(struct idpf_flex_tx_ctx_desc)); + if (idx == 0) + idx = txq->desc_count; +@@ -2202,7 +2202,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, + data_len = skb->data_len; + size = skb_headlen(skb); + +- tx_desc = IDPF_FLEX_TX_DESC(tx_q, i); ++ tx_desc = &tx_q->flex_tx[i]; + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + +@@ -2275,7 +2275,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, + i++; + + if (i == tx_q->desc_count) { +- tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); ++ tx_desc = &tx_q->flex_tx[0]; + i = 0; + tx_q->compl_tag_cur_gen = + IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); +@@ -2320,7 +2320,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, + i++; + + if (i == tx_q->desc_count) { +- tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); ++ tx_desc = &tx_q->flex_tx[0]; + i = 0; + tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); + } +@@ -2553,7 +2553,7 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) + txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + /* grab the next descriptor */ +- desc = IDPF_FLEX_TX_CTX_DESC(txq, i); ++ desc = &txq->flex_ctx[i]; + txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); + + return desc; +@@ -3128,7 +3128,6 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + struct idpf_sw_queue *refillq = NULL; + struct idpf_rxq_set *rxq_set = NULL; + struct idpf_rx_buf *rx_buf = NULL; +- union virtchnl2_rx_desc *desc; + unsigned int pkt_len = 0; + unsigned int hdr_len = 0; + u16 gen_id, buf_id = 0; +@@ -3138,8 +3137,7 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) + u8 rxdid; + + /* get the Rx desc from Rx queue based on 'next_to_clean' */ +- desc = IDPF_RX_DESC(rxq, ntc); +- rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)desc; ++ rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc +@@ -3320,11 +3318,11 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq, + int cleaned = 0; + u16 gen; + +- buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, bufq_nta); ++ buf_desc = &bufq->split_buf[bufq_nta]; + + /* make sure we stop at ring wrap in the unlikely case ring is full */ + while (likely(cleaned < refillq->desc_count)) { +- u16 refill_desc = IDPF_SPLITQ_RX_BI_DESC(refillq, ntc); ++ u16 refill_desc = refillq->ring[ntc]; + bool failure; + + gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc); +@@ -3342,7 +3340,7 @@ static void idpf_rx_clean_refillq(struct idpf_queue *bufq, + } + + if (unlikely(++bufq_nta == bufq->desc_count)) { +- buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, 0); ++ buf_desc = &bufq->split_buf[0]; + bufq_nta = 0; + } else { + buf_desc++; +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +index 551391e204647..6dce14483215f 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@ -8,6 +8,7 @@ + #include + #include + ++#include "idpf_lan_txrx.h" + #include "virtchnl2_lan_desc.h" + + #define IDPF_LARGE_MAX_Q 256 +@@ -117,24 +118,6 @@ do { \ + #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M + #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M + +-#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i) \ +- (&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i])) +-#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i) \ +- (&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i])) +-#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i]) +- +-#define IDPF_BASE_TX_DESC(txq, i) \ +- (&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i])) +-#define IDPF_BASE_TX_CTX_DESC(txq, i) \ +- (&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i])) +-#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i) \ +- (&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i])) +- +-#define IDPF_FLEX_TX_DESC(txq, i) \ +- (&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i])) +-#define IDPF_FLEX_TX_CTX_DESC(txq, i) \ +- (&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i])) +- + #define IDPF_DESC_UNUSED(txq) \ + ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ + (txq)->next_to_clean - (txq)->next_to_use - 1) +@@ -317,8 +300,6 @@ struct idpf_rx_extracted { + + #define IDPF_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +-#define IDPF_RX_DESC(rxq, i) \ +- (&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i])) + + struct idpf_rx_buf { + struct page *page; +@@ -655,7 +636,15 @@ union idpf_queue_stats { + * @q_vector: Backreference to associated vector + * @size: Length of descriptor ring in bytes + * @dma: Physical address of ring +- * @desc_ring: Descriptor ring memory ++ * @rx: universal receive descriptor array ++ * @single_buf: Rx buffer descriptor array in singleq ++ * @split_buf: Rx buffer descriptor array in splitq ++ * @base_tx: basic Tx descriptor array ++ * @base_ctx: basic Tx context descriptor array ++ * @flex_tx: flex Tx descriptor array ++ * @flex_ctx: flex Tx context descriptor array ++ * @comp: completion descriptor array ++ * @desc_ring: virtual descriptor ring address + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather + * @tx_min_pkt_len: Min supported packet length + * @num_completions: Only relevant for TX completion queue. It tracks the +@@ -733,7 +722,21 @@ struct idpf_queue { + struct idpf_q_vector *q_vector; + unsigned int size; + dma_addr_t dma; +- void *desc_ring; ++ union { ++ union virtchnl2_rx_desc *rx; ++ ++ struct virtchnl2_singleq_rx_buf_desc *single_buf; ++ struct virtchnl2_splitq_rx_buf_desc *split_buf; ++ ++ struct idpf_base_tx_desc *base_tx; ++ struct idpf_base_tx_ctx_desc *base_ctx; ++ union idpf_tx_flex_desc *flex_tx; ++ struct idpf_flex_tx_ctx_desc *flex_ctx; ++ ++ struct idpf_splitq_tx_compl_desc *comp; ++ ++ void *desc_ring; ++ }; + + u16 tx_max_bufs; + u8 tx_min_pkt_len; +-- +2.43.0 + diff --git a/queue-6.10/kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch b/queue-6.10/kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch new file mode 100644 index 00000000000..c146abe3ffe --- /dev/null +++ b/queue-6.10/kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch @@ -0,0 +1,55 @@ +From 4f6130a16e1a443d0d3580cc59467478cdf9e865 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 6 May 2024 14:35:02 +0800 +Subject: KVM: x86: Drop unused check_apicv_inhibit_reasons() callback + definition + +From: Hou Wenlong + +[ Upstream commit c7d4c5f01961cdc4f1d29525e2b0d71f62c5bc33 ] + +The check_apicv_inhibit_reasons() callback implementation was dropped in +the commit b3f257a84696 ("KVM: x86: Track required APICv inhibits with +variable, not callback"), but the definition removal was missed in the +final version patch (it was removed in the v4). Therefore, it should be +dropped, and the vmx_check_apicv_inhibit_reasons() function declaration +should also be removed. + +Signed-off-by: Hou Wenlong +Reviewed-by: Alejandro Jimenez +Link: https://lore.kernel.org/r/54abd1d0ccaba4d532f81df61259b9c0e021fbde.1714977229.git.houwenlong.hwl@antgroup.com +Signed-off-by: Sean Christopherson +Stable-dep-of: 73b42dc69be8 ("KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)") +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/kvm_host.h | 1 - + arch/x86/kvm/vmx/x86_ops.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index d0274b3be2c40..a571f89db6977 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1708,7 +1708,6 @@ struct kvm_x86_ops { + void (*enable_nmi_window)(struct kvm_vcpu *vcpu); + void (*enable_irq_window)(struct kvm_vcpu *vcpu); + void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); +- bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason); + const unsigned long required_apicv_inhibits; + bool allow_apicv_in_x2apic_without_x2apic_virtualization; + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); +diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h +index d404227c164d6..e46aba18600e7 100644 +--- a/arch/x86/kvm/vmx/x86_ops.h ++++ b/arch/x86/kvm/vmx/x86_ops.h +@@ -46,7 +46,6 @@ bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu); + void vmx_migrate_timers(struct kvm_vcpu *vcpu); + void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); + void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu); +-bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason); + void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); + void vmx_hwapic_isr_update(int max_isr); + int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu); +-- +2.43.0 + diff --git a/queue-6.10/kvm-x86-make-x2apic-id-100-readonly.patch b/queue-6.10/kvm-x86-make-x2apic-id-100-readonly.patch new file mode 100644 index 00000000000..a7c8d78ac3b --- /dev/null +++ b/queue-6.10/kvm-x86-make-x2apic-id-100-readonly.patch @@ -0,0 +1,128 @@ +From 4b84e6390a908a3653b1ae74c37856ea4da0a5c6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Aug 2024 13:29:40 -0700 +Subject: KVM: x86: Make x2APIC ID 100% readonly + +From: Sean Christopherson + +[ Upstream commit 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 ] + +Ignore the userspace provided x2APIC ID when fixing up APIC state for +KVM_SET_LAPIC, i.e. make the x2APIC fully readonly in KVM. Commit +a92e2543d6a8 ("KVM: x86: use hardware-compatible format for APIC ID +register"), which added the fixup, didn't intend to allow userspace to +modify the x2APIC ID. In fact, that commit is when KVM first started +treating the x2APIC ID as readonly, apparently to fix some race: + + static inline u32 kvm_apic_id(struct kvm_lapic *apic) + { +- return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff; ++ /* To avoid a race between apic_base and following APIC_ID update when ++ * switching to x2apic_mode, the x2apic mode returns initial x2apic id. ++ */ ++ if (apic_x2apic_mode(apic)) ++ return apic->vcpu->vcpu_id; ++ ++ return kvm_lapic_get_reg(apic, APIC_ID) >> 24; + } + +Furthermore, KVM doesn't support delivering interrupts to vCPUs with a +modified x2APIC ID, but KVM *does* return the modified value on a guest +RDMSR and for KVM_GET_LAPIC. I.e. no remotely sane setup can actually +work with a modified x2APIC ID. + +Making the x2APIC ID fully readonly fixes a WARN in KVM's optimized map +calculation, which expects the LDR to align with the x2APIC ID. + + WARNING: CPU: 2 PID: 958 at arch/x86/kvm/lapic.c:331 kvm_recalculate_apic_map+0x609/0xa00 [kvm] + CPU: 2 PID: 958 Comm: recalc_apic_map Not tainted 6.4.0-rc3-vanilla+ #35 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.2-1-1 04/01/2014 + RIP: 0010:kvm_recalculate_apic_map+0x609/0xa00 [kvm] + Call Trace: + + kvm_apic_set_state+0x1cf/0x5b0 [kvm] + kvm_arch_vcpu_ioctl+0x1806/0x2100 [kvm] + kvm_vcpu_ioctl+0x663/0x8a0 [kvm] + __x64_sys_ioctl+0xb8/0xf0 + do_syscall_64+0x56/0x80 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + RIP: 0033:0x7fade8b9dd6f + +Unfortunately, the WARN can still trigger for other CPUs than the current +one by racing against KVM_SET_LAPIC, so remove it completely. + +Reported-by: Michal Luczaj +Closes: https://lore.kernel.org/all/814baa0c-1eaa-4503-129f-059917365e80@rbox.co +Reported-by: Haoyu Wu +Closes: https://lore.kernel.org/all/20240126161633.62529-1-haoyuwu254@gmail.com +Reported-by: syzbot+545f1326f405db4e1c3e@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/000000000000c2a6b9061cbca3c3@google.com +Signed-off-by: Sean Christopherson +Message-ID: <20240802202941.344889-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Stable-dep-of: 73b42dc69be8 ("KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/lapic.c | 22 +++++++++++++++------- + 1 file changed, 15 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index f1f54218b0603..9392d6e3d8e37 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new, + * reversing the LDR calculation to get cluster of APICs, i.e. no + * additional work is required. + */ +- if (apic_x2apic_mode(apic)) { +- WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); ++ if (apic_x2apic_mode(apic)) + return; +- } + + if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, + &cluster, &mask))) { +@@ -2987,18 +2985,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s, bool set) + { + if (apic_x2apic_mode(vcpu->arch.apic)) { ++ u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); + u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); + u64 icr; + + if (vcpu->kvm->arch.x2apic_format) { +- if (*id != vcpu->vcpu_id) ++ if (*id != x2apic_id) + return -EINVAL; + } else { ++ /* ++ * Ignore the userspace value when setting APIC state. ++ * KVM's model is that the x2APIC ID is readonly, e.g. ++ * KVM only supports delivering interrupts to KVM's ++ * version of the x2APIC ID. However, for backwards ++ * compatibility, don't reject attempts to set a ++ * mismatched ID for userspace that hasn't opted into ++ * x2apic_format. ++ */ + if (set) +- *id >>= 24; ++ *id = x2apic_id; + else +- *id <<= 24; ++ *id = x2apic_id << 24; + } + + /* +@@ -3007,7 +3015,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, + * split to ICR+ICR2 in userspace for backwards compatibility. + */ + if (set) { +- *ldr = kvm_apic_calc_x2apic_ldr(*id); ++ *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); + + icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | + (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; +-- +2.43.0 + diff --git a/queue-6.10/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch b/queue-6.10/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch new file mode 100644 index 00000000000..882b1667dcb --- /dev/null +++ b/queue-6.10/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch @@ -0,0 +1,160 @@ +From 405f1ac9c78994085351ac64ccab5331d42128af Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 Jul 2024 16:51:00 -0700 +Subject: KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC) + +From: Sean Christopherson + +[ Upstream commit 73b42dc69be8564d4951a14d00f827929fe5ef79 ] + +Re-introduce the "split" x2APIC ICR storage that KVM used prior to Intel's +IPI virtualization support, but only for AMD. While not stated anywhere +in the APM, despite stating the ICR is a single 64-bit register, AMD CPUs +store the 64-bit ICR as two separate 32-bit values in ICR and ICR2. When +IPI virtualization (IPIv on Intel, all AVIC flavors on AMD) is enabled, +KVM needs to match CPU behavior as some ICR ICR writes will be handled by +the CPU, not by KVM. + +Add a kvm_x86_ops knob to control the underlying format used by the CPU to +store the x2APIC ICR, and tune it to AMD vs. Intel regardless of whether +or not x2AVIC is enabled. If KVM is handling all ICR writes, the storage +format for x2APIC mode doesn't matter, and having the behavior follow AMD +versus Intel will provide better test coverage and ease debugging. + +Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") +Cc: stable@vger.kernel.org +Cc: Maxim Levitsky +Cc: Suravee Suthikulpanit +Link: https://lore.kernel.org/r/20240719235107.3023592-4-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/kvm_host.h | 2 ++ + arch/x86/kvm/lapic.c | 42 +++++++++++++++++++++++---------- + arch/x86/kvm/svm/svm.c | 2 ++ + arch/x86/kvm/vmx/main.c | 2 ++ + 4 files changed, 36 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index a571f89db6977..e18399d08fb17 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1708,6 +1708,8 @@ struct kvm_x86_ops { + void (*enable_nmi_window)(struct kvm_vcpu *vcpu); + void (*enable_irq_window)(struct kvm_vcpu *vcpu); + void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); ++ ++ const bool x2apic_icr_is_split; + const unsigned long required_apicv_inhibits; + bool allow_apicv_in_x2apic_without_x2apic_virtualization; + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index 9392d6e3d8e37..523d02c50562f 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -2469,11 +2469,25 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) + data &= ~APIC_ICR_BUSY; + + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); +- kvm_lapic_set_reg64(apic, APIC_ICR, data); ++ if (kvm_x86_ops.x2apic_icr_is_split) { ++ kvm_lapic_set_reg(apic, APIC_ICR, data); ++ kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); ++ } else { ++ kvm_lapic_set_reg64(apic, APIC_ICR, data); ++ } + trace_kvm_apic_write(APIC_ICR, data); + return 0; + } + ++static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) ++{ ++ if (kvm_x86_ops.x2apic_icr_is_split) ++ return (u64)kvm_lapic_get_reg(apic, APIC_ICR) | ++ (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32; ++ ++ return kvm_lapic_get_reg64(apic, APIC_ICR); ++} ++ + /* emulate APIC access in a trap manner */ + void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) + { +@@ -2491,7 +2505,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) + * maybe-unecessary write, and both are in the noise anyways. + */ + if (apic_x2apic_mode(apic) && offset == APIC_ICR) +- WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR))); ++ WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic))); + else + kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); + } +@@ -3011,18 +3025,22 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, + + /* + * In x2APIC mode, the LDR is fixed and based on the id. And +- * ICR is internally a single 64-bit register, but needs to be +- * split to ICR+ICR2 in userspace for backwards compatibility. ++ * if the ICR is _not_ split, ICR is internally a single 64-bit ++ * register, but needs to be split to ICR+ICR2 in userspace for ++ * backwards compatibility. + */ +- if (set) { ++ if (set) + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); + +- icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | +- (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; +- __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); +- } else { +- icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); +- __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); ++ if (!kvm_x86_ops.x2apic_icr_is_split) { ++ if (set) { ++ icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | ++ (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; ++ __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); ++ } else { ++ icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); ++ __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); ++ } + } + } + +@@ -3219,7 +3237,7 @@ static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) + u32 low; + + if (reg == APIC_ICR) { +- *data = kvm_lapic_get_reg64(apic, APIC_ICR); ++ *data = kvm_x2apic_icr_read(apic); + return 0; + } + +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index 0357f7af55966..6d5da700268a5 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -5051,6 +5051,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { + .enable_nmi_window = svm_enable_nmi_window, + .enable_irq_window = svm_enable_irq_window, + .update_cr8_intercept = svm_update_cr8_intercept, ++ ++ .x2apic_icr_is_split = true, + .set_virtual_apic_mode = avic_refresh_virtual_apic_mode, + .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl, + .apicv_post_state_restore = avic_apicv_post_state_restore, +diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c +index 547fca3709feb..35c2c004dacd2 100644 +--- a/arch/x86/kvm/vmx/main.c ++++ b/arch/x86/kvm/vmx/main.c +@@ -89,6 +89,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { + .enable_nmi_window = vmx_enable_nmi_window, + .enable_irq_window = vmx_enable_irq_window, + .update_cr8_intercept = vmx_update_cr8_intercept, ++ ++ .x2apic_icr_is_split = false, + .set_virtual_apic_mode = vmx_set_virtual_apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, +-- +2.43.0 + diff --git a/queue-6.10/lsm-infrastructure-management-of-the-sock-security.patch b/queue-6.10/lsm-infrastructure-management-of-the-sock-security.patch new file mode 100644 index 00000000000..5d1d5b5cff2 --- /dev/null +++ b/queue-6.10/lsm-infrastructure-management-of-the-sock-security.patch @@ -0,0 +1,933 @@ +From d24e5d45e9ad144b89c1f1442d35ada883c2839a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jul 2024 14:32:25 -0700 +Subject: lsm: infrastructure management of the sock security + +From: Casey Schaufler + +[ Upstream commit 2aff9d20d50ac45dd13a013ef5231f4fb8912356 ] + +Move management of the sock->sk_security blob out +of the individual security modules and into the security +infrastructure. Instead of allocating the blobs from within +the modules the modules tell the infrastructure how much +space is required, and the space is allocated there. + +Acked-by: Paul Moore +Reviewed-by: Kees Cook +Reviewed-by: John Johansen +Acked-by: Stephen Smalley +Signed-off-by: Casey Schaufler +[PM: subject tweak] +Signed-off-by: Paul Moore +Stable-dep-of: 63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook") +Signed-off-by: Sasha Levin +--- + include/linux/lsm_hooks.h | 1 + + security/apparmor/include/net.h | 3 +- + security/apparmor/lsm.c | 17 +------ + security/apparmor/net.c | 2 +- + security/security.c | 36 +++++++++++++- + security/selinux/hooks.c | 80 ++++++++++++++----------------- + security/selinux/include/objsec.h | 5 ++ + security/selinux/netlabel.c | 23 ++++----- + security/smack/smack.h | 5 ++ + security/smack/smack_lsm.c | 70 +++++++++++++-------------- + security/smack/smack_netfilter.c | 4 +- + 11 files changed, 133 insertions(+), 113 deletions(-) + +diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h +index a2ade0ffe9e7d..efd4a0655159c 100644 +--- a/include/linux/lsm_hooks.h ++++ b/include/linux/lsm_hooks.h +@@ -73,6 +73,7 @@ struct lsm_blob_sizes { + int lbs_cred; + int lbs_file; + int lbs_inode; ++ int lbs_sock; + int lbs_superblock; + int lbs_ipc; + int lbs_msg_msg; +diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h +index 67bf888c3bd6b..c42ed8a73f1ce 100644 +--- a/security/apparmor/include/net.h ++++ b/security/apparmor/include/net.h +@@ -51,10 +51,9 @@ struct aa_sk_ctx { + struct aa_label *peer; + }; + +-#define SK_CTX(X) ((X)->sk_security) + static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) + { +- return sk->sk_security; ++ return sk->sk_security + apparmor_blob_sizes.lbs_sock; + } + + #define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P) \ +diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c +index 4373b914acf20..b8366fca98d23 100644 +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -1057,27 +1057,12 @@ static int apparmor_userns_create(const struct cred *cred) + return error; + } + +-static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags) +-{ +- struct aa_sk_ctx *ctx; +- +- ctx = kzalloc(sizeof(*ctx), flags); +- if (!ctx) +- return -ENOMEM; +- +- sk->sk_security = ctx; +- +- return 0; +-} +- + static void apparmor_sk_free_security(struct sock *sk) + { + struct aa_sk_ctx *ctx = aa_sock(sk); + +- sk->sk_security = NULL; + aa_put_label(ctx->label); + aa_put_label(ctx->peer); +- kfree(ctx); + } + + /** +@@ -1432,6 +1417,7 @@ struct lsm_blob_sizes apparmor_blob_sizes __ro_after_init = { + .lbs_cred = sizeof(struct aa_label *), + .lbs_file = sizeof(struct aa_file_ctx), + .lbs_task = sizeof(struct aa_task_ctx), ++ .lbs_sock = sizeof(struct aa_sk_ctx), + }; + + static const struct lsm_id apparmor_lsmid = { +@@ -1477,7 +1463,6 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { + LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), + LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), + +- LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security), + LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), + LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), + +diff --git a/security/apparmor/net.c b/security/apparmor/net.c +index 87e934b2b5488..77413a5191179 100644 +--- a/security/apparmor/net.c ++++ b/security/apparmor/net.c +@@ -151,7 +151,7 @@ static int aa_label_sk_perm(const struct cred *subj_cred, + const char *op, u32 request, + struct sock *sk) + { +- struct aa_sk_ctx *ctx = SK_CTX(sk); ++ struct aa_sk_ctx *ctx = aa_sock(sk); + int error = 0; + + AA_BUG(!label); +diff --git a/security/security.c b/security/security.c +index 41ab07eafc7fa..43166e341526c 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + /* How many LSMs were built into the kernel? */ + #define LSM_COUNT (__end_lsm_info - __start_lsm_info) +@@ -227,6 +228,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) + lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); + lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc); + lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg); ++ lsm_set_blob_size(&needed->lbs_sock, &blob_sizes.lbs_sock); + lsm_set_blob_size(&needed->lbs_superblock, &blob_sizes.lbs_superblock); + lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task); + lsm_set_blob_size(&needed->lbs_xattr_count, +@@ -401,6 +403,7 @@ static void __init ordered_lsm_init(void) + init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); + init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc); + init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg); ++ init_debug("sock blob size = %d\n", blob_sizes.lbs_sock); + init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock); + init_debug("task blob size = %d\n", blob_sizes.lbs_task); + init_debug("xattr slots = %d\n", blob_sizes.lbs_xattr_count); +@@ -4673,6 +4676,28 @@ int security_socket_getpeersec_dgram(struct socket *sock, + } + EXPORT_SYMBOL(security_socket_getpeersec_dgram); + ++/** ++ * lsm_sock_alloc - allocate a composite sock blob ++ * @sock: the sock that needs a blob ++ * @priority: allocation mode ++ * ++ * Allocate the sock blob for all the modules ++ * ++ * Returns 0, or -ENOMEM if memory can't be allocated. ++ */ ++static int lsm_sock_alloc(struct sock *sock, gfp_t priority) ++{ ++ if (blob_sizes.lbs_sock == 0) { ++ sock->sk_security = NULL; ++ return 0; ++ } ++ ++ sock->sk_security = kzalloc(blob_sizes.lbs_sock, priority); ++ if (sock->sk_security == NULL) ++ return -ENOMEM; ++ return 0; ++} ++ + /** + * security_sk_alloc() - Allocate and initialize a sock's LSM blob + * @sk: sock +@@ -4686,7 +4711,14 @@ EXPORT_SYMBOL(security_socket_getpeersec_dgram); + */ + int security_sk_alloc(struct sock *sk, int family, gfp_t priority) + { +- return call_int_hook(sk_alloc_security, sk, family, priority); ++ int rc = lsm_sock_alloc(sk, priority); ++ ++ if (unlikely(rc)) ++ return rc; ++ rc = call_int_hook(sk_alloc_security, sk, family, priority); ++ if (unlikely(rc)) ++ security_sk_free(sk); ++ return rc; + } + + /** +@@ -4698,6 +4730,8 @@ int security_sk_alloc(struct sock *sk, int family, gfp_t priority) + void security_sk_free(struct sock *sk) + { + call_void_hook(sk_free_security, sk); ++ kfree(sk->sk_security); ++ sk->sk_security = NULL; + } + + /** +diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c +index 400eca4ad0fb6..c11303d662d80 100644 +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -4594,7 +4594,7 @@ static int socket_sockcreate_sid(const struct task_security_struct *tsec, + + static int sock_has_perm(struct sock *sk, u32 perms) + { +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + struct common_audit_data ad; + struct lsm_network_audit net; + +@@ -4662,7 +4662,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, + isec->initialized = LABEL_INITIALIZED; + + if (sock->sk) { +- sksec = sock->sk->sk_security; ++ sksec = selinux_sock(sock->sk); + sksec->sclass = sclass; + sksec->sid = sid; + /* Allows detection of the first association on this socket */ +@@ -4678,8 +4678,8 @@ static int selinux_socket_post_create(struct socket *sock, int family, + static int selinux_socket_socketpair(struct socket *socka, + struct socket *sockb) + { +- struct sk_security_struct *sksec_a = socka->sk->sk_security; +- struct sk_security_struct *sksec_b = sockb->sk->sk_security; ++ struct sk_security_struct *sksec_a = selinux_sock(socka->sk); ++ struct sk_security_struct *sksec_b = selinux_sock(sockb->sk); + + sksec_a->peer_sid = sksec_b->sid; + sksec_b->peer_sid = sksec_a->sid; +@@ -4694,7 +4694,7 @@ static int selinux_socket_socketpair(struct socket *socka, + static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) + { + struct sock *sk = sock->sk; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + u16 family; + int err; + +@@ -4834,7 +4834,7 @@ static int selinux_socket_connect_helper(struct socket *sock, + struct sockaddr *address, int addrlen) + { + struct sock *sk = sock->sk; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + int err; + + err = sock_has_perm(sk, SOCKET__CONNECT); +@@ -5012,9 +5012,9 @@ static int selinux_socket_unix_stream_connect(struct sock *sock, + struct sock *other, + struct sock *newsk) + { +- struct sk_security_struct *sksec_sock = sock->sk_security; +- struct sk_security_struct *sksec_other = other->sk_security; +- struct sk_security_struct *sksec_new = newsk->sk_security; ++ struct sk_security_struct *sksec_sock = selinux_sock(sock); ++ struct sk_security_struct *sksec_other = selinux_sock(other); ++ struct sk_security_struct *sksec_new = selinux_sock(newsk); + struct common_audit_data ad; + struct lsm_network_audit net; + int err; +@@ -5043,8 +5043,8 @@ static int selinux_socket_unix_stream_connect(struct sock *sock, + static int selinux_socket_unix_may_send(struct socket *sock, + struct socket *other) + { +- struct sk_security_struct *ssec = sock->sk->sk_security; +- struct sk_security_struct *osec = other->sk->sk_security; ++ struct sk_security_struct *ssec = selinux_sock(sock->sk); ++ struct sk_security_struct *osec = selinux_sock(other->sk); + struct common_audit_data ad; + struct lsm_network_audit net; + +@@ -5081,7 +5081,7 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, + u16 family) + { + int err = 0; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + u32 sk_sid = sksec->sid; + struct common_audit_data ad; + struct lsm_network_audit net; +@@ -5110,7 +5110,7 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, + static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) + { + int err, peerlbl_active, secmark_active; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + u16 family = sk->sk_family; + u32 sk_sid = sksec->sid; + struct common_audit_data ad; +@@ -5178,7 +5178,7 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, + int err = 0; + char *scontext = NULL; + u32 scontext_len; +- struct sk_security_struct *sksec = sock->sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sock->sk); + u32 peer_sid = SECSID_NULL; + + if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET || +@@ -5238,34 +5238,27 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, + + static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) + { +- struct sk_security_struct *sksec; +- +- sksec = kzalloc(sizeof(*sksec), priority); +- if (!sksec) +- return -ENOMEM; ++ struct sk_security_struct *sksec = selinux_sock(sk); + + sksec->peer_sid = SECINITSID_UNLABELED; + sksec->sid = SECINITSID_UNLABELED; + sksec->sclass = SECCLASS_SOCKET; + selinux_netlbl_sk_security_reset(sksec); +- sk->sk_security = sksec; + + return 0; + } + + static void selinux_sk_free_security(struct sock *sk) + { +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + +- sk->sk_security = NULL; + selinux_netlbl_sk_security_free(sksec); +- kfree(sksec); + } + + static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) + { +- struct sk_security_struct *sksec = sk->sk_security; +- struct sk_security_struct *newsksec = newsk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); ++ struct sk_security_struct *newsksec = selinux_sock(newsk); + + newsksec->sid = sksec->sid; + newsksec->peer_sid = sksec->peer_sid; +@@ -5279,7 +5272,7 @@ static void selinux_sk_getsecid(const struct sock *sk, u32 *secid) + if (!sk) + *secid = SECINITSID_ANY_SOCKET; + else { +- const struct sk_security_struct *sksec = sk->sk_security; ++ const struct sk_security_struct *sksec = selinux_sock(sk); + + *secid = sksec->sid; + } +@@ -5289,7 +5282,7 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) + { + struct inode_security_struct *isec = + inode_security_novalidate(SOCK_INODE(parent)); +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + + if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 || + sk->sk_family == PF_UNIX) +@@ -5306,7 +5299,7 @@ static int selinux_sctp_process_new_assoc(struct sctp_association *asoc, + { + struct sock *sk = asoc->base.sk; + u16 family = sk->sk_family; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + struct common_audit_data ad; + struct lsm_network_audit net; + int err; +@@ -5361,7 +5354,7 @@ static int selinux_sctp_process_new_assoc(struct sctp_association *asoc, + static int selinux_sctp_assoc_request(struct sctp_association *asoc, + struct sk_buff *skb) + { +- struct sk_security_struct *sksec = asoc->base.sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(asoc->base.sk); + u32 conn_sid; + int err; + +@@ -5394,7 +5387,7 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, + static int selinux_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) + { +- struct sk_security_struct *sksec = asoc->base.sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(asoc->base.sk); + + if (!selinux_policycap_extsockclass()) + return 0; +@@ -5493,8 +5486,8 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, + static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, + struct sock *newsk) + { +- struct sk_security_struct *sksec = sk->sk_security; +- struct sk_security_struct *newsksec = newsk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); ++ struct sk_security_struct *newsksec = selinux_sock(newsk); + + /* If policy does not support SECCLASS_SCTP_SOCKET then call + * the non-sctp clone version. +@@ -5510,8 +5503,8 @@ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk + + static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk) + { +- struct sk_security_struct *ssksec = ssk->sk_security; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *ssksec = selinux_sock(ssk); ++ struct sk_security_struct *sksec = selinux_sock(sk); + + ssksec->sclass = sksec->sclass; + ssksec->sid = sksec->sid; +@@ -5526,7 +5519,7 @@ static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk) + static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req) + { +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + int err; + u16 family = req->rsk_ops->family; + u32 connsid; +@@ -5547,7 +5540,7 @@ static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb, + static void selinux_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) + { +- struct sk_security_struct *newsksec = newsk->sk_security; ++ struct sk_security_struct *newsksec = selinux_sock(newsk); + + newsksec->sid = req->secid; + newsksec->peer_sid = req->peer_secid; +@@ -5564,7 +5557,7 @@ static void selinux_inet_csk_clone(struct sock *newsk, + static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) + { + u16 family = sk->sk_family; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + + /* handle mapped IPv4 packets arriving via IPv6 sockets */ + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) +@@ -5639,7 +5632,7 @@ static int selinux_tun_dev_attach_queue(void *security) + static int selinux_tun_dev_attach(struct sock *sk, void *security) + { + struct tun_security_struct *tunsec = security; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + + /* we don't currently perform any NetLabel based labeling here and it + * isn't clear that we would want to do so anyway; while we could apply +@@ -5762,7 +5755,7 @@ static unsigned int selinux_ip_output(void *priv, struct sk_buff *skb, + return NF_ACCEPT; + + /* standard practice, label using the parent socket */ +- sksec = sk->sk_security; ++ sksec = selinux_sock(sk); + sid = sksec->sid; + } else + sid = SECINITSID_KERNEL; +@@ -5785,7 +5778,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, + sk = skb_to_full_sk(skb); + if (sk == NULL) + return NF_ACCEPT; +- sksec = sk->sk_security; ++ sksec = selinux_sock(sk); + + ad_net_init_from_iif(&ad, &net, state->out->ifindex, state->pf); + if (selinux_parse_skb(skb, &ad, NULL, 0, &proto)) +@@ -5874,7 +5867,7 @@ static unsigned int selinux_ip_postroute(void *priv, + u32 skb_sid; + struct sk_security_struct *sksec; + +- sksec = sk->sk_security; ++ sksec = selinux_sock(sk); + if (selinux_skb_peerlbl_sid(skb, family, &skb_sid)) + return NF_DROP; + /* At this point, if the returned skb peerlbl is SECSID_NULL +@@ -5903,7 +5896,7 @@ static unsigned int selinux_ip_postroute(void *priv, + } else { + /* Locally generated packet, fetch the security label from the + * associated socket. */ +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + peer_sid = sksec->sid; + secmark_perm = PACKET__SEND; + } +@@ -5946,7 +5939,7 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) + unsigned int data_len = skb->len; + unsigned char *data = skb->data; + struct nlmsghdr *nlh; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + u16 sclass = sksec->sclass; + u32 perm; + +@@ -7004,6 +6997,7 @@ struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = { + .lbs_inode = sizeof(struct inode_security_struct), + .lbs_ipc = sizeof(struct ipc_security_struct), + .lbs_msg_msg = sizeof(struct msg_security_struct), ++ .lbs_sock = sizeof(struct sk_security_struct), + .lbs_superblock = sizeof(struct superblock_security_struct), + .lbs_xattr_count = SELINUX_INODE_INIT_XATTRS, + }; +diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h +index dea1d6f3ed2d3..b074099acbaf7 100644 +--- a/security/selinux/include/objsec.h ++++ b/security/selinux/include/objsec.h +@@ -195,4 +195,9 @@ selinux_superblock(const struct super_block *superblock) + return superblock->s_security + selinux_blob_sizes.lbs_superblock; + } + ++static inline struct sk_security_struct *selinux_sock(const struct sock *sock) ++{ ++ return sock->sk_security + selinux_blob_sizes.lbs_sock; ++} ++ + #endif /* _SELINUX_OBJSEC_H_ */ +diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c +index 55885634e8804..fbe5f8c29f813 100644 +--- a/security/selinux/netlabel.c ++++ b/security/selinux/netlabel.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -68,7 +69,7 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb, + static struct netlbl_lsm_secattr *selinux_netlbl_sock_genattr(struct sock *sk) + { + int rc; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + struct netlbl_lsm_secattr *secattr; + + if (sksec->nlbl_secattr != NULL) +@@ -100,7 +101,7 @@ static struct netlbl_lsm_secattr *selinux_netlbl_sock_getattr( + const struct sock *sk, + u32 sid) + { +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + struct netlbl_lsm_secattr *secattr = sksec->nlbl_secattr; + + if (secattr == NULL) +@@ -240,7 +241,7 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, + * being labeled by it's parent socket, if it is just exit */ + sk = skb_to_full_sk(skb); + if (sk != NULL) { +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + + if (sksec->nlbl_state != NLBL_REQSKB) + return 0; +@@ -277,7 +278,7 @@ int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, + { + int rc; + struct netlbl_lsm_secattr secattr; +- struct sk_security_struct *sksec = asoc->base.sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(asoc->base.sk); + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + +@@ -356,7 +357,7 @@ int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family) + */ + void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family) + { +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + + if (family == PF_INET) + sksec->nlbl_state = NLBL_LABELED; +@@ -374,8 +375,8 @@ void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family) + */ + void selinux_netlbl_sctp_sk_clone(struct sock *sk, struct sock *newsk) + { +- struct sk_security_struct *sksec = sk->sk_security; +- struct sk_security_struct *newsksec = newsk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); ++ struct sk_security_struct *newsksec = selinux_sock(newsk); + + newsksec->nlbl_state = sksec->nlbl_state; + } +@@ -393,7 +394,7 @@ void selinux_netlbl_sctp_sk_clone(struct sock *sk, struct sock *newsk) + int selinux_netlbl_socket_post_create(struct sock *sk, u16 family) + { + int rc; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + struct netlbl_lsm_secattr *secattr; + + if (family != PF_INET && family != PF_INET6) +@@ -510,7 +511,7 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock, + { + int rc = 0; + struct sock *sk = sock->sk; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + struct netlbl_lsm_secattr secattr; + + if (selinux_netlbl_option(level, optname) && +@@ -548,7 +549,7 @@ static int selinux_netlbl_socket_connect_helper(struct sock *sk, + struct sockaddr *addr) + { + int rc; +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + struct netlbl_lsm_secattr *secattr; + + /* connected sockets are allowed to disconnect when the address family +@@ -587,7 +588,7 @@ static int selinux_netlbl_socket_connect_helper(struct sock *sk, + int selinux_netlbl_socket_connect_locked(struct sock *sk, + struct sockaddr *addr) + { +- struct sk_security_struct *sksec = sk->sk_security; ++ struct sk_security_struct *sksec = selinux_sock(sk); + + if (sksec->nlbl_state != NLBL_REQSKB && + sksec->nlbl_state != NLBL_CONNLABELED) +diff --git a/security/smack/smack.h b/security/smack/smack.h +index 041688e5a77a3..297f21446f456 100644 +--- a/security/smack/smack.h ++++ b/security/smack/smack.h +@@ -355,6 +355,11 @@ static inline struct superblock_smack *smack_superblock( + return superblock->s_security + smack_blob_sizes.lbs_superblock; + } + ++static inline struct socket_smack *smack_sock(const struct sock *sock) ++{ ++ return sock->sk_security + smack_blob_sizes.lbs_sock; ++} ++ + /* + * Is the directory transmuting? + */ +diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c +index 002a1b9ed83a5..6ec9a40f3ec59 100644 +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -1606,7 +1606,7 @@ static int smack_inode_getsecurity(struct mnt_idmap *idmap, + if (sock == NULL || sock->sk == NULL) + return -EOPNOTSUPP; + +- ssp = sock->sk->sk_security; ++ ssp = smack_sock(sock->sk); + + if (strcmp(name, XATTR_SMACK_IPIN) == 0) + isp = ssp->smk_in; +@@ -1994,7 +1994,7 @@ static int smack_file_receive(struct file *file) + + if (inode->i_sb->s_magic == SOCKFS_MAGIC) { + sock = SOCKET_I(inode); +- ssp = sock->sk->sk_security; ++ ssp = smack_sock(sock->sk); + tsp = smack_cred(current_cred()); + /* + * If the receiving process can't write to the +@@ -2409,11 +2409,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) + static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) + { + struct smack_known *skp = smk_of_current(); +- struct socket_smack *ssp; +- +- ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); +- if (ssp == NULL) +- return -ENOMEM; ++ struct socket_smack *ssp = smack_sock(sk); + + /* + * Sockets created by kernel threads receive web label. +@@ -2427,11 +2423,10 @@ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) + } + ssp->smk_packet = NULL; + +- sk->sk_security = ssp; +- + return 0; + } + ++#ifdef SMACK_IPV6_PORT_LABELING + /** + * smack_sk_free_security - Free a socket blob + * @sk: the socket +@@ -2440,7 +2435,6 @@ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) + */ + static void smack_sk_free_security(struct sock *sk) + { +-#ifdef SMACK_IPV6_PORT_LABELING + struct smk_port_label *spp; + + if (sk->sk_family == PF_INET6) { +@@ -2453,9 +2447,8 @@ static void smack_sk_free_security(struct sock *sk) + } + rcu_read_unlock(); + } +-#endif +- kfree(sk->sk_security); + } ++#endif + + /** + * smack_sk_clone_security - Copy security context +@@ -2466,8 +2459,8 @@ static void smack_sk_free_security(struct sock *sk) + */ + static void smack_sk_clone_security(const struct sock *sk, struct sock *newsk) + { +- struct socket_smack *ssp_old = sk->sk_security; +- struct socket_smack *ssp_new = newsk->sk_security; ++ struct socket_smack *ssp_old = smack_sock(sk); ++ struct socket_smack *ssp_new = smack_sock(newsk); + + *ssp_new = *ssp_old; + } +@@ -2583,7 +2576,7 @@ static struct smack_known *smack_ipv6host_label(struct sockaddr_in6 *sip) + */ + static int smack_netlbl_add(struct sock *sk) + { +- struct socket_smack *ssp = sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sk); + struct smack_known *skp = ssp->smk_out; + int rc; + +@@ -2616,7 +2609,7 @@ static int smack_netlbl_add(struct sock *sk) + */ + static void smack_netlbl_delete(struct sock *sk) + { +- struct socket_smack *ssp = sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sk); + + /* + * Take the label off the socket if one is set. +@@ -2648,7 +2641,7 @@ static int smk_ipv4_check(struct sock *sk, struct sockaddr_in *sap) + struct smack_known *skp; + int rc = 0; + struct smack_known *hkp; +- struct socket_smack *ssp = sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sk); + struct smk_audit_info ad; + + rcu_read_lock(); +@@ -2721,7 +2714,7 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address) + { + struct sock *sk = sock->sk; + struct sockaddr_in6 *addr6; +- struct socket_smack *ssp = sock->sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sock->sk); + struct smk_port_label *spp; + unsigned short port = 0; + +@@ -2809,7 +2802,7 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address, + int act) + { + struct smk_port_label *spp; +- struct socket_smack *ssp = sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sk); + struct smack_known *skp = NULL; + unsigned short port; + struct smack_known *object; +@@ -2912,7 +2905,7 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, + if (sock == NULL || sock->sk == NULL) + return -EOPNOTSUPP; + +- ssp = sock->sk->sk_security; ++ ssp = smack_sock(sock->sk); + + if (strcmp(name, XATTR_SMACK_IPIN) == 0) + ssp->smk_in = skp; +@@ -2960,7 +2953,7 @@ static int smack_socket_post_create(struct socket *sock, int family, + * Sockets created by kernel threads receive web label. + */ + if (unlikely(current->flags & PF_KTHREAD)) { +- ssp = sock->sk->sk_security; ++ ssp = smack_sock(sock->sk); + ssp->smk_in = &smack_known_web; + ssp->smk_out = &smack_known_web; + } +@@ -2985,8 +2978,8 @@ static int smack_socket_post_create(struct socket *sock, int family, + static int smack_socket_socketpair(struct socket *socka, + struct socket *sockb) + { +- struct socket_smack *asp = socka->sk->sk_security; +- struct socket_smack *bsp = sockb->sk->sk_security; ++ struct socket_smack *asp = smack_sock(socka->sk); ++ struct socket_smack *bsp = smack_sock(sockb->sk); + + asp->smk_packet = bsp->smk_out; + bsp->smk_packet = asp->smk_out; +@@ -3049,7 +3042,7 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, + if (__is_defined(SMACK_IPV6_SECMARK_LABELING)) + rsp = smack_ipv6host_label(sip); + if (rsp != NULL) { +- struct socket_smack *ssp = sock->sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sock->sk); + + rc = smk_ipv6_check(ssp->smk_out, rsp, sip, + SMK_CONNECTING); +@@ -3844,9 +3837,9 @@ static int smack_unix_stream_connect(struct sock *sock, + { + struct smack_known *skp; + struct smack_known *okp; +- struct socket_smack *ssp = sock->sk_security; +- struct socket_smack *osp = other->sk_security; +- struct socket_smack *nsp = newsk->sk_security; ++ struct socket_smack *ssp = smack_sock(sock); ++ struct socket_smack *osp = smack_sock(other); ++ struct socket_smack *nsp = smack_sock(newsk); + struct smk_audit_info ad; + int rc = 0; + #ifdef CONFIG_AUDIT +@@ -3898,8 +3891,8 @@ static int smack_unix_stream_connect(struct sock *sock, + */ + static int smack_unix_may_send(struct socket *sock, struct socket *other) + { +- struct socket_smack *ssp = sock->sk->sk_security; +- struct socket_smack *osp = other->sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sock->sk); ++ struct socket_smack *osp = smack_sock(other->sk); + struct smk_audit_info ad; + int rc; + +@@ -3936,7 +3929,7 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg, + struct sockaddr_in6 *sap = (struct sockaddr_in6 *) msg->msg_name; + #endif + #ifdef SMACK_IPV6_SECMARK_LABELING +- struct socket_smack *ssp = sock->sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sock->sk); + struct smack_known *rsp; + #endif + int rc = 0; +@@ -4148,7 +4141,7 @@ static struct smack_known *smack_from_netlbl(const struct sock *sk, u16 family, + netlbl_secattr_init(&secattr); + + if (sk) +- ssp = sk->sk_security; ++ ssp = smack_sock(sk); + + if (netlbl_skbuff_getattr(skb, family, &secattr) == 0) { + skp = smack_from_secattr(&secattr, ssp); +@@ -4170,7 +4163,7 @@ static struct smack_known *smack_from_netlbl(const struct sock *sk, u16 family, + */ + static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) + { +- struct socket_smack *ssp = sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sk); + struct smack_known *skp = NULL; + int rc = 0; + struct smk_audit_info ad; +@@ -4274,7 +4267,7 @@ static int smack_socket_getpeersec_stream(struct socket *sock, + u32 slen = 1; + int rc = 0; + +- ssp = sock->sk->sk_security; ++ ssp = smack_sock(sock->sk); + if (ssp->smk_packet != NULL) { + rcp = ssp->smk_packet->smk_known; + slen = strlen(rcp) + 1; +@@ -4324,7 +4317,7 @@ static int smack_socket_getpeersec_dgram(struct socket *sock, + + switch (family) { + case PF_UNIX: +- ssp = sock->sk->sk_security; ++ ssp = smack_sock(sock->sk); + s = ssp->smk_out->smk_secid; + break; + case PF_INET: +@@ -4373,7 +4366,7 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) + (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)) + return; + +- ssp = sk->sk_security; ++ ssp = smack_sock(sk); + ssp->smk_in = skp; + ssp->smk_out = skp; + /* cssp->smk_packet is already set in smack_inet_csk_clone() */ +@@ -4393,7 +4386,7 @@ static int smack_inet_conn_request(const struct sock *sk, struct sk_buff *skb, + { + u16 family = sk->sk_family; + struct smack_known *skp; +- struct socket_smack *ssp = sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sk); + struct sockaddr_in addr; + struct iphdr *hdr; + struct smack_known *hskp; +@@ -4479,7 +4472,7 @@ static int smack_inet_conn_request(const struct sock *sk, struct sk_buff *skb, + static void smack_inet_csk_clone(struct sock *sk, + const struct request_sock *req) + { +- struct socket_smack *ssp = sk->sk_security; ++ struct socket_smack *ssp = smack_sock(sk); + struct smack_known *skp; + + if (req->peer_secid != 0) { +@@ -5049,6 +5042,7 @@ struct lsm_blob_sizes smack_blob_sizes __ro_after_init = { + .lbs_inode = sizeof(struct inode_smack), + .lbs_ipc = sizeof(struct smack_known *), + .lbs_msg_msg = sizeof(struct smack_known *), ++ .lbs_sock = sizeof(struct socket_smack), + .lbs_superblock = sizeof(struct superblock_smack), + .lbs_xattr_count = SMACK_INODE_INIT_XATTRS, + }; +@@ -5173,7 +5167,9 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { + LSM_HOOK_INIT(socket_getpeersec_stream, smack_socket_getpeersec_stream), + LSM_HOOK_INIT(socket_getpeersec_dgram, smack_socket_getpeersec_dgram), + LSM_HOOK_INIT(sk_alloc_security, smack_sk_alloc_security), ++#ifdef SMACK_IPV6_PORT_LABELING + LSM_HOOK_INIT(sk_free_security, smack_sk_free_security), ++#endif + LSM_HOOK_INIT(sk_clone_security, smack_sk_clone_security), + LSM_HOOK_INIT(sock_graft, smack_sock_graft), + LSM_HOOK_INIT(inet_conn_request, smack_inet_conn_request), +diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c +index b945c1d3a7431..bad71b7e648da 100644 +--- a/security/smack/smack_netfilter.c ++++ b/security/smack/smack_netfilter.c +@@ -26,8 +26,8 @@ static unsigned int smack_ip_output(void *priv, + struct socket_smack *ssp; + struct smack_known *skp; + +- if (sk && sk->sk_security) { +- ssp = sk->sk_security; ++ if (sk) { ++ ssp = smack_sock(sk); + skp = ssp->smk_out; + skb->secmark = skp->smk_secid; + } +-- +2.43.0 + diff --git a/queue-6.10/serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch b/queue-6.10/serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch new file mode 100644 index 00000000000..13b8c1589e6 --- /dev/null +++ b/queue-6.10/serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch @@ -0,0 +1,48 @@ +From 18028110d3062573f5df7e1ce692782babda7533 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Sep 2024 15:13:32 +0200 +Subject: serial: qcom-geni: fix arg types for qcom_geni_serial_poll_bit() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Douglas Anderson + +[ Upstream commit c2eaf5e01275ae13f1ec5b1434f6c49cfff57430 ] + +The "offset" passed in should be unsigned since it's always a positive +offset from our memory mapped IO. + +The "field" should be u32 since we're anding it with a 32-bit value +read from the device. + +Suggested-by: Stephen Boyd +Signed-off-by: Douglas Anderson +Reviewed-by: Konrad Dybcio +Link: https://lore.kernel.org/r/20240610152420.v4.4.I24a0de52dd7336908df180fa6b698e001f3aff82@changeid +Tested-by: Nícolas F. R. A. Prado +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20240906131336.23625-5-johan+linaro@kernel.org +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: cc4a0e5754a1 ("serial: qcom-geni: fix console corruption") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/qcom_geni_serial.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c +index b88435c0ea507..54052c68555d7 100644 +--- a/drivers/tty/serial/qcom_geni_serial.c ++++ b/drivers/tty/serial/qcom_geni_serial.c +@@ -266,7 +266,7 @@ static bool qcom_geni_serial_secondary_active(struct uart_port *uport) + } + + static bool qcom_geni_serial_poll_bit(struct uart_port *uport, +- int offset, int field, bool set) ++ unsigned int offset, u32 field, bool set) + { + u32 reg; + struct qcom_geni_serial_port *port; +-- +2.43.0 + diff --git a/queue-6.10/serial-qcom-geni-fix-console-corruption.patch b/queue-6.10/serial-qcom-geni-fix-console-corruption.patch new file mode 100644 index 00000000000..0ab674fa680 --- /dev/null +++ b/queue-6.10/serial-qcom-geni-fix-console-corruption.patch @@ -0,0 +1,169 @@ +From 9fd9b8c3e4ea317dd5ecb85b2d14186818de310b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Sep 2024 15:13:34 +0200 +Subject: serial: qcom-geni: fix console corruption +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Johan Hovold + +[ Upstream commit cc4a0e5754a16bbc1e215c091349a7c83a2c5e14 ] + +The Qualcomm serial console implementation is broken and can lose +characters when the serial port is also used for tty output. + +Specifically, the console code only waits for the current tx command to +complete when all data has already been written to the fifo. When there +are on-going longer transfers this often means that console output is +lost when the console code inadvertently "hijacks" the current tx +command instead of starting a new one. + +This can, for example, be observed during boot when console output that +should have been interspersed with init output is truncated: + + [ 9.462317] qcom-snps-eusb2-hsphy fde000.phy: Registered Qcom-eUSB2 phy + [ OK ] Found device KBG50ZNS256G KIOXIA Wi[ 9.471743ndows. + [ 9.539915] xhci-hcd xhci-hcd.0.auto: xHCI Host Controller + +Add a new state variable to track how much data has been written to the +fifo and use it to determine when the fifo and shift register are both +empty. This is needed since there is currently no other known way to +determine when the shift register is empty. + +This in turn allows the console code to interrupt long transfers without +losing data. + +Note that the oops-in-progress case is similarly broken as it does not +cancel any active command and also waits for the wrong status flag when +attempting to drain the fifo (TX_FIFO_NOT_EMPTY_EN is only set when +cancelling a command leaves data in the fifo). + +Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP") +Fixes: a1fee899e5be ("tty: serial: qcom_geni_serial: Fix softlock") +Fixes: 9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get the port lock") +Cc: stable@vger.kernel.org # 4.17 +Reviewed-by: Douglas Anderson +Tested-by: Nícolas F. R. A. Prado +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20240906131336.23625-7-johan+linaro@kernel.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/qcom_geni_serial.c | 45 +++++++++++++-------------- + 1 file changed, 22 insertions(+), 23 deletions(-) + +diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c +index 7bbd70c306201..f8f6e9466b400 100644 +--- a/drivers/tty/serial/qcom_geni_serial.c ++++ b/drivers/tty/serial/qcom_geni_serial.c +@@ -131,6 +131,7 @@ struct qcom_geni_serial_port { + bool brk; + + unsigned int tx_remaining; ++ unsigned int tx_queued; + int wakeup_irq; + bool rx_tx_swap; + bool cts_rts_swap; +@@ -144,6 +145,8 @@ static const struct uart_ops qcom_geni_uart_pops; + static struct uart_driver qcom_geni_console_driver; + static struct uart_driver qcom_geni_uart_driver; + ++static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport); ++ + static inline struct qcom_geni_serial_port *to_dev_port(struct uart_port *uport) + { + return container_of(uport, struct qcom_geni_serial_port, uport); +@@ -393,6 +396,14 @@ static void qcom_geni_serial_poll_put_char(struct uart_port *uport, + #endif + + #ifdef CONFIG_SERIAL_QCOM_GENI_CONSOLE ++static void qcom_geni_serial_drain_fifo(struct uart_port *uport) ++{ ++ struct qcom_geni_serial_port *port = to_dev_port(uport); ++ ++ qcom_geni_serial_poll_bitfield(uport, SE_GENI_M_GP_LENGTH, GP_LENGTH, ++ port->tx_queued); ++} ++ + static void qcom_geni_serial_wr_char(struct uart_port *uport, unsigned char ch) + { + struct qcom_geni_private_data *private_data = uport->private_data; +@@ -468,7 +479,6 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s, + struct qcom_geni_serial_port *port; + bool locked = true; + unsigned long flags; +- u32 geni_status; + + WARN_ON(co->index < 0 || co->index >= GENI_UART_CONS_PORTS); + +@@ -482,34 +492,20 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s, + else + uart_port_lock_irqsave(uport, &flags); + +- geni_status = readl(uport->membase + SE_GENI_STATUS); ++ if (qcom_geni_serial_main_active(uport)) { ++ /* Wait for completion or drain FIFO */ ++ if (!locked || port->tx_remaining == 0) ++ qcom_geni_serial_poll_tx_done(uport); ++ else ++ qcom_geni_serial_drain_fifo(uport); + +- if (!locked) { +- /* +- * We can only get here if an oops is in progress then we were +- * unable to get the lock. This means we can't safely access +- * our state variables like tx_remaining. About the best we +- * can do is wait for the FIFO to be empty before we start our +- * transfer, so we'll do that. +- */ +- qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS, +- M_TX_FIFO_NOT_EMPTY_EN, false); +- } else if ((geni_status & M_GENI_CMD_ACTIVE) && !port->tx_remaining) { +- /* +- * It seems we can't interrupt existing transfers if all data +- * has been sent, in which case we need to look for done first. +- */ +- qcom_geni_serial_poll_tx_done(uport); ++ qcom_geni_serial_cancel_tx_cmd(uport); + } + + __qcom_geni_serial_console_write(uport, s, count); + +- +- if (locked) { +- if (port->tx_remaining) +- qcom_geni_serial_setup_tx(uport, port->tx_remaining); ++ if (locked) + uart_port_unlock_irqrestore(uport, flags); +- } + } + + static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop) +@@ -690,6 +686,7 @@ static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport) + writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR); + + port->tx_remaining = 0; ++ port->tx_queued = 0; + } + + static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop) +@@ -916,6 +913,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport, + if (!port->tx_remaining) { + qcom_geni_serial_setup_tx(uport, pending); + port->tx_remaining = pending; ++ port->tx_queued = 0; + + irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN); + if (!(irq_en & M_TX_FIFO_WATERMARK_EN)) +@@ -924,6 +922,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport, + } + + qcom_geni_serial_send_chunk_fifo(uport, chunk); ++ port->tx_queued += chunk; + + /* + * The tx fifo watermark is level triggered and latched. Though we had +-- +2.43.0 + diff --git a/queue-6.10/serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch b/queue-6.10/serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch new file mode 100644 index 00000000000..3ec8119e340 --- /dev/null +++ b/queue-6.10/serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch @@ -0,0 +1,70 @@ +From c6511f4d88c312a11be28f152c02f6bade9d8e64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Sep 2024 15:13:33 +0200 +Subject: serial: qcom-geni: introduce qcom_geni_serial_poll_bitfield() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Douglas Anderson + +[ Upstream commit b26d1ad1221273c88c2c4f5b4080338b8ca23859 ] + +With a small modification the qcom_geni_serial_poll_bit() function +could be used to poll more than just a single bit. Let's generalize +it. We'll make the qcom_geni_serial_poll_bit() into just a wrapper of +the general function. + +Signed-off-by: Douglas Anderson +Reviewed-by: Konrad Dybcio +Link: https://lore.kernel.org/r/20240610152420.v4.5.Ic6411eab8d9d37acc451705f583fb535cd6dadb2@changeid +Tested-by: Nícolas F. R. A. Prado +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20240906131336.23625-6-johan+linaro@kernel.org +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: cc4a0e5754a1 ("serial: qcom-geni: fix console corruption") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/qcom_geni_serial.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c +index 54052c68555d7..7bbd70c306201 100644 +--- a/drivers/tty/serial/qcom_geni_serial.c ++++ b/drivers/tty/serial/qcom_geni_serial.c +@@ -265,8 +265,8 @@ static bool qcom_geni_serial_secondary_active(struct uart_port *uport) + return readl(uport->membase + SE_GENI_STATUS) & S_GENI_CMD_ACTIVE; + } + +-static bool qcom_geni_serial_poll_bit(struct uart_port *uport, +- unsigned int offset, u32 field, bool set) ++static bool qcom_geni_serial_poll_bitfield(struct uart_port *uport, ++ unsigned int offset, u32 field, u32 val) + { + u32 reg; + struct qcom_geni_serial_port *port; +@@ -286,7 +286,7 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport, + timeout_us = DIV_ROUND_UP(timeout_us, 10) * 10; + while (timeout_us) { + reg = readl(uport->membase + offset); +- if ((bool)(reg & field) == set) ++ if ((reg & field) == val) + return true; + udelay(10); + timeout_us -= 10; +@@ -294,6 +294,12 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport, + return false; + } + ++static bool qcom_geni_serial_poll_bit(struct uart_port *uport, ++ unsigned int offset, u32 field, bool set) ++{ ++ return qcom_geni_serial_poll_bitfield(uport, offset, field, set ? field : 0); ++} ++ + static void qcom_geni_serial_setup_tx(struct uart_port *uport, u32 xmit_size) + { + u32 m_cmd; +-- +2.43.0 + diff --git a/queue-6.10/series b/queue-6.10/series index 36de71ca69f..d083557f860 100644 --- a/queue-6.10/series +++ b/queue-6.10/series @@ -596,3 +596,27 @@ dt-bindings-spi-nxp-fspi-add-imx8ulp-support.patch arm-dts-imx6ul-geam-fix-fsl-pins-property-in-tscgrp-pinctrl.patch arm-dts-imx6ull-seeed-npi-fix-fsl-pins-property-in-tscgrp-pinctrl.patch tools-nolibc-include-arch.h-from-string.h.patch +soc-versatile-realview-fix-memory-leak-during-device.patch +soc-versatile-realview-fix-soc_dev-leak-during-devic.patch +kvm-x86-drop-unused-check_apicv_inhibit_reasons-call.patch +kvm-x86-make-x2apic-id-100-readonly.patch +kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2.patch +x86-mm-make-x86_platform.guest.enc_status_change_-re.patch +x86-tdx-account-shared-memory.patch +x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch +x86-tdx-convert-shared-memory-back-to-private-on-kex.patch +x86-tdx-fix-in-kernel-mmio-check.patch +xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch +usb-xhci-fix-loss-of-data-on-cadence-xhc.patch +soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch +serial-qcom-geni-fix-arg-types-for-qcom_geni_serial_.patch +serial-qcom-geni-introduce-qcom_geni_serial_poll_bit.patch +serial-qcom-geni-fix-console-corruption.patch +idpf-stop-using-macros-for-accessing-queue-descripto.patch +idpf-split-idpf_queue-into-4-strictly-typed-queue-st.patch +idpf-merge-singleq-and-splitq-net_device_ops.patch +idpf-fix-netdev-tx-queue-stop-wake.patch +fs_parse-add-uid-gid-option-option-parsing-helpers.patch +debugfs-convert-to-new-uid-gid-option-parsing-helper.patch +debugfs-show-actual-source-in-proc-mounts.patch +lsm-infrastructure-management-of-the-sock-security.patch diff --git a/queue-6.10/soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch b/queue-6.10/soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch new file mode 100644 index 00000000000..5211e149f62 --- /dev/null +++ b/queue-6.10/soc-qcom-geni-se-add-gp_length-irq_en_set-irq_en_cle.patch @@ -0,0 +1,75 @@ +From 429c30fd2323f3e1c0e151a27221ed6d24f8b82a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Sep 2024 15:13:31 +0200 +Subject: soc: qcom: geni-se: add GP_LENGTH/IRQ_EN_SET/IRQ_EN_CLEAR registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Douglas Anderson + +[ Upstream commit b03ffc76b83c1a7d058454efbcf1bf0e345ef1c2 ] + +For UART devices the M_GP_LENGTH is the TX word count. For other +devices this is the transaction word count. + +For UART devices the S_GP_LENGTH is the RX word count. + +The IRQ_EN set/clear registers allow you to set or clear bits in the +IRQ_EN register without needing a read-modify-write. + +Acked-by: Bjorn Andersson +Signed-off-by: Douglas Anderson +Link: https://lore.kernel.org/r/20240610152420.v4.1.Ife7ced506aef1be3158712aa3ff34a006b973559@changeid +Tested-by: Nícolas F. R. A. Prado +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20240906131336.23625-4-johan+linaro@kernel.org +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: cc4a0e5754a1 ("serial: qcom-geni: fix console corruption") +Signed-off-by: Sasha Levin +--- + include/linux/soc/qcom/geni-se.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h +index 0f038a1a03309..c3bca9c0bf2cf 100644 +--- a/include/linux/soc/qcom/geni-se.h ++++ b/include/linux/soc/qcom/geni-se.h +@@ -88,11 +88,15 @@ struct geni_se { + #define SE_GENI_M_IRQ_STATUS 0x610 + #define SE_GENI_M_IRQ_EN 0x614 + #define SE_GENI_M_IRQ_CLEAR 0x618 ++#define SE_GENI_M_IRQ_EN_SET 0x61c ++#define SE_GENI_M_IRQ_EN_CLEAR 0x620 + #define SE_GENI_S_CMD0 0x630 + #define SE_GENI_S_CMD_CTRL_REG 0x634 + #define SE_GENI_S_IRQ_STATUS 0x640 + #define SE_GENI_S_IRQ_EN 0x644 + #define SE_GENI_S_IRQ_CLEAR 0x648 ++#define SE_GENI_S_IRQ_EN_SET 0x64c ++#define SE_GENI_S_IRQ_EN_CLEAR 0x650 + #define SE_GENI_TX_FIFOn 0x700 + #define SE_GENI_RX_FIFOn 0x780 + #define SE_GENI_TX_FIFO_STATUS 0x800 +@@ -101,6 +105,8 @@ struct geni_se { + #define SE_GENI_RX_WATERMARK_REG 0x810 + #define SE_GENI_RX_RFR_WATERMARK_REG 0x814 + #define SE_GENI_IOS 0x908 ++#define SE_GENI_M_GP_LENGTH 0x910 ++#define SE_GENI_S_GP_LENGTH 0x914 + #define SE_DMA_TX_IRQ_STAT 0xc40 + #define SE_DMA_TX_IRQ_CLR 0xc44 + #define SE_DMA_TX_FSM_RST 0xc58 +@@ -234,6 +240,9 @@ struct geni_se { + #define IO2_DATA_IN BIT(1) + #define RX_DATA_IN BIT(0) + ++/* SE_GENI_M_GP_LENGTH and SE_GENI_S_GP_LENGTH fields */ ++#define GP_LENGTH GENMASK(31, 0) ++ + /* SE_DMA_TX_IRQ_STAT Register fields */ + #define TX_DMA_DONE BIT(0) + #define TX_EOT BIT(1) +-- +2.43.0 + diff --git a/queue-6.10/soc-versatile-realview-fix-memory-leak-during-device.patch b/queue-6.10/soc-versatile-realview-fix-memory-leak-during-device.patch new file mode 100644 index 00000000000..c8d4ed71ab5 --- /dev/null +++ b/queue-6.10/soc-versatile-realview-fix-memory-leak-during-device.patch @@ -0,0 +1,50 @@ +From 7c463388c4faca9c1d47db60f2888eeef1e00c94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 25 Aug 2024 20:05:23 +0200 +Subject: soc: versatile: realview: fix memory leak during device remove + +From: Krzysztof Kozlowski + +[ Upstream commit 1c4f26a41f9d052f334f6ae629e01f598ed93508 ] + +If device is unbound, the memory allocated for soc_dev_attr should be +freed to prevent leaks. + +Signed-off-by: Krzysztof Kozlowski +Link: https://lore.kernel.org/20240825-soc-dev-fixes-v1-2-ff4b35abed83@linaro.org +Signed-off-by: Linus Walleij +Stable-dep-of: c774f2564c00 ("soc: versatile: realview: fix soc_dev leak during device remove") +Signed-off-by: Sasha Levin +--- + drivers/soc/versatile/soc-realview.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/drivers/soc/versatile/soc-realview.c b/drivers/soc/versatile/soc-realview.c +index c6876d232d8fd..d304ee69287af 100644 +--- a/drivers/soc/versatile/soc-realview.c ++++ b/drivers/soc/versatile/soc-realview.c +@@ -93,7 +93,7 @@ static int realview_soc_probe(struct platform_device *pdev) + if (IS_ERR(syscon_regmap)) + return PTR_ERR(syscon_regmap); + +- soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); ++ soc_dev_attr = devm_kzalloc(&pdev->dev, sizeof(*soc_dev_attr), GFP_KERNEL); + if (!soc_dev_attr) + return -ENOMEM; + +@@ -106,10 +106,9 @@ static int realview_soc_probe(struct platform_device *pdev) + soc_dev_attr->family = "Versatile"; + soc_dev_attr->custom_attr_group = realview_groups[0]; + soc_dev = soc_device_register(soc_dev_attr); +- if (IS_ERR(soc_dev)) { +- kfree(soc_dev_attr); ++ if (IS_ERR(soc_dev)) + return -ENODEV; +- } ++ + ret = regmap_read(syscon_regmap, REALVIEW_SYS_ID_OFFSET, + &realview_coreid); + if (ret) +-- +2.43.0 + diff --git a/queue-6.10/soc-versatile-realview-fix-soc_dev-leak-during-devic.patch b/queue-6.10/soc-versatile-realview-fix-soc_dev-leak-during-devic.patch new file mode 100644 index 00000000000..7c730ce7340 --- /dev/null +++ b/queue-6.10/soc-versatile-realview-fix-soc_dev-leak-during-devic.patch @@ -0,0 +1,63 @@ +From 015be69c0db89a0fa48410c1745b32504a95ee02 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 25 Aug 2024 20:05:24 +0200 +Subject: soc: versatile: realview: fix soc_dev leak during device remove + +From: Krzysztof Kozlowski + +[ Upstream commit c774f2564c0086c23f5269fd4691f233756bf075 ] + +If device is unbound, the soc_dev should be unregistered to prevent +memory leak. + +Fixes: a2974c9c1f83 ("soc: add driver for the ARM RealView") +Cc: stable@vger.kernel.org +Signed-off-by: Krzysztof Kozlowski +Link: https://lore.kernel.org/20240825-soc-dev-fixes-v1-3-ff4b35abed83@linaro.org +Signed-off-by: Linus Walleij +Signed-off-by: Sasha Levin +--- + drivers/soc/versatile/soc-realview.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/drivers/soc/versatile/soc-realview.c b/drivers/soc/versatile/soc-realview.c +index d304ee69287af..cf91abe07d38d 100644 +--- a/drivers/soc/versatile/soc-realview.c ++++ b/drivers/soc/versatile/soc-realview.c +@@ -4,6 +4,7 @@ + * + * Author: Linus Walleij + */ ++#include + #include + #include + #include +@@ -81,6 +82,13 @@ static struct attribute *realview_attrs[] = { + + ATTRIBUTE_GROUPS(realview); + ++static void realview_soc_socdev_release(void *data) ++{ ++ struct soc_device *soc_dev = data; ++ ++ soc_device_unregister(soc_dev); ++} ++ + static int realview_soc_probe(struct platform_device *pdev) + { + struct regmap *syscon_regmap; +@@ -109,6 +117,11 @@ static int realview_soc_probe(struct platform_device *pdev) + if (IS_ERR(soc_dev)) + return -ENODEV; + ++ ret = devm_add_action_or_reset(&pdev->dev, realview_soc_socdev_release, ++ soc_dev); ++ if (ret) ++ return ret; ++ + ret = regmap_read(syscon_regmap, REALVIEW_SYS_ID_OFFSET, + &realview_coreid); + if (ret) +-- +2.43.0 + diff --git a/queue-6.10/usb-xhci-fix-loss-of-data-on-cadence-xhc.patch b/queue-6.10/usb-xhci-fix-loss-of-data-on-cadence-xhc.patch new file mode 100644 index 00000000000..c49526a9d05 --- /dev/null +++ b/queue-6.10/usb-xhci-fix-loss-of-data-on-cadence-xhc.patch @@ -0,0 +1,117 @@ +From 65a6a1f627626dee5f356a04001d03c25bfacd7d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Sep 2024 07:03:28 +0000 +Subject: usb: xhci: fix loss of data on Cadence xHC + +From: Pawel Laszczak + +[ Upstream commit e5fa8db0be3e8757e8641600c518425a4589b85c ] + +Streams should flush their TRB cache, re-read TRBs, and start executing +TRBs from the beginning of the new dequeue pointer after a 'Set TR Dequeue +Pointer' command. + +Cadence controllers may fail to start from the beginning of the dequeue +TRB as it doesn't clear the Opaque 'RsvdO' field of the stream context +during 'Set TR Dequeue' command. This stream context area is where xHC +stores information about the last partially executed TD when a stream +is stopped. xHC uses this information to resume the transfer where it left +mid TD, when the stream is restarted. + +Patch fixes this by clearing out all RsvdO fields before initializing new +Stream transfer using a 'Set TR Dequeue Pointer' command. + +Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") +cc: stable@vger.kernel.org +Signed-off-by: Pawel Laszczak +Reviewed-by: Peter Chen +Link: https://lore.kernel.org/r/PH7PR07MB95386A40146E3EC64086F409DD9D2@PH7PR07MB9538.namprd07.prod.outlook.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/cdns3/host.c | 4 +++- + drivers/usb/host/xhci-pci.c | 7 +++++++ + drivers/usb/host/xhci-ring.c | 14 ++++++++++++++ + drivers/usb/host/xhci.h | 1 + + 4 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c +index ceca4d839dfd4..7ba760ee62e33 100644 +--- a/drivers/usb/cdns3/host.c ++++ b/drivers/usb/cdns3/host.c +@@ -62,7 +62,9 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { + .resume_quirk = xhci_cdns3_resume_quirk, + }; + +-static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; ++static const struct xhci_plat_priv xhci_plat_cdnsp_xhci = { ++ .quirks = XHCI_CDNS_SCTX_QUIRK, ++}; + + static int __cdns_host_init(struct cdns *cdns) + { +diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c +index 19527b856c550..994fd8b38bd01 100644 +--- a/drivers/usb/host/xhci-pci.c ++++ b/drivers/usb/host/xhci-pci.c +@@ -81,6 +81,9 @@ + #define PCI_DEVICE_ID_ASMEDIA_2142_XHCI 0x2142 + #define PCI_DEVICE_ID_ASMEDIA_3242_XHCI 0x3242 + ++#define PCI_DEVICE_ID_CADENCE 0x17CD ++#define PCI_DEVICE_ID_CADENCE_SSP 0x0200 ++ + static const char hcd_name[] = "xhci_hcd"; + + static struct hc_driver __read_mostly xhci_pci_hc_driver; +@@ -480,6 +483,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) + xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + } + ++ if (pdev->vendor == PCI_DEVICE_ID_CADENCE && ++ pdev->device == PCI_DEVICE_ID_CADENCE_SSP) ++ xhci->quirks |= XHCI_CDNS_SCTX_QUIRK; ++ + /* xHC spec requires PCI devices to support D3hot and D3cold */ + if (xhci->hci_version >= 0x120) + xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index fd0cde3d1569c..0fe6bef6c3980 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -1426,6 +1426,20 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, + struct xhci_stream_ctx *ctx = + &ep->stream_info->stream_ctx_array[stream_id]; + deq = le64_to_cpu(ctx->stream_ring) & SCTX_DEQ_MASK; ++ ++ /* ++ * Cadence xHCI controllers store some endpoint state ++ * information within Rsvd0 fields of Stream Endpoint ++ * context. This field is not cleared during Set TR ++ * Dequeue Pointer command which causes XDMA to skip ++ * over transfer ring and leads to data loss on stream ++ * pipe. ++ * To fix this issue driver must clear Rsvd0 field. ++ */ ++ if (xhci->quirks & XHCI_CDNS_SCTX_QUIRK) { ++ ctx->reserved[0] = 0; ++ ctx->reserved[1] = 0; ++ } + } else { + deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK; + } +diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h +index 5a8925474176d..ac8da8a7df86b 100644 +--- a/drivers/usb/host/xhci.h ++++ b/drivers/usb/host/xhci.h +@@ -1630,6 +1630,7 @@ struct xhci_hcd { + #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) + #define XHCI_ZHAOXIN_HOST BIT_ULL(46) + #define XHCI_WRITE_64_HI_LO BIT_ULL(47) ++#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) + + unsigned int num_active_eps; + unsigned int limit_active_eps; +-- +2.43.0 + diff --git a/queue-6.10/x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch b/queue-6.10/x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch new file mode 100644 index 00000000000..15dd21c7bbf --- /dev/null +++ b/queue-6.10/x86-mm-add-callbacks-to-prepare-encrypted-memory-for.patch @@ -0,0 +1,155 @@ +From cbe17f625208e4cb8eaa9c9c53ef0a457a559f50 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jun 2024 12:58:55 +0300 +Subject: x86/mm: Add callbacks to prepare encrypted memory for kexec + +From: Kirill A. Shutemov + +[ Upstream commit 22daa42294b419a0d8060a3870285e7a72aa63e4 ] + +AMD SEV and Intel TDX guests allocate shared buffers for performing I/O. +This is done by allocating pages normally from the buddy allocator and +then converting them to shared using set_memory_decrypted(). + +On kexec, the second kernel is unaware of which memory has been +converted in this manner. It only sees E820_TYPE_RAM. Accessing shared +memory as private is fatal. + +Therefore, the memory state must be reset to its original state before +starting the new kernel with kexec. + +The process of converting shared memory back to private occurs in two +steps: + +- enc_kexec_begin() stops new conversions. + +- enc_kexec_finish() unshares all existing shared memory, reverting it + back to private. + +Signed-off-by: Kirill A. Shutemov +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Nikolay Borisov +Reviewed-by: Kai Huang +Tested-by: Tao Liu +Link: https://lore.kernel.org/r/20240614095904.1345461-11-kirill.shutemov@linux.intel.com +Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check") +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/x86_init.h | 10 ++++++++++ + arch/x86/kernel/crash.c | 12 ++++++++++++ + arch/x86/kernel/reboot.c | 12 ++++++++++++ + arch/x86/kernel/x86_init.c | 4 ++++ + 4 files changed, 38 insertions(+) + +diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h +index 28ac3cb9b987b..213cf5379a5a6 100644 +--- a/arch/x86/include/asm/x86_init.h ++++ b/arch/x86/include/asm/x86_init.h +@@ -149,12 +149,22 @@ struct x86_init_acpi { + * @enc_status_change_finish Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status ++ * @enc_kexec_begin Begin the two-step process of converting shared memory back ++ * to private. It stops the new conversions from being started ++ * and waits in-flight conversions to finish, if possible. ++ * @enc_kexec_finish Finish the two-step process of converting shared memory to ++ * private. All memory is private after the call when ++ * the function returns. ++ * It is called on only one CPU while the others are shut down ++ * and with interrupts disabled. + */ + struct x86_guest { + int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); ++ void (*enc_kexec_begin)(void); ++ void (*enc_kexec_finish)(void); + }; + + /** +diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c +index f06501445cd98..340af81556584 100644 +--- a/arch/x86/kernel/crash.c ++++ b/arch/x86/kernel/crash.c +@@ -128,6 +128,18 @@ void native_machine_crash_shutdown(struct pt_regs *regs) + #ifdef CONFIG_HPET_TIMER + hpet_disable(); + #endif ++ ++ /* ++ * Non-crash kexec calls enc_kexec_begin() while scheduling is still ++ * active. This allows the callback to wait until all in-flight ++ * shared<->private conversions are complete. In a crash scenario, ++ * enc_kexec_begin() gets called after all but one CPU have been shut ++ * down and interrupts have been disabled. This allows the callback to ++ * detect a race with the conversion and report it. ++ */ ++ x86_platform.guest.enc_kexec_begin(); ++ x86_platform.guest.enc_kexec_finish(); ++ + crash_save_cpu(regs, safe_smp_processor_id()); + } + +diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c +index f3130f762784a..bb7a44af7efd1 100644 +--- a/arch/x86/kernel/reboot.c ++++ b/arch/x86/kernel/reboot.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -716,6 +717,14 @@ static void native_machine_emergency_restart(void) + + void native_machine_shutdown(void) + { ++ /* ++ * Call enc_kexec_begin() while all CPUs are still active and ++ * interrupts are enabled. This will allow all in-flight memory ++ * conversions to finish cleanly. ++ */ ++ if (kexec_in_progress) ++ x86_platform.guest.enc_kexec_begin(); ++ + /* Stop the cpus and apics */ + #ifdef CONFIG_X86_IO_APIC + /* +@@ -752,6 +761,9 @@ void native_machine_shutdown(void) + #ifdef CONFIG_X86_64 + x86_platform.iommu_shutdown(); + #endif ++ ++ if (kexec_in_progress) ++ x86_platform.guest.enc_kexec_finish(); + } + + static void __machine_emergency_restart(int emergency) +diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c +index 3f95e32dd288e..0a2bbd674a6d9 100644 +--- a/arch/x86/kernel/x86_init.c ++++ b/arch/x86/kernel/x86_init.c +@@ -139,6 +139,8 @@ static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool + static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; } + static bool enc_tlb_flush_required_noop(bool enc) { return false; } + static bool enc_cache_flush_required_noop(void) { return false; } ++static void enc_kexec_begin_noop(void) {} ++static void enc_kexec_finish_noop(void) {} + static bool is_private_mmio_noop(u64 addr) {return false; } + + struct x86_platform_ops x86_platform __ro_after_init = { +@@ -162,6 +164,8 @@ struct x86_platform_ops x86_platform __ro_after_init = { + .enc_status_change_finish = enc_status_change_finish_noop, + .enc_tlb_flush_required = enc_tlb_flush_required_noop, + .enc_cache_flush_required = enc_cache_flush_required_noop, ++ .enc_kexec_begin = enc_kexec_begin_noop, ++ .enc_kexec_finish = enc_kexec_finish_noop, + }, + }; + +-- +2.43.0 + diff --git a/queue-6.10/x86-mm-make-x86_platform.guest.enc_status_change_-re.patch b/queue-6.10/x86-mm-make-x86_platform.guest.enc_status_change_-re.patch new file mode 100644 index 00000000000..03459c6d57e --- /dev/null +++ b/queue-6.10/x86-mm-make-x86_platform.guest.enc_status_change_-re.patch @@ -0,0 +1,248 @@ +From dfaedba14744b7e751e19d1cbcf05c623f8e0f17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jun 2024 12:58:52 +0300 +Subject: x86/mm: Make x86_platform.guest.enc_status_change_*() return an error + +From: Kirill A. Shutemov + +[ Upstream commit 99c5c4c60e0db1d2ff58b8a61c93b6851146469f ] + +TDX is going to have more than one reason to fail enc_status_change_prepare(). + +Change the callback to return errno instead of assuming -EIO. Change +enc_status_change_finish() too to keep the interface symmetric. + +Signed-off-by: Kirill A. Shutemov +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Dave Hansen +Reviewed-by: Kai Huang +Reviewed-by: Michael Kelley +Tested-by: Tao Liu +Link: https://lore.kernel.org/r/20240614095904.1345461-8-kirill.shutemov@linux.intel.com +Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check") +Signed-off-by: Sasha Levin +--- + arch/x86/coco/tdx/tdx.c | 20 +++++++++++--------- + arch/x86/hyperv/ivm.c | 22 ++++++++++------------ + arch/x86/include/asm/x86_init.h | 4 ++-- + arch/x86/kernel/x86_init.c | 4 ++-- + arch/x86/mm/mem_encrypt_amd.c | 8 ++++---- + arch/x86/mm/pat/set_memory.c | 12 +++++++----- + 6 files changed, 36 insertions(+), 34 deletions(-) + +diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c +index 8fe4c2b07128e..fdcc081317764 100644 +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -797,28 +797,30 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) + return true; + } + +-static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, +- bool enc) ++static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, ++ bool enc) + { + /* + * Only handle shared->private conversion here. + * See the comment in tdx_early_init(). + */ +- if (enc) +- return tdx_enc_status_changed(vaddr, numpages, enc); +- return true; ++ if (enc && !tdx_enc_status_changed(vaddr, numpages, enc)) ++ return -EIO; ++ ++ return 0; + } + +-static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, ++static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + bool enc) + { + /* + * Only handle private->shared conversion here. + * See the comment in tdx_early_init(). + */ +- if (!enc) +- return tdx_enc_status_changed(vaddr, numpages, enc); +- return true; ++ if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc)) ++ return -EIO; ++ ++ return 0; + } + + void __init tdx_early_init(void) +diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c +index 768d73de0d098..b4a851d27c7cb 100644 +--- a/arch/x86/hyperv/ivm.c ++++ b/arch/x86/hyperv/ivm.c +@@ -523,9 +523,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], + * transition is complete, hv_vtom_set_host_visibility() marks the pages + * as "present" again. + */ +-static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) ++static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) + { +- return !set_memory_np(kbuffer, pagecount); ++ return set_memory_np(kbuffer, pagecount); + } + + /* +@@ -536,20 +536,19 @@ static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc + * with host. This function works as wrap of hv_mark_gpa_visibility() + * with memory base and size. + */ +-static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) ++static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) + { + enum hv_mem_host_visibility visibility = enc ? + VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; + u64 *pfn_array; + phys_addr_t paddr; ++ int i, pfn, err; + void *vaddr; + int ret = 0; +- bool result = true; +- int i, pfn; + + pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); + if (!pfn_array) { +- result = false; ++ ret = -ENOMEM; + goto err_set_memory_p; + } + +@@ -568,10 +567,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo + if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { + ret = hv_mark_gpa_visibility(pfn, pfn_array, + visibility); +- if (ret) { +- result = false; ++ if (ret) + goto err_free_pfn_array; +- } + pfn = 0; + } + } +@@ -586,10 +583,11 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo + * order to avoid leaving the memory range in a "broken" state. Setting + * the PRESENT bits shouldn't fail, but return an error if it does. + */ +- if (set_memory_p(kbuffer, pagecount)) +- result = false; ++ err = set_memory_p(kbuffer, pagecount); ++ if (err && !ret) ++ ret = err; + +- return result; ++ return ret; + } + + static bool hv_vtom_tlb_flush_required(bool private) +diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h +index 6149eabe200f5..28ac3cb9b987b 100644 +--- a/arch/x86/include/asm/x86_init.h ++++ b/arch/x86/include/asm/x86_init.h +@@ -151,8 +151,8 @@ struct x86_init_acpi { + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + */ + struct x86_guest { +- bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); +- bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); ++ int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); ++ int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); + }; +diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c +index 5474a6fdd6895..3f95e32dd288e 100644 +--- a/arch/x86/kernel/x86_init.c ++++ b/arch/x86/kernel/x86_init.c +@@ -135,8 +135,8 @@ struct x86_cpuinit_ops x86_cpuinit = { + + static void default_nmi_init(void) { }; + +-static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } +-static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } ++static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return 0; } ++static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; } + static bool enc_tlb_flush_required_noop(bool enc) { return false; } + static bool enc_cache_flush_required_noop(void) { return false; } + static bool is_private_mmio_noop(u64 addr) {return false; } +diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c +index 422602f6039b8..e7b67519ddb5d 100644 +--- a/arch/x86/mm/mem_encrypt_amd.c ++++ b/arch/x86/mm/mem_encrypt_amd.c +@@ -283,7 +283,7 @@ static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) + #endif + } + +-static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) ++static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) + { + /* + * To maintain the security guarantees of SEV-SNP guests, make sure +@@ -292,11 +292,11 @@ static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) + snp_set_memory_shared(vaddr, npages); + +- return true; ++ return 0; + } + + /* Return true unconditionally: return value doesn't matter for the SEV side */ +-static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) ++static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) + { + /* + * After memory is mapped encrypted in the page table, validate it +@@ -308,7 +308,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e + if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc); + +- return true; ++ return 0; + } + + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) +diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c +index 19fdfbb171ed6..498812f067cd5 100644 +--- a/arch/x86/mm/pat/set_memory.c ++++ b/arch/x86/mm/pat/set_memory.c +@@ -2196,7 +2196,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) + cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); + + /* Notify hypervisor that we are about to set/clr encryption attribute. */ +- if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc)) ++ ret = x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); ++ if (ret) + goto vmm_fail; + + ret = __change_page_attr_set_clr(&cpa, 1); +@@ -2214,16 +2215,17 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) + return ret; + + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ +- if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc)) ++ ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc); ++ if (ret) + goto vmm_fail; + + return 0; + + vmm_fail: +- WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s.\n", +- (void *)addr, numpages, enc ? "private" : "shared"); ++ WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s: %d\n", ++ (void *)addr, numpages, enc ? "private" : "shared", ret); + +- return -EIO; ++ return ret; + } + + static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) +-- +2.43.0 + diff --git a/queue-6.10/x86-tdx-account-shared-memory.patch b/queue-6.10/x86-tdx-account-shared-memory.patch new file mode 100644 index 00000000000..e1d0dd15eb2 --- /dev/null +++ b/queue-6.10/x86-tdx-account-shared-memory.patch @@ -0,0 +1,62 @@ +From 8066e22947aa074499e766c86d887e3809015968 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jun 2024 12:58:54 +0300 +Subject: x86/tdx: Account shared memory + +From: Kirill A. Shutemov + +[ Upstream commit c3abbf1376874f0d6eb22859a8655831644efa42 ] + +The kernel will convert all shared memory back to private during kexec. +The direct mapping page tables will provide information on which memory +is shared. + +It is extremely important to convert all shared memory. If a page is +missed, it will cause the second kernel to crash when it accesses it. + +Keep track of the number of shared pages. This will allow for +cross-checking against the shared information in the direct mapping and +reporting if the shared bit is lost. + +Memory conversion is slow and does not happen often. Global atomic is +not going to be a bottleneck. + +Signed-off-by: Kirill A. Shutemov +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Kai Huang +Tested-by: Tao Liu +Link: https://lore.kernel.org/r/20240614095904.1345461-10-kirill.shutemov@linux.intel.com +Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check") +Signed-off-by: Sasha Levin +--- + arch/x86/coco/tdx/tdx.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c +index fdcc081317764..729ef77b65865 100644 +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -38,6 +38,8 @@ + + #define TDREPORT_SUBTYPE_0 0 + ++static atomic_long_t nr_shared; ++ + /* Called from __tdx_hypercall() for unrecoverable failure */ + noinstr void __noreturn __tdx_hypercall_failed(void) + { +@@ -820,6 +822,11 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc)) + return -EIO; + ++ if (enc) ++ atomic_long_sub(numpages, &nr_shared); ++ else ++ atomic_long_add(numpages, &nr_shared); ++ + return 0; + } + +-- +2.43.0 + diff --git a/queue-6.10/x86-tdx-convert-shared-memory-back-to-private-on-kex.patch b/queue-6.10/x86-tdx-convert-shared-memory-back-to-private-on-kex.patch new file mode 100644 index 00000000000..d83bfce6054 --- /dev/null +++ b/queue-6.10/x86-tdx-convert-shared-memory-back-to-private-on-kex.patch @@ -0,0 +1,261 @@ +From 69bd596b0d0d2d6f12594826ab1ff5d9cb412f10 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jun 2024 12:58:56 +0300 +Subject: x86/tdx: Convert shared memory back to private on kexec + +From: Kirill A. Shutemov + +[ Upstream commit 859e63b789d6b17b3c64e51a0aabdc58752a0254 ] + +TDX guests allocate shared buffers to perform I/O. It is done by allocating +pages normally from the buddy allocator and converting them to shared with +set_memory_decrypted(). + +The second, kexec-ed kernel has no idea what memory is converted this way. It +only sees E820_TYPE_RAM. + +Accessing shared memory via private mapping is fatal. It leads to unrecoverable +TD exit. + +On kexec, walk direct mapping and convert all shared memory back to private. It +makes all RAM private again and second kernel may use it normally. + +The conversion occurs in two steps: stopping new conversions and unsharing all +memory. In the case of normal kexec, the stopping of conversions takes place +while scheduling is still functioning. This allows for waiting until any ongoing +conversions are finished. The second step is carried out when all CPUs except one +are inactive and interrupts are disabled. This prevents any conflicts with code +that may access shared memory. + +Signed-off-by: Kirill A. Shutemov +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Rick Edgecombe +Reviewed-by: Kai Huang +Tested-by: Tao Liu +Link: https://lore.kernel.org/r/20240614095904.1345461-12-kirill.shutemov@linux.intel.com +Stable-dep-of: d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check") +Signed-off-by: Sasha Levin +--- + arch/x86/coco/tdx/tdx.c | 94 +++++++++++++++++++++++++++++++ + arch/x86/include/asm/pgtable.h | 5 ++ + arch/x86/include/asm/set_memory.h | 3 + + arch/x86/mm/pat/set_memory.c | 42 +++++++++++++- + 4 files changed, 141 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c +index 729ef77b65865..da8b66dce0da5 100644 +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -14,6 +15,7 @@ + #include + #include + #include ++#include + + /* MMIO direction */ + #define EPT_READ 0 +@@ -830,6 +832,95 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + return 0; + } + ++/* Stop new private<->shared conversions */ ++static void tdx_kexec_begin(void) ++{ ++ if (!IS_ENABLED(CONFIG_KEXEC_CORE)) ++ return; ++ ++ /* ++ * Crash kernel reaches here with interrupts disabled: can't wait for ++ * conversions to finish. ++ * ++ * If race happened, just report and proceed. ++ */ ++ if (!set_memory_enc_stop_conversion()) ++ pr_warn("Failed to stop shared<->private conversions\n"); ++} ++ ++/* Walk direct mapping and convert all shared memory back to private */ ++static void tdx_kexec_finish(void) ++{ ++ unsigned long addr, end; ++ long found = 0, shared; ++ ++ if (!IS_ENABLED(CONFIG_KEXEC_CORE)) ++ return; ++ ++ lockdep_assert_irqs_disabled(); ++ ++ addr = PAGE_OFFSET; ++ end = PAGE_OFFSET + get_max_mapped(); ++ ++ while (addr < end) { ++ unsigned long size; ++ unsigned int level; ++ pte_t *pte; ++ ++ pte = lookup_address(addr, &level); ++ size = page_level_size(level); ++ ++ if (pte && pte_decrypted(*pte)) { ++ int pages = size / PAGE_SIZE; ++ ++ /* ++ * Touching memory with shared bit set triggers implicit ++ * conversion to shared. ++ * ++ * Make sure nobody touches the shared range from ++ * now on. ++ */ ++ set_pte(pte, __pte(0)); ++ ++ /* ++ * Memory encryption state persists across kexec. ++ * If tdx_enc_status_changed() fails in the first ++ * kernel, it leaves memory in an unknown state. ++ * ++ * If that memory remains shared, accessing it in the ++ * *next* kernel through a private mapping will result ++ * in an unrecoverable guest shutdown. ++ * ++ * The kdump kernel boot is not impacted as it uses ++ * a pre-reserved memory range that is always private. ++ * However, gathering crash information could lead to ++ * a crash if it accesses unconverted memory through ++ * a private mapping which is possible when accessing ++ * that memory through /proc/vmcore, for example. ++ * ++ * In all cases, print error info in order to leave ++ * enough bread crumbs for debugging. ++ */ ++ if (!tdx_enc_status_changed(addr, pages, true)) { ++ pr_err("Failed to unshare range %#lx-%#lx\n", ++ addr, addr + size); ++ } ++ ++ found += pages; ++ } ++ ++ addr += size; ++ } ++ ++ __flush_tlb_all(); ++ ++ shared = atomic_long_read(&nr_shared); ++ if (shared != found) { ++ pr_err("shared page accounting is off\n"); ++ pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found); ++ } ++} ++ + void __init tdx_early_init(void) + { + struct tdx_module_args args = { +@@ -889,6 +980,9 @@ void __init tdx_early_init(void) + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + ++ x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; ++ x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; ++ + /* + * TDX intercepts the RDMSR to read the X2APIC ID in the parallel + * bringup low level code. That raises #VE which cannot be handled +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 65b8e5bb902cc..e39311a89bf47 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte) + return pte_flags(pte) & _PAGE_ACCESSED; + } + ++static inline bool pte_decrypted(pte_t pte) ++{ ++ return cc_mkdec(pte_val(pte)) == pte_val(pte); ++} ++ + #define pmd_dirty pmd_dirty + static inline bool pmd_dirty(pmd_t pmd) + { +diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h +index 9aee31862b4a8..4b2abce2e3e7d 100644 +--- a/arch/x86/include/asm/set_memory.h ++++ b/arch/x86/include/asm/set_memory.h +@@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages); + int set_memory_np(unsigned long addr, int numpages); + int set_memory_p(unsigned long addr, int numpages); + int set_memory_4k(unsigned long addr, int numpages); ++ ++bool set_memory_enc_stop_conversion(void); + int set_memory_encrypted(unsigned long addr, int numpages); + int set_memory_decrypted(unsigned long addr, int numpages); ++ + int set_memory_np_noalias(unsigned long addr, int numpages); + int set_memory_nonglobal(unsigned long addr, int numpages); + int set_memory_global(unsigned long addr, int numpages); +diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c +index 498812f067cd5..1356e25e6d125 100644 +--- a/arch/x86/mm/pat/set_memory.c ++++ b/arch/x86/mm/pat/set_memory.c +@@ -2228,12 +2228,48 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) + return ret; + } + ++/* ++ * The lock serializes conversions between private and shared memory. ++ * ++ * It is taken for read on conversion. A write lock guarantees that no ++ * concurrent conversions are in progress. ++ */ ++static DECLARE_RWSEM(mem_enc_lock); ++ ++/* ++ * Stop new private<->shared conversions. ++ * ++ * Taking the exclusive mem_enc_lock waits for in-flight conversions to complete. ++ * The lock is not released to prevent new conversions from being started. ++ */ ++bool set_memory_enc_stop_conversion(void) ++{ ++ /* ++ * In a crash scenario, sleep is not allowed. Try to take the lock. ++ * Failure indicates that there is a race with the conversion. ++ */ ++ if (oops_in_progress) ++ return down_write_trylock(&mem_enc_lock); ++ ++ down_write(&mem_enc_lock); ++ ++ return true; ++} ++ + static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) + { +- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) +- return __set_memory_enc_pgtable(addr, numpages, enc); ++ int ret = 0; + +- return 0; ++ if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { ++ if (!down_read_trylock(&mem_enc_lock)) ++ return -EBUSY; ++ ++ ret = __set_memory_enc_pgtable(addr, numpages, enc); ++ ++ up_read(&mem_enc_lock); ++ } ++ ++ return ret; + } + + int set_memory_encrypted(unsigned long addr, int numpages) +-- +2.43.0 + diff --git a/queue-6.10/x86-tdx-fix-in-kernel-mmio-check.patch b/queue-6.10/x86-tdx-fix-in-kernel-mmio-check.patch new file mode 100644 index 00000000000..06aeb597fca --- /dev/null +++ b/queue-6.10/x86-tdx-fix-in-kernel-mmio-check.patch @@ -0,0 +1,60 @@ +From 602d64b8731b4e2df83b0f8416a0539a22775a59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Sep 2024 19:05:56 +0200 +Subject: x86/tdx: Fix "in-kernel MMIO" check + +From: Alexey Gladkov (Intel) + +[ Upstream commit d4fc4d01471528da8a9797a065982e05090e1d81 ] + +TDX only supports kernel-initiated MMIO operations. The handle_mmio() +function checks if the #VE exception occurred in the kernel and rejects +the operation if it did not. + +However, userspace can deceive the kernel into performing MMIO on its +behalf. For example, if userspace can point a syscall to an MMIO address, +syscall does get_user() or put_user() on it, triggering MMIO #VE. The +kernel will treat the #VE as in-kernel MMIO. + +Ensure that the target MMIO address is within the kernel before decoding +instruction. + +Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO") +Signed-off-by: Alexey Gladkov (Intel) +Signed-off-by: Dave Hansen +Reviewed-by: Kirill A. Shutemov +Acked-by: Dave Hansen +Cc:stable@vger.kernel.org +Link: https://lore.kernel.org/all/565a804b80387970460a4ebc67c88d1380f61ad1.1726237595.git.legion%40kernel.org +Signed-off-by: Sasha Levin +--- + arch/x86/coco/tdx/tdx.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c +index da8b66dce0da5..327c45c5013fe 100644 +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + /* MMIO direction */ + #define EPT_READ 0 +@@ -433,6 +434,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) + return -EINVAL; + } + ++ if (!fault_in_kernel_space(ve->gla)) { ++ WARN_ONCE(1, "Access to userspace address is not supported"); ++ return -EINVAL; ++ } ++ + /* + * Reject EPT violation #VEs that split pages. + * +-- +2.43.0 + diff --git a/queue-6.10/xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch b/queue-6.10/xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch new file mode 100644 index 00000000000..f9692dab424 --- /dev/null +++ b/queue-6.10/xhci-add-a-quirk-for-writing-erst-in-high-low-order.patch @@ -0,0 +1,65 @@ +From 525222814f03c36309bea0992d0e53f045f9031a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jun 2024 20:39:12 +0900 +Subject: xhci: Add a quirk for writing ERST in high-low order + +From: Daehwan Jung + +[ Upstream commit bc162403e33e1d57e40994977acaf19f1434e460 ] + +This quirk is for the controller that has a limitation in supporting +separate ERSTBA_HI and ERSTBA_LO programming. It's supported when +the ERSTBA is programmed ERSTBA_HI before ERSTBA_LO. That's because +the internal initialization of event ring fetches the +"Event Ring Segment Table Entry" based on the indication of ERSTBA_LO +written. + +Signed-off-by: Daehwan Jung +Link: https://lore.kernel.org/r/1718019553-111939-3-git-send-email-dh10.jung@samsung.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: e5fa8db0be3e ("usb: xhci: fix loss of data on Cadence xHC") +Signed-off-by: Sasha Levin +--- + drivers/usb/host/xhci-mem.c | 5 ++++- + drivers/usb/host/xhci.h | 2 ++ + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c +index f591ddd086627..fa3ee53df0ecc 100644 +--- a/drivers/usb/host/xhci-mem.c ++++ b/drivers/usb/host/xhci-mem.c +@@ -2325,7 +2325,10 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, + erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); + erst_base &= ERST_BASE_RSVDP; + erst_base |= ir->erst.erst_dma_addr & ~ERST_BASE_RSVDP; +- xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base); ++ if (xhci->quirks & XHCI_WRITE_64_HI_LO) ++ hi_lo_writeq(erst_base, &ir->ir_set->erst_base); ++ else ++ xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base); + + /* Set the event ring dequeue address of this interrupter */ + xhci_set_hc_event_deq(xhci, ir); +diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h +index 78d014c4d884a..5a8925474176d 100644 +--- a/drivers/usb/host/xhci.h ++++ b/drivers/usb/host/xhci.h +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + /* Code sharing between pci-quirks and xhci hcd */ + #include "xhci-ext-caps.h" +@@ -1628,6 +1629,7 @@ struct xhci_hcd { + #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) + #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) + #define XHCI_ZHAOXIN_HOST BIT_ULL(46) ++#define XHCI_WRITE_64_HI_LO BIT_ULL(47) + + unsigned int num_active_eps; + unsigned int limit_active_eps; +-- +2.43.0 +