From: Sasha Levin <sashal@kernel.org>
Date: Sun, 12 Nov 2023 02:50:12 +0000 (-0500)
Subject: Fixes for 6.1
X-Git-Tag: v4.14.330~61
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5e7d656cd16215c93f569fa6f5684d8dfb0e61ad;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 6.1

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-6.1/blk-core-use-pr_warn_ratelimited-in-bio_check_ro.patch b/queue-6.1/blk-core-use-pr_warn_ratelimited-in-bio_check_ro.patch
new file mode 100644
index 00000000000..070e9d22935
--- /dev/null
+++ b/queue-6.1/blk-core-use-pr_warn_ratelimited-in-bio_check_ro.patch
@@ -0,0 +1,43 @@
+From 0542337a6f654f25f6109d7fd0f7d620af585d19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Nov 2023 19:12:47 +0800
+Subject: blk-core: use pr_warn_ratelimited() in bio_check_ro()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 1b0a151c10a6d823f033023b9fdd9af72a89591b ]
+
+If one of the underlying disks of raid or dm is set to read-only, then
+each io will generate new log, which will cause message storm. This
+environment is indeed problematic, however we can't make sure our
+naive custormer won't do this, hence use pr_warn_ratelimited() to
+prevent message storm in this case.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Fixes: 57e95e4670d1 ("block: fix and cleanup bio_check_ro")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/r/20231107111247.2157820-1-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/block/blk-core.c b/block/blk-core.c
+index ebb7a1689b261..6eaf2b0ad7cca 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -490,8 +490,8 @@ static inline void bio_check_ro(struct bio *bio)
+ 	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
+ 		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
+ 			return;
+-		pr_warn("Trying to write to read-only block-device %pg\n",
+-			bio->bi_bdev);
++		pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",
++				    bio->bi_bdev);
+ 		/* Older lvm-tools actually trigger this */
+ 	}
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/bpf-check-map-usercnt-after-timer-timer-is-assigned.patch b/queue-6.1/bpf-check-map-usercnt-after-timer-timer-is-assigned.patch
new file mode 100644
index 00000000000..03c599e17c1
--- /dev/null
+++ b/queue-6.1/bpf-check-map-usercnt-after-timer-timer-is-assigned.patch
@@ -0,0 +1,113 @@
+From ef364ef17a004ff8e3a33e4168c585acfdfa4568 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Oct 2023 14:36:16 +0800
+Subject: bpf: Check map->usercnt after timer->timer is assigned
+
+From: Hou Tao <houtao1@huawei.com>
+
+[ Upstream commit fd381ce60a2d79cc967506208085336d3d268ae0 ]
+
+When there are concurrent uref release and bpf timer init operations,
+the following sequence diagram is possible. It will break the guarantee
+provided by bpf_timer: bpf_timer will still be alive after userspace
+application releases or unpins the map. It also will lead to kmemleak
+for old kernel version which doesn't release bpf_timer when map is
+released.
+
+bpf program X:
+
+bpf_timer_init()
+  lock timer->lock
+    read timer->timer as NULL
+    read map->usercnt != 0
+
+                process Y:
+
+                close(map_fd)
+                  // put last uref
+                  bpf_map_put_uref()
+                    atomic_dec_and_test(map->usercnt)
+                      array_map_free_timers()
+                        bpf_timer_cancel_and_free()
+                          // just return
+                          read timer->timer is NULL
+
+    t = bpf_map_kmalloc_node()
+    timer->timer = t
+  unlock timer->lock
+
+Fix the problem by checking map->usercnt after timer->timer is assigned,
+so when there are concurrent uref release and bpf timer init, either
+bpf_timer_cancel_and_free() from uref release reads a no-NULL timer
+or the newly-added atomic64_read() returns a zero usercnt.
+
+Because atomic_dec_and_test(map->usercnt) and READ_ONCE(timer->timer)
+in bpf_timer_cancel_and_free() are not protected by a lock, so add
+a memory barrier to guarantee the order between map->usercnt and
+timer->timer. Also use WRITE_ONCE(timer->timer, x) to match the lockless
+read of timer->timer in bpf_timer_cancel_and_free().
+
+Reported-by: Hsin-Wei Hung <hsinweih@uci.edu>
+Closes: https://lore.kernel.org/bpf/CABcoxUaT2k9hWsS1tNgXyoU3E-=PuOgMn737qK984fbFmfYixQ@mail.gmail.com
+Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.")
+Signed-off-by: Hou Tao <houtao1@huawei.com>
+Link: https://lore.kernel.org/r/20231030063616.1653024-1-houtao@huaweicloud.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/helpers.c | 25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
+index a6b04faed282b..6212e4ae084bb 100644
+--- a/kernel/bpf/helpers.c
++++ b/kernel/bpf/helpers.c
+@@ -1156,13 +1156,6 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
+ 		ret = -EBUSY;
+ 		goto out;
+ 	}
+-	if (!atomic64_read(&map->usercnt)) {
+-		/* maps with timers must be either held by user space
+-		 * or pinned in bpffs.
+-		 */
+-		ret = -EPERM;
+-		goto out;
+-	}
+ 	/* allocate hrtimer via map_kmalloc to use memcg accounting */
+ 	t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
+ 	if (!t) {
+@@ -1175,7 +1168,21 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
+ 	rcu_assign_pointer(t->callback_fn, NULL);
+ 	hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
+ 	t->timer.function = bpf_timer_cb;
+-	timer->timer = t;
++	WRITE_ONCE(timer->timer, t);
++	/* Guarantee the order between timer->timer and map->usercnt. So
++	 * when there are concurrent uref release and bpf timer init, either
++	 * bpf_timer_cancel_and_free() called by uref release reads a no-NULL
++	 * timer or atomic64_read() below returns a zero usercnt.
++	 */
++	smp_mb();
++	if (!atomic64_read(&map->usercnt)) {
++		/* maps with timers must be either held by user space
++		 * or pinned in bpffs.
++		 */
++		WRITE_ONCE(timer->timer, NULL);
++		kfree(t);
++		ret = -EPERM;
++	}
+ out:
+ 	__bpf_spin_unlock_irqrestore(&timer->lock);
+ 	return ret;
+@@ -1343,7 +1350,7 @@ void bpf_timer_cancel_and_free(void *val)
+ 	/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
+ 	 * this timer, since it won't be initialized.
+ 	 */
+-	timer->timer = NULL;
++	WRITE_ONCE(timer->timer, NULL);
+ out:
+ 	__bpf_spin_unlock_irqrestore(&timer->lock);
+ 	if (!t)
+-- 
+2.42.0
+
diff --git a/queue-6.1/dccp-call-security_inet_conn_request-after-setting-i.patch b/queue-6.1/dccp-call-security_inet_conn_request-after-setting-i.patch
new file mode 100644
index 00000000000..6b935eed89d
--- /dev/null
+++ b/queue-6.1/dccp-call-security_inet_conn_request-after-setting-i.patch
@@ -0,0 +1,59 @@
+From d55df215ba76d0171b2e4892577406bf603ac157 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Oct 2023 13:10:41 -0700
+Subject: dccp: Call security_inet_conn_request() after setting IPv4 addresses.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit fa2df45af13091f76b89adb84a28f13818d5d631 ]
+
+Initially, commit 4237c75c0a35 ("[MLSXFRM]: Auto-labeling of child
+sockets") introduced security_inet_conn_request() in some functions
+where reqsk is allocated.  The hook is added just after the allocation,
+so reqsk's IPv4 remote address was not initialised then.
+
+However, SELinux/Smack started to read it in netlbl_req_setattr()
+after the cited commits.
+
+This bug was partially fixed by commit 284904aa7946 ("lsm: Relocate
+the IPv4 security_inet_conn_request() hooks").
+
+This patch fixes the last bug in DCCPv4.
+
+Fixes: 389fb800ac8b ("netlabel: Label incoming TCP connections correctly in SELinux")
+Fixes: 07feee8f812f ("netlabel: Cleanup the Smack/NetLabel code to fix incoming TCP connections")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dccp/ipv4.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
+index 247179d4c8865..9fe6d96797169 100644
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -628,9 +628,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+ 	if (dccp_parse_options(sk, dreq, skb))
+ 		goto drop_and_free;
+ 
+-	if (security_inet_conn_request(sk, skb, req))
+-		goto drop_and_free;
+-
+ 	ireq = inet_rsk(req);
+ 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+ 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+@@ -638,6 +635,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+ 	ireq->ireq_family = AF_INET;
+ 	ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
+ 
++	if (security_inet_conn_request(sk, skb, req))
++		goto drop_and_free;
++
+ 	/*
+ 	 * Step 3: Process LISTEN state
+ 	 *
+-- 
+2.42.0
+
diff --git a/queue-6.1/dccp-tcp-call-security_inet_conn_request-after-setti.patch b/queue-6.1/dccp-tcp-call-security_inet_conn_request-after-setti.patch
new file mode 100644
index 00000000000..0bc57bec00e
--- /dev/null
+++ b/queue-6.1/dccp-tcp-call-security_inet_conn_request-after-setti.patch
@@ -0,0 +1,85 @@
+From 41c12a435b1d8d7850233fc50e0898436c63ae67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Oct 2023 13:10:42 -0700
+Subject: dccp/tcp: Call security_inet_conn_request() after setting IPv6
+ addresses.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 23be1e0e2a83a8543214d2599a31d9a2185a796b ]
+
+Initially, commit 4237c75c0a35 ("[MLSXFRM]: Auto-labeling of child
+sockets") introduced security_inet_conn_request() in some functions
+where reqsk is allocated.  The hook is added just after the allocation,
+so reqsk's IPv6 remote address was not initialised then.
+
+However, SELinux/Smack started to read it in netlbl_req_setattr()
+after commit e1adea927080 ("calipso: Allow request sockets to be
+relabelled by the lsm.").
+
+Commit 284904aa7946 ("lsm: Relocate the IPv4 security_inet_conn_request()
+hooks") fixed that kind of issue only in TCPv4 because IPv6 labeling was
+not supported at that time.  Finally, the same issue was introduced again
+in IPv6.
+
+Let's apply the same fix on DCCPv6 and TCPv6.
+
+Fixes: e1adea927080 ("calipso: Allow request sockets to be relabelled by the lsm.")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dccp/ipv6.c       | 6 +++---
+ net/ipv6/syncookies.c | 7 ++++---
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index 6fb34eaf1237a..e0b0bf75a46c2 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -359,15 +359,15 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+ 	if (dccp_parse_options(sk, dreq, skb))
+ 		goto drop_and_free;
+ 
+-	if (security_inet_conn_request(sk, skb, req))
+-		goto drop_and_free;
+-
+ 	ireq = inet_rsk(req);
+ 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+ 	ireq->ireq_family = AF_INET6;
+ 	ireq->ir_mark = inet_request_mark(sk, skb);
+ 
++	if (security_inet_conn_request(sk, skb, req))
++		goto drop_and_free;
++
+ 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
+ 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
+index 5014aa6634527..8698b49dfc8de 100644
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -180,14 +180,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+ 	treq = tcp_rsk(req);
+ 	treq->tfo_listener = false;
+ 
+-	if (security_inet_conn_request(sk, skb, req))
+-		goto out_free;
+-
+ 	req->mss = mss;
+ 	ireq->ir_rmt_port = th->source;
+ 	ireq->ir_num = ntohs(th->dest);
+ 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
++
++	if (security_inet_conn_request(sk, skb, req))
++		goto out_free;
++
+ 	if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+ 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+-- 
+2.42.0
+
diff --git a/queue-6.1/fix-termination-state-for-idr_for_each_entry_ul.patch b/queue-6.1/fix-termination-state-for-idr_for_each_entry_ul.patch
new file mode 100644
index 00000000000..b8398c8b19a
--- /dev/null
+++ b/queue-6.1/fix-termination-state-for-idr_for_each_entry_ul.patch
@@ -0,0 +1,64 @@
+From 3be08cb79ea133375cf3b3ab3bd02118f119bb6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Oct 2023 09:53:33 +1100
+Subject: Fix termination state for idr_for_each_entry_ul()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit e8ae8ad479e2d037daa33756e5e72850a7bd37a9 ]
+
+The comment for idr_for_each_entry_ul() states
+
+  after normal termination @entry is left with the value NULL
+
+This is not correct in the case where UINT_MAX has an entry in the idr.
+In that case @entry will be non-NULL after termination.
+No current code depends on the documentation being correct, but to
+save future code we should fix it.
+
+Also fix idr_for_each_entry_continue_ul().  While this is not documented
+as leaving @entry as NULL, the mellanox driver appears to depend on
+it doing so.  So make that explicit in the documentation as well as in
+the code.
+
+Fixes: e33d2b74d805 ("idr: fix overflow case for idr_for_each_entry_ul()")
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Chris Mi <chrism@mellanox.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/idr.h | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/idr.h b/include/linux/idr.h
+index a0dce14090a9e..da5f5fa4a3a6a 100644
+--- a/include/linux/idr.h
++++ b/include/linux/idr.h
+@@ -200,7 +200,7 @@ static inline void idr_preload_end(void)
+  */
+ #define idr_for_each_entry_ul(idr, entry, tmp, id)			\
+ 	for (tmp = 0, id = 0;						\
+-	     tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \
++	     ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
+ 	     tmp = id, ++id)
+ 
+ /**
+@@ -224,10 +224,12 @@ static inline void idr_preload_end(void)
+  * @id: Entry ID.
+  *
+  * Continue to iterate over entries, continuing after the current position.
++ * After normal termination @entry is left with the value NULL.  This
++ * is convenient for a "not found" value.
+  */
+ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id)		\
+ 	for (tmp = id;							\
+-	     tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \
++	     ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
+ 	     tmp = id, ++id)
+ 
+ /*
+-- 
+2.42.0
+
diff --git a/queue-6.1/hsr-prevent-use-after-free-in-prp_create_tagged_fram.patch b/queue-6.1/hsr-prevent-use-after-free-in-prp_create_tagged_fram.patch
new file mode 100644
index 00000000000..12aa62e5d8e
--- /dev/null
+++ b/queue-6.1/hsr-prevent-use-after-free-in-prp_create_tagged_fram.patch
@@ -0,0 +1,42 @@
+From 9e7063b06e42ddfb71d39500f0b6e465401d09d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Oct 2023 15:19:01 +0300
+Subject: hsr: Prevent use after free in prp_create_tagged_frame()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 876f8ab52363f649bcc74072157dfd7adfbabc0d ]
+
+The prp_fill_rct() function can fail.  In that situation, it frees the
+skb and returns NULL.  Meanwhile on the success path, it returns the
+original skb.  So it's straight forward to fix bug by using the returned
+value.
+
+Fixes: 451d8123f897 ("net: prp: add packet handling support")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://lore.kernel.org/r/57af1f28-7f57-4a96-bcd3-b7a0f2340845@moroto.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_forward.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
+index b71dab630a873..80cdc6f6b34c9 100644
+--- a/net/hsr/hsr_forward.c
++++ b/net/hsr/hsr_forward.c
+@@ -342,9 +342,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
+ 	skb = skb_copy_expand(frame->skb_std, 0,
+ 			      skb_tailroom(frame->skb_std) + HSR_HLEN,
+ 			      GFP_ATOMIC);
+-	prp_fill_rct(skb, frame, port);
+-
+-	return skb;
++	return prp_fill_rct(skb, frame, port);
+ }
+ 
+ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
+-- 
+2.42.0
+
diff --git a/queue-6.1/i2c-iproc-handle-invalid-slave-state.patch b/queue-6.1/i2c-iproc-handle-invalid-slave-state.patch
new file mode 100644
index 00000000000..d34e57aa197
--- /dev/null
+++ b/queue-6.1/i2c-iproc-handle-invalid-slave-state.patch
@@ -0,0 +1,200 @@
+From ea9042fe39247c12add88da66f6ccda2b3b6f98f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Aug 2023 14:23:51 -0700
+Subject: i2c: iproc: handle invalid slave state
+
+From: Roman Bacik <roman.bacik@broadcom.com>
+
+[ Upstream commit ba15a14399c262f91ce30c19fcbdc952262dd1be ]
+
+Add the code to handle an invalid state when both bits S_RX_EVENT
+(indicating a transaction) and S_START_BUSY (indicating the end
+of transaction - transition of START_BUSY from 1 to 0) are set in
+the interrupt status register during a slave read.
+
+Signed-off-by: Roman Bacik <roman.bacik@broadcom.com>
+Fixes: 1ca1b4516088 ("i2c: iproc: handle Master aborted error")
+Acked-by: Ray Jui <ray.jui@broadcom.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-bcm-iproc.c | 133 ++++++++++++++++-------------
+ 1 file changed, 75 insertions(+), 58 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
+index 30a2a3200bed9..86a080f24d8a2 100644
+--- a/drivers/i2c/busses/i2c-bcm-iproc.c
++++ b/drivers/i2c/busses/i2c-bcm-iproc.c
+@@ -316,26 +316,44 @@ static void bcm_iproc_i2c_slave_init(
+ 	iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
+ }
+ 
+-static void bcm_iproc_i2c_check_slave_status(
+-	struct bcm_iproc_i2c_dev *iproc_i2c)
++static bool bcm_iproc_i2c_check_slave_status
++	(struct bcm_iproc_i2c_dev *iproc_i2c, u32 status)
+ {
+ 	u32 val;
++	bool recover = false;
+ 
+-	val = iproc_i2c_rd_reg(iproc_i2c, S_CMD_OFFSET);
+-	/* status is valid only when START_BUSY is cleared after it was set */
+-	if (val & BIT(S_CMD_START_BUSY_SHIFT))
+-		return;
++	/* check slave transmit status only if slave is transmitting */
++	if (!iproc_i2c->slave_rx_only) {
++		val = iproc_i2c_rd_reg(iproc_i2c, S_CMD_OFFSET);
++		/* status is valid only when START_BUSY is cleared */
++		if (!(val & BIT(S_CMD_START_BUSY_SHIFT))) {
++			val = (val >> S_CMD_STATUS_SHIFT) & S_CMD_STATUS_MASK;
++			if (val == S_CMD_STATUS_TIMEOUT ||
++			    val == S_CMD_STATUS_MASTER_ABORT) {
++				dev_warn(iproc_i2c->device,
++					 (val == S_CMD_STATUS_TIMEOUT) ?
++					 "slave random stretch time timeout\n" :
++					 "Master aborted read transaction\n");
++				recover = true;
++			}
++		}
++	}
++
++	/* RX_EVENT is not valid when START_BUSY is set */
++	if ((status & BIT(IS_S_RX_EVENT_SHIFT)) &&
++	    (status & BIT(IS_S_START_BUSY_SHIFT))) {
++		dev_warn(iproc_i2c->device, "Slave aborted read transaction\n");
++		recover = true;
++	}
+ 
+-	val = (val >> S_CMD_STATUS_SHIFT) & S_CMD_STATUS_MASK;
+-	if (val == S_CMD_STATUS_TIMEOUT || val == S_CMD_STATUS_MASTER_ABORT) {
+-		dev_err(iproc_i2c->device, (val == S_CMD_STATUS_TIMEOUT) ?
+-			"slave random stretch time timeout\n" :
+-			"Master aborted read transaction\n");
++	if (recover) {
+ 		/* re-initialize i2c for recovery */
+ 		bcm_iproc_i2c_enable_disable(iproc_i2c, false);
+ 		bcm_iproc_i2c_slave_init(iproc_i2c, true);
+ 		bcm_iproc_i2c_enable_disable(iproc_i2c, true);
+ 	}
++
++	return recover;
+ }
+ 
+ static void bcm_iproc_i2c_slave_read(struct bcm_iproc_i2c_dev *iproc_i2c)
+@@ -420,48 +438,6 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c,
+ 	u32 val;
+ 	u8 value;
+ 
+-	/*
+-	 * Slave events in case of master-write, master-write-read and,
+-	 * master-read
+-	 *
+-	 * Master-write     : only IS_S_RX_EVENT_SHIFT event
+-	 * Master-write-read: both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
+-	 *                    events
+-	 * Master-read      : both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
+-	 *                    events or only IS_S_RD_EVENT_SHIFT
+-	 *
+-	 * iproc has a slave rx fifo size of 64 bytes. Rx fifo full interrupt
+-	 * (IS_S_RX_FIFO_FULL_SHIFT) will be generated when RX fifo becomes
+-	 * full. This can happen if Master issues write requests of more than
+-	 * 64 bytes.
+-	 */
+-	if (status & BIT(IS_S_RX_EVENT_SHIFT) ||
+-	    status & BIT(IS_S_RD_EVENT_SHIFT) ||
+-	    status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) {
+-		/* disable slave interrupts */
+-		val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET);
+-		val &= ~iproc_i2c->slave_int_mask;
+-		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
+-
+-		if (status & BIT(IS_S_RD_EVENT_SHIFT))
+-			/* Master-write-read request */
+-			iproc_i2c->slave_rx_only = false;
+-		else
+-			/* Master-write request only */
+-			iproc_i2c->slave_rx_only = true;
+-
+-		/* schedule tasklet to read data later */
+-		tasklet_schedule(&iproc_i2c->slave_rx_tasklet);
+-
+-		/*
+-		 * clear only IS_S_RX_EVENT_SHIFT and
+-		 * IS_S_RX_FIFO_FULL_SHIFT interrupt.
+-		 */
+-		val = BIT(IS_S_RX_EVENT_SHIFT);
+-		if (status & BIT(IS_S_RX_FIFO_FULL_SHIFT))
+-			val |= BIT(IS_S_RX_FIFO_FULL_SHIFT);
+-		iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, val);
+-	}
+ 
+ 	if (status & BIT(IS_S_TX_UNDERRUN_SHIFT)) {
+ 		iproc_i2c->tx_underrun++;
+@@ -493,8 +469,9 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c,
+ 		 * less than PKT_LENGTH bytes were output on the SMBUS
+ 		 */
+ 		iproc_i2c->slave_int_mask &= ~BIT(IE_S_TX_UNDERRUN_SHIFT);
+-		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET,
+-				 iproc_i2c->slave_int_mask);
++		val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET);
++		val &= ~BIT(IE_S_TX_UNDERRUN_SHIFT);
++		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
+ 
+ 		/* End of SMBUS for Master Read */
+ 		val = BIT(S_TX_WR_STATUS_SHIFT);
+@@ -515,9 +492,49 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c,
+ 				 BIT(IS_S_START_BUSY_SHIFT));
+ 	}
+ 
+-	/* check slave transmit status only if slave is transmitting */
+-	if (!iproc_i2c->slave_rx_only)
+-		bcm_iproc_i2c_check_slave_status(iproc_i2c);
++	/* if the controller has been reset, immediately return from the ISR */
++	if (bcm_iproc_i2c_check_slave_status(iproc_i2c, status))
++		return true;
++
++	/*
++	 * Slave events in case of master-write, master-write-read and,
++	 * master-read
++	 *
++	 * Master-write     : only IS_S_RX_EVENT_SHIFT event
++	 * Master-write-read: both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
++	 *                    events
++	 * Master-read      : both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
++	 *                    events or only IS_S_RD_EVENT_SHIFT
++	 *
++	 * iproc has a slave rx fifo size of 64 bytes. Rx fifo full interrupt
++	 * (IS_S_RX_FIFO_FULL_SHIFT) will be generated when RX fifo becomes
++	 * full. This can happen if Master issues write requests of more than
++	 * 64 bytes.
++	 */
++	if (status & BIT(IS_S_RX_EVENT_SHIFT) ||
++	    status & BIT(IS_S_RD_EVENT_SHIFT) ||
++	    status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) {
++		/* disable slave interrupts */
++		val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET);
++		val &= ~iproc_i2c->slave_int_mask;
++		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
++
++		if (status & BIT(IS_S_RD_EVENT_SHIFT))
++			/* Master-write-read request */
++			iproc_i2c->slave_rx_only = false;
++		else
++			/* Master-write request only */
++			iproc_i2c->slave_rx_only = true;
++
++		/* schedule tasklet to read data later */
++		tasklet_schedule(&iproc_i2c->slave_rx_tasklet);
++
++		/* clear IS_S_RX_FIFO_FULL_SHIFT interrupt */
++		if (status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) {
++			val = BIT(IS_S_RX_FIFO_FULL_SHIFT);
++			iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, val);
++		}
++	}
+ 
+ 	return true;
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/inet-shrink-struct-flowi_common.patch b/queue-6.1/inet-shrink-struct-flowi_common.patch
new file mode 100644
index 00000000000..ca831d23b4d
--- /dev/null
+++ b/queue-6.1/inet-shrink-struct-flowi_common.patch
@@ -0,0 +1,44 @@
+From 54f549733fa56fa6f5de1e4198c516777a13b2da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Oct 2023 14:10:37 +0000
+Subject: inet: shrink struct flowi_common
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1726483b79a72e0150734d5367e4a0238bf8fcff ]
+
+I am looking at syzbot reports triggering kernel stack overflows
+involving a cascade of ipvlan devices.
+
+We can save 8 bytes in struct flowi_common.
+
+This patch alone will not fix the issue, but is a start.
+
+Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: wenxu <wenxu@ucloud.cn>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20231025141037.3448203-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/flow.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/flow.h b/include/net/flow.h
+index 2f0da4f0318b5..079cc493fe67d 100644
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -39,8 +39,8 @@ struct flowi_common {
+ #define FLOWI_FLAG_KNOWN_NH		0x02
+ 	__u32	flowic_secid;
+ 	kuid_t  flowic_uid;
+-	struct flowi_tunnel flowic_tun_key;
+ 	__u32		flowic_multipath_hash;
++	struct flowi_tunnel flowic_tun_key;
+ };
+ 
+ union flowi_uli {
+-- 
+2.42.0
+
diff --git a/queue-6.1/input-synaptics-rmi4-fix-use-after-free-in-rmi_unreg.patch b/queue-6.1/input-synaptics-rmi4-fix-use-after-free-in-rmi_unreg.patch
new file mode 100644
index 00000000000..77c203d8be0
--- /dev/null
+++ b/queue-6.1/input-synaptics-rmi4-fix-use-after-free-in-rmi_unreg.patch
@@ -0,0 +1,43 @@
+From 52f66df7f9d6a80f301b583c80168ff716396f9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Oct 2023 02:53:36 +0000
+Subject: Input: synaptics-rmi4 - fix use after free in
+ rmi_unregister_function()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit eb988e46da2e4eae89f5337e047ce372fe33d5b1 ]
+
+The put_device() calls rmi_release_function() which frees "fn" so the
+dereference on the next line "fn->num_of_irqs" is a use after free.
+Move the put_device() to the end to fix this.
+
+Fixes: 24d28e4f1271 ("Input: synaptics-rmi4 - convert irq distribution to irq_domain")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/706efd36-7561-42f3-adfa-dd1d0bd4f5a1@moroto.mountain
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/rmi4/rmi_bus.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
+index 50a0134b6901b..e6557d5f50ce5 100644
+--- a/drivers/input/rmi4/rmi_bus.c
++++ b/drivers/input/rmi4/rmi_bus.c
+@@ -277,11 +277,11 @@ void rmi_unregister_function(struct rmi_function *fn)
+ 
+ 	device_del(&fn->dev);
+ 	of_node_put(fn->dev.of_node);
+-	put_device(&fn->dev);
+ 
+ 	for (i = 0; i < fn->num_of_irqs; i++)
+ 		irq_dispose_mapping(fn->irq[i]);
+ 
++	put_device(&fn->dev);
+ }
+ 
+ /**
+-- 
+2.42.0
+
diff --git a/queue-6.1/llc-verify-mac-len-before-reading-mac-header.patch b/queue-6.1/llc-verify-mac-len-before-reading-mac-header.patch
new file mode 100644
index 00000000000..aef9eb41f5b
--- /dev/null
+++ b/queue-6.1/llc-verify-mac-len-before-reading-mac-header.patch
@@ -0,0 +1,113 @@
+From e59a5f2fb7ab319630f42646ebae0d856244f914 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Oct 2023 19:42:38 -0400
+Subject: llc: verify mac len before reading mac header
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 7b3ba18703a63f6fd487183b9262b08e5632da1b ]
+
+LLC reads the mac header with eth_hdr without verifying that the skb
+has an Ethernet header.
+
+Syzbot was able to enter llc_rcv on a tun device. Tun can insert
+packets without mac len and with user configurable skb->protocol
+(passing a tun_pi header when not configuring IFF_NO_PI).
+
+    BUG: KMSAN: uninit-value in llc_station_ac_send_test_r net/llc/llc_station.c:81 [inline]
+    BUG: KMSAN: uninit-value in llc_station_rcv+0x6fb/0x1290 net/llc/llc_station.c:111
+    llc_station_ac_send_test_r net/llc/llc_station.c:81 [inline]
+    llc_station_rcv+0x6fb/0x1290 net/llc/llc_station.c:111
+    llc_rcv+0xc5d/0x14a0 net/llc/llc_input.c:218
+    __netif_receive_skb_one_core net/core/dev.c:5523 [inline]
+    __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5637
+    netif_receive_skb_internal net/core/dev.c:5723 [inline]
+    netif_receive_skb+0x58/0x660 net/core/dev.c:5782
+    tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555
+    tun_get_user+0x54c5/0x69c0 drivers/net/tun.c:2002
+
+Add a mac_len test before all three eth_hdr(skb) calls under net/llc.
+
+There are further uses in include/net/llc_pdu.h. All these are
+protected by a test skb->protocol == ETH_P_802_2. Which does not
+protect against this tun scenario.
+
+But the mac_len test added in this patch in llc_fixup_skb will
+indirectly protect those too. That is called from llc_rcv before any
+other LLC code.
+
+It is tempting to just add a blanket mac_len check in llc_rcv, but
+not sure whether that could break valid LLC paths that do not assume
+an Ethernet header. 802.2 LLC may be used on top of non-802.3
+protocols in principle. The below referenced commit shows that used
+to, on top of Token Ring.
+
+At least one of the three eth_hdr uses goes back to before the start
+of git history. But the one that syzbot exercises is introduced in
+this commit. That commit is old enough (2008), that effectively all
+stable kernels should receive this.
+
+Fixes: f83f1768f833 ("[LLC]: skb allocation size for responses")
+Reported-by: syzbot+a8c7be6dee0de1b669cc@syzkaller.appspotmail.com
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Link: https://lore.kernel.org/r/20231025234251.3796495-1-willemdebruijn.kernel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/llc/llc_input.c   | 10 ++++++++--
+ net/llc/llc_s_ac.c    |  3 +++
+ net/llc/llc_station.c |  3 +++
+ 3 files changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
+index 7cac441862e21..51bccfb00a9cd 100644
+--- a/net/llc/llc_input.c
++++ b/net/llc/llc_input.c
+@@ -127,8 +127,14 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
+ 	skb->transport_header += llc_len;
+ 	skb_pull(skb, llc_len);
+ 	if (skb->protocol == htons(ETH_P_802_2)) {
+-		__be16 pdulen = eth_hdr(skb)->h_proto;
+-		s32 data_size = ntohs(pdulen) - llc_len;
++		__be16 pdulen;
++		s32 data_size;
++
++		if (skb->mac_len < ETH_HLEN)
++			return 0;
++
++		pdulen = eth_hdr(skb)->h_proto;
++		data_size = ntohs(pdulen) - llc_len;
+ 
+ 		if (data_size < 0 ||
+ 		    !pskb_may_pull(skb, data_size))
+diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
+index 79d1cef8f15a9..06fb8e6944b06 100644
+--- a/net/llc/llc_s_ac.c
++++ b/net/llc/llc_s_ac.c
+@@ -153,6 +153,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
+ 	int rc = 1;
+ 	u32 data_size;
+ 
++	if (skb->mac_len < ETH_HLEN)
++		return 1;
++
+ 	llc_pdu_decode_sa(skb, mac_da);
+ 	llc_pdu_decode_da(skb, mac_sa);
+ 	llc_pdu_decode_ssap(skb, &dsap);
+diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
+index 05c6ae0920534..f506542925109 100644
+--- a/net/llc/llc_station.c
++++ b/net/llc/llc_station.c
+@@ -76,6 +76,9 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
+ 	u32 data_size;
+ 	struct sk_buff *nskb;
+ 
++	if (skb->mac_len < ETH_HLEN)
++		goto out;
++
+ 	/* The test request command is type U (llc_len = 3) */
+ 	data_size = ntohs(eth_hdr(skb)->h_proto) - 3;
+ 	nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size);
+-- 
+2.42.0
+
diff --git a/queue-6.1/nbd-fix-uaf-in-nbd_open.patch b/queue-6.1/nbd-fix-uaf-in-nbd_open.patch
new file mode 100644
index 00000000000..981ba86998f
--- /dev/null
+++ b/queue-6.1/nbd-fix-uaf-in-nbd_open.patch
@@ -0,0 +1,73 @@
+From 77347a8505e3a84797ef1b8474922e2feefe48bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Nov 2023 18:34:35 +0800
+Subject: nbd: fix uaf in nbd_open
+
+From: Li Lingfeng <lilingfeng3@huawei.com>
+
+[ Upstream commit 327462725b0f759f093788dfbcb2f1fd132f956b ]
+
+Commit 4af5f2e03013 ("nbd: use blk_mq_alloc_disk and
+blk_cleanup_disk") cleans up disk by blk_cleanup_disk() and it won't set
+disk->private_data as NULL as before. UAF may be triggered in nbd_open()
+if someone tries to open nbd device right after nbd_put() since nbd has
+been free in nbd_dev_remove().
+
+Fix this by implementing ->free_disk and free private data in it.
+
+Fixes: 4af5f2e03013 ("nbd: use blk_mq_alloc_disk and blk_cleanup_disk")
+Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Link: https://lore.kernel.org/r/20231107103435.2074904-1-lilingfeng@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/nbd.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index 7718c81e1dba8..e94d2ff6b1223 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -250,7 +250,6 @@ static void nbd_dev_remove(struct nbd_device *nbd)
+ 	struct gendisk *disk = nbd->disk;
+ 
+ 	del_gendisk(disk);
+-	put_disk(disk);
+ 	blk_mq_free_tag_set(&nbd->tag_set);
+ 
+ 	/*
+@@ -261,7 +260,7 @@ static void nbd_dev_remove(struct nbd_device *nbd)
+ 	idr_remove(&nbd_index_idr, nbd->index);
+ 	mutex_unlock(&nbd_index_mutex);
+ 	destroy_workqueue(nbd->recv_workq);
+-	kfree(nbd);
++	put_disk(disk);
+ }
+ 
+ static void nbd_dev_remove_work(struct work_struct *work)
+@@ -1608,6 +1607,13 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
+ 	nbd_put(nbd);
+ }
+ 
++static void nbd_free_disk(struct gendisk *disk)
++{
++	struct nbd_device *nbd = disk->private_data;
++
++	kfree(nbd);
++}
++
+ static const struct block_device_operations nbd_fops =
+ {
+ 	.owner =	THIS_MODULE,
+@@ -1615,6 +1621,7 @@ static const struct block_device_operations nbd_fops =
+ 	.release =	nbd_release,
+ 	.ioctl =	nbd_ioctl,
+ 	.compat_ioctl =	nbd_ioctl,
++	.free_disk =	nbd_free_disk,
+ };
+ 
+ #if IS_ENABLED(CONFIG_DEBUG_FS)
+-- 
+2.42.0
+
diff --git a/queue-6.1/net-page_pool-add-missing-free_percpu-when-page_pool.patch b/queue-6.1/net-page_pool-add-missing-free_percpu-when-page_pool.patch
new file mode 100644
index 00000000000..e9fd73636a5
--- /dev/null
+++ b/queue-6.1/net-page_pool-add-missing-free_percpu-when-page_pool.patch
@@ -0,0 +1,48 @@
+From f6f08cbd9ad20a06e17adec789b23e8478d73984 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Oct 2023 17:12:56 +0800
+Subject: net: page_pool: add missing free_percpu when page_pool_init fail
+
+From: Jian Shen <shenjian15@huawei.com>
+
+[ Upstream commit 8ffbd1669ed1d58939d6e878dffaa2f60bf961a4 ]
+
+When ptr_ring_init() returns failure in page_pool_init(), free_percpu()
+is not called to free pool->recycle_stats, which may cause memory
+leak.
+
+Fixes: ad6fa1e1ab1b ("page_pool: Add recycle stats")
+Signed-off-by: Jian Shen <shenjian15@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Yunsheng Lin <linyunsheng@huawei.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Link: https://lore.kernel.org/r/20231030091256.2915394-1-shaojijie@huawei.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/page_pool.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/page_pool.c b/net/core/page_pool.c
+index 2396c99bedeaa..caf6d950d54ad 100644
+--- a/net/core/page_pool.c
++++ b/net/core/page_pool.c
+@@ -209,8 +209,12 @@ static int page_pool_init(struct page_pool *pool,
+ 		return -ENOMEM;
+ #endif
+ 
+-	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
++	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
++#ifdef CONFIG_PAGE_POOL_STATS
++		free_percpu(pool->recycle_stats);
++#endif
+ 		return -ENOMEM;
++	}
+ 
+ 	atomic_set(&pool->pages_state_release_cnt, 0);
+ 
+-- 
+2.42.0
+
diff --git a/queue-6.1/net-r8169-disable-multicast-filter-for-rtl8168h-and-.patch b/queue-6.1/net-r8169-disable-multicast-filter-for-rtl8168h-and-.patch
new file mode 100644
index 00000000000..488cf72443d
--- /dev/null
+++ b/queue-6.1/net-r8169-disable-multicast-filter-for-rtl8168h-and-.patch
@@ -0,0 +1,43 @@
+From 86470605f1916f8312661e33a42580217c7c6cb2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Oct 2023 16:50:14 -0400
+Subject: net: r8169: Disable multicast filter for RTL8168H and RTL8107E
+
+From: Patrick Thompson <ptf@google.com>
+
+[ Upstream commit efa5f1311c4998e9e6317c52bc5ee93b3a0f36df ]
+
+RTL8168H and RTL8107E ethernet adapters erroneously filter unicast
+eapol packets unless allmulti is enabled. These devices correspond to
+RTL_GIGA_MAC_VER_46 and VER_48. Add an exception for VER_46 and VER_48
+in the same way that VER_35 has an exception.
+
+Fixes: 6e1d0b898818 ("r8169:add support for RTL8168H and RTL8107E")
+Signed-off-by: Patrick Thompson <ptf@google.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
+Link: https://lore.kernel.org/r/20231030205031.177855-1-ptf@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
+index 94f902d8e975f..c56d3538889b6 100644
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -2514,7 +2514,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
+ 		rx_mode |= AcceptAllPhys;
+ 	} else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
+ 		   dev->flags & IFF_ALLMULTI ||
+-		   tp->mac_version == RTL_GIGA_MAC_VER_35) {
++		   tp->mac_version == RTL_GIGA_MAC_VER_35 ||
++		   tp->mac_version == RTL_GIGA_MAC_VER_46 ||
++		   tp->mac_version == RTL_GIGA_MAC_VER_48) {
+ 		/* accept all multicasts */
+ 	} else if (netdev_mc_empty(dev)) {
+ 		rx_mode &= ~AcceptMulticast;
+-- 
+2.42.0
+
diff --git a/queue-6.1/net-smc-allow-cdc-msg-send-rather-than-drop-it-with-.patch b/queue-6.1/net-smc-allow-cdc-msg-send-rather-than-drop-it-with-.patch
new file mode 100644
index 00000000000..fcd3c595621
--- /dev/null
+++ b/queue-6.1/net-smc-allow-cdc-msg-send-rather-than-drop-it-with-.patch
@@ -0,0 +1,64 @@
+From 7853abecf41949469a9d1cbeff72cfbb11a80a67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 14:07:39 +0800
+Subject: net/smc: allow cdc msg send rather than drop it with NULL sndbuf_desc
+
+From: D. Wythe <alibuda@linux.alibaba.com>
+
+[ Upstream commit c5bf605ba4f9d6fbbb120595ab95002f4716edcb ]
+
+This patch re-fix the issues mentioned by commit 22a825c541d7
+("net/smc: fix NULL sndbuf_desc in smc_cdc_tx_handler()").
+
+Blocking sending message do solve the issues though, but it also
+prevents the peer to receive the final message. Besides, in logic,
+whether the sndbuf_desc is NULL or not have no impact on the processing
+of cdc message sending.
+
+Hence that, this patch allows the cdc message sending but to check the
+sndbuf_desc with care in smc_cdc_tx_handler().
+
+Fixes: 22a825c541d7 ("net/smc: fix NULL sndbuf_desc in smc_cdc_tx_handler()")
+Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
+Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/smc_cdc.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
+index 01bdb7909a14b..3c06625ceb200 100644
+--- a/net/smc/smc_cdc.c
++++ b/net/smc/smc_cdc.c
+@@ -28,13 +28,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
+ {
+ 	struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
+ 	struct smc_connection *conn = cdcpend->conn;
++	struct smc_buf_desc *sndbuf_desc;
+ 	struct smc_sock *smc;
+ 	int diff;
+ 
++	sndbuf_desc = conn->sndbuf_desc;
+ 	smc = container_of(conn, struct smc_sock, conn);
+ 	bh_lock_sock(&smc->sk);
+-	if (!wc_status) {
+-		diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
++	if (!wc_status && sndbuf_desc) {
++		diff = smc_curs_diff(sndbuf_desc->len,
+ 				     &cdcpend->conn->tx_curs_fin,
+ 				     &cdcpend->cursor);
+ 		/* sndbuf_space is decreased in smc_sendmsg */
+@@ -114,9 +116,6 @@ int smc_cdc_msg_send(struct smc_connection *conn,
+ 	union smc_host_cursor cfed;
+ 	int rc;
+ 
+-	if (unlikely(!READ_ONCE(conn->sndbuf_desc)))
+-		return -ENOBUFS;
+-
+ 	smc_cdc_add_pending_send(conn, pend);
+ 
+ 	conn->tx_cdc_seq++;
+-- 
+2.42.0
+
diff --git a/queue-6.1/net-smc-fix-dangling-sock-under-state-smc_appfinclos.patch b/queue-6.1/net-smc-fix-dangling-sock-under-state-smc_appfinclos.patch
new file mode 100644
index 00000000000..fdb706b5d88
--- /dev/null
+++ b/queue-6.1/net-smc-fix-dangling-sock-under-state-smc_appfinclos.patch
@@ -0,0 +1,111 @@
+From 5f8336187cd90bc38a9010e71ebfce68e99810aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 14:07:38 +0800
+Subject: net/smc: fix dangling sock under state SMC_APPFINCLOSEWAIT
+
+From: D. Wythe <alibuda@linux.alibaba.com>
+
+[ Upstream commit 5211c9729484c923f8d2e06bd29f9322cc42bb8f ]
+
+Considering scenario:
+
+				smc_cdc_rx_handler
+__smc_release
+				sock_set_flag
+smc_close_active()
+sock_set_flag
+
+__set_bit(DEAD)			__set_bit(DONE)
+
+Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
+if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
+in smc_close_passive_work:
+
+if (sock_flag(sk, SOCK_DEAD) &&
+	smc_close_sent_any_close(conn)) {
+	sk->sk_state = SMC_CLOSED;
+} else {
+	/* just shutdown, but not yet closed locally */
+	sk->sk_state = SMC_APPFINCLOSEWAIT;
+}
+
+Replace sock_set_flags or __set_bit to set_bit will fix this problem.
+Since set_bit is atomic.
+
+Fixes: b38d732477e4 ("smc: socket closing and linkgroup cleanup")
+Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
+Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/af_smc.c    | 4 ++--
+ net/smc/smc.h       | 5 +++++
+ net/smc/smc_cdc.c   | 2 +-
+ net/smc/smc_close.c | 2 +-
+ 4 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
+index 4ea41d6e36969..d676119984c09 100644
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -274,7 +274,7 @@ static int __smc_release(struct smc_sock *smc)
+ 
+ 	if (!smc->use_fallback) {
+ 		rc = smc_close_active(smc);
+-		sock_set_flag(sk, SOCK_DEAD);
++		smc_sock_set_flag(sk, SOCK_DEAD);
+ 		sk->sk_shutdown |= SHUTDOWN_MASK;
+ 	} else {
+ 		if (sk->sk_state != SMC_CLOSED) {
+@@ -1710,7 +1710,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
+ 		if (new_clcsock)
+ 			sock_release(new_clcsock);
+ 		new_sk->sk_state = SMC_CLOSED;
+-		sock_set_flag(new_sk, SOCK_DEAD);
++		smc_sock_set_flag(new_sk, SOCK_DEAD);
+ 		sock_put(new_sk); /* final */
+ 		*new_smc = NULL;
+ 		goto out;
+diff --git a/net/smc/smc.h b/net/smc/smc.h
+index 1d36720fc019c..bcb57e60b2155 100644
+--- a/net/smc/smc.h
++++ b/net/smc/smc.h
+@@ -377,4 +377,9 @@ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb);
+ int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
+ int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
+ 
++static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
++{
++	set_bit(flag, &sk->sk_flags);
++}
++
+ #endif	/* __SMC_H */
+diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
+index 89105e95b4523..01bdb7909a14b 100644
+--- a/net/smc/smc_cdc.c
++++ b/net/smc/smc_cdc.c
+@@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
+ 		smc->sk.sk_shutdown |= RCV_SHUTDOWN;
+ 		if (smc->clcsock && smc->clcsock->sk)
+ 			smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
+-		sock_set_flag(&smc->sk, SOCK_DONE);
++		smc_sock_set_flag(&smc->sk, SOCK_DONE);
+ 		sock_hold(&smc->sk); /* sock_put in close_work */
+ 		if (!queue_work(smc_close_wq, &conn->close_work))
+ 			sock_put(&smc->sk);
+diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
+index dbdf03e8aa5b5..449ef454b53be 100644
+--- a/net/smc/smc_close.c
++++ b/net/smc/smc_close.c
+@@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc)
+ 		break;
+ 	}
+ 
+-	sock_set_flag(sk, SOCK_DEAD);
++	smc_sock_set_flag(sk, SOCK_DEAD);
+ 	sk->sk_state_change(sk);
+ 
+ 	if (release_clcsock) {
+-- 
+2.42.0
+
diff --git a/queue-6.1/net-smc-put-sk-reference-if-close-work-was-canceled.patch b/queue-6.1/net-smc-put-sk-reference-if-close-work-was-canceled.patch
new file mode 100644
index 00000000000..cab4fd7b22c
--- /dev/null
+++ b/queue-6.1/net-smc-put-sk-reference-if-close-work-was-canceled.patch
@@ -0,0 +1,40 @@
+From 2daffade9ae01e94c5c2521448a0a5f202d39365 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 14:07:40 +0800
+Subject: net/smc: put sk reference if close work was canceled
+
+From: D. Wythe <alibuda@linux.alibaba.com>
+
+[ Upstream commit aa96fbd6d78d9770323b21e2c92bd38821be8852 ]
+
+Note that we always hold a reference to sock when attempting
+to submit close_work. Therefore, if we have successfully
+canceled close_work from pending, we MUST release that reference
+to avoid potential leaks.
+
+Fixes: 42bfba9eaa33 ("net/smc: immediate termination for SMCD link groups")
+Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
+Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/smc_close.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
+index 449ef454b53be..10219f55aad14 100644
+--- a/net/smc/smc_close.c
++++ b/net/smc/smc_close.c
+@@ -116,7 +116,8 @@ static void smc_close_cancel_work(struct smc_sock *smc)
+ 	struct sock *sk = &smc->sk;
+ 
+ 	release_sock(sk);
+-	cancel_work_sync(&smc->conn.close_work);
++	if (cancel_work_sync(&smc->conn.close_work))
++		sock_put(sk);
+ 	cancel_delayed_work_sync(&smc->conn.tx_work);
+ 	lock_sock(sk);
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/net-stmmac-xgmac-enable-support-for-multiple-flexibl.patch b/queue-6.1/net-stmmac-xgmac-enable-support-for-multiple-flexibl.patch
new file mode 100644
index 00000000000..b931db77663
--- /dev/null
+++ b/queue-6.1/net-stmmac-xgmac-enable-support-for-multiple-flexibl.patch
@@ -0,0 +1,68 @@
+From e4eaab234e08ae1b7f732d7d07c38d25ed439eca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Oct 2023 10:27:29 +0800
+Subject: net: stmmac: xgmac: Enable support for multiple Flexible PPS outputs
+
+From: Furong Xu <0x1207@gmail.com>
+
+[ Upstream commit db456d90a4c1b43b6251fa4348c8adc59b583274 ]
+
+From XGMAC Core 3.20 and later, each Flexible PPS has individual PPSEN bit
+to select Fixed mode or Flexible mode. The PPSEN must be set, or it stays
+in Fixed PPS mode by default.
+XGMAC Core prior 3.20, only PPSEN0(bit 4) is writable. PPSEN{1,2,3} are
+read-only reserved, and they are already in Flexible mode by default, our
+new code always set PPSEN{1,2,3} do not make things worse ;-)
+
+Fixes: 95eaf3cd0a90 ("net: stmmac: dwxgmac: Add Flexible PPS support")
+Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Furong Xu <0x1207@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h     |  2 +-
+ .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c    | 14 +++++++++++++-
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+index 1913385df6856..880a75bf2eb1f 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+@@ -222,7 +222,7 @@
+ 	((val) << XGMAC_PPS_MINIDX(x))
+ #define XGMAC_PPSCMD_START		0x2
+ #define XGMAC_PPSCMD_STOP		0x5
+-#define XGMAC_PPSEN0			BIT(4)
++#define XGMAC_PPSENx(x)			BIT(4 + (x) * 8)
+ #define XGMAC_PPSx_TARGET_TIME_SEC(x)	(0x00000d80 + (x) * 0x10)
+ #define XGMAC_PPSx_TARGET_TIME_NSEC(x)	(0x00000d84 + (x) * 0x10)
+ #define XGMAC_TRGTBUSY0			BIT(31)
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+index c6c4d7948fe5f..f30e08a106cbe 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+@@ -1135,7 +1135,19 @@ static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index,
+ 
+ 	val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_START);
+ 	val |= XGMAC_TRGTMODSELx(index, XGMAC_PPSCMD_START);
+-	val |= XGMAC_PPSEN0;
++
++	/* XGMAC Core has 4 PPS outputs at most.
++	 *
++	 * Prior XGMAC Core 3.20, Fixed mode or Flexible mode are selectable for
++	 * PPS0 only via PPSEN0. PPS{1,2,3} are in Flexible mode by default,
++	 * and can not be switched to Fixed mode, since PPSEN{1,2,3} are
++	 * read-only reserved to 0.
++	 * But we always set PPSEN{1,2,3} do not make things worse ;-)
++	 *
++	 * From XGMAC Core 3.20 and later, PPSEN{0,1,2,3} are writable and must
++	 * be set, or the PPS outputs stay in Fixed PPS mode by default.
++	 */
++	val |= XGMAC_PPSENx(index);
+ 
+ 	writel(cfg->start.tv_sec, ioaddr + XGMAC_PPSx_TARGET_TIME_SEC(index));
+ 
+-- 
+2.42.0
+
diff --git a/queue-6.1/netfilter-nat-fix-ipv6-nat-redirect-with-mapped-and-.patch b/queue-6.1/netfilter-nat-fix-ipv6-nat-redirect-with-mapped-and-.patch
new file mode 100644
index 00000000000..4fb459c07e9
--- /dev/null
+++ b/queue-6.1/netfilter-nat-fix-ipv6-nat-redirect-with-mapped-and-.patch
@@ -0,0 +1,97 @@
+From 7f92464ecb7569ffd1203fc661d6017b7e2e85b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Nov 2023 13:18:53 +0100
+Subject: netfilter: nat: fix ipv6 nat redirect with mapped and scoped
+ addresses
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 80abbe8a8263106fe45a4f293b92b5c74cc9cc8a ]
+
+The ipv6 redirect target was derived from the ipv4 one, i.e. its
+identical to a 'dnat' with the first (primary) address assigned to the
+network interface.  The code has been moved around to make it usable
+from nf_tables too, but its still the same as it was back when this
+was added in 2012.
+
+IPv6, however, has different types of addresses, if the 'wrong' address
+comes first the redirection does not work.
+
+In Daniels case, the addresses are:
+  inet6 ::ffff:192 ...
+  inet6 2a01: ...
+
+... so the function attempts to redirect to the mapped address.
+
+Add more checks before the address is deemed correct:
+1. If the packets' daddr is scoped, search for a scoped address too
+2. skip tentative addresses
+3. skip mapped addresses
+
+Use the first address that appears to match our needs.
+
+Reported-by: Daniel Huhardeaux <tech@tootai.net>
+Closes: https://lore.kernel.org/netfilter/71be06b8-6aa0-4cf9-9e0b-e2839b01b22f@tootai.net/
+Fixes: 115e23ac78f8 ("netfilter: ip6tables: add REDIRECT target")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_nat_redirect.c | 27 ++++++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
+index 6616ba5d0b049..5b37487d9d11f 100644
+--- a/net/netfilter/nf_nat_redirect.c
++++ b/net/netfilter/nf_nat_redirect.c
+@@ -80,6 +80,26 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
+ 
+ static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
+ 
++static bool nf_nat_redirect_ipv6_usable(const struct inet6_ifaddr *ifa, unsigned int scope)
++{
++	unsigned int ifa_addr_type = ipv6_addr_type(&ifa->addr);
++
++	if (ifa_addr_type & IPV6_ADDR_MAPPED)
++		return false;
++
++	if ((ifa->flags & IFA_F_TENTATIVE) && (!(ifa->flags & IFA_F_OPTIMISTIC)))
++		return false;
++
++	if (scope) {
++		unsigned int ifa_scope = ifa_addr_type & IPV6_ADDR_SCOPE_MASK;
++
++		if (!(scope & ifa_scope))
++			return false;
++	}
++
++	return true;
++}
++
+ unsigned int
+ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ 		     unsigned int hooknum)
+@@ -89,14 +109,19 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ 	if (hooknum == NF_INET_LOCAL_OUT) {
+ 		newdst.in6 = loopback_addr;
+ 	} else {
++		unsigned int scope = ipv6_addr_scope(&ipv6_hdr(skb)->daddr);
+ 		struct inet6_dev *idev;
+-		struct inet6_ifaddr *ifa;
+ 		bool addr = false;
+ 
+ 		idev = __in6_dev_get(skb->dev);
+ 		if (idev != NULL) {
++			const struct inet6_ifaddr *ifa;
++
+ 			read_lock_bh(&idev->lock);
+ 			list_for_each_entry(ifa, &idev->addr_list, if_list) {
++				if (!nf_nat_redirect_ipv6_usable(ifa, scope))
++					continue;
++
+ 				newdst.in6 = ifa->addr;
+ 				addr = true;
+ 				break;
+-- 
+2.42.0
+
diff --git a/queue-6.1/netfilter-nft_redir-use-struct-nf_nat_range2-through.patch b/queue-6.1/netfilter-nft_redir-use-struct-nf_nat_range2-through.patch
new file mode 100644
index 00000000000..8e9c4e2b733
--- /dev/null
+++ b/queue-6.1/netfilter-nft_redir-use-struct-nf_nat_range2-through.patch
@@ -0,0 +1,372 @@
+From 8d64f2d44d2141b8cbca5ef6876f6e12553d3dfb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Mar 2023 21:48:01 +0000
+Subject: netfilter: nft_redir: use `struct nf_nat_range2` throughout and
+ deduplicate eval call-backs
+
+From: Jeremy Sowden <jeremy@azazel.net>
+
+[ Upstream commit 6f56ad1b92328997e1b1792047099df6f8d7acb5 ]
+
+`nf_nat_redirect_ipv4` takes a `struct nf_nat_ipv4_multi_range_compat`,
+but converts it internally to a `struct nf_nat_range2`.  Change the
+function to take the latter, factor out the code now shared with
+`nf_nat_redirect_ipv6`, move the conversion to the xt_REDIRECT module,
+and update the ipv4 range initialization in the nft_redir module.
+
+Replace a bare hex constant for 127.0.0.1 with a macro.
+
+Remove `WARN_ON`.  `nf_nat_setup_info` calls `nf_ct_is_confirmed`:
+
+	/* Can't setup nat info for confirmed ct. */
+	if (nf_ct_is_confirmed(ct))
+		return NF_ACCEPT;
+
+This means that `ct` cannot be null or the kernel will crash, and
+implies that `ctinfo` is `IP_CT_NEW` or `IP_CT_RELATED`.
+
+nft_redir has separate ipv4 and ipv6 call-backs which share much of
+their code, and an inet one switch containing a switch that calls one of
+the others based on the family of the packet.  Merge the ipv4 and ipv6
+ones into the inet one in order to get rid of the duplicate code.
+
+Const-qualify the `priv` pointer since we don't need to write through
+it.
+
+Assign `priv->flags` to the range instead of OR-ing it in.
+
+Set the `NF_NAT_RANGE_PROTO_SPECIFIED` flag once during init, rather
+than on every eval.
+
+Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Stable-dep-of: 80abbe8a8263 ("netfilter: nat: fix ipv6 nat redirect with mapped and scoped addresses")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_nat_redirect.h |  3 +-
+ net/netfilter/nf_nat_redirect.c         | 71 ++++++++++-----------
+ net/netfilter/nft_redir.c               | 84 +++++++++----------------
+ net/netfilter/xt_REDIRECT.c             | 10 ++-
+ 4 files changed, 72 insertions(+), 96 deletions(-)
+
+diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h
+index 2418653a66db1..279380de904c8 100644
+--- a/include/net/netfilter/nf_nat_redirect.h
++++ b/include/net/netfilter/nf_nat_redirect.h
+@@ -6,8 +6,7 @@
+ #include <uapi/linux/netfilter/nf_nat.h>
+ 
+ unsigned int
+-nf_nat_redirect_ipv4(struct sk_buff *skb,
+-		     const struct nf_nat_ipv4_multi_range_compat *mr,
++nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ 		     unsigned int hooknum);
+ unsigned int
+ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
+index f91579c821e9a..6616ba5d0b049 100644
+--- a/net/netfilter/nf_nat_redirect.c
++++ b/net/netfilter/nf_nat_redirect.c
+@@ -10,6 +10,7 @@
+ 
+ #include <linux/if.h>
+ #include <linux/inetdevice.h>
++#include <linux/in.h>
+ #include <linux/ip.h>
+ #include <linux/kernel.h>
+ #include <linux/netdevice.h>
+@@ -24,54 +25,56 @@
+ #include <net/netfilter/nf_nat.h>
+ #include <net/netfilter/nf_nat_redirect.h>
+ 
++static unsigned int
++nf_nat_redirect(struct sk_buff *skb, const struct nf_nat_range2 *range,
++		const union nf_inet_addr *newdst)
++{
++	struct nf_nat_range2 newrange;
++	enum ip_conntrack_info ctinfo;
++	struct nf_conn *ct;
++
++	ct = nf_ct_get(skb, &ctinfo);
++
++	memset(&newrange, 0, sizeof(newrange));
++
++	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
++	newrange.min_addr	= *newdst;
++	newrange.max_addr	= *newdst;
++	newrange.min_proto	= range->min_proto;
++	newrange.max_proto	= range->max_proto;
++
++	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
++}
++
+ unsigned int
+-nf_nat_redirect_ipv4(struct sk_buff *skb,
+-		     const struct nf_nat_ipv4_multi_range_compat *mr,
++nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ 		     unsigned int hooknum)
+ {
+-	struct nf_conn *ct;
+-	enum ip_conntrack_info ctinfo;
+-	__be32 newdst;
+-	struct nf_nat_range2 newrange;
++	union nf_inet_addr newdst = {};
+ 
+ 	WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+ 		hooknum != NF_INET_LOCAL_OUT);
+ 
+-	ct = nf_ct_get(skb, &ctinfo);
+-	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
+-
+ 	/* Local packets: make them go to loopback */
+ 	if (hooknum == NF_INET_LOCAL_OUT) {
+-		newdst = htonl(0x7F000001);
++		newdst.ip = htonl(INADDR_LOOPBACK);
+ 	} else {
+ 		const struct in_device *indev;
+ 
+-		newdst = 0;
+-
+ 		indev = __in_dev_get_rcu(skb->dev);
+ 		if (indev) {
+ 			const struct in_ifaddr *ifa;
+ 
+ 			ifa = rcu_dereference(indev->ifa_list);
+ 			if (ifa)
+-				newdst = ifa->ifa_local;
++				newdst.ip = ifa->ifa_local;
+ 		}
+ 
+-		if (!newdst)
++		if (!newdst.ip)
+ 			return NF_DROP;
+ 	}
+ 
+-	/* Transfer from original range. */
+-	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+-	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+-	newrange.flags	     = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+-	newrange.min_addr.ip = newdst;
+-	newrange.max_addr.ip = newdst;
+-	newrange.min_proto   = mr->range[0].min;
+-	newrange.max_proto   = mr->range[0].max;
+-
+-	/* Hand modified range to generic setup. */
+-	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
++	return nf_nat_redirect(skb, range, &newdst);
+ }
+ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
+ 
+@@ -81,14 +84,10 @@ unsigned int
+ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ 		     unsigned int hooknum)
+ {
+-	struct nf_nat_range2 newrange;
+-	struct in6_addr newdst;
+-	enum ip_conntrack_info ctinfo;
+-	struct nf_conn *ct;
++	union nf_inet_addr newdst = {};
+ 
+-	ct = nf_ct_get(skb, &ctinfo);
+ 	if (hooknum == NF_INET_LOCAL_OUT) {
+-		newdst = loopback_addr;
++		newdst.in6 = loopback_addr;
+ 	} else {
+ 		struct inet6_dev *idev;
+ 		struct inet6_ifaddr *ifa;
+@@ -98,7 +97,7 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ 		if (idev != NULL) {
+ 			read_lock_bh(&idev->lock);
+ 			list_for_each_entry(ifa, &idev->addr_list, if_list) {
+-				newdst = ifa->addr;
++				newdst.in6 = ifa->addr;
+ 				addr = true;
+ 				break;
+ 			}
+@@ -109,12 +108,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ 			return NF_DROP;
+ 	}
+ 
+-	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
+-	newrange.min_addr.in6	= newdst;
+-	newrange.max_addr.in6	= newdst;
+-	newrange.min_proto	= range->min_proto;
+-	newrange.max_proto	= range->max_proto;
+-
+-	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
++	return nf_nat_redirect(skb, range, &newdst);
+ }
+ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6);
+diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
+index 5ed64b2bd15e8..08b408d3e113d 100644
+--- a/net/netfilter/nft_redir.c
++++ b/net/netfilter/nft_redir.c
+@@ -64,6 +64,8 @@ static int nft_redir_init(const struct nft_ctx *ctx,
+ 		} else {
+ 			priv->sreg_proto_max = priv->sreg_proto_min;
+ 		}
++
++		priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ 	}
+ 
+ 	if (tb[NFTA_REDIR_FLAGS]) {
+@@ -98,25 +100,37 @@ static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
+ 	return -1;
+ }
+ 
+-static void nft_redir_ipv4_eval(const struct nft_expr *expr,
+-				struct nft_regs *regs,
+-				const struct nft_pktinfo *pkt)
++static void nft_redir_eval(const struct nft_expr *expr,
++			   struct nft_regs *regs,
++			   const struct nft_pktinfo *pkt)
+ {
+-	struct nft_redir *priv = nft_expr_priv(expr);
+-	struct nf_nat_ipv4_multi_range_compat mr;
++	const struct nft_redir *priv = nft_expr_priv(expr);
++	struct nf_nat_range2 range;
+ 
+-	memset(&mr, 0, sizeof(mr));
++	memset(&range, 0, sizeof(range));
++	range.flags = priv->flags;
+ 	if (priv->sreg_proto_min) {
+-		mr.range[0].min.all = (__force __be16)nft_reg_load16(
+-			&regs->data[priv->sreg_proto_min]);
+-		mr.range[0].max.all = (__force __be16)nft_reg_load16(
+-			&regs->data[priv->sreg_proto_max]);
+-		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
++		range.min_proto.all = (__force __be16)
++			nft_reg_load16(&regs->data[priv->sreg_proto_min]);
++		range.max_proto.all = (__force __be16)
++			nft_reg_load16(&regs->data[priv->sreg_proto_max]);
+ 	}
+ 
+-	mr.range[0].flags |= priv->flags;
+-
+-	regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
++	switch (nft_pf(pkt)) {
++	case NFPROTO_IPV4:
++		regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range,
++							  nft_hook(pkt));
++		break;
++#ifdef CONFIG_NF_TABLES_IPV6
++	case NFPROTO_IPV6:
++		regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range,
++							  nft_hook(pkt));
++		break;
++#endif
++	default:
++		WARN_ON_ONCE(1);
++		break;
++	}
+ }
+ 
+ static void
+@@ -129,7 +143,7 @@ static struct nft_expr_type nft_redir_ipv4_type;
+ static const struct nft_expr_ops nft_redir_ipv4_ops = {
+ 	.type		= &nft_redir_ipv4_type,
+ 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+-	.eval		= nft_redir_ipv4_eval,
++	.eval		= nft_redir_eval,
+ 	.init		= nft_redir_init,
+ 	.destroy	= nft_redir_ipv4_destroy,
+ 	.dump		= nft_redir_dump,
+@@ -147,28 +161,6 @@ static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
+ };
+ 
+ #ifdef CONFIG_NF_TABLES_IPV6
+-static void nft_redir_ipv6_eval(const struct nft_expr *expr,
+-				struct nft_regs *regs,
+-				const struct nft_pktinfo *pkt)
+-{
+-	struct nft_redir *priv = nft_expr_priv(expr);
+-	struct nf_nat_range2 range;
+-
+-	memset(&range, 0, sizeof(range));
+-	if (priv->sreg_proto_min) {
+-		range.min_proto.all = (__force __be16)nft_reg_load16(
+-			&regs->data[priv->sreg_proto_min]);
+-		range.max_proto.all = (__force __be16)nft_reg_load16(
+-			&regs->data[priv->sreg_proto_max]);
+-		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+-	}
+-
+-	range.flags |= priv->flags;
+-
+-	regs->verdict.code =
+-		nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
+-}
+-
+ static void
+ nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+ {
+@@ -179,7 +171,7 @@ static struct nft_expr_type nft_redir_ipv6_type;
+ static const struct nft_expr_ops nft_redir_ipv6_ops = {
+ 	.type		= &nft_redir_ipv6_type,
+ 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+-	.eval		= nft_redir_ipv6_eval,
++	.eval		= nft_redir_eval,
+ 	.init		= nft_redir_init,
+ 	.destroy	= nft_redir_ipv6_destroy,
+ 	.dump		= nft_redir_dump,
+@@ -198,20 +190,6 @@ static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
+ #endif
+ 
+ #ifdef CONFIG_NF_TABLES_INET
+-static void nft_redir_inet_eval(const struct nft_expr *expr,
+-				struct nft_regs *regs,
+-				const struct nft_pktinfo *pkt)
+-{
+-	switch (nft_pf(pkt)) {
+-	case NFPROTO_IPV4:
+-		return nft_redir_ipv4_eval(expr, regs, pkt);
+-	case NFPROTO_IPV6:
+-		return nft_redir_ipv6_eval(expr, regs, pkt);
+-	}
+-
+-	WARN_ON_ONCE(1);
+-}
+-
+ static void
+ nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+ {
+@@ -222,7 +200,7 @@ static struct nft_expr_type nft_redir_inet_type;
+ static const struct nft_expr_ops nft_redir_inet_ops = {
+ 	.type		= &nft_redir_inet_type,
+ 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+-	.eval		= nft_redir_inet_eval,
++	.eval		= nft_redir_eval,
+ 	.init		= nft_redir_init,
+ 	.destroy	= nft_redir_inet_destroy,
+ 	.dump		= nft_redir_dump,
+diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
+index 353ca7801251a..ff66b56a3f97d 100644
+--- a/net/netfilter/xt_REDIRECT.c
++++ b/net/netfilter/xt_REDIRECT.c
+@@ -46,7 +46,6 @@ static void redirect_tg_destroy(const struct xt_tgdtor_param *par)
+ 	nf_ct_netns_put(par->net, par->family);
+ }
+ 
+-/* FIXME: Take multiple ranges --RR */
+ static int redirect_tg4_check(const struct xt_tgchk_param *par)
+ {
+ 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+@@ -65,7 +64,14 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par)
+ static unsigned int
+ redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+ {
+-	return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par));
++	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
++	struct nf_nat_range2 range = {
++		.flags       = mr->range[0].flags,
++		.min_proto   = mr->range[0].min,
++		.max_proto   = mr->range[0].max,
++	};
++
++	return nf_nat_redirect_ipv4(skb, &range, xt_hooknum(par));
+ }
+ 
+ static struct xt_target redirect_tg_reg[] __read_mostly = {
+-- 
+2.42.0
+
diff --git a/queue-6.1/netfilter-xt_recent-fix-increase-ipv6-literal-buffer.patch b/queue-6.1/netfilter-xt_recent-fix-increase-ipv6-literal-buffer.patch
new file mode 100644
index 00000000000..69dfab985ac
--- /dev/null
+++ b/queue-6.1/netfilter-xt_recent-fix-increase-ipv6-literal-buffer.patch
@@ -0,0 +1,49 @@
+From 128703facde1966f223048219a98e742fabbc063 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Nov 2023 11:56:00 -0800
+Subject: netfilter: xt_recent: fix (increase) ipv6 literal buffer length
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Å»enczykowski <zenczykowski@gmail.com>
+
+[ Upstream commit 7b308feb4fd2d1c06919445c65c8fbf8e9fd1781 ]
+
+in6_pton() supports 'low-32-bit dot-decimal representation'
+(this is useful with DNS64/NAT64 networks for example):
+
+  # echo +aaaa:bbbb:cccc:dddd:eeee:ffff:1.2.3.4 > /proc/self/net/xt_recent/DEFAULT
+  # cat /proc/self/net/xt_recent/DEFAULT
+  src=aaaa:bbbb:cccc:dddd:eeee:ffff:0102:0304 ttl: 0 last_seen: 9733848829 oldest_pkt: 1 9733848829
+
+but the provided buffer is too short:
+
+  # echo +aaaa:bbbb:cccc:dddd:eeee:ffff:255.255.255.255 > /proc/self/net/xt_recent/DEFAULT
+  -bash: echo: write error: Invalid argument
+
+Fixes: 079aa88fe717 ("netfilter: xt_recent: IPv6 support")
+Signed-off-by: Maciej Å»enczykowski <zenczykowski@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/xt_recent.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
+index 7ddb9a78e3fc8..ef93e0d3bee04 100644
+--- a/net/netfilter/xt_recent.c
++++ b/net/netfilter/xt_recent.c
+@@ -561,7 +561,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
+ {
+ 	struct recent_table *t = pde_data(file_inode(file));
+ 	struct recent_entry *e;
+-	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
++	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")];
+ 	const char *c = buf;
+ 	union nf_inet_addr addr = {};
+ 	u_int16_t family;
+-- 
+2.42.0
+
diff --git a/queue-6.1/nvme-fix-error-handling-for-io_uring-nvme-passthroug.patch b/queue-6.1/nvme-fix-error-handling-for-io_uring-nvme-passthroug.patch
new file mode 100644
index 00000000000..0e531487ade
--- /dev/null
+++ b/queue-6.1/nvme-fix-error-handling-for-io_uring-nvme-passthroug.patch
@@ -0,0 +1,46 @@
+From 939f52cda3d6f05a0dcbfbbdfa7f6378eb95d8e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 Oct 2023 00:54:30 +0530
+Subject: nvme: fix error-handling for io_uring nvme-passthrough
+
+From: Anuj Gupta <anuj20.g@samsung.com>
+
+[ Upstream commit 1147dd0503564fa0e03489a039f9e0c748a03db4 ]
+
+Driver may return an error before submitting the command to the device.
+Ensure that such error is propagated up.
+
+Fixes: 456cba386e94 ("nvme: wire-up uring-cmd support for io-passthru on char-device.")
+Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
+Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
+Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/ioctl.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
+index b33004a4bcb5a..91e6d03475798 100644
+--- a/drivers/nvme/host/ioctl.c
++++ b/drivers/nvme/host/ioctl.c
+@@ -435,10 +435,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
+ 	void *cookie = READ_ONCE(ioucmd->cookie);
+ 
+ 	req->bio = pdu->bio;
+-	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
++	if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
+ 		pdu->nvme_status = -EINTR;
+-	else
++	} else {
+ 		pdu->nvme_status = nvme_req(req)->status;
++		if (!pdu->nvme_status)
++			pdu->nvme_status = blk_status_to_errno(err);
++	}
+ 	pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64);
+ 
+ 	/*
+-- 
+2.42.0
+
diff --git a/queue-6.1/octeontx2-pf-fix-error-codes.patch b/queue-6.1/octeontx2-pf-fix-error-codes.patch
new file mode 100644
index 00000000000..569d6c9787c
--- /dev/null
+++ b/queue-6.1/octeontx2-pf-fix-error-codes.patch
@@ -0,0 +1,69 @@
+From 2e089a867a9f6ed8df0c8b4385a9e45f09c3cc30 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Oct 2023 07:49:52 +0530
+Subject: octeontx2-pf: Fix error codes
+
+From: Ratheesh Kannoth <rkannoth@marvell.com>
+
+[ Upstream commit 96b9a68d1a6e4f889d453874c9e359aa720b520f ]
+
+Some of error codes were wrong. Fix the same.
+
+Fixes: 51afe9026d0c ("octeontx2-pf: NIX TX overwrites SQ_CTX_HW_S[SQ_INT]")
+Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
+Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Link: https://lore.kernel.org/r/20231027021953.1819959-1-rkannoth@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../marvell/octeontx2/nic/otx2_struct.h       | 34 +++++++++----------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
+index fa37b9f312cae..4e5899d8fa2e6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
+@@ -318,23 +318,23 @@ enum nix_snd_status_e {
+ 	NIX_SND_STATUS_EXT_ERR = 0x6,
+ 	NIX_SND_STATUS_JUMP_FAULT = 0x7,
+ 	NIX_SND_STATUS_JUMP_POISON = 0x8,
+-	NIX_SND_STATUS_CRC_ERR = 0x9,
+-	NIX_SND_STATUS_IMM_ERR = 0x10,
+-	NIX_SND_STATUS_SG_ERR = 0x11,
+-	NIX_SND_STATUS_MEM_ERR = 0x12,
+-	NIX_SND_STATUS_INVALID_SUBDC = 0x13,
+-	NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x14,
+-	NIX_SND_STATUS_DATA_FAULT = 0x15,
+-	NIX_SND_STATUS_DATA_POISON = 0x16,
+-	NIX_SND_STATUS_NPC_DROP_ACTION = 0x17,
+-	NIX_SND_STATUS_LOCK_VIOL = 0x18,
+-	NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x19,
+-	NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x20,
+-	NIX_SND_STATUS_NPC_MCAST_ABORT = 0x21,
+-	NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x22,
+-	NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x23,
+-	NIX_SND_STATUS_SEND_MEM_FAULT = 0x24,
+-	NIX_SND_STATUS_SEND_STATS_ERR = 0x25,
++	NIX_SND_STATUS_CRC_ERR = 0x10,
++	NIX_SND_STATUS_IMM_ERR = 0x11,
++	NIX_SND_STATUS_SG_ERR = 0x12,
++	NIX_SND_STATUS_MEM_ERR = 0x13,
++	NIX_SND_STATUS_INVALID_SUBDC = 0x14,
++	NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x15,
++	NIX_SND_STATUS_DATA_FAULT = 0x16,
++	NIX_SND_STATUS_DATA_POISON = 0x17,
++	NIX_SND_STATUS_NPC_DROP_ACTION = 0x20,
++	NIX_SND_STATUS_LOCK_VIOL = 0x21,
++	NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x22,
++	NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x23,
++	NIX_SND_STATUS_NPC_MCAST_ABORT = 0x24,
++	NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x25,
++	NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x26,
++	NIX_SND_STATUS_SEND_MEM_FAULT = 0x27,
++	NIX_SND_STATUS_SEND_STATS_ERR = 0x28,
+ 	NIX_SND_STATUS_MAX,
+ };
+ 
+-- 
+2.42.0
+
diff --git a/queue-6.1/octeontx2-pf-fix-holes-in-error-code.patch b/queue-6.1/octeontx2-pf-fix-holes-in-error-code.patch
new file mode 100644
index 00000000000..df4f3e7f1ea
--- /dev/null
+++ b/queue-6.1/octeontx2-pf-fix-holes-in-error-code.patch
@@ -0,0 +1,156 @@
+From 5a8654a938e41485de1b43de81286f0f4a47f6ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Oct 2023 07:49:53 +0530
+Subject: octeontx2-pf: Fix holes in error code
+
+From: Ratheesh Kannoth <rkannoth@marvell.com>
+
+[ Upstream commit 7aeeb2cb7a2570bb69a87ad14018b03e06ce5be5 ]
+
+Error code strings are not getting printed properly
+due to holes. Print error code as well.
+
+Fixes: 51afe9026d0c ("octeontx2-pf: NIX TX overwrites SQ_CTX_HW_S[SQ_INT]")
+Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
+Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Link: https://lore.kernel.org/r/20231027021953.1819959-2-rkannoth@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_pf.c  | 80 +++++++++++--------
+ 1 file changed, 46 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 17e546d0d7e55..101d79a0bb436 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1194,31 +1194,32 @@ static char *nix_mnqerr_e_str[NIX_MNQERR_MAX] = {
+ };
+ 
+ static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] =  {
+-	"NIX_SND_STATUS_GOOD",
+-	"NIX_SND_STATUS_SQ_CTX_FAULT",
+-	"NIX_SND_STATUS_SQ_CTX_POISON",
+-	"NIX_SND_STATUS_SQB_FAULT",
+-	"NIX_SND_STATUS_SQB_POISON",
+-	"NIX_SND_STATUS_HDR_ERR",
+-	"NIX_SND_STATUS_EXT_ERR",
+-	"NIX_SND_STATUS_JUMP_FAULT",
+-	"NIX_SND_STATUS_JUMP_POISON",
+-	"NIX_SND_STATUS_CRC_ERR",
+-	"NIX_SND_STATUS_IMM_ERR",
+-	"NIX_SND_STATUS_SG_ERR",
+-	"NIX_SND_STATUS_MEM_ERR",
+-	"NIX_SND_STATUS_INVALID_SUBDC",
+-	"NIX_SND_STATUS_SUBDC_ORDER_ERR",
+-	"NIX_SND_STATUS_DATA_FAULT",
+-	"NIX_SND_STATUS_DATA_POISON",
+-	"NIX_SND_STATUS_NPC_DROP_ACTION",
+-	"NIX_SND_STATUS_LOCK_VIOL",
+-	"NIX_SND_STATUS_NPC_UCAST_CHAN_ERR",
+-	"NIX_SND_STATUS_NPC_MCAST_CHAN_ERR",
+-	"NIX_SND_STATUS_NPC_MCAST_ABORT",
+-	"NIX_SND_STATUS_NPC_VTAG_PTR_ERR",
+-	"NIX_SND_STATUS_NPC_VTAG_SIZE_ERR",
+-	"NIX_SND_STATUS_SEND_STATS_ERR",
++	[NIX_SND_STATUS_GOOD] = "NIX_SND_STATUS_GOOD",
++	[NIX_SND_STATUS_SQ_CTX_FAULT] = "NIX_SND_STATUS_SQ_CTX_FAULT",
++	[NIX_SND_STATUS_SQ_CTX_POISON] = "NIX_SND_STATUS_SQ_CTX_POISON",
++	[NIX_SND_STATUS_SQB_FAULT] = "NIX_SND_STATUS_SQB_FAULT",
++	[NIX_SND_STATUS_SQB_POISON] = "NIX_SND_STATUS_SQB_POISON",
++	[NIX_SND_STATUS_HDR_ERR] = "NIX_SND_STATUS_HDR_ERR",
++	[NIX_SND_STATUS_EXT_ERR] = "NIX_SND_STATUS_EXT_ERR",
++	[NIX_SND_STATUS_JUMP_FAULT] = "NIX_SND_STATUS_JUMP_FAULT",
++	[NIX_SND_STATUS_JUMP_POISON] = "NIX_SND_STATUS_JUMP_POISON",
++	[NIX_SND_STATUS_CRC_ERR] = "NIX_SND_STATUS_CRC_ERR",
++	[NIX_SND_STATUS_IMM_ERR] = "NIX_SND_STATUS_IMM_ERR",
++	[NIX_SND_STATUS_SG_ERR] = "NIX_SND_STATUS_SG_ERR",
++	[NIX_SND_STATUS_MEM_ERR] = "NIX_SND_STATUS_MEM_ERR",
++	[NIX_SND_STATUS_INVALID_SUBDC] = "NIX_SND_STATUS_INVALID_SUBDC",
++	[NIX_SND_STATUS_SUBDC_ORDER_ERR] = "NIX_SND_STATUS_SUBDC_ORDER_ERR",
++	[NIX_SND_STATUS_DATA_FAULT] = "NIX_SND_STATUS_DATA_FAULT",
++	[NIX_SND_STATUS_DATA_POISON] = "NIX_SND_STATUS_DATA_POISON",
++	[NIX_SND_STATUS_NPC_DROP_ACTION] = "NIX_SND_STATUS_NPC_DROP_ACTION",
++	[NIX_SND_STATUS_LOCK_VIOL] = "NIX_SND_STATUS_LOCK_VIOL",
++	[NIX_SND_STATUS_NPC_UCAST_CHAN_ERR] = "NIX_SND_STAT_NPC_UCAST_CHAN_ERR",
++	[NIX_SND_STATUS_NPC_MCAST_CHAN_ERR] = "NIX_SND_STAT_NPC_MCAST_CHAN_ERR",
++	[NIX_SND_STATUS_NPC_MCAST_ABORT] = "NIX_SND_STATUS_NPC_MCAST_ABORT",
++	[NIX_SND_STATUS_NPC_VTAG_PTR_ERR] = "NIX_SND_STATUS_NPC_VTAG_PTR_ERR",
++	[NIX_SND_STATUS_NPC_VTAG_SIZE_ERR] = "NIX_SND_STATUS_NPC_VTAG_SIZE_ERR",
++	[NIX_SND_STATUS_SEND_MEM_FAULT] = "NIX_SND_STATUS_SEND_MEM_FAULT",
++	[NIX_SND_STATUS_SEND_STATS_ERR] = "NIX_SND_STATUS_SEND_STATS_ERR",
+ };
+ 
+ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+@@ -1238,14 +1239,16 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ 			continue;
+ 
+ 		if (val & BIT_ULL(42)) {
+-			netdev_err(pf->netdev, "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
++			netdev_err(pf->netdev,
++				   "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+ 				   qidx, otx2_read64(pf, NIX_LF_ERR_INT));
+ 		} else {
+ 			if (val & BIT_ULL(NIX_CQERRINT_DOOR_ERR))
+ 				netdev_err(pf->netdev, "CQ%lld: Doorbell error",
+ 					   qidx);
+ 			if (val & BIT_ULL(NIX_CQERRINT_CQE_FAULT))
+-				netdev_err(pf->netdev, "CQ%lld: Memory fault on CQE write to LLC/DRAM",
++				netdev_err(pf->netdev,
++					   "CQ%lld: Memory fault on CQE write to LLC/DRAM",
+ 					   qidx);
+ 		}
+ 
+@@ -1268,7 +1271,8 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ 			     (val & NIX_SQINT_BITS));
+ 
+ 		if (val & BIT_ULL(42)) {
+-			netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
++			netdev_err(pf->netdev,
++				   "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+ 				   qidx, otx2_read64(pf, NIX_LF_ERR_INT));
+ 			goto done;
+ 		}
+@@ -1278,8 +1282,11 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ 			goto chk_mnq_err_dbg;
+ 
+ 		sq_op_err_code = FIELD_GET(GENMASK(7, 0), sq_op_err_dbg);
+-		netdev_err(pf->netdev, "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(%llx)  err=%s\n",
+-			   qidx, sq_op_err_dbg, nix_sqoperr_e_str[sq_op_err_code]);
++		netdev_err(pf->netdev,
++			   "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(0x%llx)  err=%s(%#x)\n",
++			   qidx, sq_op_err_dbg,
++			   nix_sqoperr_e_str[sq_op_err_code],
++			   sq_op_err_code);
+ 
+ 		otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, BIT_ULL(44));
+ 
+@@ -1296,16 +1303,21 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ 			goto chk_snd_err_dbg;
+ 
+ 		mnq_err_code = FIELD_GET(GENMASK(7, 0), mnq_err_dbg);
+-		netdev_err(pf->netdev, "SQ%lld: NIX_LF_MNQ_ERR_DBG(%llx)  err=%s\n",
+-			   qidx, mnq_err_dbg,  nix_mnqerr_e_str[mnq_err_code]);
++		netdev_err(pf->netdev,
++			   "SQ%lld: NIX_LF_MNQ_ERR_DBG(0x%llx)  err=%s(%#x)\n",
++			   qidx, mnq_err_dbg,  nix_mnqerr_e_str[mnq_err_code],
++			   mnq_err_code);
+ 		otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, BIT_ULL(44));
+ 
+ chk_snd_err_dbg:
+ 		snd_err_dbg = otx2_read64(pf, NIX_LF_SEND_ERR_DBG);
+ 		if (snd_err_dbg & BIT(44)) {
+ 			snd_err_code = FIELD_GET(GENMASK(7, 0), snd_err_dbg);
+-			netdev_err(pf->netdev, "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s\n",
+-				   qidx, snd_err_dbg, nix_snd_status_e_str[snd_err_code]);
++			netdev_err(pf->netdev,
++				   "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s(%#x)\n",
++				   qidx, snd_err_dbg,
++				   nix_snd_status_e_str[snd_err_code],
++				   snd_err_code);
+ 			otx2_write64(pf, NIX_LF_SEND_ERR_DBG, BIT_ULL(44));
+ 		}
+ 
+-- 
+2.42.0
+
diff --git a/queue-6.1/octeontx2-pf-free-pending-and-dropped-sqes.patch b/queue-6.1/octeontx2-pf-free-pending-and-dropped-sqes.patch
new file mode 100644
index 00000000000..5c9a481fc26
--- /dev/null
+++ b/queue-6.1/octeontx2-pf-free-pending-and-dropped-sqes.patch
@@ -0,0 +1,162 @@
+From e11cc39744ac02b64560bce825fdfe5810ae0645 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Oct 2023 16:53:45 +0530
+Subject: octeontx2-pf: Free pending and dropped SQEs
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 3423ca23e08bf285a324237abe88e7e7d9becfe6 ]
+
+On interface down, the pending SQEs in the NIX get dropped
+or drained out during SMQ flush. But skb's pointed by these
+SQEs never get free or updated to the stack as respective CQE
+never get added.
+This patch fixes the issue by freeing all valid skb's in SQ SG list.
+
+Fixes: b1bc8457e9d0 ("octeontx2-pf: Cleanup all receive buffers in SG descriptor")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../marvell/octeontx2/nic/otx2_common.c       | 15 +++----
+ .../marvell/octeontx2/nic/otx2_common.h       |  1 +
+ .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |  1 +
+ .../marvell/octeontx2/nic/otx2_txrx.c         | 42 +++++++++++++++++++
+ 4 files changed, 49 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+index c76dad78c26eb..0f896f606c3e6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+@@ -797,7 +797,6 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
+ 	int qidx, sqe_tail, sqe_head;
+ 	struct otx2_snd_queue *sq;
+ 	u64 incr, *ptr, val;
+-	int timeout = 1000;
+ 
+ 	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
+ 	for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
+@@ -806,15 +805,11 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
+ 			continue;
+ 
+ 		incr = (u64)qidx << 32;
+-		while (timeout) {
+-			val = otx2_atomic64_add(incr, ptr);
+-			sqe_head = (val >> 20) & 0x3F;
+-			sqe_tail = (val >> 28) & 0x3F;
+-			if (sqe_head == sqe_tail)
+-				break;
+-			usleep_range(1, 3);
+-			timeout--;
+-		}
++		val = otx2_atomic64_add(incr, ptr);
++		sqe_head = (val >> 20) & 0x3F;
++		sqe_tail = (val >> 28) & 0x3F;
++		if (sqe_head != sqe_tail)
++			usleep_range(50, 60);
+ 	}
+ }
+ 
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+index 876a7b51b8e51..efd66224b3dbf 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+@@ -933,6 +933,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en);
+ int otx2_txsch_alloc(struct otx2_nic *pfvf);
+ void otx2_txschq_stop(struct otx2_nic *pfvf);
+ void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq);
++void otx2_free_pending_sqe(struct otx2_nic *pfvf);
+ void otx2_sqb_flush(struct otx2_nic *pfvf);
+ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+ 		    dma_addr_t *dma);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index c558c9b64f5be..c724131172f3f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1596,6 +1596,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
+ 		else
+ 			otx2_cleanup_tx_cqes(pf, cq);
+ 	}
++	otx2_free_pending_sqe(pf);
+ 
+ 	otx2_free_sq_res(pf);
+ 
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+index d005434e1e037..20d801d30c732 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+@@ -1224,9 +1224,11 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+ 
+ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+ {
++	int tx_pkts = 0, tx_bytes = 0;
+ 	struct sk_buff *skb = NULL;
+ 	struct otx2_snd_queue *sq;
+ 	struct nix_cqe_tx_s *cqe;
++	struct netdev_queue *txq;
+ 	int processed_cqe = 0;
+ 	struct sg_list *sg;
+ 	int qidx;
+@@ -1247,12 +1249,20 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+ 		sg = &sq->sg[cqe->comp.sqe_id];
+ 		skb = (struct sk_buff *)sg->skb;
+ 		if (skb) {
++			tx_bytes += skb->len;
++			tx_pkts++;
+ 			otx2_dma_unmap_skb_frags(pfvf, sg);
+ 			dev_kfree_skb_any(skb);
+ 			sg->skb = (u64)NULL;
+ 		}
+ 	}
+ 
++	if (likely(tx_pkts)) {
++		if (qidx >= pfvf->hw.tx_queues)
++			qidx -= pfvf->hw.xdp_queues;
++		txq = netdev_get_tx_queue(pfvf->netdev, qidx);
++		netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
++	}
+ 	/* Free CQEs to HW */
+ 	otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR,
+ 		     ((u64)cq->cq_idx << 32) | processed_cqe);
+@@ -1279,6 +1289,38 @@ int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable)
+ 	return err;
+ }
+ 
++void otx2_free_pending_sqe(struct otx2_nic *pfvf)
++{
++	int tx_pkts = 0, tx_bytes = 0;
++	struct sk_buff *skb = NULL;
++	struct otx2_snd_queue *sq;
++	struct netdev_queue *txq;
++	struct sg_list *sg;
++	int sq_idx, sqe;
++
++	for (sq_idx = 0; sq_idx < pfvf->hw.tx_queues; sq_idx++) {
++		sq = &pfvf->qset.sq[sq_idx];
++		for (sqe = 0; sqe < sq->sqe_cnt; sqe++) {
++			sg = &sq->sg[sqe];
++			skb = (struct sk_buff *)sg->skb;
++			if (skb) {
++				tx_bytes += skb->len;
++				tx_pkts++;
++				otx2_dma_unmap_skb_frags(pfvf, sg);
++				dev_kfree_skb_any(skb);
++				sg->skb = (u64)NULL;
++			}
++		}
++
++		if (!tx_pkts)
++			continue;
++		txq = netdev_get_tx_queue(pfvf->netdev, sq_idx);
++		netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
++		tx_pkts = 0;
++		tx_bytes = 0;
++	}
++}
++
+ static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, u64 dma_addr,
+ 				int len, int *offset)
+ {
+-- 
+2.42.0
+
diff --git a/queue-6.1/octeontx2-pf-qos-send-queues-management.patch b/queue-6.1/octeontx2-pf-qos-send-queues-management.patch
new file mode 100644
index 00000000000..da2f5e68082
--- /dev/null
+++ b/queue-6.1/octeontx2-pf-qos-send-queues-management.patch
@@ -0,0 +1,874 @@
+From f6a2d4a39e969d2c49f8cceb5825a7a2d740ea15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 13 May 2023 14:21:38 +0530
+Subject: octeontx2-pf: qos send queues management
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit ab6dddd2a669a0ecc2ce07485c7a15fadbb5a0aa ]
+
+Current implementation is such that the number of Send queues (SQs)
+are decided on the device probe which is equal to the number of online
+cpus. These SQs are allocated and deallocated in interface open and c
+lose calls respectively.
+
+This patch defines new APIs for initializing and deinitializing Send
+queues dynamically and allocates more number of transmit queues for
+QOS feature.
+
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 3423ca23e08b ("octeontx2-pf: Free pending and dropped SQEs")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../marvell/octeontx2/af/rvu_debugfs.c        |   5 +
+ .../ethernet/marvell/octeontx2/nic/Makefile   |   2 +-
+ .../marvell/octeontx2/nic/otx2_common.c       |  43 ++-
+ .../marvell/octeontx2/nic/otx2_common.h       |  39 ++-
+ .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |  44 ++-
+ .../marvell/octeontx2/nic/otx2_txrx.c         |  24 +-
+ .../marvell/octeontx2/nic/otx2_txrx.h         |   3 +-
+ .../ethernet/marvell/octeontx2/nic/otx2_vf.c  |   7 +-
+ .../net/ethernet/marvell/octeontx2/nic/qos.h  |  19 ++
+ .../ethernet/marvell/octeontx2/nic/qos_sq.c   | 282 ++++++++++++++++++
+ 10 files changed, 426 insertions(+), 42 deletions(-)
+ create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/qos.h
+ create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+index aadc352c2ffbd..5c9dc3f9262f5 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+@@ -1222,6 +1222,11 @@ static int rvu_dbg_npa_ctx_display(struct seq_file *m, void *unused, int ctype)
+ 
+ 	for (aura = id; aura < max_id; aura++) {
+ 		aq_req.aura_id = aura;
++
++		/* Skip if queue is uninitialized */
++		if (ctype == NPA_AQ_CTYPE_POOL && !test_bit(aura, pfvf->pool_bmap))
++			continue;
++
+ 		seq_printf(m, "======%s : %d=======\n",
+ 			   (ctype == NPA_AQ_CTYPE_AURA) ? "AURA" : "POOL",
+ 			aq_req.aura_id);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+index 73fdb87986148..3d31ddf7c652e 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+@@ -8,7 +8,7 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o
+ 
+ rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
+                otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
+-               otx2_devlink.o
++               otx2_devlink.o qos_sq.o
+ rvu_nicvf-y := otx2_vf.o otx2_devlink.o
+ 
+ rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+index 2575c207150e1..c76dad78c26eb 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+@@ -513,8 +513,8 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
+ 		     (pfvf->hw.cq_ecount_wait - 1));
+ }
+ 
+-int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+-		      dma_addr_t *dma)
++static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
++			     dma_addr_t *dma)
+ {
+ 	u8 *buf;
+ 
+@@ -532,8 +532,8 @@ int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+ 	return 0;
+ }
+ 
+-static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+-			   dma_addr_t *dma)
++int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
++		    dma_addr_t *dma)
+ {
+ 	int ret;
+ 
+@@ -795,11 +795,16 @@ void otx2_txschq_stop(struct otx2_nic *pfvf)
+ void otx2_sqb_flush(struct otx2_nic *pfvf)
+ {
+ 	int qidx, sqe_tail, sqe_head;
++	struct otx2_snd_queue *sq;
+ 	u64 incr, *ptr, val;
+ 	int timeout = 1000;
+ 
+ 	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
+-	for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) {
++	for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
++		sq = &pfvf->qset.sq[qidx];
++		if (!sq->sqb_ptrs)
++			continue;
++
+ 		incr = (u64)qidx << 32;
+ 		while (timeout) {
+ 			val = otx2_atomic64_add(incr, ptr);
+@@ -899,7 +904,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
+ 	return otx2_sync_mbox_msg(&pfvf->mbox);
+ }
+ 
+-static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
++int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
+ {
+ 	struct otx2_qset *qset = &pfvf->qset;
+ 	struct otx2_snd_queue *sq;
+@@ -972,9 +977,17 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
+ 		cq->cint_idx = qidx - pfvf->hw.rx_queues;
+ 		cq->cqe_cnt = qset->sqe_cnt;
+ 	} else {
+-		cq->cq_type = CQ_XDP;
+-		cq->cint_idx = qidx - non_xdp_queues;
+-		cq->cqe_cnt = qset->sqe_cnt;
++		if (pfvf->hw.xdp_queues &&
++		    qidx < non_xdp_queues + pfvf->hw.xdp_queues) {
++			cq->cq_type = CQ_XDP;
++			cq->cint_idx = qidx - non_xdp_queues;
++			cq->cqe_cnt = qset->sqe_cnt;
++		} else {
++			cq->cq_type = CQ_QOS;
++			cq->cint_idx = qidx - non_xdp_queues -
++				       pfvf->hw.xdp_queues;
++			cq->cqe_cnt = qset->sqe_cnt;
++		}
+ 	}
+ 	cq->cqe_size = pfvf->qset.xqe_size;
+ 
+@@ -1132,7 +1145,7 @@ int otx2_config_nix(struct otx2_nic *pfvf)
+ 
+ 	/* Set RQ/SQ/CQ counts */
+ 	nixlf->rq_cnt = pfvf->hw.rx_queues;
+-	nixlf->sq_cnt = pfvf->hw.non_qos_queues;
++	nixlf->sq_cnt = otx2_get_total_tx_queues(pfvf);
+ 	nixlf->cq_cnt = pfvf->qset.cq_cnt;
+ 	nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE;
+ 	nixlf->rss_grps = MAX_RSS_GROUPS;
+@@ -1170,7 +1183,7 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf)
+ 	int sqb, qidx;
+ 	u64 iova, pa;
+ 
+-	for (qidx = 0; qidx < hw->non_qos_queues; qidx++) {
++	for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
+ 		sq = &qset->sq[qidx];
+ 		if (!sq->sqb_ptrs)
+ 			continue;
+@@ -1238,8 +1251,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf)
+ 	pfvf->qset.pool = NULL;
+ }
+ 
+-static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+-			  int pool_id, int numptrs)
++int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
++		   int pool_id, int numptrs)
+ {
+ 	struct npa_aq_enq_req *aq;
+ 	struct otx2_pool *pool;
+@@ -1315,8 +1328,8 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+ 	return 0;
+ }
+ 
+-static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+-			  int stack_pages, int numptrs, int buf_size)
++int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
++		   int stack_pages, int numptrs, int buf_size)
+ {
+ 	struct npa_aq_enq_req *aq;
+ 	struct otx2_pool *pool;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+index 6c81d09798914..876a7b51b8e51 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+@@ -27,6 +27,7 @@
+ #include "otx2_txrx.h"
+ #include "otx2_devlink.h"
+ #include <rvu_trace.h>
++#include "qos.h"
+ 
+ /* PCI device IDs */
+ #define PCI_DEVID_OCTEONTX2_RVU_PF              0xA063
+@@ -186,6 +187,7 @@ struct otx2_hw {
+ 	u16                     rx_queues;
+ 	u16                     tx_queues;
+ 	u16                     xdp_queues;
++	u16			tc_tx_queues;
+ 	u16                     non_qos_queues; /* tx queues plus xdp queues */
+ 	u16			max_queues;
+ 	u16			pool_cnt;
+@@ -498,6 +500,8 @@ struct otx2_nic {
+ 	u16			pfc_schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+ 	bool			pfc_alloc_status[NIX_PF_PFC_PRIO_MAX];
+ #endif
++	/* qos */
++	struct otx2_qos		qos;
+ 
+ 	/* napi event count. It is needed for adaptive irq coalescing. */
+ 	u32 napi_events;
+@@ -742,8 +746,7 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
+ /* Alloc pointer from pool/aura */
+ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
+ {
+-	u64 *ptr = (u64 *)otx2_get_regaddr(pfvf,
+-			   NPA_LF_AURA_OP_ALLOCX(0));
++	u64 *ptr = (__force u64 *)otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0));
+ 	u64 incr = (u64)aura | BIT_ULL(63);
+ 
+ 	return otx2_atomic64_add(incr, ptr);
+@@ -885,12 +888,23 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf,
+ 
+ static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx)
+ {
++	u16 smq;
+ #ifdef CONFIG_DCB
+ 	if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx])
+ 		return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx];
+ #endif
++	/* check if qidx falls under QOS queues */
++	if (qidx >= pfvf->hw.non_qos_queues)
++		smq = pfvf->qos.qid_to_sqmap[qidx - pfvf->hw.non_qos_queues];
++	else
++		smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+ 
+-	return pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
++	return smq;
++}
++
++static inline u16 otx2_get_total_tx_queues(struct otx2_nic *pfvf)
++{
++	return pfvf->hw.non_qos_queues + pfvf->hw.tc_tx_queues;
+ }
+ 
+ /* MSI-X APIs */
+@@ -920,17 +934,22 @@ int otx2_txsch_alloc(struct otx2_nic *pfvf);
+ void otx2_txschq_stop(struct otx2_nic *pfvf);
+ void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq);
+ void otx2_sqb_flush(struct otx2_nic *pfvf);
+-int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+-		      dma_addr_t *dma);
++int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
++		    dma_addr_t *dma);
+ int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
+ void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
+ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
+ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
+ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
++int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura);
+ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
+ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
+ int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
+ 		      dma_addr_t *dma);
++int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
++		   int stack_pages, int numptrs, int buf_size);
++int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
++		   int pool_id, int numptrs);
+ 
+ /* RSS configuration APIs*/
+ int otx2_rss_init(struct otx2_nic *pfvf);
+@@ -1038,4 +1057,14 @@ static inline void cn10k_handle_mcs_event(struct otx2_nic *pfvf,
+ {}
+ #endif /* CONFIG_MACSEC */
+ 
++/* qos support */
++static inline void otx2_qos_init(struct otx2_nic *pfvf, int qos_txqs)
++{
++	struct otx2_hw *hw = &pfvf->hw;
++
++	hw->tc_tx_queues = qos_txqs;
++}
++
++u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
++		      struct net_device *sb_dev);
+ #endif /* OTX2_COMMON_H */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 545984a86f235..c558c9b64f5be 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -23,6 +23,7 @@
+ #include "otx2_struct.h"
+ #include "otx2_ptp.h"
+ #include "cn10k.h"
++#include "qos.h"
+ #include <rvu_trace.h>
+ 
+ #define DRV_NAME	"rvu_nicpf"
+@@ -1225,6 +1226,7 @@ static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] =  {
+ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ {
+ 	struct otx2_nic *pf = data;
++	struct otx2_snd_queue *sq;
+ 	u64 val, *ptr;
+ 	u64 qidx = 0;
+ 
+@@ -1256,10 +1258,14 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ 	}
+ 
+ 	/* SQ */
+-	for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) {
++	for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) {
+ 		u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg;
+ 		u8 sq_op_err_code, mnq_err_code, snd_err_code;
+ 
++		sq = &pf->qset.sq[qidx];
++		if (!sq->sqb_ptrs)
++			continue;
++
+ 		/* Below debug registers captures first errors corresponding to
+ 		 * those registers. We don't have to check against SQ qid as
+ 		 * these are fatal errors.
+@@ -1391,7 +1397,7 @@ static void otx2_free_sq_res(struct otx2_nic *pf)
+ 	otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false);
+ 	/* Free SQB pointers */
+ 	otx2_sq_free_sqbs(pf);
+-	for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) {
++	for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) {
+ 		sq = &qset->sq[qidx];
+ 		qmem_free(pf->dev, sq->sqe);
+ 		qmem_free(pf->dev, sq->tso_hdrs);
+@@ -1441,7 +1447,7 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
+ 	 * so, aura count = pool count.
+ 	 */
+ 	hw->rqpool_cnt = hw->rx_queues;
+-	hw->sqpool_cnt = hw->non_qos_queues;
++	hw->sqpool_cnt = otx2_get_total_tx_queues(pf);
+ 	hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt;
+ 
+ 	/* Maximum hardware supported transmit length */
+@@ -1694,11 +1700,14 @@ int otx2_open(struct net_device *netdev)
+ 
+ 	netif_carrier_off(netdev);
+ 
+-	pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.non_qos_queues;
+ 	/* RQ and SQs are mapped to different CQs,
+ 	 * so find out max CQ IRQs (i.e CINTs) needed.
+ 	 */
+-	pf->hw.cint_cnt = max(pf->hw.rx_queues, pf->hw.tx_queues);
++	pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues,
++			       pf->hw.tc_tx_queues);
++
++	pf->qset.cq_cnt = pf->hw.rx_queues + otx2_get_total_tx_queues(pf);
++
+ 	qset->napi = kcalloc(pf->hw.cint_cnt, sizeof(*cq_poll), GFP_KERNEL);
+ 	if (!qset->napi)
+ 		return -ENOMEM;
+@@ -1749,6 +1758,11 @@ int otx2_open(struct net_device *netdev)
+ 		else
+ 			cq_poll->cq_ids[CQ_XDP] = CINT_INVALID_CQ;
+ 
++		cq_poll->cq_ids[CQ_QOS] = (qidx < pf->hw.tc_tx_queues) ?
++					  (qidx + pf->hw.rx_queues +
++					   pf->hw.non_qos_queues) :
++					  CINT_INVALID_CQ;
++
+ 		cq_poll->dev = (void *)pf;
+ 		cq_poll->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+ 		INIT_WORK(&cq_poll->dim.work, otx2_dim_work);
+@@ -1953,6 +1967,12 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
+ 	int qidx = skb_get_queue_mapping(skb);
+ 	struct otx2_snd_queue *sq;
+ 	struct netdev_queue *txq;
++	int sq_idx;
++
++	/* XDP SQs are not mapped with TXQs
++	 * advance qid to derive correct sq mapped with QOS
++	 */
++	sq_idx = (qidx >= pf->hw.tx_queues) ? (qidx + pf->hw.xdp_queues) : qidx;
+ 
+ 	/* Check for minimum and maximum packet length */
+ 	if (skb->len <= ETH_HLEN ||
+@@ -1961,7 +1981,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
+ 		return NETDEV_TX_OK;
+ 	}
+ 
+-	sq = &pf->qset.sq[qidx];
++	sq = &pf->qset.sq[sq_idx];
+ 	txq = netdev_get_tx_queue(netdev, qidx);
+ 
+ 	if (!otx2_sq_append_skb(netdev, sq, skb, qidx)) {
+@@ -1979,8 +1999,8 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
+ 	return NETDEV_TX_OK;
+ }
+ 
+-static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
+-			     struct net_device *sb_dev)
++u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
++		      struct net_device *sb_dev)
+ {
+ #ifdef CONFIG_DCB
+ 	struct otx2_nic *pf = netdev_priv(netdev);
+@@ -2002,6 +2022,7 @@ static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ #endif
+ 	return netdev_pick_tx(netdev, skb, NULL);
+ }
++EXPORT_SYMBOL(otx2_select_queue);
+ 
+ static netdev_features_t otx2_fix_features(struct net_device *dev,
+ 					   netdev_features_t features)
+@@ -2715,10 +2736,10 @@ static void otx2_sriov_vfcfg_cleanup(struct otx2_nic *pf)
+ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ {
+ 	struct device *dev = &pdev->dev;
++	int err, qcount, qos_txqs;
+ 	struct net_device *netdev;
+ 	struct otx2_nic *pf;
+ 	struct otx2_hw *hw;
+-	int err, qcount;
+ 	int num_vec;
+ 
+ 	err = pcim_enable_device(pdev);
+@@ -2743,8 +2764,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 
+ 	/* Set number of queues */
+ 	qcount = min_t(int, num_online_cpus(), OTX2_MAX_CQ_CNT);
++	qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES);
+ 
+-	netdev = alloc_etherdev_mqs(sizeof(*pf), qcount, qcount);
++	netdev = alloc_etherdev_mqs(sizeof(*pf), qcount + qos_txqs, qcount);
+ 	if (!netdev) {
+ 		err = -ENOMEM;
+ 		goto err_release_regions;
+@@ -2931,6 +2953,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 		goto err_pf_sriov_init;
+ #endif
+ 
++	otx2_qos_init(pf, qos_txqs);
++
+ 	return 0;
+ 
+ err_pf_sriov_init:
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+index 5704fb75fa477..d005434e1e037 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+@@ -468,12 +468,13 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
+ 			break;
+ 		}
+ 
+-		if (cq->cq_type == CQ_XDP) {
++		qidx = cq->cq_idx - pfvf->hw.rx_queues;
++
++		if (cq->cq_type == CQ_XDP)
+ 			otx2_xdp_snd_pkt_handler(pfvf, sq, cqe);
+-		} else {
+-			otx2_snd_pkt_handler(pfvf, cq, sq, cqe, budget,
+-					     &tx_pkts, &tx_bytes);
+-		}
++		else
++			otx2_snd_pkt_handler(pfvf, cq, &pfvf->qset.sq[qidx],
++					     cqe, budget, &tx_pkts, &tx_bytes);
+ 
+ 		cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
+ 		processed_cqe++;
+@@ -490,7 +491,11 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
+ 	if (likely(tx_pkts)) {
+ 		struct netdev_queue *txq;
+ 
+-		txq = netdev_get_tx_queue(pfvf->netdev, cq->cint_idx);
++		qidx = cq->cq_idx - pfvf->hw.rx_queues;
++
++		if (qidx >= pfvf->hw.tx_queues)
++			qidx -= pfvf->hw.xdp_queues;
++		txq = netdev_get_tx_queue(pfvf->netdev, qidx);
+ 		netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+ 		/* Check if queue was stopped earlier due to ring full */
+ 		smp_mb();
+@@ -738,7 +743,8 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+ 		sqe_hdr->aura = sq->aura_id;
+ 		/* Post a CQE Tx after pkt transmission */
+ 		sqe_hdr->pnc = 1;
+-		sqe_hdr->sq = qidx;
++		sqe_hdr->sq = (qidx >=  pfvf->hw.tx_queues) ?
++			       qidx + pfvf->hw.xdp_queues : qidx;
+ 	}
+ 	sqe_hdr->total = skb->len;
+ 	/* Set SQE identifier which will be used later for freeing SKB */
+@@ -1223,8 +1229,10 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+ 	struct nix_cqe_tx_s *cqe;
+ 	int processed_cqe = 0;
+ 	struct sg_list *sg;
++	int qidx;
+ 
+-	sq = &pfvf->qset.sq[cq->cint_idx];
++	qidx = cq->cq_idx - pfvf->hw.rx_queues;
++	sq = &pfvf->qset.sq[qidx];
+ 
+ 	if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+ 		return;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+index 93cac2c2664c2..7ab6db9a986fa 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+@@ -102,7 +102,8 @@ enum cq_type {
+ 	CQ_RX,
+ 	CQ_TX,
+ 	CQ_XDP,
+-	CQS_PER_CINT = 3, /* RQ + SQ + XDP */
++	CQ_QOS,
++	CQS_PER_CINT = 4, /* RQ + SQ + XDP + QOS_SQ */
+ };
+ 
+ struct otx2_cq_poll {
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+index ad90f8f2aad1f..404855bccb4b6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+@@ -475,6 +475,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
+ 	.ndo_open = otx2vf_open,
+ 	.ndo_stop = otx2vf_stop,
+ 	.ndo_start_xmit = otx2vf_xmit,
++	.ndo_select_queue = otx2_select_queue,
+ 	.ndo_set_rx_mode = otx2vf_set_rx_mode,
+ 	.ndo_set_mac_address = otx2_set_mac_address,
+ 	.ndo_change_mtu = otx2vf_change_mtu,
+@@ -520,10 +521,10 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ {
+ 	int num_vec = pci_msix_vec_count(pdev);
+ 	struct device *dev = &pdev->dev;
++	int err, qcount, qos_txqs;
+ 	struct net_device *netdev;
+ 	struct otx2_nic *vf;
+ 	struct otx2_hw *hw;
+-	int err, qcount;
+ 
+ 	err = pcim_enable_device(pdev);
+ 	if (err) {
+@@ -546,7 +547,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 	pci_set_master(pdev);
+ 
+ 	qcount = num_online_cpus();
+-	netdev = alloc_etherdev_mqs(sizeof(*vf), qcount, qcount);
++	qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES);
++	netdev = alloc_etherdev_mqs(sizeof(*vf), qcount + qos_txqs, qcount);
+ 	if (!netdev) {
+ 		err = -ENOMEM;
+ 		goto err_release_regions;
+@@ -695,6 +697,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 	if (err)
+ 		goto err_shutdown_tc;
+ #endif
++	otx2_qos_init(vf, qos_txqs);
+ 
+ 	return 0;
+ 
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h
+new file mode 100644
+index 0000000000000..73a62d092e99a
+--- /dev/null
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h
+@@ -0,0 +1,19 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Marvell RVU Ethernet driver
++ *
++ * Copyright (C) 2023 Marvell.
++ *
++ */
++#ifndef OTX2_QOS_H
++#define OTX2_QOS_H
++
++#define OTX2_QOS_MAX_LEAF_NODES                16
++
++int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx, u16 smq);
++void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx, u16 mdq);
++
++struct otx2_qos {
++	       u16 qid_to_sqmap[OTX2_QOS_MAX_LEAF_NODES];
++	};
++
++#endif
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
+new file mode 100644
+index 0000000000000..e142d43f5a62c
+--- /dev/null
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
+@@ -0,0 +1,282 @@
++// SPDX-License-Identifier: GPL-2.0
++/* Marvell RVU Physical Function ethernet driver
++ *
++ * Copyright (C) 2023 Marvell.
++ *
++ */
++
++#include <linux/netdevice.h>
++#include <net/tso.h>
++
++#include "cn10k.h"
++#include "otx2_reg.h"
++#include "otx2_common.h"
++#include "otx2_txrx.h"
++#include "otx2_struct.h"
++
++#define OTX2_QOS_MAX_LEAF_NODES 16
++
++static void otx2_qos_aura_pool_free(struct otx2_nic *pfvf, int pool_id)
++{
++	struct otx2_pool *pool;
++
++	if (!pfvf->qset.pool)
++		return;
++
++	pool = &pfvf->qset.pool[pool_id];
++	qmem_free(pfvf->dev, pool->stack);
++	qmem_free(pfvf->dev, pool->fc_addr);
++	pool->stack = NULL;
++	pool->fc_addr = NULL;
++}
++
++static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx)
++{
++	struct otx2_qset *qset = &pfvf->qset;
++	int pool_id, stack_pages, num_sqbs;
++	struct otx2_hw *hw = &pfvf->hw;
++	struct otx2_snd_queue *sq;
++	struct otx2_pool *pool;
++	dma_addr_t bufptr;
++	int err, ptr;
++	u64 iova, pa;
++
++	/* Calculate number of SQBs needed.
++	 *
++	 * For a 128byte SQE, and 4K size SQB, 31 SQEs will fit in one SQB.
++	 * Last SQE is used for pointing to next SQB.
++	 */
++	num_sqbs = (hw->sqb_size / 128) - 1;
++	num_sqbs = (qset->sqe_cnt + num_sqbs) / num_sqbs;
++
++	/* Get no of stack pages needed */
++	stack_pages =
++		(num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
++
++	pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
++	pool = &pfvf->qset.pool[pool_id];
++
++	/* Initialize aura context */
++	err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs);
++	if (err)
++		return err;
++
++	/* Initialize pool context */
++	err = otx2_pool_init(pfvf, pool_id, stack_pages,
++			     num_sqbs, hw->sqb_size);
++	if (err)
++		goto aura_free;
++
++	/* Flush accumulated messages */
++	err = otx2_sync_mbox_msg(&pfvf->mbox);
++	if (err)
++		goto pool_free;
++
++	/* Allocate pointers and free them to aura/pool */
++	sq = &qset->sq[qidx];
++	sq->sqb_count = 0;
++	sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL);
++	if (!sq->sqb_ptrs) {
++		err = -ENOMEM;
++		goto pool_free;
++	}
++
++	for (ptr = 0; ptr < num_sqbs; ptr++) {
++		err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
++		if (err)
++			goto sqb_free;
++		pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
++		sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
++	}
++
++	return 0;
++
++sqb_free:
++	while (ptr--) {
++		if (!sq->sqb_ptrs[ptr])
++			continue;
++		iova = sq->sqb_ptrs[ptr];
++		pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
++		dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size,
++				     DMA_FROM_DEVICE,
++				     DMA_ATTR_SKIP_CPU_SYNC);
++		put_page(virt_to_page(phys_to_virt(pa)));
++		otx2_aura_allocptr(pfvf, pool_id);
++	}
++	sq->sqb_count = 0;
++	kfree(sq->sqb_ptrs);
++pool_free:
++	qmem_free(pfvf->dev, pool->stack);
++aura_free:
++	qmem_free(pfvf->dev, pool->fc_addr);
++	otx2_mbox_reset(&pfvf->mbox.mbox, 0);
++	return err;
++}
++
++static void otx2_qos_sq_free_sqbs(struct otx2_nic *pfvf, int qidx)
++{
++	struct otx2_qset *qset = &pfvf->qset;
++	struct otx2_hw *hw = &pfvf->hw;
++	struct otx2_snd_queue *sq;
++	u64 iova, pa;
++	int sqb;
++
++	sq = &qset->sq[qidx];
++	if (!sq->sqb_ptrs)
++		return;
++	for (sqb = 0; sqb < sq->sqb_count; sqb++) {
++		if (!sq->sqb_ptrs[sqb])
++			continue;
++		iova = sq->sqb_ptrs[sqb];
++		pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
++		dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size,
++				     DMA_FROM_DEVICE,
++				     DMA_ATTR_SKIP_CPU_SYNC);
++		put_page(virt_to_page(phys_to_virt(pa)));
++	}
++
++	sq->sqb_count = 0;
++
++	sq = &qset->sq[qidx];
++	qmem_free(pfvf->dev, sq->sqe);
++	qmem_free(pfvf->dev, sq->tso_hdrs);
++	kfree(sq->sg);
++	kfree(sq->sqb_ptrs);
++	qmem_free(pfvf->dev, sq->timestamps);
++
++	memset((void *)sq, 0, sizeof(*sq));
++}
++
++/* send queue id */
++static void otx2_qos_sqb_flush(struct otx2_nic *pfvf, int qidx)
++{
++	int sqe_tail, sqe_head;
++	u64 incr, *ptr, val;
++
++	ptr = (__force u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
++	incr = (u64)qidx << 32;
++	val = otx2_atomic64_add(incr, ptr);
++	sqe_head = (val >> 20) & 0x3F;
++	sqe_tail = (val >> 28) & 0x3F;
++	if (sqe_head != sqe_tail)
++		usleep_range(50, 60);
++}
++
++static int otx2_qos_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id)
++{
++	struct nix_cn10k_aq_enq_req *cn10k_sq_aq;
++	struct npa_aq_enq_req *aura_aq;
++	struct npa_aq_enq_req *pool_aq;
++	struct nix_aq_enq_req *sq_aq;
++
++	if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
++		cn10k_sq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
++		if (!cn10k_sq_aq)
++			return -ENOMEM;
++		cn10k_sq_aq->qidx = qidx;
++		cn10k_sq_aq->sq.ena = 0;
++		cn10k_sq_aq->sq_mask.ena = 1;
++		cn10k_sq_aq->ctype = NIX_AQ_CTYPE_SQ;
++		cn10k_sq_aq->op = NIX_AQ_INSTOP_WRITE;
++	} else {
++		sq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
++		if (!sq_aq)
++			return -ENOMEM;
++		sq_aq->qidx = qidx;
++		sq_aq->sq.ena = 0;
++		sq_aq->sq_mask.ena = 1;
++		sq_aq->ctype = NIX_AQ_CTYPE_SQ;
++		sq_aq->op = NIX_AQ_INSTOP_WRITE;
++	}
++
++	aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
++	if (!aura_aq) {
++		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
++		return -ENOMEM;
++	}
++
++	aura_aq->aura_id = aura_id;
++	aura_aq->aura.ena = 0;
++	aura_aq->aura_mask.ena = 1;
++	aura_aq->ctype = NPA_AQ_CTYPE_AURA;
++	aura_aq->op = NPA_AQ_INSTOP_WRITE;
++
++	pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
++	if (!pool_aq) {
++		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
++		return -ENOMEM;
++	}
++
++	pool_aq->aura_id = aura_id;
++	pool_aq->pool.ena = 0;
++	pool_aq->pool_mask.ena = 1;
++
++	pool_aq->ctype = NPA_AQ_CTYPE_POOL;
++	pool_aq->op = NPA_AQ_INSTOP_WRITE;
++
++	return otx2_sync_mbox_msg(&pfvf->mbox);
++}
++
++int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx, u16 smq)
++{
++	struct otx2_hw *hw = &pfvf->hw;
++	int pool_id, sq_idx, err;
++
++	if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
++		return -EPERM;
++
++	sq_idx = hw->non_qos_queues + qidx;
++
++	mutex_lock(&pfvf->mbox.lock);
++	err = otx2_qos_sq_aura_pool_init(pfvf, sq_idx);
++	if (err)
++		goto out;
++
++	pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx);
++	pfvf->qos.qid_to_sqmap[qidx] = smq;
++	err = otx2_sq_init(pfvf, sq_idx, pool_id);
++	if (err)
++		goto out;
++out:
++	mutex_unlock(&pfvf->mbox.lock);
++	return err;
++}
++
++void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx, u16 mdq)
++{
++	struct otx2_qset *qset = &pfvf->qset;
++	struct otx2_hw *hw = &pfvf->hw;
++	struct otx2_snd_queue *sq;
++	struct otx2_cq_queue *cq;
++	int pool_id, sq_idx;
++
++	sq_idx = hw->non_qos_queues + qidx;
++
++	/* If the DOWN flag is set SQs are already freed */
++	if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
++		return;
++
++	sq = &pfvf->qset.sq[sq_idx];
++	if (!sq->sqb_ptrs)
++		return;
++
++	if (sq_idx < hw->non_qos_queues ||
++	    sq_idx >= otx2_get_total_tx_queues(pfvf)) {
++		netdev_err(pfvf->netdev, "Send Queue is not a QoS queue\n");
++		return;
++	}
++
++	cq = &qset->cq[pfvf->hw.rx_queues + sq_idx];
++	pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx);
++
++	otx2_qos_sqb_flush(pfvf, sq_idx);
++	otx2_smq_flush(pfvf, otx2_get_smq_idx(pfvf, sq_idx));
++	otx2_cleanup_tx_cqes(pfvf, cq);
++
++	mutex_lock(&pfvf->mbox.lock);
++	otx2_qos_ctx_disable(pfvf, sq_idx, pool_id);
++	mutex_unlock(&pfvf->mbox.lock);
++
++	otx2_qos_sq_free_sqbs(pfvf, sq_idx);
++	otx2_qos_aura_pool_free(pfvf, pool_id);
++}
+-- 
+2.42.0
+
diff --git a/queue-6.1/octeontx2-pf-rename-tot_tx_queues-to-non_qos_queues.patch b/queue-6.1/octeontx2-pf-rename-tot_tx_queues-to-non_qos_queues.patch
new file mode 100644
index 00000000000..40ba57237a6
--- /dev/null
+++ b/queue-6.1/octeontx2-pf-rename-tot_tx_queues-to-non_qos_queues.patch
@@ -0,0 +1,184 @@
+From e00182d3cffef2a3d2f81c12a80094332e4d9a8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 13 May 2023 14:21:37 +0530
+Subject: octeontx2-pf: Rename tot_tx_queues to non_qos_queues
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit 508c58f76ca510956625c945f9b8eb104f2c8208 ]
+
+current implementation is such that tot_tx_queues contains both
+xdp queues and normal tx queues. which will be allocated in interface
+open calls and deallocated on interface down calls respectively.
+
+With addition of QOS, where send quees are allocated/deallacated upon
+user request Qos send queues won't be part of tot_tx_queues. So this
+patch renames tot_tx_queues to non_qos_queues.
+
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 3423ca23e08b ("octeontx2-pf: Free pending and dropped SQEs")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_common.c   | 12 ++++++------
+ .../ethernet/marvell/octeontx2/nic/otx2_common.h   |  2 +-
+ .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c   | 14 +++++++-------
+ .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c   |  2 +-
+ 4 files changed, 15 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+index 011355e73696e..2575c207150e1 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+@@ -799,7 +799,7 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
+ 	int timeout = 1000;
+ 
+ 	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
+-	for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) {
++	for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) {
+ 		incr = (u64)qidx << 32;
+ 		while (timeout) {
+ 			val = otx2_atomic64_add(incr, ptr);
+@@ -1085,7 +1085,7 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf)
+ 	}
+ 
+ 	/* Initialize TX queues */
+-	for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) {
++	for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) {
+ 		u16 sqb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+ 
+ 		err = otx2_sq_init(pfvf, qidx, sqb_aura);
+@@ -1132,7 +1132,7 @@ int otx2_config_nix(struct otx2_nic *pfvf)
+ 
+ 	/* Set RQ/SQ/CQ counts */
+ 	nixlf->rq_cnt = pfvf->hw.rx_queues;
+-	nixlf->sq_cnt = pfvf->hw.tot_tx_queues;
++	nixlf->sq_cnt = pfvf->hw.non_qos_queues;
+ 	nixlf->cq_cnt = pfvf->qset.cq_cnt;
+ 	nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE;
+ 	nixlf->rss_grps = MAX_RSS_GROUPS;
+@@ -1170,7 +1170,7 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf)
+ 	int sqb, qidx;
+ 	u64 iova, pa;
+ 
+-	for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) {
++	for (qidx = 0; qidx < hw->non_qos_queues; qidx++) {
+ 		sq = &qset->sq[qidx];
+ 		if (!sq->sqb_ptrs)
+ 			continue;
+@@ -1386,7 +1386,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+ 	stack_pages =
+ 		(num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
+ 
+-	for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) {
++	for (qidx = 0; qidx < hw->non_qos_queues; qidx++) {
+ 		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+ 		/* Initialize aura context */
+ 		err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs);
+@@ -1406,7 +1406,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+ 		goto fail;
+ 
+ 	/* Allocate pointers and free them to aura/pool */
+-	for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) {
++	for (qidx = 0; qidx < hw->non_qos_queues; qidx++) {
+ 		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+ 		pool = &pfvf->qset.pool[pool_id];
+ 
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+index 8a9793b06769f..6c81d09798914 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+@@ -186,7 +186,7 @@ struct otx2_hw {
+ 	u16                     rx_queues;
+ 	u16                     tx_queues;
+ 	u16                     xdp_queues;
+-	u16                     tot_tx_queues;
++	u16                     non_qos_queues; /* tx queues plus xdp queues */
+ 	u16			max_queues;
+ 	u16			pool_cnt;
+ 	u16			rqpool_cnt;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 101d79a0bb436..545984a86f235 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1256,7 +1256,7 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ 	}
+ 
+ 	/* SQ */
+-	for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) {
++	for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) {
+ 		u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg;
+ 		u8 sq_op_err_code, mnq_err_code, snd_err_code;
+ 
+@@ -1391,7 +1391,7 @@ static void otx2_free_sq_res(struct otx2_nic *pf)
+ 	otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false);
+ 	/* Free SQB pointers */
+ 	otx2_sq_free_sqbs(pf);
+-	for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) {
++	for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) {
+ 		sq = &qset->sq[qidx];
+ 		qmem_free(pf->dev, sq->sqe);
+ 		qmem_free(pf->dev, sq->tso_hdrs);
+@@ -1441,7 +1441,7 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
+ 	 * so, aura count = pool count.
+ 	 */
+ 	hw->rqpool_cnt = hw->rx_queues;
+-	hw->sqpool_cnt = hw->tot_tx_queues;
++	hw->sqpool_cnt = hw->non_qos_queues;
+ 	hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt;
+ 
+ 	/* Maximum hardware supported transmit length */
+@@ -1694,7 +1694,7 @@ int otx2_open(struct net_device *netdev)
+ 
+ 	netif_carrier_off(netdev);
+ 
+-	pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.tot_tx_queues;
++	pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.non_qos_queues;
+ 	/* RQ and SQs are mapped to different CQs,
+ 	 * so find out max CQ IRQs (i.e CINTs) needed.
+ 	 */
+@@ -1714,7 +1714,7 @@ int otx2_open(struct net_device *netdev)
+ 	if (!qset->cq)
+ 		goto err_free_mem;
+ 
+-	qset->sq = kcalloc(pf->hw.tot_tx_queues,
++	qset->sq = kcalloc(pf->hw.non_qos_queues,
+ 			   sizeof(struct otx2_snd_queue), GFP_KERNEL);
+ 	if (!qset->sq)
+ 		goto err_free_mem;
+@@ -2532,7 +2532,7 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog)
+ 	else
+ 		pf->hw.xdp_queues = 0;
+ 
+-	pf->hw.tot_tx_queues += pf->hw.xdp_queues;
++	pf->hw.non_qos_queues += pf->hw.xdp_queues;
+ 
+ 	if (if_up)
+ 		otx2_open(pf->netdev);
+@@ -2763,7 +2763,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 	hw->pdev = pdev;
+ 	hw->rx_queues = qcount;
+ 	hw->tx_queues = qcount;
+-	hw->tot_tx_queues = qcount;
++	hw->non_qos_queues = qcount;
+ 	hw->max_queues = qcount;
+ 	hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
+ 	/* Use CQE of 128 byte descriptor size by default */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+index f8f0c01f62a14..ad90f8f2aad1f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+@@ -566,7 +566,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 	hw->rx_queues = qcount;
+ 	hw->tx_queues = qcount;
+ 	hw->max_queues = qcount;
+-	hw->tot_tx_queues = qcount;
++	hw->non_qos_queues = qcount;
+ 	hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
+ 	/* Use CQE of 128 byte descriptor size by default */
+ 	hw->xqe_size = 128;
+-- 
+2.42.0
+
diff --git a/queue-6.1/pwm-brcmstb-utilize-appropriate-clock-apis-in-suspen.patch b/queue-6.1/pwm-brcmstb-utilize-appropriate-clock-apis-in-suspen.patch
new file mode 100644
index 00000000000..1d397d70b07
--- /dev/null
+++ b/queue-6.1/pwm-brcmstb-utilize-appropriate-clock-apis-in-suspen.patch
@@ -0,0 +1,51 @@
+From 4034c635c55102451912c2907d82937d7443bef6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Oct 2023 10:54:14 -0700
+Subject: pwm: brcmstb: Utilize appropriate clock APIs in suspend/resume
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Florian Fainelli <florian.fainelli@broadcom.com>
+
+[ Upstream commit e9bc4411548aaa738905d37851a0146c16b3bb21 ]
+
+The suspend/resume functions currently utilize
+clk_disable()/clk_enable() respectively which may be no-ops with certain
+clock providers such as SCMI. Fix this to use clk_disable_unprepare()
+and clk_prepare_enable() respectively as we should.
+
+Fixes: 3a9f5957020f ("pwm: Add Broadcom BCM7038 PWM controller support")
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Acked-by: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pwm/pwm-brcmstb.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
+index 3db3f96edf78d..6afd34d651c77 100644
+--- a/drivers/pwm/pwm-brcmstb.c
++++ b/drivers/pwm/pwm-brcmstb.c
+@@ -290,7 +290,7 @@ static int brcmstb_pwm_suspend(struct device *dev)
+ {
+ 	struct brcmstb_pwm *p = dev_get_drvdata(dev);
+ 
+-	clk_disable(p->clk);
++	clk_disable_unprepare(p->clk);
+ 
+ 	return 0;
+ }
+@@ -299,7 +299,7 @@ static int brcmstb_pwm_resume(struct device *dev)
+ {
+ 	struct brcmstb_pwm *p = dev_get_drvdata(dev);
+ 
+-	clk_enable(p->clk);
++	clk_prepare_enable(p->clk);
+ 
+ 	return 0;
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/pwm-sti-reduce-number-of-allocations-and-drop-usage-.patch b/queue-6.1/pwm-sti-reduce-number-of-allocations-and-drop-usage-.patch
new file mode 100644
index 00000000000..ab1ae5f76a8
--- /dev/null
+++ b/queue-6.1/pwm-sti-reduce-number-of-allocations-and-drop-usage-.patch
@@ -0,0 +1,115 @@
+From f21ab36aee4e32227e5dc76c74ef752c2193b133 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 10:06:48 +0200
+Subject: pwm: sti: Reduce number of allocations and drop usage of chip_data
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit 2d6812b41e0d832919d72c72ebddf361df53ba1b ]
+
+Instead of using one allocation per capture channel, use a single one. Also
+store it in driver data instead of chip data.
+
+This has several advantages:
+
+ - driver data isn't cleared when pwm_put() is called
+ - Reduces memory fragmentation
+
+Also register the pwm chip only after the per capture channel data is
+initialized as the capture callback relies on this initialization and it
+might be called even before pwmchip_add() returns.
+
+It would be still better to have struct sti_pwm_compat_data and the
+per-channel data struct sti_cpt_ddata in a single memory chunk, but that's
+not easily possible because the number of capture channels isn't known yet
+when the driver data struct is allocated.
+
+Fixes: e926b12c611c ("pwm: Clear chip_data in pwm_put()")
+Reported-by: George Stark <gnstark@sberdevices.ru>
+Fixes: c97267ae831d ("pwm: sti: Add PWM capture callback")
+Link: https://lore.kernel.org/r/20230705080650.2353391-7-u.kleine-koenig@pengutronix.de
+Signed-off-by: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pwm/pwm-sti.c | 29 ++++++++++++++---------------
+ 1 file changed, 14 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c
+index 44b1f93256b36..652fdb8dc7bfa 100644
+--- a/drivers/pwm/pwm-sti.c
++++ b/drivers/pwm/pwm-sti.c
+@@ -79,6 +79,7 @@ struct sti_pwm_compat_data {
+ 	unsigned int cpt_num_devs;
+ 	unsigned int max_pwm_cnt;
+ 	unsigned int max_prescale;
++	struct sti_cpt_ddata *ddata;
+ };
+ 
+ struct sti_pwm_chip {
+@@ -314,7 +315,7 @@ static int sti_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm,
+ {
+ 	struct sti_pwm_chip *pc = to_sti_pwmchip(chip);
+ 	struct sti_pwm_compat_data *cdata = pc->cdata;
+-	struct sti_cpt_ddata *ddata = pwm_get_chip_data(pwm);
++	struct sti_cpt_ddata *ddata = &cdata->ddata[pwm->hwpwm];
+ 	struct device *dev = pc->dev;
+ 	unsigned int effective_ticks;
+ 	unsigned long long high, low;
+@@ -440,7 +441,7 @@ static irqreturn_t sti_pwm_interrupt(int irq, void *data)
+ 	while (cpt_int_stat) {
+ 		devicenum = ffs(cpt_int_stat) - 1;
+ 
+-		ddata = pwm_get_chip_data(&pc->chip.pwms[devicenum]);
++		ddata = &pc->cdata->ddata[devicenum];
+ 
+ 		/*
+ 		 * Capture input:
+@@ -638,30 +639,28 @@ static int sti_pwm_probe(struct platform_device *pdev)
+ 			dev_err(dev, "failed to prepare clock\n");
+ 			return ret;
+ 		}
++
++		cdata->ddata = devm_kzalloc(dev, cdata->cpt_num_devs * sizeof(*cdata->ddata), GFP_KERNEL);
++		if (!cdata->ddata)
++			return -ENOMEM;
+ 	}
+ 
+ 	pc->chip.dev = dev;
+ 	pc->chip.ops = &sti_pwm_ops;
+ 	pc->chip.npwm = pc->cdata->pwm_num_devs;
+ 
+-	ret = pwmchip_add(&pc->chip);
+-	if (ret < 0) {
+-		clk_unprepare(pc->pwm_clk);
+-		clk_unprepare(pc->cpt_clk);
+-		return ret;
+-	}
+-
+ 	for (i = 0; i < cdata->cpt_num_devs; i++) {
+-		struct sti_cpt_ddata *ddata;
+-
+-		ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
+-		if (!ddata)
+-			return -ENOMEM;
++		struct sti_cpt_ddata *ddata = &cdata->ddata[i];
+ 
+ 		init_waitqueue_head(&ddata->wait);
+ 		mutex_init(&ddata->lock);
++	}
+ 
+-		pwm_set_chip_data(&pc->chip.pwms[i], ddata);
++	ret = pwmchip_add(&pc->chip);
++	if (ret < 0) {
++		clk_unprepare(pc->pwm_clk);
++		clk_unprepare(pc->cpt_clk);
++		return ret;
+ 	}
+ 
+ 	platform_set_drvdata(pdev, pc);
+-- 
+2.42.0
+
diff --git a/queue-6.1/r8169-respect-userspace-disabling-iff_multicast.patch b/queue-6.1/r8169-respect-userspace-disabling-iff_multicast.patch
new file mode 100644
index 00000000000..64cec14a252
--- /dev/null
+++ b/queue-6.1/r8169-respect-userspace-disabling-iff_multicast.patch
@@ -0,0 +1,42 @@
+From 43453f1f795cd8c5e12ff2302a4d1e1c177f1f55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Nov 2023 23:43:36 +0100
+Subject: r8169: respect userspace disabling IFF_MULTICAST
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit 8999ce4cfc87e61b4143ec2e7b93d8e92e11fa7f ]
+
+So far we ignore the setting of IFF_MULTICAST. Fix this and clear bit
+AcceptMulticast if IFF_MULTICAST isn't set.
+
+Note: Based on the implementations I've seen it doesn't seem to be 100% clear
+what a driver is supposed to do if IFF_ALLMULTI is set but IFF_MULTICAST
+is not. This patch is based on the understanding that IFF_MULTICAST has
+precedence.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Link: https://lore.kernel.org/r/4a57ba02-d52d-4369-9f14-3565e6c1f7dc@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
+index c56d3538889b6..d14706265d9cb 100644
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -2512,6 +2512,8 @@ static void rtl_set_rx_mode(struct net_device *dev)
+ 
+ 	if (dev->flags & IFF_PROMISC) {
+ 		rx_mode |= AcceptAllPhys;
++	} else if (!(dev->flags & IFF_MULTICAST)) {
++		rx_mode &= ~AcceptMulticast;
+ 	} else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
+ 		   dev->flags & IFF_ALLMULTI ||
+ 		   tp->mac_version == RTL_GIGA_MAC_VER_35 ||
+-- 
+2.42.0
+
diff --git a/queue-6.1/risc-v-don-t-fail-in-riscv_of_parent_hartid-for-disa.patch b/queue-6.1/risc-v-don-t-fail-in-riscv_of_parent_hartid-for-disa.patch
new file mode 100644
index 00000000000..0e9d021b00e
--- /dev/null
+++ b/queue-6.1/risc-v-don-t-fail-in-riscv_of_parent_hartid-for-disa.patch
@@ -0,0 +1,56 @@
+From fe1e50b0024a7bd3424894e00fc66fd5271b2ec1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Oct 2023 21:12:53 +0530
+Subject: RISC-V: Don't fail in riscv_of_parent_hartid() for disabled HARTs
+
+From: Anup Patel <apatel@ventanamicro.com>
+
+[ Upstream commit c4676f8dc1e12e68d6511f9ed89707fdad4c962c ]
+
+The riscv_of_processor_hartid() used by riscv_of_parent_hartid() fails
+for HARTs disabled in the DT. This results in the following warning
+thrown by the RISC-V INTC driver for the E-core on SiFive boards:
+
+[    0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller
+
+The riscv_of_parent_hartid() is only expected to read the hartid
+from the DT so we directly call of_get_cpu_hwid() instead of calling
+riscv_of_processor_hartid().
+
+Fixes: ad635e723e17 ("riscv: cpu: Add 64bit hartid support on RV64")
+Signed-off-by: Anup Patel <apatel@ventanamicro.com>
+Reviewed-by: Atish Patra <atishp@rivosinc.com>
+Link: https://lore.kernel.org/r/20231027154254.355853-2-apatel@ventanamicro.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/cpu.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
+index 852ecccd8920f..0f76181dc634d 100644
+--- a/arch/riscv/kernel/cpu.c
++++ b/arch/riscv/kernel/cpu.c
+@@ -57,13 +57,14 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
+  */
+ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
+ {
+-	int rc;
+-
+ 	for (; node; node = node->parent) {
+ 		if (of_device_is_compatible(node, "riscv")) {
+-			rc = riscv_of_processor_hartid(node, hartid);
+-			if (!rc)
+-				return 0;
++			*hartid = (unsigned long)of_get_cpu_hwid(node, 0);
++			if (*hartid == ~0UL) {
++				pr_warn("Found CPU without hart ID\n");
++				return -ENODEV;
++			}
++			return 0;
+ 		}
+ 	}
+ 
+-- 
+2.42.0
+
diff --git a/queue-6.1/selftests-pmtu.sh-fix-result-checking.patch b/queue-6.1/selftests-pmtu.sh-fix-result-checking.patch
new file mode 100644
index 00000000000..bca9c5eec34
--- /dev/null
+++ b/queue-6.1/selftests-pmtu.sh-fix-result-checking.patch
@@ -0,0 +1,41 @@
+From 17a251fbf65b4aaa37dadd8958676d8a677fde8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Oct 2023 11:47:32 +0800
+Subject: selftests: pmtu.sh: fix result checking
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 63e201916b27260218e528a2f8758be47f99bbf4 ]
+
+In the PMTU test, when all previous tests are skipped and the new test
+passes, the exit code is set to 0. However, the current check mistakenly
+treats this as an assignment, causing the check to pass every time.
+
+Consequently, regardless of how many tests have failed, if the latest test
+passes, the PMTU test will report a pass.
+
+Fixes: 2a9d3716b810 ("selftests: pmtu.sh: improve the test result processing")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/pmtu.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
+index dfe3d287f01d2..0d705fdcf3b76 100755
+--- a/tools/testing/selftests/net/pmtu.sh
++++ b/tools/testing/selftests/net/pmtu.sh
+@@ -2013,7 +2013,7 @@ run_test() {
+ 	case $ret in
+ 		0)
+ 			all_skipped=false
+-			[ $exitcode=$ksft_skip ] && exitcode=0
++			[ $exitcode -eq $ksft_skip ] && exitcode=0
+ 		;;
+ 		$ksft_skip)
+ 			[ $all_skipped = true ] && exitcode=$ksft_skip
+-- 
+2.42.0
+
diff --git a/queue-6.1/series b/queue-6.1/series
index 36c9103aa8a..fd0c2c18325 100644
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -329,3 +329,39 @@ media-cadence-csi2rx-unregister-v4l2-async-notifier.patch
 media-dvb-usb-v2-af9035-fix-missing-unlock.patch
 media-cec-meson-always-include-meson-sub-directory-i.patch
 regmap-prevent-noinc-writes-from-clobbering-cache.patch
+pwm-sti-reduce-number-of-allocations-and-drop-usage-.patch
+pwm-brcmstb-utilize-appropriate-clock-apis-in-suspen.patch
+input-synaptics-rmi4-fix-use-after-free-in-rmi_unreg.patch
+watchdog-ixp4xx-make-sure-restart-always-works.patch
+llc-verify-mac-len-before-reading-mac-header.patch
+hsr-prevent-use-after-free-in-prp_create_tagged_fram.patch
+tipc-change-nla_policy-for-bearer-related-names-to-n.patch
+bpf-check-map-usercnt-after-timer-timer-is-assigned.patch
+inet-shrink-struct-flowi_common.patch
+octeontx2-pf-fix-error-codes.patch
+octeontx2-pf-fix-holes-in-error-code.patch
+net-page_pool-add-missing-free_percpu-when-page_pool.patch
+dccp-call-security_inet_conn_request-after-setting-i.patch
+dccp-tcp-call-security_inet_conn_request-after-setti.patch
+net-r8169-disable-multicast-filter-for-rtl8168h-and-.patch
+fix-termination-state-for-idr_for_each_entry_ul.patch
+net-stmmac-xgmac-enable-support-for-multiple-flexibl.patch
+selftests-pmtu.sh-fix-result-checking.patch
+octeontx2-pf-rename-tot_tx_queues-to-non_qos_queues.patch
+octeontx2-pf-qos-send-queues-management.patch
+octeontx2-pf-free-pending-and-dropped-sqes.patch
+net-smc-fix-dangling-sock-under-state-smc_appfinclos.patch
+net-smc-allow-cdc-msg-send-rather-than-drop-it-with-.patch
+net-smc-put-sk-reference-if-close-work-was-canceled.patch
+nvme-fix-error-handling-for-io_uring-nvme-passthroug.patch
+tg3-power-down-device-only-on-system_power_off.patch
+nbd-fix-uaf-in-nbd_open.patch
+blk-core-use-pr_warn_ratelimited-in-bio_check_ro.patch
+virtio-vsock-replace-virtio_vsock_pkt-with-sk_buff.patch
+vsock-virtio-remove-socket-from-connected-bound-list.patch
+r8169-respect-userspace-disabling-iff_multicast.patch
+i2c-iproc-handle-invalid-slave-state.patch
+netfilter-xt_recent-fix-increase-ipv6-literal-buffer.patch
+netfilter-nft_redir-use-struct-nf_nat_range2-through.patch
+netfilter-nat-fix-ipv6-nat-redirect-with-mapped-and-.patch
+risc-v-don-t-fail-in-riscv_of_parent_hartid-for-disa.patch
diff --git a/queue-6.1/tg3-power-down-device-only-on-system_power_off.patch b/queue-6.1/tg3-power-down-device-only-on-system_power_off.patch
new file mode 100644
index 00000000000..f6369cab16e
--- /dev/null
+++ b/queue-6.1/tg3-power-down-device-only-on-system_power_off.patch
@@ -0,0 +1,46 @@
+From bb2266f47afc23ff00d5cf74034978d21d28dfd0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 13:50:29 +0200
+Subject: tg3: power down device only on SYSTEM_POWER_OFF
+
+From: George Shuklin <george.shuklin@gmail.com>
+
+[ Upstream commit 9fc3bc7643341dc5be7d269f3d3dbe441d8d7ac3 ]
+
+Dell R650xs servers hangs on reboot if tg3 driver calls
+tg3_power_down.
+
+This happens only if network adapters (BCM5720 for R650xs) were
+initialized using SNP (e.g. by booting ipxe.efi).
+
+The actual problem is on Dell side, but this fix allows servers
+to come back alive after reboot.
+
+Signed-off-by: George Shuklin <george.shuklin@gmail.com>
+Fixes: 2ca1c94ce0b6 ("tg3: Disable tg3 device on system reboot to avoid triggering AER")
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20231103115029.83273-1-george.shuklin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 9609041016776..85570e40c8e9b 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -18086,7 +18086,8 @@ static void tg3_shutdown(struct pci_dev *pdev)
+ 	if (netif_running(dev))
+ 		dev_close(dev);
+ 
+-	tg3_power_down(tp);
++	if (system_state == SYSTEM_POWER_OFF)
++		tg3_power_down(tp);
+ 
+ 	rtnl_unlock();
+ 
+-- 
+2.42.0
+
diff --git a/queue-6.1/tipc-change-nla_policy-for-bearer-related-names-to-n.patch b/queue-6.1/tipc-change-nla_policy-for-bearer-related-names-to-n.patch
new file mode 100644
index 00000000000..db0295a9c70
--- /dev/null
+++ b/queue-6.1/tipc-change-nla_policy-for-bearer-related-names-to-n.patch
@@ -0,0 +1,111 @@
+From 8159bf4cc90607234a192c9a6170a78a50801d0d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Oct 2023 16:55:40 +0900
+Subject: tipc: Change nla_policy for bearer-related names to NLA_NUL_STRING
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+[ Upstream commit 19b3f72a41a8751e26bffc093bb7e1cef29ad579 ]
+
+syzbot reported the following uninit-value access issue [1]:
+
+=====================================================
+BUG: KMSAN: uninit-value in strlen lib/string.c:418 [inline]
+BUG: KMSAN: uninit-value in strstr+0xb8/0x2f0 lib/string.c:756
+ strlen lib/string.c:418 [inline]
+ strstr+0xb8/0x2f0 lib/string.c:756
+ tipc_nl_node_reset_link_stats+0x3ea/0xb50 net/tipc/node.c:2595
+ genl_family_rcv_msg_doit net/netlink/genetlink.c:971 [inline]
+ genl_family_rcv_msg net/netlink/genetlink.c:1051 [inline]
+ genl_rcv_msg+0x11ec/0x1290 net/netlink/genetlink.c:1066
+ netlink_rcv_skb+0x371/0x650 net/netlink/af_netlink.c:2545
+ genl_rcv+0x40/0x60 net/netlink/genetlink.c:1075
+ netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline]
+ netlink_unicast+0xf47/0x1250 net/netlink/af_netlink.c:1368
+ netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910
+ sock_sendmsg_nosec net/socket.c:730 [inline]
+ sock_sendmsg net/socket.c:753 [inline]
+ ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2541
+ ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2595
+ __sys_sendmsg net/socket.c:2624 [inline]
+ __do_sys_sendmsg net/socket.c:2633 [inline]
+ __se_sys_sendmsg net/socket.c:2631 [inline]
+ __x64_sys_sendmsg+0x307/0x490 net/socket.c:2631
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Uninit was created at:
+ slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767
+ slab_alloc_node mm/slub.c:3478 [inline]
+ kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523
+ kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:559
+ __alloc_skb+0x318/0x740 net/core/skbuff.c:650
+ alloc_skb include/linux/skbuff.h:1286 [inline]
+ netlink_alloc_large_skb net/netlink/af_netlink.c:1214 [inline]
+ netlink_sendmsg+0xb34/0x13d0 net/netlink/af_netlink.c:1885
+ sock_sendmsg_nosec net/socket.c:730 [inline]
+ sock_sendmsg net/socket.c:753 [inline]
+ ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2541
+ ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2595
+ __sys_sendmsg net/socket.c:2624 [inline]
+ __do_sys_sendmsg net/socket.c:2633 [inline]
+ __se_sys_sendmsg net/socket.c:2631 [inline]
+ __x64_sys_sendmsg+0x307/0x490 net/socket.c:2631
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+TIPC bearer-related names including link names must be null-terminated
+strings. If a link name which is not null-terminated is passed through
+netlink, strstr() and similar functions can cause buffer overrun. This
+causes the above issue.
+
+This patch changes the nla_policy for bearer-related names from NLA_STRING
+to NLA_NUL_STRING. This resolves the issue by ensuring that only
+null-terminated strings are accepted as bearer-related names.
+
+syzbot reported similar uninit-value issue related to bearer names [2]. The
+root cause of this issue is that a non-null-terminated bearer name was
+passed. This patch also resolved this issue.
+
+Fixes: 7be57fc69184 ("tipc: add link get/dump to new netlink api")
+Fixes: 0655f6a8635b ("tipc: add bearer disable/enable to new netlink api")
+Reported-and-tested-by: syzbot+5138ca807af9d2b42574@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=5138ca807af9d2b42574 [1]
+Reported-and-tested-by: syzbot+9425c47dccbcb4c17d51@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=9425c47dccbcb4c17d51 [2]
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20231030075540.3784537-1-syoshida@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/netlink.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
+index e8fd257c0e688..1a9a5bdaccf4f 100644
+--- a/net/tipc/netlink.c
++++ b/net/tipc/netlink.c
+@@ -88,7 +88,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
+ 
+ const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
+ 	[TIPC_NLA_LINK_UNSPEC]		= { .type = NLA_UNSPEC },
+-	[TIPC_NLA_LINK_NAME]		= { .type = NLA_STRING,
++	[TIPC_NLA_LINK_NAME]		= { .type = NLA_NUL_STRING,
+ 					    .len = TIPC_MAX_LINK_NAME },
+ 	[TIPC_NLA_LINK_MTU]		= { .type = NLA_U32 },
+ 	[TIPC_NLA_LINK_BROADCAST]	= { .type = NLA_FLAG },
+@@ -125,7 +125,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
+ 
+ const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1]	= {
+ 	[TIPC_NLA_BEARER_UNSPEC]	= { .type = NLA_UNSPEC },
+-	[TIPC_NLA_BEARER_NAME]		= { .type = NLA_STRING,
++	[TIPC_NLA_BEARER_NAME]		= { .type = NLA_NUL_STRING,
+ 					    .len = TIPC_MAX_BEARER_NAME },
+ 	[TIPC_NLA_BEARER_PROP]		= { .type = NLA_NESTED },
+ 	[TIPC_NLA_BEARER_DOMAIN]	= { .type = NLA_U32 }
+-- 
+2.42.0
+
diff --git a/queue-6.1/virtio-vsock-replace-virtio_vsock_pkt-with-sk_buff.patch b/queue-6.1/virtio-vsock-replace-virtio_vsock_pkt-with-sk_buff.patch
new file mode 100644
index 00000000000..82faff26c09
--- /dev/null
+++ b/queue-6.1/virtio-vsock-replace-virtio_vsock_pkt-with-sk_buff.patch
@@ -0,0 +1,1983 @@
+From dc5f3dc5e6910cd026685601ab84ffd77ceafc09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 22:21:37 +0000
+Subject: virtio/vsock: replace virtio_vsock_pkt with sk_buff
+
+From: Bobby Eshleman <bobby.eshleman@bytedance.com>
+
+[ Upstream commit 71dc9ec9ac7d3eee785cdc986c3daeb821381e20 ]
+
+This commit changes virtio/vsock to use sk_buff instead of
+virtio_vsock_pkt. Beyond better conforming to other net code, using
+sk_buff allows vsock to use sk_buff-dependent features in the future
+(such as sockmap) and improves throughput.
+
+This patch introduces the following performance changes:
+
+Tool: Uperf
+Env: Phys Host + L1 Guest
+Payload: 64k
+Threads: 16
+Test Runs: 10
+Type: SOCK_STREAM
+Before: commit b7bfaa761d760 ("Linux 6.2-rc3")
+
+Before
+------
+g2h: 16.77Gb/s
+h2g: 10.56Gb/s
+
+After
+-----
+g2h: 21.04Gb/s
+h2g: 10.76Gb/s
+
+Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 3a5cc90a4d17 ("vsock/virtio: remove socket from connected/bound list on shutdown")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vsock.c                   | 214 +++++-------
+ include/linux/virtio_vsock.h            | 129 ++++++--
+ net/vmw_vsock/virtio_transport.c        | 149 +++------
+ net/vmw_vsock/virtio_transport_common.c | 422 +++++++++++++-----------
+ net/vmw_vsock/vsock_loopback.c          |  51 +--
+ 5 files changed, 498 insertions(+), 467 deletions(-)
+
+diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
+index a2b3743723639..1f3b89c885cca 100644
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -51,8 +51,7 @@ struct vhost_vsock {
+ 	struct hlist_node hash;
+ 
+ 	struct vhost_work send_pkt_work;
+-	spinlock_t send_pkt_list_lock;
+-	struct list_head send_pkt_list;	/* host->guest pending packets */
++	struct sk_buff_head send_pkt_queue; /* host->guest pending packets */
+ 
+ 	atomic_t queued_replies;
+ 
+@@ -108,40 +107,31 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ 	vhost_disable_notify(&vsock->dev, vq);
+ 
+ 	do {
+-		struct virtio_vsock_pkt *pkt;
++		struct virtio_vsock_hdr *hdr;
++		size_t iov_len, payload_len;
+ 		struct iov_iter iov_iter;
++		u32 flags_to_restore = 0;
++		struct sk_buff *skb;
+ 		unsigned out, in;
+ 		size_t nbytes;
+-		size_t iov_len, payload_len;
+ 		int head;
+-		u32 flags_to_restore = 0;
+ 
+-		spin_lock_bh(&vsock->send_pkt_list_lock);
+-		if (list_empty(&vsock->send_pkt_list)) {
+-			spin_unlock_bh(&vsock->send_pkt_list_lock);
++		skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
++
++		if (!skb) {
+ 			vhost_enable_notify(&vsock->dev, vq);
+ 			break;
+ 		}
+ 
+-		pkt = list_first_entry(&vsock->send_pkt_list,
+-				       struct virtio_vsock_pkt, list);
+-		list_del_init(&pkt->list);
+-		spin_unlock_bh(&vsock->send_pkt_list_lock);
+-
+ 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ 					 &out, &in, NULL, NULL);
+ 		if (head < 0) {
+-			spin_lock_bh(&vsock->send_pkt_list_lock);
+-			list_add(&pkt->list, &vsock->send_pkt_list);
+-			spin_unlock_bh(&vsock->send_pkt_list_lock);
++			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
+ 			break;
+ 		}
+ 
+ 		if (head == vq->num) {
+-			spin_lock_bh(&vsock->send_pkt_list_lock);
+-			list_add(&pkt->list, &vsock->send_pkt_list);
+-			spin_unlock_bh(&vsock->send_pkt_list_lock);
+-
++			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
+ 			/* We cannot finish yet if more buffers snuck in while
+ 			 * re-enabling notify.
+ 			 */
+@@ -153,26 +143,27 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ 		}
+ 
+ 		if (out) {
+-			virtio_transport_free_pkt(pkt);
++			kfree_skb(skb);
+ 			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
+ 			break;
+ 		}
+ 
+ 		iov_len = iov_length(&vq->iov[out], in);
+-		if (iov_len < sizeof(pkt->hdr)) {
+-			virtio_transport_free_pkt(pkt);
++		if (iov_len < sizeof(*hdr)) {
++			kfree_skb(skb);
+ 			vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
+ 			break;
+ 		}
+ 
+ 		iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len);
+-		payload_len = pkt->len - pkt->off;
++		payload_len = skb->len;
++		hdr = virtio_vsock_hdr(skb);
+ 
+ 		/* If the packet is greater than the space available in the
+ 		 * buffer, we split it using multiple buffers.
+ 		 */
+-		if (payload_len > iov_len - sizeof(pkt->hdr)) {
+-			payload_len = iov_len - sizeof(pkt->hdr);
++		if (payload_len > iov_len - sizeof(*hdr)) {
++			payload_len = iov_len - sizeof(*hdr);
+ 
+ 			/* As we are copying pieces of large packet's buffer to
+ 			 * small rx buffers, headers of packets in rx queue are
+@@ -185,31 +176,30 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ 			 * bits set. After initialized header will be copied to
+ 			 * rx buffer, these required bits will be restored.
+ 			 */
+-			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
+-				pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
++			if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
++				hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+ 				flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
+ 
+-				if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+-					pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
++				if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) {
++					hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ 					flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
+ 				}
+ 			}
+ 		}
+ 
+ 		/* Set the correct length in the header */
+-		pkt->hdr.len = cpu_to_le32(payload_len);
++		hdr->len = cpu_to_le32(payload_len);
+ 
+-		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+-		if (nbytes != sizeof(pkt->hdr)) {
+-			virtio_transport_free_pkt(pkt);
++		nbytes = copy_to_iter(hdr, sizeof(*hdr), &iov_iter);
++		if (nbytes != sizeof(*hdr)) {
++			kfree_skb(skb);
+ 			vq_err(vq, "Faulted on copying pkt hdr\n");
+ 			break;
+ 		}
+ 
+-		nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
+-				      &iov_iter);
++		nbytes = copy_to_iter(skb->data, payload_len, &iov_iter);
+ 		if (nbytes != payload_len) {
+-			virtio_transport_free_pkt(pkt);
++			kfree_skb(skb);
+ 			vq_err(vq, "Faulted on copying pkt buf\n");
+ 			break;
+ 		}
+@@ -217,31 +207,28 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ 		/* Deliver to monitoring devices all packets that we
+ 		 * will transmit.
+ 		 */
+-		virtio_transport_deliver_tap_pkt(pkt);
++		virtio_transport_deliver_tap_pkt(skb);
+ 
+-		vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
++		vhost_add_used(vq, head, sizeof(*hdr) + payload_len);
+ 		added = true;
+ 
+-		pkt->off += payload_len;
++		skb_pull(skb, payload_len);
+ 		total_len += payload_len;
+ 
+ 		/* If we didn't send all the payload we can requeue the packet
+ 		 * to send it with the next available buffer.
+ 		 */
+-		if (pkt->off < pkt->len) {
+-			pkt->hdr.flags |= cpu_to_le32(flags_to_restore);
++		if (skb->len > 0) {
++			hdr->flags |= cpu_to_le32(flags_to_restore);
+ 
+-			/* We are queueing the same virtio_vsock_pkt to handle
++			/* We are queueing the same skb to handle
+ 			 * the remaining bytes, and we want to deliver it
+ 			 * to monitoring devices in the next iteration.
+ 			 */
+-			pkt->tap_delivered = false;
+-
+-			spin_lock_bh(&vsock->send_pkt_list_lock);
+-			list_add(&pkt->list, &vsock->send_pkt_list);
+-			spin_unlock_bh(&vsock->send_pkt_list_lock);
++			virtio_vsock_skb_clear_tap_delivered(skb);
++			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
+ 		} else {
+-			if (pkt->reply) {
++			if (virtio_vsock_skb_reply(skb)) {
+ 				int val;
+ 
+ 				val = atomic_dec_return(&vsock->queued_replies);
+@@ -253,7 +240,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ 					restart_tx = true;
+ 			}
+ 
+-			virtio_transport_free_pkt(pkt);
++			consume_skb(skb);
+ 		}
+ 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
+ 	if (added)
+@@ -278,28 +265,26 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work)
+ }
+ 
+ static int
+-vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
++vhost_transport_send_pkt(struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct vhost_vsock *vsock;
+-	int len = pkt->len;
++	int len = skb->len;
+ 
+ 	rcu_read_lock();
+ 
+ 	/* Find the vhost_vsock according to guest context id  */
+-	vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
++	vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
+ 	if (!vsock) {
+ 		rcu_read_unlock();
+-		virtio_transport_free_pkt(pkt);
++		kfree_skb(skb);
+ 		return -ENODEV;
+ 	}
+ 
+-	if (pkt->reply)
++	if (virtio_vsock_skb_reply(skb))
+ 		atomic_inc(&vsock->queued_replies);
+ 
+-	spin_lock_bh(&vsock->send_pkt_list_lock);
+-	list_add_tail(&pkt->list, &vsock->send_pkt_list);
+-	spin_unlock_bh(&vsock->send_pkt_list_lock);
+-
++	virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
+ 	vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+ 
+ 	rcu_read_unlock();
+@@ -310,10 +295,8 @@ static int
+ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+ {
+ 	struct vhost_vsock *vsock;
+-	struct virtio_vsock_pkt *pkt, *n;
+ 	int cnt = 0;
+ 	int ret = -ENODEV;
+-	LIST_HEAD(freeme);
+ 
+ 	rcu_read_lock();
+ 
+@@ -322,20 +305,7 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+ 	if (!vsock)
+ 		goto out;
+ 
+-	spin_lock_bh(&vsock->send_pkt_list_lock);
+-	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+-		if (pkt->vsk != vsk)
+-			continue;
+-		list_move(&pkt->list, &freeme);
+-	}
+-	spin_unlock_bh(&vsock->send_pkt_list_lock);
+-
+-	list_for_each_entry_safe(pkt, n, &freeme, list) {
+-		if (pkt->reply)
+-			cnt++;
+-		list_del(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
+-	}
++	cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
+ 
+ 	if (cnt) {
+ 		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+@@ -352,12 +322,14 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+ 	return ret;
+ }
+ 
+-static struct virtio_vsock_pkt *
+-vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
++static struct sk_buff *
++vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
+ 		      unsigned int out, unsigned int in)
+ {
+-	struct virtio_vsock_pkt *pkt;
++	struct virtio_vsock_hdr *hdr;
+ 	struct iov_iter iov_iter;
++	struct sk_buff *skb;
++	size_t payload_len;
+ 	size_t nbytes;
+ 	size_t len;
+ 
+@@ -366,50 +338,48 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
+ 		return NULL;
+ 	}
+ 
+-	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+-	if (!pkt)
++	len = iov_length(vq->iov, out);
++
++	/* len contains both payload and hdr */
++	skb = virtio_vsock_alloc_skb(len, GFP_KERNEL);
++	if (!skb)
+ 		return NULL;
+ 
+-	len = iov_length(vq->iov, out);
+ 	iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len);
+ 
+-	nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+-	if (nbytes != sizeof(pkt->hdr)) {
++	hdr = virtio_vsock_hdr(skb);
++	nbytes = copy_from_iter(hdr, sizeof(*hdr), &iov_iter);
++	if (nbytes != sizeof(*hdr)) {
+ 		vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
+-		       sizeof(pkt->hdr), nbytes);
+-		kfree(pkt);
++		       sizeof(*hdr), nbytes);
++		kfree_skb(skb);
+ 		return NULL;
+ 	}
+ 
+-	pkt->len = le32_to_cpu(pkt->hdr.len);
++	payload_len = le32_to_cpu(hdr->len);
+ 
+ 	/* No payload */
+-	if (!pkt->len)
+-		return pkt;
++	if (!payload_len)
++		return skb;
+ 
+-	/* The pkt is too big */
+-	if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+-		kfree(pkt);
++	/* The pkt is too big or the length in the header is invalid */
++	if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE ||
++	    payload_len + sizeof(*hdr) > len) {
++		kfree_skb(skb);
+ 		return NULL;
+ 	}
+ 
+-	pkt->buf = kvmalloc(pkt->len, GFP_KERNEL);
+-	if (!pkt->buf) {
+-		kfree(pkt);
+-		return NULL;
+-	}
++	virtio_vsock_skb_rx_put(skb);
+ 
+-	pkt->buf_len = pkt->len;
+-
+-	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+-	if (nbytes != pkt->len) {
+-		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+-		       pkt->len, nbytes);
+-		virtio_transport_free_pkt(pkt);
++	nbytes = copy_from_iter(skb->data, payload_len, &iov_iter);
++	if (nbytes != payload_len) {
++		vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
++		       payload_len, nbytes);
++		kfree_skb(skb);
+ 		return NULL;
+ 	}
+ 
+-	return pkt;
++	return skb;
+ }
+ 
+ /* Is there space left for replies to rx packets? */
+@@ -496,9 +466,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+ 						  poll.work);
+ 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ 						 dev);
+-	struct virtio_vsock_pkt *pkt;
+ 	int head, pkts = 0, total_len = 0;
+ 	unsigned int out, in;
++	struct sk_buff *skb;
+ 	bool added = false;
+ 
+ 	mutex_lock(&vq->mutex);
+@@ -511,6 +481,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+ 
+ 	vhost_disable_notify(&vsock->dev, vq);
+ 	do {
++		struct virtio_vsock_hdr *hdr;
++
+ 		if (!vhost_vsock_more_replies(vsock)) {
+ 			/* Stop tx until the device processes already
+ 			 * pending replies.  Leave tx virtqueue
+@@ -532,24 +504,26 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+ 			break;
+ 		}
+ 
+-		pkt = vhost_vsock_alloc_pkt(vq, out, in);
+-		if (!pkt) {
++		skb = vhost_vsock_alloc_skb(vq, out, in);
++		if (!skb) {
+ 			vq_err(vq, "Faulted on pkt\n");
+ 			continue;
+ 		}
+ 
+-		total_len += sizeof(pkt->hdr) + pkt->len;
++		total_len += sizeof(*hdr) + skb->len;
+ 
+ 		/* Deliver to monitoring devices all received packets */
+-		virtio_transport_deliver_tap_pkt(pkt);
++		virtio_transport_deliver_tap_pkt(skb);
++
++		hdr = virtio_vsock_hdr(skb);
+ 
+ 		/* Only accept correctly addressed packets */
+-		if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
+-		    le64_to_cpu(pkt->hdr.dst_cid) ==
++		if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid &&
++		    le64_to_cpu(hdr->dst_cid) ==
+ 		    vhost_transport_get_local_cid())
+-			virtio_transport_recv_pkt(&vhost_transport, pkt);
++			virtio_transport_recv_pkt(&vhost_transport, skb);
+ 		else
+-			virtio_transport_free_pkt(pkt);
++			kfree_skb(skb);
+ 
+ 		vhost_add_used(vq, head, 0);
+ 		added = true;
+@@ -693,8 +667,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
+ 		       VHOST_VSOCK_WEIGHT, true, NULL);
+ 
+ 	file->private_data = vsock;
+-	spin_lock_init(&vsock->send_pkt_list_lock);
+-	INIT_LIST_HEAD(&vsock->send_pkt_list);
++	skb_queue_head_init(&vsock->send_pkt_queue);
+ 	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
+ 	return 0;
+ 
+@@ -760,16 +733,7 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
+ 	vhost_vsock_flush(vsock);
+ 	vhost_dev_stop(&vsock->dev);
+ 
+-	spin_lock_bh(&vsock->send_pkt_list_lock);
+-	while (!list_empty(&vsock->send_pkt_list)) {
+-		struct virtio_vsock_pkt *pkt;
+-
+-		pkt = list_first_entry(&vsock->send_pkt_list,
+-				struct virtio_vsock_pkt, list);
+-		list_del_init(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
+-	}
+-	spin_unlock_bh(&vsock->send_pkt_list_lock);
++	virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
+ 
+ 	vhost_dev_cleanup(&vsock->dev);
+ 	kfree(vsock->dev.vqs);
+diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
+index 35d7eedb5e8e4..3f9c166113063 100644
+--- a/include/linux/virtio_vsock.h
++++ b/include/linux/virtio_vsock.h
+@@ -7,6 +7,109 @@
+ #include <net/sock.h>
+ #include <net/af_vsock.h>
+ 
++#define VIRTIO_VSOCK_SKB_HEADROOM (sizeof(struct virtio_vsock_hdr))
++
++struct virtio_vsock_skb_cb {
++	bool reply;
++	bool tap_delivered;
++};
++
++#define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb))
++
++static inline struct virtio_vsock_hdr *virtio_vsock_hdr(struct sk_buff *skb)
++{
++	return (struct virtio_vsock_hdr *)skb->head;
++}
++
++static inline bool virtio_vsock_skb_reply(struct sk_buff *skb)
++{
++	return VIRTIO_VSOCK_SKB_CB(skb)->reply;
++}
++
++static inline void virtio_vsock_skb_set_reply(struct sk_buff *skb)
++{
++	VIRTIO_VSOCK_SKB_CB(skb)->reply = true;
++}
++
++static inline bool virtio_vsock_skb_tap_delivered(struct sk_buff *skb)
++{
++	return VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered;
++}
++
++static inline void virtio_vsock_skb_set_tap_delivered(struct sk_buff *skb)
++{
++	VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = true;
++}
++
++static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb)
++{
++	VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false;
++}
++
++static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb)
++{
++	u32 len;
++
++	len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
++
++	if (len > 0)
++		skb_put(skb, len);
++}
++
++static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask)
++{
++	struct sk_buff *skb;
++
++	if (size < VIRTIO_VSOCK_SKB_HEADROOM)
++		return NULL;
++
++	skb = alloc_skb(size, mask);
++	if (!skb)
++		return NULL;
++
++	skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM);
++	return skb;
++}
++
++static inline void
++virtio_vsock_skb_queue_head(struct sk_buff_head *list, struct sk_buff *skb)
++{
++	spin_lock_bh(&list->lock);
++	__skb_queue_head(list, skb);
++	spin_unlock_bh(&list->lock);
++}
++
++static inline void
++virtio_vsock_skb_queue_tail(struct sk_buff_head *list, struct sk_buff *skb)
++{
++	spin_lock_bh(&list->lock);
++	__skb_queue_tail(list, skb);
++	spin_unlock_bh(&list->lock);
++}
++
++static inline struct sk_buff *virtio_vsock_skb_dequeue(struct sk_buff_head *list)
++{
++	struct sk_buff *skb;
++
++	spin_lock_bh(&list->lock);
++	skb = __skb_dequeue(list);
++	spin_unlock_bh(&list->lock);
++
++	return skb;
++}
++
++static inline void virtio_vsock_skb_queue_purge(struct sk_buff_head *list)
++{
++	spin_lock_bh(&list->lock);
++	__skb_queue_purge(list);
++	spin_unlock_bh(&list->lock);
++}
++
++static inline size_t virtio_vsock_skb_len(struct sk_buff *skb)
++{
++	return (size_t)(skb_end_pointer(skb) - skb->head);
++}
++
+ #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE	(1024 * 4)
+ #define VIRTIO_VSOCK_MAX_BUF_SIZE		0xFFFFFFFFUL
+ #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE		(1024 * 64)
+@@ -35,23 +138,10 @@ struct virtio_vsock_sock {
+ 	u32 last_fwd_cnt;
+ 	u32 rx_bytes;
+ 	u32 buf_alloc;
+-	struct list_head rx_queue;
++	struct sk_buff_head rx_queue;
+ 	u32 msg_count;
+ };
+ 
+-struct virtio_vsock_pkt {
+-	struct virtio_vsock_hdr	hdr;
+-	struct list_head list;
+-	/* socket refcnt not held, only use for cancellation */
+-	struct vsock_sock *vsk;
+-	void *buf;
+-	u32 buf_len;
+-	u32 len;
+-	u32 off;
+-	bool reply;
+-	bool tap_delivered;
+-};
+-
+ struct virtio_vsock_pkt_info {
+ 	u32 remote_cid, remote_port;
+ 	struct vsock_sock *vsk;
+@@ -68,7 +158,7 @@ struct virtio_transport {
+ 	struct vsock_transport transport;
+ 
+ 	/* Takes ownership of the packet */
+-	int (*send_pkt)(struct virtio_vsock_pkt *pkt);
++	int (*send_pkt)(struct sk_buff *skb);
+ };
+ 
+ ssize_t
+@@ -149,11 +239,10 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
+ void virtio_transport_destruct(struct vsock_sock *vsk);
+ 
+ void virtio_transport_recv_pkt(struct virtio_transport *t,
+-			       struct virtio_vsock_pkt *pkt);
+-void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt);
+-void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt);
++			       struct sk_buff *skb);
++void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb);
+ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted);
+ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
+-void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt);
+-
++void virtio_transport_deliver_tap_pkt(struct sk_buff *skb);
++int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list);
+ #endif /* _LINUX_VIRTIO_VSOCK_H */
+diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
+index 460e7fbb42da3..16575ea836590 100644
+--- a/net/vmw_vsock/virtio_transport.c
++++ b/net/vmw_vsock/virtio_transport.c
+@@ -42,8 +42,7 @@ struct virtio_vsock {
+ 	bool tx_run;
+ 
+ 	struct work_struct send_pkt_work;
+-	spinlock_t send_pkt_list_lock;
+-	struct list_head send_pkt_list;
++	struct sk_buff_head send_pkt_queue;
+ 
+ 	atomic_t queued_replies;
+ 
+@@ -101,41 +100,31 @@ virtio_transport_send_pkt_work(struct work_struct *work)
+ 	vq = vsock->vqs[VSOCK_VQ_TX];
+ 
+ 	for (;;) {
+-		struct virtio_vsock_pkt *pkt;
+ 		struct scatterlist hdr, buf, *sgs[2];
+ 		int ret, in_sg = 0, out_sg = 0;
++		struct sk_buff *skb;
+ 		bool reply;
+ 
+-		spin_lock_bh(&vsock->send_pkt_list_lock);
+-		if (list_empty(&vsock->send_pkt_list)) {
+-			spin_unlock_bh(&vsock->send_pkt_list_lock);
++		skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
++		if (!skb)
+ 			break;
+-		}
+-
+-		pkt = list_first_entry(&vsock->send_pkt_list,
+-				       struct virtio_vsock_pkt, list);
+-		list_del_init(&pkt->list);
+-		spin_unlock_bh(&vsock->send_pkt_list_lock);
+ 
+-		virtio_transport_deliver_tap_pkt(pkt);
++		virtio_transport_deliver_tap_pkt(skb);
++		reply = virtio_vsock_skb_reply(skb);
+ 
+-		reply = pkt->reply;
+-
+-		sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
++		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
+ 		sgs[out_sg++] = &hdr;
+-		if (pkt->buf) {
+-			sg_init_one(&buf, pkt->buf, pkt->len);
++		if (skb->len > 0) {
++			sg_init_one(&buf, skb->data, skb->len);
+ 			sgs[out_sg++] = &buf;
+ 		}
+ 
+-		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
++		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
+ 		/* Usually this means that there is no more space available in
+ 		 * the vq
+ 		 */
+ 		if (ret < 0) {
+-			spin_lock_bh(&vsock->send_pkt_list_lock);
+-			list_add(&pkt->list, &vsock->send_pkt_list);
+-			spin_unlock_bh(&vsock->send_pkt_list_lock);
++			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
+ 			break;
+ 		}
+ 
+@@ -164,32 +153,32 @@ virtio_transport_send_pkt_work(struct work_struct *work)
+ }
+ 
+ static int
+-virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
++virtio_transport_send_pkt(struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr;
+ 	struct virtio_vsock *vsock;
+-	int len = pkt->len;
++	int len = skb->len;
++
++	hdr = virtio_vsock_hdr(skb);
+ 
+ 	rcu_read_lock();
+ 	vsock = rcu_dereference(the_virtio_vsock);
+ 	if (!vsock) {
+-		virtio_transport_free_pkt(pkt);
++		kfree_skb(skb);
+ 		len = -ENODEV;
+ 		goto out_rcu;
+ 	}
+ 
+-	if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
+-		virtio_transport_free_pkt(pkt);
++	if (le64_to_cpu(hdr->dst_cid) == vsock->guest_cid) {
++		kfree_skb(skb);
+ 		len = -ENODEV;
+ 		goto out_rcu;
+ 	}
+ 
+-	if (pkt->reply)
++	if (virtio_vsock_skb_reply(skb))
+ 		atomic_inc(&vsock->queued_replies);
+ 
+-	spin_lock_bh(&vsock->send_pkt_list_lock);
+-	list_add_tail(&pkt->list, &vsock->send_pkt_list);
+-	spin_unlock_bh(&vsock->send_pkt_list_lock);
+-
++	virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
+ 	queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
+ 
+ out_rcu:
+@@ -201,9 +190,7 @@ static int
+ virtio_transport_cancel_pkt(struct vsock_sock *vsk)
+ {
+ 	struct virtio_vsock *vsock;
+-	struct virtio_vsock_pkt *pkt, *n;
+ 	int cnt = 0, ret;
+-	LIST_HEAD(freeme);
+ 
+ 	rcu_read_lock();
+ 	vsock = rcu_dereference(the_virtio_vsock);
+@@ -212,20 +199,7 @@ virtio_transport_cancel_pkt(struct vsock_sock *vsk)
+ 		goto out_rcu;
+ 	}
+ 
+-	spin_lock_bh(&vsock->send_pkt_list_lock);
+-	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+-		if (pkt->vsk != vsk)
+-			continue;
+-		list_move(&pkt->list, &freeme);
+-	}
+-	spin_unlock_bh(&vsock->send_pkt_list_lock);
+-
+-	list_for_each_entry_safe(pkt, n, &freeme, list) {
+-		if (pkt->reply)
+-			cnt++;
+-		list_del(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
+-	}
++	cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
+ 
+ 	if (cnt) {
+ 		struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+@@ -246,38 +220,28 @@ virtio_transport_cancel_pkt(struct vsock_sock *vsk)
+ 
+ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
+ {
+-	int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+-	struct virtio_vsock_pkt *pkt;
+-	struct scatterlist hdr, buf, *sgs[2];
++	int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM;
++	struct scatterlist pkt, *p;
+ 	struct virtqueue *vq;
++	struct sk_buff *skb;
+ 	int ret;
+ 
+ 	vq = vsock->vqs[VSOCK_VQ_RX];
+ 
+ 	do {
+-		pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+-		if (!pkt)
++		skb = virtio_vsock_alloc_skb(total_len, GFP_KERNEL);
++		if (!skb)
+ 			break;
+ 
+-		pkt->buf = kmalloc(buf_len, GFP_KERNEL);
+-		if (!pkt->buf) {
+-			virtio_transport_free_pkt(pkt);
++		memset(skb->head, 0, VIRTIO_VSOCK_SKB_HEADROOM);
++		sg_init_one(&pkt, virtio_vsock_hdr(skb), total_len);
++		p = &pkt;
++		ret = virtqueue_add_sgs(vq, &p, 0, 1, skb, GFP_KERNEL);
++		if (ret < 0) {
++			kfree_skb(skb);
+ 			break;
+ 		}
+ 
+-		pkt->buf_len = buf_len;
+-		pkt->len = buf_len;
+-
+-		sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+-		sgs[0] = &hdr;
+-
+-		sg_init_one(&buf, pkt->buf, buf_len);
+-		sgs[1] = &buf;
+-		ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
+-		if (ret) {
+-			virtio_transport_free_pkt(pkt);
+-			break;
+-		}
+ 		vsock->rx_buf_nr++;
+ 	} while (vq->num_free);
+ 	if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
+@@ -299,12 +263,12 @@ static void virtio_transport_tx_work(struct work_struct *work)
+ 		goto out;
+ 
+ 	do {
+-		struct virtio_vsock_pkt *pkt;
++		struct sk_buff *skb;
+ 		unsigned int len;
+ 
+ 		virtqueue_disable_cb(vq);
+-		while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
+-			virtio_transport_free_pkt(pkt);
++		while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
++			consume_skb(skb);
+ 			added = true;
+ 		}
+ 	} while (!virtqueue_enable_cb(vq));
+@@ -529,7 +493,7 @@ static void virtio_transport_rx_work(struct work_struct *work)
+ 	do {
+ 		virtqueue_disable_cb(vq);
+ 		for (;;) {
+-			struct virtio_vsock_pkt *pkt;
++			struct sk_buff *skb;
+ 			unsigned int len;
+ 
+ 			if (!virtio_transport_more_replies(vsock)) {
+@@ -540,23 +504,22 @@ static void virtio_transport_rx_work(struct work_struct *work)
+ 				goto out;
+ 			}
+ 
+-			pkt = virtqueue_get_buf(vq, &len);
+-			if (!pkt) {
++			skb = virtqueue_get_buf(vq, &len);
++			if (!skb)
+ 				break;
+-			}
+ 
+ 			vsock->rx_buf_nr--;
+ 
+ 			/* Drop short/long packets */
+-			if (unlikely(len < sizeof(pkt->hdr) ||
+-				     len > sizeof(pkt->hdr) + pkt->len)) {
+-				virtio_transport_free_pkt(pkt);
++			if (unlikely(len < sizeof(struct virtio_vsock_hdr) ||
++				     len > virtio_vsock_skb_len(skb))) {
++				kfree_skb(skb);
+ 				continue;
+ 			}
+ 
+-			pkt->len = len - sizeof(pkt->hdr);
+-			virtio_transport_deliver_tap_pkt(pkt);
+-			virtio_transport_recv_pkt(&virtio_transport, pkt);
++			virtio_vsock_skb_rx_put(skb);
++			virtio_transport_deliver_tap_pkt(skb);
++			virtio_transport_recv_pkt(&virtio_transport, skb);
+ 		}
+ 	} while (!virtqueue_enable_cb(vq));
+ 
+@@ -624,7 +587,7 @@ static void virtio_vsock_vqs_start(struct virtio_vsock *vsock)
+ static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
+ {
+ 	struct virtio_device *vdev = vsock->vdev;
+-	struct virtio_vsock_pkt *pkt;
++	struct sk_buff *skb;
+ 
+ 	/* Reset all connected sockets when the VQs disappear */
+ 	vsock_for_each_connected_socket(&virtio_transport.transport,
+@@ -651,23 +614,16 @@ static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
+ 	virtio_reset_device(vdev);
+ 
+ 	mutex_lock(&vsock->rx_lock);
+-	while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
+-		virtio_transport_free_pkt(pkt);
++	while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
++		kfree_skb(skb);
+ 	mutex_unlock(&vsock->rx_lock);
+ 
+ 	mutex_lock(&vsock->tx_lock);
+-	while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
+-		virtio_transport_free_pkt(pkt);
++	while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
++		kfree_skb(skb);
+ 	mutex_unlock(&vsock->tx_lock);
+ 
+-	spin_lock_bh(&vsock->send_pkt_list_lock);
+-	while (!list_empty(&vsock->send_pkt_list)) {
+-		pkt = list_first_entry(&vsock->send_pkt_list,
+-				       struct virtio_vsock_pkt, list);
+-		list_del(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
+-	}
+-	spin_unlock_bh(&vsock->send_pkt_list_lock);
++	virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
+ 
+ 	/* Delete virtqueues and flush outstanding callbacks if any */
+ 	vdev->config->del_vqs(vdev);
+@@ -704,8 +660,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
+ 	mutex_init(&vsock->tx_lock);
+ 	mutex_init(&vsock->rx_lock);
+ 	mutex_init(&vsock->event_lock);
+-	spin_lock_init(&vsock->send_pkt_list_lock);
+-	INIT_LIST_HEAD(&vsock->send_pkt_list);
++	skb_queue_head_init(&vsock->send_pkt_queue);
+ 	INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
+ 	INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
+ 	INIT_WORK(&vsock->event_work, virtio_transport_event_work);
+diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
+index a9980e9b93040..a1581c77cf84a 100644
+--- a/net/vmw_vsock/virtio_transport_common.c
++++ b/net/vmw_vsock/virtio_transport_common.c
+@@ -37,53 +37,56 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
+ 	return container_of(t, struct virtio_transport, transport);
+ }
+ 
+-static struct virtio_vsock_pkt *
+-virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
++/* Returns a new packet on success, otherwise returns NULL.
++ *
++ * If NULL is returned, errp is set to a negative errno.
++ */
++static struct sk_buff *
++virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
+ 			   size_t len,
+ 			   u32 src_cid,
+ 			   u32 src_port,
+ 			   u32 dst_cid,
+ 			   u32 dst_port)
+ {
+-	struct virtio_vsock_pkt *pkt;
++	const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
++	struct virtio_vsock_hdr *hdr;
++	struct sk_buff *skb;
++	void *payload;
+ 	int err;
+ 
+-	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+-	if (!pkt)
++	skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
++	if (!skb)
+ 		return NULL;
+ 
+-	pkt->hdr.type		= cpu_to_le16(info->type);
+-	pkt->hdr.op		= cpu_to_le16(info->op);
+-	pkt->hdr.src_cid	= cpu_to_le64(src_cid);
+-	pkt->hdr.dst_cid	= cpu_to_le64(dst_cid);
+-	pkt->hdr.src_port	= cpu_to_le32(src_port);
+-	pkt->hdr.dst_port	= cpu_to_le32(dst_port);
+-	pkt->hdr.flags		= cpu_to_le32(info->flags);
+-	pkt->len		= len;
+-	pkt->hdr.len		= cpu_to_le32(len);
+-	pkt->reply		= info->reply;
+-	pkt->vsk		= info->vsk;
++	hdr = virtio_vsock_hdr(skb);
++	hdr->type	= cpu_to_le16(info->type);
++	hdr->op		= cpu_to_le16(info->op);
++	hdr->src_cid	= cpu_to_le64(src_cid);
++	hdr->dst_cid	= cpu_to_le64(dst_cid);
++	hdr->src_port	= cpu_to_le32(src_port);
++	hdr->dst_port	= cpu_to_le32(dst_port);
++	hdr->flags	= cpu_to_le32(info->flags);
++	hdr->len	= cpu_to_le32(len);
+ 
+ 	if (info->msg && len > 0) {
+-		pkt->buf = kmalloc(len, GFP_KERNEL);
+-		if (!pkt->buf)
+-			goto out_pkt;
+-
+-		pkt->buf_len = len;
+-
+-		err = memcpy_from_msg(pkt->buf, info->msg, len);
++		payload = skb_put(skb, len);
++		err = memcpy_from_msg(payload, info->msg, len);
+ 		if (err)
+ 			goto out;
+ 
+ 		if (msg_data_left(info->msg) == 0 &&
+ 		    info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+-			pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
++			hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+ 
+ 			if (info->msg->msg_flags & MSG_EOR)
+-				pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
++				hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ 		}
+ 	}
+ 
++	if (info->reply)
++		virtio_vsock_skb_set_reply(skb);
++
+ 	trace_virtio_transport_alloc_pkt(src_cid, src_port,
+ 					 dst_cid, dst_port,
+ 					 len,
+@@ -91,19 +94,18 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
+ 					 info->op,
+ 					 info->flags);
+ 
+-	return pkt;
++	return skb;
+ 
+ out:
+-	kfree(pkt->buf);
+-out_pkt:
+-	kfree(pkt);
++	kfree_skb(skb);
+ 	return NULL;
+ }
+ 
+ /* Packet capture */
+ static struct sk_buff *virtio_transport_build_skb(void *opaque)
+ {
+-	struct virtio_vsock_pkt *pkt = opaque;
++	struct virtio_vsock_hdr *pkt_hdr;
++	struct sk_buff *pkt = opaque;
+ 	struct af_vsockmon_hdr *hdr;
+ 	struct sk_buff *skb;
+ 	size_t payload_len;
+@@ -113,10 +115,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
+ 	 * the payload length from the header and the buffer pointer taking
+ 	 * care of the offset in the original packet.
+ 	 */
+-	payload_len = le32_to_cpu(pkt->hdr.len);
+-	payload_buf = pkt->buf + pkt->off;
++	pkt_hdr = virtio_vsock_hdr(pkt);
++	payload_len = pkt->len;
++	payload_buf = pkt->data;
+ 
+-	skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len,
++	skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
+ 			GFP_ATOMIC);
+ 	if (!skb)
+ 		return NULL;
+@@ -124,16 +127,16 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
+ 	hdr = skb_put(skb, sizeof(*hdr));
+ 
+ 	/* pkt->hdr is little-endian so no need to byteswap here */
+-	hdr->src_cid = pkt->hdr.src_cid;
+-	hdr->src_port = pkt->hdr.src_port;
+-	hdr->dst_cid = pkt->hdr.dst_cid;
+-	hdr->dst_port = pkt->hdr.dst_port;
++	hdr->src_cid = pkt_hdr->src_cid;
++	hdr->src_port = pkt_hdr->src_port;
++	hdr->dst_cid = pkt_hdr->dst_cid;
++	hdr->dst_port = pkt_hdr->dst_port;
+ 
+ 	hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
+-	hdr->len = cpu_to_le16(sizeof(pkt->hdr));
++	hdr->len = cpu_to_le16(sizeof(*pkt_hdr));
+ 	memset(hdr->reserved, 0, sizeof(hdr->reserved));
+ 
+-	switch (le16_to_cpu(pkt->hdr.op)) {
++	switch (le16_to_cpu(pkt_hdr->op)) {
+ 	case VIRTIO_VSOCK_OP_REQUEST:
+ 	case VIRTIO_VSOCK_OP_RESPONSE:
+ 		hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
+@@ -154,7 +157,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
+ 		break;
+ 	}
+ 
+-	skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
++	skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
+ 
+ 	if (payload_len) {
+ 		skb_put_data(skb, payload_buf, payload_len);
+@@ -163,13 +166,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
+ 	return skb;
+ }
+ 
+-void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
++void virtio_transport_deliver_tap_pkt(struct sk_buff *skb)
+ {
+-	if (pkt->tap_delivered)
++	if (virtio_vsock_skb_tap_delivered(skb))
+ 		return;
+ 
+-	vsock_deliver_tap(virtio_transport_build_skb, pkt);
+-	pkt->tap_delivered = true;
++	vsock_deliver_tap(virtio_transport_build_skb, skb);
++	virtio_vsock_skb_set_tap_delivered(skb);
+ }
+ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
+ 
+@@ -192,8 +195,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
+ 	u32 src_cid, src_port, dst_cid, dst_port;
+ 	const struct virtio_transport *t_ops;
+ 	struct virtio_vsock_sock *vvs;
+-	struct virtio_vsock_pkt *pkt;
+ 	u32 pkt_len = info->pkt_len;
++	struct sk_buff *skb;
+ 
+ 	info->type = virtio_transport_get_type(sk_vsock(vsk));
+ 
+@@ -224,42 +227,47 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
+ 	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+ 		return pkt_len;
+ 
+-	pkt = virtio_transport_alloc_pkt(info, pkt_len,
++	skb = virtio_transport_alloc_skb(info, pkt_len,
+ 					 src_cid, src_port,
+ 					 dst_cid, dst_port);
+-	if (!pkt) {
++	if (!skb) {
+ 		virtio_transport_put_credit(vvs, pkt_len);
+ 		return -ENOMEM;
+ 	}
+ 
+-	virtio_transport_inc_tx_pkt(vvs, pkt);
++	virtio_transport_inc_tx_pkt(vvs, skb);
+ 
+-	return t_ops->send_pkt(pkt);
++	return t_ops->send_pkt(skb);
+ }
+ 
+ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
+-					struct virtio_vsock_pkt *pkt)
++					struct sk_buff *skb)
+ {
+-	if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
++	if (vvs->rx_bytes + skb->len > vvs->buf_alloc)
+ 		return false;
+ 
+-	vvs->rx_bytes += pkt->len;
++	vvs->rx_bytes += skb->len;
+ 	return true;
+ }
+ 
+ static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
+-					struct virtio_vsock_pkt *pkt)
++					struct sk_buff *skb)
+ {
+-	vvs->rx_bytes -= pkt->len;
+-	vvs->fwd_cnt += pkt->len;
++	int len;
++
++	len = skb_headroom(skb) - sizeof(struct virtio_vsock_hdr) - skb->len;
++	vvs->rx_bytes -= len;
++	vvs->fwd_cnt += len;
+ }
+ 
+-void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
++void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
++
+ 	spin_lock_bh(&vvs->rx_lock);
+ 	vvs->last_fwd_cnt = vvs->fwd_cnt;
+-	pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
+-	pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
++	hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
++	hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc);
+ 	spin_unlock_bh(&vvs->rx_lock);
+ }
+ EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
+@@ -303,29 +311,29 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
+ 				size_t len)
+ {
+ 	struct virtio_vsock_sock *vvs = vsk->trans;
+-	struct virtio_vsock_pkt *pkt;
+ 	size_t bytes, total = 0, off;
++	struct sk_buff *skb, *tmp;
+ 	int err = -EFAULT;
+ 
+ 	spin_lock_bh(&vvs->rx_lock);
+ 
+-	list_for_each_entry(pkt, &vvs->rx_queue, list) {
+-		off = pkt->off;
++	skb_queue_walk_safe(&vvs->rx_queue, skb,  tmp) {
++		off = 0;
+ 
+ 		if (total == len)
+ 			break;
+ 
+-		while (total < len && off < pkt->len) {
++		while (total < len && off < skb->len) {
+ 			bytes = len - total;
+-			if (bytes > pkt->len - off)
+-				bytes = pkt->len - off;
++			if (bytes > skb->len - off)
++				bytes = skb->len - off;
+ 
+ 			/* sk_lock is held by caller so no one else can dequeue.
+ 			 * Unlock rx_lock since memcpy_to_msg() may sleep.
+ 			 */
+ 			spin_unlock_bh(&vvs->rx_lock);
+ 
+-			err = memcpy_to_msg(msg, pkt->buf + off, bytes);
++			err = memcpy_to_msg(msg, skb->data + off, bytes);
+ 			if (err)
+ 				goto out;
+ 
+@@ -352,37 +360,38 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
+ 				   size_t len)
+ {
+ 	struct virtio_vsock_sock *vvs = vsk->trans;
+-	struct virtio_vsock_pkt *pkt;
+ 	size_t bytes, total = 0;
+-	u32 free_space;
++	struct sk_buff *skb;
+ 	int err = -EFAULT;
++	u32 free_space;
+ 
+ 	spin_lock_bh(&vvs->rx_lock);
+-	while (total < len && !list_empty(&vvs->rx_queue)) {
+-		pkt = list_first_entry(&vvs->rx_queue,
+-				       struct virtio_vsock_pkt, list);
++	while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
++		skb = __skb_dequeue(&vvs->rx_queue);
+ 
+ 		bytes = len - total;
+-		if (bytes > pkt->len - pkt->off)
+-			bytes = pkt->len - pkt->off;
++		if (bytes > skb->len)
++			bytes = skb->len;
+ 
+ 		/* sk_lock is held by caller so no one else can dequeue.
+ 		 * Unlock rx_lock since memcpy_to_msg() may sleep.
+ 		 */
+ 		spin_unlock_bh(&vvs->rx_lock);
+ 
+-		err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
++		err = memcpy_to_msg(msg, skb->data, bytes);
+ 		if (err)
+ 			goto out;
+ 
+ 		spin_lock_bh(&vvs->rx_lock);
+ 
+ 		total += bytes;
+-		pkt->off += bytes;
+-		if (pkt->off == pkt->len) {
+-			virtio_transport_dec_rx_pkt(vvs, pkt);
+-			list_del(&pkt->list);
+-			virtio_transport_free_pkt(pkt);
++		skb_pull(skb, bytes);
++
++		if (skb->len == 0) {
++			virtio_transport_dec_rx_pkt(vvs, skb);
++			consume_skb(skb);
++		} else {
++			__skb_queue_head(&vvs->rx_queue, skb);
+ 		}
+ 	}
+ 
+@@ -414,10 +423,10 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+ 						 int flags)
+ {
+ 	struct virtio_vsock_sock *vvs = vsk->trans;
+-	struct virtio_vsock_pkt *pkt;
+ 	int dequeued_len = 0;
+ 	size_t user_buf_len = msg_data_left(msg);
+ 	bool msg_ready = false;
++	struct sk_buff *skb;
+ 
+ 	spin_lock_bh(&vvs->rx_lock);
+ 
+@@ -427,13 +436,18 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+ 	}
+ 
+ 	while (!msg_ready) {
+-		pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
++		struct virtio_vsock_hdr *hdr;
++
++		skb = __skb_dequeue(&vvs->rx_queue);
++		if (!skb)
++			break;
++		hdr = virtio_vsock_hdr(skb);
+ 
+ 		if (dequeued_len >= 0) {
+ 			size_t pkt_len;
+ 			size_t bytes_to_copy;
+ 
+-			pkt_len = (size_t)le32_to_cpu(pkt->hdr.len);
++			pkt_len = (size_t)le32_to_cpu(hdr->len);
+ 			bytes_to_copy = min(user_buf_len, pkt_len);
+ 
+ 			if (bytes_to_copy) {
+@@ -444,7 +458,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+ 				 */
+ 				spin_unlock_bh(&vvs->rx_lock);
+ 
+-				err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
++				err = memcpy_to_msg(msg, skb->data, bytes_to_copy);
+ 				if (err) {
+ 					/* Copy of message failed. Rest of
+ 					 * fragments will be freed without copy.
+@@ -452,6 +466,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+ 					dequeued_len = err;
+ 				} else {
+ 					user_buf_len -= bytes_to_copy;
++					skb_pull(skb, bytes_to_copy);
+ 				}
+ 
+ 				spin_lock_bh(&vvs->rx_lock);
+@@ -461,17 +476,16 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+ 				dequeued_len += pkt_len;
+ 		}
+ 
+-		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
++		if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ 			msg_ready = true;
+ 			vvs->msg_count--;
+ 
+-			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
++			if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
+ 				msg->msg_flags |= MSG_EOR;
+ 		}
+ 
+-		virtio_transport_dec_rx_pkt(vvs, pkt);
+-		list_del(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
++		virtio_transport_dec_rx_pkt(vvs, skb);
++		kfree_skb(skb);
+ 	}
+ 
+ 	spin_unlock_bh(&vvs->rx_lock);
+@@ -609,7 +623,7 @@ int virtio_transport_do_socket_init(struct vsock_sock *vsk,
+ 
+ 	spin_lock_init(&vvs->rx_lock);
+ 	spin_lock_init(&vvs->tx_lock);
+-	INIT_LIST_HEAD(&vvs->rx_queue);
++	skb_queue_head_init(&vvs->rx_queue);
+ 
+ 	return 0;
+ }
+@@ -806,16 +820,16 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
+ EXPORT_SYMBOL_GPL(virtio_transport_destruct);
+ 
+ static int virtio_transport_reset(struct vsock_sock *vsk,
+-				  struct virtio_vsock_pkt *pkt)
++				  struct sk_buff *skb)
+ {
+ 	struct virtio_vsock_pkt_info info = {
+ 		.op = VIRTIO_VSOCK_OP_RST,
+-		.reply = !!pkt,
++		.reply = !!skb,
+ 		.vsk = vsk,
+ 	};
+ 
+ 	/* Send RST only if the original pkt is not a RST pkt */
+-	if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
++	if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST)
+ 		return 0;
+ 
+ 	return virtio_transport_send_pkt_info(vsk, &info);
+@@ -825,29 +839,30 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
+  * attempt was made to connect to a socket that does not exist.
+  */
+ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
+-					  struct virtio_vsock_pkt *pkt)
++					  struct sk_buff *skb)
+ {
+-	struct virtio_vsock_pkt *reply;
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct virtio_vsock_pkt_info info = {
+ 		.op = VIRTIO_VSOCK_OP_RST,
+-		.type = le16_to_cpu(pkt->hdr.type),
++		.type = le16_to_cpu(hdr->type),
+ 		.reply = true,
+ 	};
++	struct sk_buff *reply;
+ 
+ 	/* Send RST only if the original pkt is not a RST pkt */
+-	if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
++	if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
+ 		return 0;
+ 
+-	reply = virtio_transport_alloc_pkt(&info, 0,
+-					   le64_to_cpu(pkt->hdr.dst_cid),
+-					   le32_to_cpu(pkt->hdr.dst_port),
+-					   le64_to_cpu(pkt->hdr.src_cid),
+-					   le32_to_cpu(pkt->hdr.src_port));
++	reply = virtio_transport_alloc_skb(&info, 0,
++					   le64_to_cpu(hdr->dst_cid),
++					   le32_to_cpu(hdr->dst_port),
++					   le64_to_cpu(hdr->src_cid),
++					   le32_to_cpu(hdr->src_port));
+ 	if (!reply)
+ 		return -ENOMEM;
+ 
+ 	if (!t) {
+-		virtio_transport_free_pkt(reply);
++		kfree_skb(reply);
+ 		return -ENOTCONN;
+ 	}
+ 
+@@ -858,16 +873,11 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
+ static void virtio_transport_remove_sock(struct vsock_sock *vsk)
+ {
+ 	struct virtio_vsock_sock *vvs = vsk->trans;
+-	struct virtio_vsock_pkt *pkt, *tmp;
+ 
+ 	/* We don't need to take rx_lock, as the socket is closing and we are
+ 	 * removing it.
+ 	 */
+-	list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
+-		list_del(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
+-	}
+-
++	__skb_queue_purge(&vvs->rx_queue);
+ 	vsock_remove_sock(vsk);
+ }
+ 
+@@ -981,13 +991,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_release);
+ 
+ static int
+ virtio_transport_recv_connecting(struct sock *sk,
+-				 struct virtio_vsock_pkt *pkt)
++				 struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct vsock_sock *vsk = vsock_sk(sk);
+-	int err;
+ 	int skerr;
++	int err;
+ 
+-	switch (le16_to_cpu(pkt->hdr.op)) {
++	switch (le16_to_cpu(hdr->op)) {
+ 	case VIRTIO_VSOCK_OP_RESPONSE:
+ 		sk->sk_state = TCP_ESTABLISHED;
+ 		sk->sk_socket->state = SS_CONNECTED;
+@@ -1008,7 +1019,7 @@ virtio_transport_recv_connecting(struct sock *sk,
+ 	return 0;
+ 
+ destroy:
+-	virtio_transport_reset(vsk, pkt);
++	virtio_transport_reset(vsk, skb);
+ 	sk->sk_state = TCP_CLOSE;
+ 	sk->sk_err = skerr;
+ 	sk_error_report(sk);
+@@ -1017,34 +1028,37 @@ virtio_transport_recv_connecting(struct sock *sk,
+ 
+ static void
+ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
+-			      struct virtio_vsock_pkt *pkt)
++			      struct sk_buff *skb)
+ {
+ 	struct virtio_vsock_sock *vvs = vsk->trans;
+ 	bool can_enqueue, free_pkt = false;
++	struct virtio_vsock_hdr *hdr;
++	u32 len;
+ 
+-	pkt->len = le32_to_cpu(pkt->hdr.len);
+-	pkt->off = 0;
++	hdr = virtio_vsock_hdr(skb);
++	len = le32_to_cpu(hdr->len);
+ 
+ 	spin_lock_bh(&vvs->rx_lock);
+ 
+-	can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
++	can_enqueue = virtio_transport_inc_rx_pkt(vvs, skb);
+ 	if (!can_enqueue) {
+ 		free_pkt = true;
+ 		goto out;
+ 	}
+ 
+-	if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
++	if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
+ 		vvs->msg_count++;
+ 
+ 	/* Try to copy small packets into the buffer of last packet queued,
+ 	 * to avoid wasting memory queueing the entire buffer with a small
+ 	 * payload.
+ 	 */
+-	if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
+-		struct virtio_vsock_pkt *last_pkt;
++	if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) {
++		struct virtio_vsock_hdr *last_hdr;
++		struct sk_buff *last_skb;
+ 
+-		last_pkt = list_last_entry(&vvs->rx_queue,
+-					   struct virtio_vsock_pkt, list);
++		last_skb = skb_peek_tail(&vvs->rx_queue);
++		last_hdr = virtio_vsock_hdr(last_skb);
+ 
+ 		/* If there is space in the last packet queued, we copy the
+ 		 * new packet in its buffer. We avoid this if the last packet
+@@ -1052,35 +1066,35 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
+ 		 * delimiter of SEQPACKET message, so 'pkt' is the first packet
+ 		 * of a new message.
+ 		 */
+-		if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
+-		    !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
+-			memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
+-			       pkt->len);
+-			last_pkt->len += pkt->len;
++		if (skb->len < skb_tailroom(last_skb) &&
++		    !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
++			memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
+ 			free_pkt = true;
+-			last_pkt->hdr.flags |= pkt->hdr.flags;
++			last_hdr->flags |= hdr->flags;
++			last_hdr->len = cpu_to_le32(last_skb->len);
+ 			goto out;
+ 		}
+ 	}
+ 
+-	list_add_tail(&pkt->list, &vvs->rx_queue);
++	__skb_queue_tail(&vvs->rx_queue, skb);
+ 
+ out:
+ 	spin_unlock_bh(&vvs->rx_lock);
+ 	if (free_pkt)
+-		virtio_transport_free_pkt(pkt);
++		kfree_skb(skb);
+ }
+ 
+ static int
+ virtio_transport_recv_connected(struct sock *sk,
+-				struct virtio_vsock_pkt *pkt)
++				struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct vsock_sock *vsk = vsock_sk(sk);
+ 	int err = 0;
+ 
+-	switch (le16_to_cpu(pkt->hdr.op)) {
++	switch (le16_to_cpu(hdr->op)) {
+ 	case VIRTIO_VSOCK_OP_RW:
+-		virtio_transport_recv_enqueue(vsk, pkt);
++		virtio_transport_recv_enqueue(vsk, skb);
+ 		vsock_data_ready(sk);
+ 		return err;
+ 	case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+@@ -1090,18 +1104,17 @@ virtio_transport_recv_connected(struct sock *sk,
+ 		sk->sk_write_space(sk);
+ 		break;
+ 	case VIRTIO_VSOCK_OP_SHUTDOWN:
+-		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
++		if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
+ 			vsk->peer_shutdown |= RCV_SHUTDOWN;
+-		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
++		if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
+ 			vsk->peer_shutdown |= SEND_SHUTDOWN;
+ 		if (vsk->peer_shutdown == SHUTDOWN_MASK &&
+ 		    vsock_stream_has_data(vsk) <= 0 &&
+ 		    !sock_flag(sk, SOCK_DONE)) {
+ 			(void)virtio_transport_reset(vsk, NULL);
+-
+ 			virtio_transport_do_close(vsk, true);
+ 		}
+-		if (le32_to_cpu(pkt->hdr.flags))
++		if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
+ 			sk->sk_state_change(sk);
+ 		break;
+ 	case VIRTIO_VSOCK_OP_RST:
+@@ -1112,28 +1125,30 @@ virtio_transport_recv_connected(struct sock *sk,
+ 		break;
+ 	}
+ 
+-	virtio_transport_free_pkt(pkt);
++	kfree_skb(skb);
+ 	return err;
+ }
+ 
+ static void
+ virtio_transport_recv_disconnecting(struct sock *sk,
+-				    struct virtio_vsock_pkt *pkt)
++				    struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct vsock_sock *vsk = vsock_sk(sk);
+ 
+-	if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
++	if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
+ 		virtio_transport_do_close(vsk, true);
+ }
+ 
+ static int
+ virtio_transport_send_response(struct vsock_sock *vsk,
+-			       struct virtio_vsock_pkt *pkt)
++			       struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct virtio_vsock_pkt_info info = {
+ 		.op = VIRTIO_VSOCK_OP_RESPONSE,
+-		.remote_cid = le64_to_cpu(pkt->hdr.src_cid),
+-		.remote_port = le32_to_cpu(pkt->hdr.src_port),
++		.remote_cid = le64_to_cpu(hdr->src_cid),
++		.remote_port = le32_to_cpu(hdr->src_port),
+ 		.reply = true,
+ 		.vsk = vsk,
+ 	};
+@@ -1142,8 +1157,9 @@ virtio_transport_send_response(struct vsock_sock *vsk,
+ }
+ 
+ static bool virtio_transport_space_update(struct sock *sk,
+-					  struct virtio_vsock_pkt *pkt)
++					  struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct vsock_sock *vsk = vsock_sk(sk);
+ 	struct virtio_vsock_sock *vvs = vsk->trans;
+ 	bool space_available;
+@@ -1158,8 +1174,8 @@ static bool virtio_transport_space_update(struct sock *sk,
+ 
+ 	/* buf_alloc and fwd_cnt is always included in the hdr */
+ 	spin_lock_bh(&vvs->tx_lock);
+-	vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
+-	vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
++	vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
++	vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
+ 	space_available = virtio_transport_has_space(vsk);
+ 	spin_unlock_bh(&vvs->tx_lock);
+ 	return space_available;
+@@ -1167,27 +1183,28 @@ static bool virtio_transport_space_update(struct sock *sk,
+ 
+ /* Handle server socket */
+ static int
+-virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
++virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
+ 			     struct virtio_transport *t)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct vsock_sock *vsk = vsock_sk(sk);
+ 	struct vsock_sock *vchild;
+ 	struct sock *child;
+ 	int ret;
+ 
+-	if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
+-		virtio_transport_reset_no_sock(t, pkt);
++	if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
++		virtio_transport_reset_no_sock(t, skb);
+ 		return -EINVAL;
+ 	}
+ 
+ 	if (sk_acceptq_is_full(sk)) {
+-		virtio_transport_reset_no_sock(t, pkt);
++		virtio_transport_reset_no_sock(t, skb);
+ 		return -ENOMEM;
+ 	}
+ 
+ 	child = vsock_create_connected(sk);
+ 	if (!child) {
+-		virtio_transport_reset_no_sock(t, pkt);
++		virtio_transport_reset_no_sock(t, skb);
+ 		return -ENOMEM;
+ 	}
+ 
+@@ -1198,10 +1215,10 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
+ 	child->sk_state = TCP_ESTABLISHED;
+ 
+ 	vchild = vsock_sk(child);
+-	vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
+-			le32_to_cpu(pkt->hdr.dst_port));
+-	vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
+-			le32_to_cpu(pkt->hdr.src_port));
++	vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid),
++			le32_to_cpu(hdr->dst_port));
++	vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid),
++			le32_to_cpu(hdr->src_port));
+ 
+ 	ret = vsock_assign_transport(vchild, vsk);
+ 	/* Transport assigned (looking at remote_addr) must be the same
+@@ -1209,17 +1226,17 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
+ 	 */
+ 	if (ret || vchild->transport != &t->transport) {
+ 		release_sock(child);
+-		virtio_transport_reset_no_sock(t, pkt);
++		virtio_transport_reset_no_sock(t, skb);
+ 		sock_put(child);
+ 		return ret;
+ 	}
+ 
+-	if (virtio_transport_space_update(child, pkt))
++	if (virtio_transport_space_update(child, skb))
+ 		child->sk_write_space(child);
+ 
+ 	vsock_insert_connected(vchild);
+ 	vsock_enqueue_accept(sk, child);
+-	virtio_transport_send_response(vchild, pkt);
++	virtio_transport_send_response(vchild, skb);
+ 
+ 	release_sock(child);
+ 
+@@ -1237,29 +1254,30 @@ static bool virtio_transport_valid_type(u16 type)
+  * lock.
+  */
+ void virtio_transport_recv_pkt(struct virtio_transport *t,
+-			       struct virtio_vsock_pkt *pkt)
++			       struct sk_buff *skb)
+ {
++	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ 	struct sockaddr_vm src, dst;
+ 	struct vsock_sock *vsk;
+ 	struct sock *sk;
+ 	bool space_available;
+ 
+-	vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid),
+-			le32_to_cpu(pkt->hdr.src_port));
+-	vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid),
+-			le32_to_cpu(pkt->hdr.dst_port));
++	vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
++			le32_to_cpu(hdr->src_port));
++	vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
++			le32_to_cpu(hdr->dst_port));
+ 
+ 	trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
+ 					dst.svm_cid, dst.svm_port,
+-					le32_to_cpu(pkt->hdr.len),
+-					le16_to_cpu(pkt->hdr.type),
+-					le16_to_cpu(pkt->hdr.op),
+-					le32_to_cpu(pkt->hdr.flags),
+-					le32_to_cpu(pkt->hdr.buf_alloc),
+-					le32_to_cpu(pkt->hdr.fwd_cnt));
+-
+-	if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) {
+-		(void)virtio_transport_reset_no_sock(t, pkt);
++					le32_to_cpu(hdr->len),
++					le16_to_cpu(hdr->type),
++					le16_to_cpu(hdr->op),
++					le32_to_cpu(hdr->flags),
++					le32_to_cpu(hdr->buf_alloc),
++					le32_to_cpu(hdr->fwd_cnt));
++
++	if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
++		(void)virtio_transport_reset_no_sock(t, skb);
+ 		goto free_pkt;
+ 	}
+ 
+@@ -1270,13 +1288,13 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
+ 	if (!sk) {
+ 		sk = vsock_find_bound_socket(&dst);
+ 		if (!sk) {
+-			(void)virtio_transport_reset_no_sock(t, pkt);
++			(void)virtio_transport_reset_no_sock(t, skb);
+ 			goto free_pkt;
+ 		}
+ 	}
+ 
+-	if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) {
+-		(void)virtio_transport_reset_no_sock(t, pkt);
++	if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
++		(void)virtio_transport_reset_no_sock(t, skb);
+ 		sock_put(sk);
+ 		goto free_pkt;
+ 	}
+@@ -1287,13 +1305,13 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
+ 
+ 	/* Check if sk has been closed before lock_sock */
+ 	if (sock_flag(sk, SOCK_DONE)) {
+-		(void)virtio_transport_reset_no_sock(t, pkt);
++		(void)virtio_transport_reset_no_sock(t, skb);
+ 		release_sock(sk);
+ 		sock_put(sk);
+ 		goto free_pkt;
+ 	}
+ 
+-	space_available = virtio_transport_space_update(sk, pkt);
++	space_available = virtio_transport_space_update(sk, skb);
+ 
+ 	/* Update CID in case it has changed after a transport reset event */
+ 	if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
+@@ -1304,23 +1322,23 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
+ 
+ 	switch (sk->sk_state) {
+ 	case TCP_LISTEN:
+-		virtio_transport_recv_listen(sk, pkt, t);
+-		virtio_transport_free_pkt(pkt);
++		virtio_transport_recv_listen(sk, skb, t);
++		kfree_skb(skb);
+ 		break;
+ 	case TCP_SYN_SENT:
+-		virtio_transport_recv_connecting(sk, pkt);
+-		virtio_transport_free_pkt(pkt);
++		virtio_transport_recv_connecting(sk, skb);
++		kfree_skb(skb);
+ 		break;
+ 	case TCP_ESTABLISHED:
+-		virtio_transport_recv_connected(sk, pkt);
++		virtio_transport_recv_connected(sk, skb);
+ 		break;
+ 	case TCP_CLOSING:
+-		virtio_transport_recv_disconnecting(sk, pkt);
+-		virtio_transport_free_pkt(pkt);
++		virtio_transport_recv_disconnecting(sk, skb);
++		kfree_skb(skb);
+ 		break;
+ 	default:
+-		(void)virtio_transport_reset_no_sock(t, pkt);
+-		virtio_transport_free_pkt(pkt);
++		(void)virtio_transport_reset_no_sock(t, skb);
++		kfree_skb(skb);
+ 		break;
+ 	}
+ 
+@@ -1333,16 +1351,42 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
+ 	return;
+ 
+ free_pkt:
+-	virtio_transport_free_pkt(pkt);
++	kfree_skb(skb);
+ }
+ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
+ 
+-void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
++/* Remove skbs found in a queue that have a vsk that matches.
++ *
++ * Each skb is freed.
++ *
++ * Returns the count of skbs that were reply packets.
++ */
++int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
+ {
+-	kvfree(pkt->buf);
+-	kfree(pkt);
++	struct sk_buff_head freeme;
++	struct sk_buff *skb, *tmp;
++	int cnt = 0;
++
++	skb_queue_head_init(&freeme);
++
++	spin_lock_bh(&queue->lock);
++	skb_queue_walk_safe(queue, skb, tmp) {
++		if (vsock_sk(skb->sk) != vsk)
++			continue;
++
++		__skb_unlink(skb, queue);
++		__skb_queue_tail(&freeme, skb);
++
++		if (virtio_vsock_skb_reply(skb))
++			cnt++;
++	}
++	spin_unlock_bh(&queue->lock);
++
++	__skb_queue_purge(&freeme);
++
++	return cnt;
+ }
+-EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
++EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);
+ 
+ MODULE_LICENSE("GPL v2");
+ MODULE_AUTHOR("Asias He");
+diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
+index 169a8cf65b390..671e03240fc52 100644
+--- a/net/vmw_vsock/vsock_loopback.c
++++ b/net/vmw_vsock/vsock_loopback.c
+@@ -16,7 +16,7 @@ struct vsock_loopback {
+ 	struct workqueue_struct *workqueue;
+ 
+ 	spinlock_t pkt_list_lock; /* protects pkt_list */
+-	struct list_head pkt_list;
++	struct sk_buff_head pkt_queue;
+ 	struct work_struct pkt_work;
+ };
+ 
+@@ -27,13 +27,13 @@ static u32 vsock_loopback_get_local_cid(void)
+ 	return VMADDR_CID_LOCAL;
+ }
+ 
+-static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
++static int vsock_loopback_send_pkt(struct sk_buff *skb)
+ {
+ 	struct vsock_loopback *vsock = &the_vsock_loopback;
+-	int len = pkt->len;
++	int len = skb->len;
+ 
+ 	spin_lock_bh(&vsock->pkt_list_lock);
+-	list_add_tail(&pkt->list, &vsock->pkt_list);
++	skb_queue_tail(&vsock->pkt_queue, skb);
+ 	spin_unlock_bh(&vsock->pkt_list_lock);
+ 
+ 	queue_work(vsock->workqueue, &vsock->pkt_work);
+@@ -44,21 +44,8 @@ static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
+ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
+ {
+ 	struct vsock_loopback *vsock = &the_vsock_loopback;
+-	struct virtio_vsock_pkt *pkt, *n;
+-	LIST_HEAD(freeme);
+ 
+-	spin_lock_bh(&vsock->pkt_list_lock);
+-	list_for_each_entry_safe(pkt, n, &vsock->pkt_list, list) {
+-		if (pkt->vsk != vsk)
+-			continue;
+-		list_move(&pkt->list, &freeme);
+-	}
+-	spin_unlock_bh(&vsock->pkt_list_lock);
+-
+-	list_for_each_entry_safe(pkt, n, &freeme, list) {
+-		list_del(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
+-	}
++	virtio_transport_purge_skbs(vsk, &vsock->pkt_queue);
+ 
+ 	return 0;
+ }
+@@ -121,20 +108,18 @@ static void vsock_loopback_work(struct work_struct *work)
+ {
+ 	struct vsock_loopback *vsock =
+ 		container_of(work, struct vsock_loopback, pkt_work);
+-	LIST_HEAD(pkts);
++	struct sk_buff_head pkts;
++	struct sk_buff *skb;
++
++	skb_queue_head_init(&pkts);
+ 
+ 	spin_lock_bh(&vsock->pkt_list_lock);
+-	list_splice_init(&vsock->pkt_list, &pkts);
++	skb_queue_splice_init(&vsock->pkt_queue, &pkts);
+ 	spin_unlock_bh(&vsock->pkt_list_lock);
+ 
+-	while (!list_empty(&pkts)) {
+-		struct virtio_vsock_pkt *pkt;
+-
+-		pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
+-		list_del_init(&pkt->list);
+-
+-		virtio_transport_deliver_tap_pkt(pkt);
+-		virtio_transport_recv_pkt(&loopback_transport, pkt);
++	while ((skb = __skb_dequeue(&pkts))) {
++		virtio_transport_deliver_tap_pkt(skb);
++		virtio_transport_recv_pkt(&loopback_transport, skb);
+ 	}
+ }
+ 
+@@ -148,7 +133,7 @@ static int __init vsock_loopback_init(void)
+ 		return -ENOMEM;
+ 
+ 	spin_lock_init(&vsock->pkt_list_lock);
+-	INIT_LIST_HEAD(&vsock->pkt_list);
++	skb_queue_head_init(&vsock->pkt_queue);
+ 	INIT_WORK(&vsock->pkt_work, vsock_loopback_work);
+ 
+ 	ret = vsock_core_register(&loopback_transport.transport,
+@@ -166,19 +151,13 @@ static int __init vsock_loopback_init(void)
+ static void __exit vsock_loopback_exit(void)
+ {
+ 	struct vsock_loopback *vsock = &the_vsock_loopback;
+-	struct virtio_vsock_pkt *pkt;
+ 
+ 	vsock_core_unregister(&loopback_transport.transport);
+ 
+ 	flush_work(&vsock->pkt_work);
+ 
+ 	spin_lock_bh(&vsock->pkt_list_lock);
+-	while (!list_empty(&vsock->pkt_list)) {
+-		pkt = list_first_entry(&vsock->pkt_list,
+-				       struct virtio_vsock_pkt, list);
+-		list_del(&pkt->list);
+-		virtio_transport_free_pkt(pkt);
+-	}
++	virtio_vsock_skb_queue_purge(&vsock->pkt_queue);
+ 	spin_unlock_bh(&vsock->pkt_list_lock);
+ 
+ 	destroy_workqueue(vsock->workqueue);
+-- 
+2.42.0
+
diff --git a/queue-6.1/vsock-virtio-remove-socket-from-connected-bound-list.patch b/queue-6.1/vsock-virtio-remove-socket-from-connected-bound-list.patch
new file mode 100644
index 00000000000..012b78e0d13
--- /dev/null
+++ b/queue-6.1/vsock-virtio-remove-socket-from-connected-bound-list.patch
@@ -0,0 +1,75 @@
+From 1ee503bd1f558d4497498a5f553aa2e0961bfa18 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 18:55:48 +0100
+Subject: vsock/virtio: remove socket from connected/bound list on shutdown
+
+From: Filippo Storniolo <f.storniolo95@gmail.com>
+
+[ Upstream commit 3a5cc90a4d1756072619fe511d07621bdef7f120 ]
+
+If the same remote peer, using the same port, tries to connect
+to a server on a listening port more than once, the server will
+reject the connection, causing a "connection reset by peer"
+error on the remote peer. This is due to the presence of a
+dangling socket from a previous connection in both the connected
+and bound socket lists.
+The inconsistency of the above lists only occurs when the remote
+peer disconnects and the server remains active.
+
+This bug does not occur when the server socket is closed:
+virtio_transport_release() will eventually schedule a call to
+virtio_transport_do_close() and the latter will remove the socket
+from the bound and connected socket lists and clear the sk_buff.
+
+However, virtio_transport_do_close() will only perform the above
+actions if it has been scheduled, and this will not happen
+if the server is processing the shutdown message from a remote peer.
+
+To fix this, introduce a call to vsock_remove_sock()
+when the server is handling a client disconnect.
+This is to remove the socket from the bound and connected socket
+lists without clearing the sk_buff.
+
+Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko")
+Reported-by: Daan De Meyer <daan.j.demeyer@gmail.com>
+Tested-by: Daan De Meyer <daan.j.demeyer@gmail.com>
+Co-developed-by: Luigi Leonardi <luigi.leonardi@outlook.com>
+Signed-off-by: Luigi Leonardi <luigi.leonardi@outlook.com>
+Signed-off-by: Filippo Storniolo <f.storniolo95@gmail.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/vmw_vsock/virtio_transport_common.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
+index a1581c77cf84a..73e5093928325 100644
+--- a/net/vmw_vsock/virtio_transport_common.c
++++ b/net/vmw_vsock/virtio_transport_common.c
+@@ -1108,11 +1108,17 @@ virtio_transport_recv_connected(struct sock *sk,
+ 			vsk->peer_shutdown |= RCV_SHUTDOWN;
+ 		if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
+ 			vsk->peer_shutdown |= SEND_SHUTDOWN;
+-		if (vsk->peer_shutdown == SHUTDOWN_MASK &&
+-		    vsock_stream_has_data(vsk) <= 0 &&
+-		    !sock_flag(sk, SOCK_DONE)) {
+-			(void)virtio_transport_reset(vsk, NULL);
+-			virtio_transport_do_close(vsk, true);
++		if (vsk->peer_shutdown == SHUTDOWN_MASK) {
++			if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) {
++				(void)virtio_transport_reset(vsk, NULL);
++				virtio_transport_do_close(vsk, true);
++			}
++			/* Remove this socket anyway because the remote peer sent
++			 * the shutdown. This way a new connection will succeed
++			 * if the remote peer uses the same source port,
++			 * even if the old socket is still unreleased, but now disconnected.
++			 */
++			vsock_remove_sock(vsk);
+ 		}
+ 		if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
+ 			sk->sk_state_change(sk);
+-- 
+2.42.0
+
diff --git a/queue-6.1/watchdog-ixp4xx-make-sure-restart-always-works.patch b/queue-6.1/watchdog-ixp4xx-make-sure-restart-always-works.patch
new file mode 100644
index 00000000000..20d7e627cc3
--- /dev/null
+++ b/queue-6.1/watchdog-ixp4xx-make-sure-restart-always-works.patch
@@ -0,0 +1,88 @@
+From 9156bf060d4bf888e7dae1dfad5025795047337f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Sep 2023 11:13:44 +0200
+Subject: watchdog: ixp4xx: Make sure restart always works
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+[ Upstream commit b4075ecfe348a44209534c75ad72392c63a489a6 ]
+
+The IXP4xx watchdog in early "A0" silicon is unreliable and
+cannot be registered, however for some systems such as the
+USRobotics USR8200 the watchdog is the only restart option,
+so implement a "dummy" watchdog that can only support restart
+in this case.
+
+Fixes: 1aea522809e6 ("watchdog: ixp4xx: Implement restart")
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lore.kernel.org/r/20230926-ixp4xx-wdt-restart-v2-1-15cf4639b423@linaro.org
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/watchdog/ixp4xx_wdt.c | 28 +++++++++++++++++++++++++---
+ 1 file changed, 25 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c
+index 281a48d9889fc..0fc91e9c4a773 100644
+--- a/drivers/watchdog/ixp4xx_wdt.c
++++ b/drivers/watchdog/ixp4xx_wdt.c
+@@ -105,6 +105,25 @@ static const struct watchdog_ops ixp4xx_wdt_ops = {
+ 	.owner = THIS_MODULE,
+ };
+ 
++/*
++ * The A0 version of the IXP422 had a bug in the watchdog making
++ * is useless, but we still need to use it to restart the system
++ * as it is the only way, so in this special case we register a
++ * "dummy" watchdog that doesn't really work, but will support
++ * the restart operation.
++ */
++static int ixp4xx_wdt_dummy(struct watchdog_device *wdd)
++{
++	return 0;
++}
++
++static const struct watchdog_ops ixp4xx_wdt_restart_only_ops = {
++	.start = ixp4xx_wdt_dummy,
++	.stop = ixp4xx_wdt_dummy,
++	.restart = ixp4xx_wdt_restart,
++	.owner = THIS_MODULE,
++};
++
+ static const struct watchdog_info ixp4xx_wdt_info = {
+ 	.options = WDIOF_KEEPALIVEPING
+ 		| WDIOF_MAGICCLOSE
+@@ -120,14 +139,17 @@ static void ixp4xx_clock_action(void *d)
+ 
+ static int ixp4xx_wdt_probe(struct platform_device *pdev)
+ {
++	static const struct watchdog_ops *iwdt_ops;
+ 	struct device *dev = &pdev->dev;
+ 	struct ixp4xx_wdt *iwdt;
+ 	struct clk *clk;
+ 	int ret;
+ 
+ 	if (!(read_cpuid_id() & 0xf) && !cpu_is_ixp46x()) {
+-		dev_err(dev, "Rev. A0 IXP42x CPU detected - watchdog disabled\n");
+-		return -ENODEV;
++		dev_info(dev, "Rev. A0 IXP42x CPU detected - only restart supported\n");
++		iwdt_ops = &ixp4xx_wdt_restart_only_ops;
++	} else {
++		iwdt_ops = &ixp4xx_wdt_ops;
+ 	}
+ 
+ 	iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL);
+@@ -153,7 +175,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev)
+ 		iwdt->rate = IXP4XX_TIMER_FREQ;
+ 
+ 	iwdt->wdd.info = &ixp4xx_wdt_info;
+-	iwdt->wdd.ops = &ixp4xx_wdt_ops;
++	iwdt->wdd.ops = iwdt_ops;
+ 	iwdt->wdd.min_timeout = 1;
+ 	iwdt->wdd.max_timeout = U32_MAX / iwdt->rate;
+ 	iwdt->wdd.parent = dev;
+-- 
+2.42.0
+