From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 1 May 2022 18:30:39 +0000 (+0200)
Subject: 5.17-stable patches
X-Git-Tag: v5.4.192~51
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2dd7422a8b97ce38a7e6a1a57e838239d26d1673;p=thirdparty%2Fkernel%2Fstable-queue.git

5.17-stable patches

added patches:
	bus-fsl-mc-msi-fix-msi-descriptor-mutex-lock-for-msi_first_desc.patch
	iocost-don-t-reset-the-inuse-weight-of-under-weighted-debtors.patch
	riscv-patch_text-fixup-last-cpu-should-be-master.patch
	virtio_net-fix-wrong-buf-address-calculation-when-using-xdp.patch
	x86-cpu-load-microcode-during-restore_processor_state.patch
	x86-pci-xen-disable-pci-msi-masking-for-xen_hvm-guests.patch
---

diff --git a/queue-5.17/bus-fsl-mc-msi-fix-msi-descriptor-mutex-lock-for-msi_first_desc.patch b/queue-5.17/bus-fsl-mc-msi-fix-msi-descriptor-mutex-lock-for-msi_first_desc.patch
new file mode 100644
index 00000000000..85360a1b4f7
--- /dev/null
+++ b/queue-5.17/bus-fsl-mc-msi-fix-msi-descriptor-mutex-lock-for-msi_first_desc.patch
@@ -0,0 +1,53 @@
+From c7d2f89fea26c84d5accc55d9976dd7e5305e63a Mon Sep 17 00:00:00 2001
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Date: Tue, 12 Apr 2022 16:56:36 +0900
+Subject: bus: fsl-mc-msi: Fix MSI descriptor mutex lock for msi_first_desc()
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+commit c7d2f89fea26c84d5accc55d9976dd7e5305e63a upstream.
+
+Commit e8604b1447b4 introduced a call to the helper function
+msi_first_desc(), which needs MSI descriptor mutex lock before
+call. However, the required mutex lock was not added. This results in
+lockdep assertion:
+
+ WARNING: CPU: 4 PID: 119 at kernel/irq/msi.c:274 msi_first_desc+0xd0/0x10c
+  msi_first_desc+0xd0/0x10c
+  fsl_mc_msi_domain_alloc_irqs+0x7c/0xc0
+  fsl_mc_populate_irq_pool+0x80/0x3cc
+
+Fix this by adding the mutex lock and unlock around the function call.
+
+Fixes: e8604b1447b4 ("bus: fsl-mc-msi: Simplify MSI descriptor handling")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220412075636.755454-1-shinichiro.kawasaki@wdc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/fsl-mc/fsl-mc-msi.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/bus/fsl-mc/fsl-mc-msi.c b/drivers/bus/fsl-mc/fsl-mc-msi.c
+index 5e0e4393ce4d..0cfe859a4ac4 100644
+--- a/drivers/bus/fsl-mc/fsl-mc-msi.c
++++ b/drivers/bus/fsl-mc/fsl-mc-msi.c
+@@ -224,8 +224,12 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev,  unsigned int irq_count)
+ 	if (error)
+ 		return error;
+ 
++	msi_lock_descs(dev);
+ 	if (msi_first_desc(dev, MSI_DESC_ALL))
+-		return -EINVAL;
++		error = -EINVAL;
++	msi_unlock_descs(dev);
++	if (error)
++		return error;
+ 
+ 	/*
+ 	 * NOTE: Calling this function will trigger the invocation of the
+-- 
+2.36.0
+
diff --git a/queue-5.17/iocost-don-t-reset-the-inuse-weight-of-under-weighted-debtors.patch b/queue-5.17/iocost-don-t-reset-the-inuse-weight-of-under-weighted-debtors.patch
new file mode 100644
index 00000000000..ba1baae7420
--- /dev/null
+++ b/queue-5.17/iocost-don-t-reset-the-inuse-weight-of-under-weighted-debtors.patch
@@ -0,0 +1,63 @@
+From 8c936f9ea11ec4e35e288810a7503b5c841a355f Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 26 Apr 2022 19:01:01 -1000
+Subject: iocost: don't reset the inuse weight of under-weighted debtors
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 8c936f9ea11ec4e35e288810a7503b5c841a355f upstream.
+
+When an iocg is in debt, its inuse weight is owned by debt handling and
+should stay at 1. This invariant was broken when determining the amount of
+surpluses at the beginning of donation calculation - when an iocg's
+hierarchical weight is too low, the iocg is excluded from donation
+calculation and its inuse is reset to its active regardless of its
+indebtedness, triggering warnings like the following:
+
+ WARNING: CPU: 5 PID: 0 at block/blk-iocost.c:1416 iocg_kick_waitq+0x392/0x3a0
+ ...
+ RIP: 0010:iocg_kick_waitq+0x392/0x3a0
+ Code: 00 00 be ff ff ff ff 48 89 4d a8 e8 98 b2 70 00 48 8b 4d a8 85 c0 0f 85 4a fe ff ff 0f 0b e9 43 fe ff ff 0f 0b e9 4d fe ff ff <0f> 0b e9 50 fe ff ff e8 a2 ae 70 00 66 90 0f 1f 44 00 00 55 48 89
+ RSP: 0018:ffffc90000200d08 EFLAGS: 00010016
+ ...
+  <IRQ>
+  ioc_timer_fn+0x2e0/0x1470
+  call_timer_fn+0xa1/0x2c0
+ ...
+
+As this happens only when an iocg's hierarchical weight is negligible, its
+impact likely is limited to triggering the warnings. Fix it by skipping
+resetting inuse of under-weighted debtors.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Rik van Riel <riel@surriel.com>
+Fixes: c421a3eb2e27 ("blk-iocost: revamp debt handling")
+Cc: stable@vger.kernel.org # v5.10+
+Link: https://lore.kernel.org/r/YmjODd4aif9BzFuO@slm.duckdns.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-iocost.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -2322,7 +2322,17 @@ static void ioc_timer_fn(struct timer_li
+ 				iocg->hweight_donating = hwa;
+ 				iocg->hweight_after_donation = new_hwi;
+ 				list_add(&iocg->surplus_list, &surpluses);
+-			} else {
++			} else if (!iocg->abs_vdebt) {
++				/*
++				 * @iocg doesn't have enough to donate. Reset
++				 * its inuse to active.
++				 *
++				 * Don't reset debtors as their inuse's are
++				 * owned by debt handling. This shouldn't affect
++				 * donation calculuation in any meaningful way
++				 * as @iocg doesn't have a meaningful amount of
++				 * share anyway.
++				 */
+ 				TRACE_IOCG_PATH(inuse_shortage, iocg, &now,
+ 						iocg->inuse, iocg->active,
+ 						iocg->hweight_inuse, new_hwi);
diff --git a/queue-5.17/riscv-patch_text-fixup-last-cpu-should-be-master.patch b/queue-5.17/riscv-patch_text-fixup-last-cpu-should-be-master.patch
new file mode 100644
index 00000000000..f420067b995
--- /dev/null
+++ b/queue-5.17/riscv-patch_text-fixup-last-cpu-should-be-master.patch
@@ -0,0 +1,39 @@
+From 8ec1442953c66a1d8462cccd8c20b7ba561f5915 Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Wed, 6 Apr 2022 22:16:49 +0800
+Subject: riscv: patch_text: Fixup last cpu should be master
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 8ec1442953c66a1d8462cccd8c20b7ba561f5915 upstream.
+
+These patch_text implementations are using stop_machine_cpuslocked
+infrastructure with atomic cpu_count. The original idea: When the
+master CPU patch_text, the others should wait for it. But current
+implementation is using the first CPU as master, which couldn't
+guarantee the remaining CPUs are waiting. This patch changes the
+last CPU as the master to solve the potential risk.
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
+Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
+Fixes: 043cb41a85de ("riscv: introduce interfaces to patch kernel code")
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/kernel/patch.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/kernel/patch.c
++++ b/arch/riscv/kernel/patch.c
+@@ -104,7 +104,7 @@ static int patch_text_cb(void *data)
+ 	struct patch_insn *patch = data;
+ 	int ret = 0;
+ 
+-	if (atomic_inc_return(&patch->cpu_count) == 1) {
++	if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
+ 		ret =
+ 		    patch_text_nosync(patch->addr, &patch->insn,
+ 					    GET_INSN_LENGTH(patch->insn));
diff --git a/queue-5.17/series b/queue-5.17/series
index 72f12ac7dc4..913d08db92e 100644
--- a/queue-5.17/series
+++ b/queue-5.17/series
@@ -46,3 +46,9 @@ pinctrl-samsung-fix-missing-gpiolib-on-arm64-exynos-config.patch
 f2fs-should-not-truncate-blocks-during-roll-forward-recovery.patch
 hex2bin-make-the-function-hex_to_bin-constant-time.patch
 hex2bin-fix-access-beyond-string-end.patch
+bus-fsl-mc-msi-fix-msi-descriptor-mutex-lock-for-msi_first_desc.patch
+riscv-patch_text-fixup-last-cpu-should-be-master.patch
+x86-cpu-load-microcode-during-restore_processor_state.patch
+x86-pci-xen-disable-pci-msi-masking-for-xen_hvm-guests.patch
+iocost-don-t-reset-the-inuse-weight-of-under-weighted-debtors.patch
+virtio_net-fix-wrong-buf-address-calculation-when-using-xdp.patch
diff --git a/queue-5.17/virtio_net-fix-wrong-buf-address-calculation-when-using-xdp.patch b/queue-5.17/virtio_net-fix-wrong-buf-address-calculation-when-using-xdp.patch
new file mode 100644
index 00000000000..b27e026b5f7
--- /dev/null
+++ b/queue-5.17/virtio_net-fix-wrong-buf-address-calculation-when-using-xdp.patch
@@ -0,0 +1,205 @@
+From acb16b395c3f3d7502443e0c799c2b42df645642 Mon Sep 17 00:00:00 2001
+From: Nikolay Aleksandrov <razor@blackwall.org>
+Date: Mon, 25 Apr 2022 13:37:03 +0300
+Subject: virtio_net: fix wrong buf address calculation when using xdp
+
+From: Nikolay Aleksandrov <razor@blackwall.org>
+
+commit acb16b395c3f3d7502443e0c799c2b42df645642 upstream.
+
+We received a report[1] of kernel crashes when Cilium is used in XDP
+mode with virtio_net after updating to newer kernels. After
+investigating the reason it turned out that when using mergeable bufs
+with an XDP program which adjusts xdp.data or xdp.data_meta page_to_buf()
+calculates the build_skb address wrong because the offset can become less
+than the headroom so it gets the address of the previous page (-X bytes
+depending on how lower offset is):
+ page_to_skb: page addr ffff9eb2923e2000 buf ffff9eb2923e1ffc offset 252 headroom 256
+
+This is a pr_err() I added in the beginning of page_to_skb which clearly
+shows offset that is less than headroom by adding 4 bytes of metadata
+via an xdp prog. The calculations done are:
+ receive_mergeable():
+ headroom = VIRTIO_XDP_HEADROOM; // VIRTIO_XDP_HEADROOM == 256 bytes
+ offset = xdp.data - page_address(xdp_page) -
+          vi->hdr_len - metasize;
+
+ page_to_skb():
+ p = page_address(page) + offset;
+ ...
+ buf = p - headroom;
+
+Now buf goes -4 bytes from the page's starting address as can be seen
+above which is set as skb->head and skb->data by build_skb later. Depending
+on what's done with the skb (when it's freed most often) we get all kinds
+of corruptions and BUG_ON() triggers in mm[2]. We have to recalculate
+the new headroom after the xdp program has run, similar to how offset
+and len are recalculated. Headroom is directly related to
+data_hard_start, data and data_meta, so we use them to get the new size.
+The result is correct (similar pr_err() in page_to_skb, one case of
+xdp_page and one case of virtnet buf):
+ a) Case with 4 bytes of metadata
+ [  115.949641] page_to_skb: page addr ffff8b4dcfad2000 offset 252 headroom 252
+ [  121.084105] page_to_skb: page addr ffff8b4dcf018000 offset 20732 headroom 252
+ b) Case of pushing data +32 bytes
+ [  153.181401] page_to_skb: page addr ffff8b4dd0c4d000 offset 288 headroom 288
+ [  158.480421] page_to_skb: page addr ffff8b4dd00b0000 offset 24864 headroom 288
+ c) Case of pushing data -33 bytes
+ [  835.906830] page_to_skb: page addr ffff8b4dd3270000 offset 223 headroom 223
+ [  840.839910] page_to_skb: page addr ffff8b4dcdd68000 offset 12511 headroom 223
+
+Offset and headroom are equal because offset points to the start of
+reserved bytes for the virtio_net header which are at buf start +
+headroom, while data points at buf start + vnet hdr size + headroom so
+when data or data_meta are adjusted by the xdp prog both the headroom size
+and the offset change equally. We can use data_hard_start to compute the
+new headroom after the xdp prog (linearized / page start case, the
+virtnet buf case is similar just with bigger base offset):
+ xdp.data_hard_start = page_address + vnet_hdr
+ xdp.data = page_address + vnet_hdr + headroom
+ new headroom after xdp prog = xdp.data - xdp.data_hard_start - metasize
+
+An example reproducer xdp prog[3] is below.
+
+[1] https://github.com/cilium/cilium/issues/19453
+
+[2] Two of the many traces:
+ [   40.437400] BUG: Bad page state in process swapper/0  pfn:14940
+ [   40.916726] BUG: Bad page state in process systemd-resolve  pfn:053b7
+ [   41.300891] kernel BUG at include/linux/mm.h:720!
+ [   41.301801] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+ [   41.302784] CPU: 1 PID: 1181 Comm: kubelet Kdump: loaded Tainted: G    B   W         5.18.0-rc1+ #37
+ [   41.304458] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014
+ [   41.306018] RIP: 0010:page_frag_free+0x79/0xe0
+ [   41.306836] Code: 00 00 75 ea 48 8b 07 a9 00 00 01 00 74 e0 48 8b 47 48 48 8d 50 ff a8 01 48 0f 45 fa eb d0 48 c7 c6 18 b8 30 a6 e8 d7 f8 fc ff <0f> 0b 48 8d 78 ff eb bc 48 8b 07 a9 00 00 01 00 74 3a 66 90 0f b6
+ [   41.310235] RSP: 0018:ffffac05c2a6bc78 EFLAGS: 00010292
+ [   41.311201] RAX: 000000000000003e RBX: 0000000000000000 RCX: 0000000000000000
+ [   41.312502] RDX: 0000000000000001 RSI: ffffffffa6423004 RDI: 00000000ffffffff
+ [   41.313794] RBP: ffff993c98823600 R08: 0000000000000000 R09: 00000000ffffdfff
+ [   41.315089] R10: ffffac05c2a6ba68 R11: ffffffffa698ca28 R12: ffff993c98823600
+ [   41.316398] R13: ffff993c86311ebc R14: 0000000000000000 R15: 000000000000005c
+ [   41.317700] FS:  00007fe13fc56740(0000) GS:ffff993cdd900000(0000) knlGS:0000000000000000
+ [   41.319150] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [   41.320152] CR2: 000000c00008a000 CR3: 0000000014908000 CR4: 0000000000350ee0
+ [   41.321387] Call Trace:
+ [   41.321819]  <TASK>
+ [   41.322193]  skb_release_data+0x13f/0x1c0
+ [   41.322902]  __kfree_skb+0x20/0x30
+ [   41.343870]  tcp_recvmsg_locked+0x671/0x880
+ [   41.363764]  tcp_recvmsg+0x5e/0x1c0
+ [   41.384102]  inet_recvmsg+0x42/0x100
+ [   41.406783]  ? sock_recvmsg+0x1d/0x70
+ [   41.428201]  sock_read_iter+0x84/0xd0
+ [   41.445592]  ? 0xffffffffa3000000
+ [   41.462442]  new_sync_read+0x148/0x160
+ [   41.479314]  ? 0xffffffffa3000000
+ [   41.496937]  vfs_read+0x138/0x190
+ [   41.517198]  ksys_read+0x87/0xc0
+ [   41.535336]  do_syscall_64+0x3b/0x90
+ [   41.551637]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [   41.568050] RIP: 0033:0x48765b
+ [   41.583955] Code: e8 4a 35 fe ff eb 88 cc cc cc cc cc cc cc cc e8 fb 7a fe ff 48 8b 7c 24 10 48 8b 74 24 18 48 8b 54 24 20 48 8b 44 24 08 0f 05 <48> 3d 01 f0 ff ff 76 20 48 c7 44 24 28 ff ff ff ff 48 c7 44 24 30
+ [   41.632818] RSP: 002b:000000c000a2f5b8 EFLAGS: 00000212 ORIG_RAX: 0000000000000000
+ [   41.664588] RAX: ffffffffffffffda RBX: 000000c000062000 RCX: 000000000048765b
+ [   41.681205] RDX: 0000000000005e54 RSI: 000000c000e66000 RDI: 0000000000000016
+ [   41.697164] RBP: 000000c000a2f608 R08: 0000000000000001 R09: 00000000000001b4
+ [   41.713034] R10: 00000000000000b6 R11: 0000000000000212 R12: 00000000000000e9
+ [   41.728755] R13: 0000000000000001 R14: 000000c000a92000 R15: ffffffffffffffff
+ [   41.744254]  </TASK>
+ [   41.758585] Modules linked in: br_netfilter bridge veth netconsole virtio_net
+
+ and
+
+ [   33.524802] BUG: Bad page state in process systemd-network  pfn:11e60
+ [   33.528617] page ffffe05dc0147b00 ffffe05dc04e7a00 ffff8ae9851ec000 (1) len 82 offset 252 metasize 4 hroom 0 hdr_len 12 data ffff8ae9851ec10c data_meta ffff8ae9851ec108 data_end ffff8ae9851ec14e
+ [   33.529764] page:000000003792b5ba refcount:0 mapcount:-512 mapping:0000000000000000 index:0x0 pfn:0x11e60
+ [   33.532463] flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff)
+ [   33.532468] raw: 000fffffc0000000 0000000000000000 dead000000000122 0000000000000000
+ [   33.532470] raw: 0000000000000000 0000000000000000 00000000fffffdff 0000000000000000
+ [   33.532471] page dumped because: nonzero mapcount
+ [   33.532472] Modules linked in: br_netfilter bridge veth netconsole virtio_net
+ [   33.532479] CPU: 0 PID: 791 Comm: systemd-network Kdump: loaded Not tainted 5.18.0-rc1+ #37
+ [   33.532482] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014
+ [   33.532484] Call Trace:
+ [   33.532496]  <TASK>
+ [   33.532500]  dump_stack_lvl+0x45/0x5a
+ [   33.532506]  bad_page.cold+0x63/0x94
+ [   33.532510]  free_pcp_prepare+0x290/0x420
+ [   33.532515]  free_unref_page+0x1b/0x100
+ [   33.532518]  skb_release_data+0x13f/0x1c0
+ [   33.532524]  kfree_skb_reason+0x3e/0xc0
+ [   33.532527]  ip6_mc_input+0x23c/0x2b0
+ [   33.532531]  ip6_sublist_rcv_finish+0x83/0x90
+ [   33.532534]  ip6_sublist_rcv+0x22b/0x2b0
+
+[3] XDP program to reproduce(xdp_pass.c):
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+ SEC("xdp_pass")
+ int xdp_pkt_pass(struct xdp_md *ctx)
+ {
+          bpf_xdp_adjust_head(ctx, -(int)32);
+          return XDP_PASS;
+ }
+
+ char _license[] SEC("license") = "GPL";
+
+ compile: clang -O2 -g -Wall -target bpf -c xdp_pass.c -o xdp_pass.o
+ load on virtio_net: ip link set enp1s0 xdpdrv obj xdp_pass.o sec xdp_pass
+
+CC: stable@vger.kernel.org
+CC: Jason Wang <jasowang@redhat.com>
+CC: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+CC: Daniel Borkmann <daniel@iogearbox.net>
+CC: "Michael S. Tsirkin" <mst@redhat.com>
+CC: virtualization@lists.linux-foundation.org
+Fixes: 8fb7da9e9907 ("virtio_net: get build_skb() buf by data ptr")
+Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Link: https://lore.kernel.org/r/20220425103703.3067292-1-razor@blackwall.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -978,6 +978,24 @@ static struct sk_buff *receive_mergeable
+ 			 * xdp.data_meta were adjusted
+ 			 */
+ 			len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
++
++			/* recalculate headroom if xdp.data or xdp_data_meta
++			 * were adjusted, note that offset should always point
++			 * to the start of the reserved bytes for virtio_net
++			 * header which are followed by xdp.data, that means
++			 * that offset is equal to the headroom (when buf is
++			 * starting at the beginning of the page, otherwise
++			 * there is a base offset inside the page) but it's used
++			 * with a different starting point (buf start) than
++			 * xdp.data (buf start + vnet hdr size). If xdp.data or
++			 * data_meta were adjusted by the xdp prog then the
++			 * headroom size has changed and so has the offset, we
++			 * can use data_hard_start, which points at buf start +
++			 * vnet hdr size, to calculate the new headroom and use
++			 * it later to compute buf start in page_to_skb()
++			 */
++			headroom = xdp.data - xdp.data_hard_start - metasize;
++
+ 			/* We can only create skb based on xdp_page. */
+ 			if (unlikely(xdp_page != page)) {
+ 				rcu_read_unlock();
+@@ -985,7 +1003,7 @@ static struct sk_buff *receive_mergeable
+ 				head_skb = page_to_skb(vi, rq, xdp_page, offset,
+ 						       len, PAGE_SIZE, false,
+ 						       metasize,
+-						       VIRTIO_XDP_HEADROOM);
++						       headroom);
+ 				return head_skb;
+ 			}
+ 			break;
diff --git a/queue-5.17/x86-cpu-load-microcode-during-restore_processor_state.patch b/queue-5.17/x86-cpu-load-microcode-during-restore_processor_state.patch
new file mode 100644
index 00000000000..b50bf50bcab
--- /dev/null
+++ b/queue-5.17/x86-cpu-load-microcode-during-restore_processor_state.patch
@@ -0,0 +1,124 @@
+From f9e14dbbd454581061c736bf70bf5cbb15ac927c Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 19 Apr 2022 09:52:41 -0700
+Subject: x86/cpu: Load microcode during restore_processor_state()
+
+From: Borislav Petkov <bp@suse.de>
+
+commit f9e14dbbd454581061c736bf70bf5cbb15ac927c upstream.
+
+When resuming from system sleep state, restore_processor_state()
+restores the boot CPU MSRs. These MSRs could be emulated by microcode.
+If microcode is not loaded yet, writing to emulated MSRs leads to
+unchecked MSR access error:
+
+  ...
+  PM: Calling lapic_suspend+0x0/0x210
+  unchecked MSR access error: WRMSR to 0x10f (tried to write 0x0...0) at rIP: ... (native_write_msr)
+  Call Trace:
+    <TASK>
+    ? restore_processor_state
+    x86_acpi_suspend_lowlevel
+    acpi_suspend_enter
+    suspend_devices_and_enter
+    pm_suspend.cold
+    state_store
+    kobj_attr_store
+    sysfs_kf_write
+    kernfs_fop_write_iter
+    new_sync_write
+    vfs_write
+    ksys_write
+    __x64_sys_write
+    do_syscall_64
+    entry_SYSCALL_64_after_hwframe
+   RIP: 0033:0x7fda13c260a7
+
+To ensure microcode emulated MSRs are available for restoration, load
+the microcode on the boot CPU before restoring these MSRs.
+
+  [ Pawan: write commit message and productize it. ]
+
+Fixes: e2a1256b17b1 ("x86/speculation: Restore speculation related MSRs during S3 resume")
+Reported-by: Kyle D. Pelton <kyle.d.pelton@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Tested-by: Kyle D. Pelton <kyle.d.pelton@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215841
+Link: https://lore.kernel.org/r/4350dfbf785cd482d3fafa72b2b49c83102df3ce.1650386317.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/microcode.h     |    2 ++
+ arch/x86/kernel/cpu/microcode/core.c |    6 +++---
+ arch/x86/power/cpu.c                 |   10 +++++++++-
+ 3 files changed, 14 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -131,10 +131,12 @@ extern void __init load_ucode_bsp(void);
+ extern void load_ucode_ap(void);
+ void reload_early_microcode(void);
+ extern bool initrd_gone;
++void microcode_bsp_resume(void);
+ #else
+ static inline void __init load_ucode_bsp(void)			{ }
+ static inline void load_ucode_ap(void)				{ }
+ static inline void reload_early_microcode(void)			{ }
++static inline void microcode_bsp_resume(void)			{ }
+ #endif
+ 
+ #endif /* _ASM_X86_MICROCODE_H */
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -758,9 +758,9 @@ static struct subsys_interface mc_cpu_in
+ };
+ 
+ /**
+- * mc_bp_resume - Update boot CPU microcode during resume.
++ * microcode_bsp_resume - Update boot CPU microcode during resume.
+  */
+-static void mc_bp_resume(void)
++void microcode_bsp_resume(void)
+ {
+ 	int cpu = smp_processor_id();
+ 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+@@ -772,7 +772,7 @@ static void mc_bp_resume(void)
+ }
+ 
+ static struct syscore_ops mc_syscore_ops = {
+-	.resume			= mc_bp_resume,
++	.resume			= microcode_bsp_resume,
+ };
+ 
+ static int mc_cpu_starting(unsigned int cpu)
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -25,6 +25,7 @@
+ #include <asm/cpu.h>
+ #include <asm/mmu_context.h>
+ #include <asm/cpu_device_id.h>
++#include <asm/microcode.h>
+ 
+ #ifdef CONFIG_X86_32
+ __visible unsigned long saved_context_ebx;
+@@ -262,11 +263,18 @@ static void notrace __restore_processor_
+ 	x86_platform.restore_sched_clock_state();
+ 	mtrr_bp_restore();
+ 	perf_restore_debug_store();
+-	msr_restore_context(ctxt);
+ 
+ 	c = &cpu_data(smp_processor_id());
+ 	if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
+ 		init_ia32_feat_ctl(c);
++
++	microcode_bsp_resume();
++
++	/*
++	 * This needs to happen after the microcode has been updated upon resume
++	 * because some of the MSRs are "emulated" in microcode.
++	 */
++	msr_restore_context(ctxt);
+ }
+ 
+ /* Needed by apm.c */
diff --git a/queue-5.17/x86-pci-xen-disable-pci-msi-masking-for-xen_hvm-guests.patch b/queue-5.17/x86-pci-xen-disable-pci-msi-masking-for-xen_hvm-guests.patch
new file mode 100644
index 00000000000..e7186a45858
--- /dev/null
+++ b/queue-5.17/x86-pci-xen-disable-pci-msi-masking-for-xen_hvm-guests.patch
@@ -0,0 +1,57 @@
+From 7e0815b3e09986d2fe651199363e135b9358132a Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 28 Apr 2022 15:50:54 +0200
+Subject: x86/pci/xen: Disable PCI/MSI[-X] masking for XEN_HVM guests
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 7e0815b3e09986d2fe651199363e135b9358132a upstream.
+
+When a XEN_HVM guest uses the XEN PIRQ/Eventchannel mechanism, then
+PCI/MSI[-X] masking is solely controlled by the hypervisor, but contrary to
+XEN_PV guests this does not disable PCI/MSI[-X] masking in the PCI/MSI
+layer.
+
+This can lead to a situation where the PCI/MSI layer masks an MSI[-X]
+interrupt and the hypervisor grants the write despite the fact that it
+already requested the interrupt. As a consequence interrupt delivery on the
+affected device is not happening ever.
+
+Set pci_msi_ignore_mask to prevent that like it's done for XEN_PV guests
+already.
+
+Fixes: 809f9267bbab ("xen: map MSIs into pirqs")
+Reported-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
+Reported-by: Dusty Mabe <dustymabe@redhat.com>
+Reported-by: Salvatore Bonaccorso <carnil@debian.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Noah Meyerhans <noahm@debian.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/87tuaduxj5.ffs@tglx
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/pci/xen.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -467,7 +467,6 @@ static __init void xen_setup_pci_msi(voi
+ 		else
+ 			xen_msi_ops.setup_msi_irqs = xen_setup_msi_irqs;
+ 		xen_msi_ops.teardown_msi_irqs = xen_pv_teardown_msi_irqs;
+-		pci_msi_ignore_mask = 1;
+ 	} else if (xen_hvm_domain()) {
+ 		xen_msi_ops.setup_msi_irqs = xen_hvm_setup_msi_irqs;
+ 		xen_msi_ops.teardown_msi_irqs = xen_teardown_msi_irqs;
+@@ -481,6 +480,11 @@ static __init void xen_setup_pci_msi(voi
+ 	 * in allocating the native domain and never use it.
+ 	 */
+ 	x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain;
++	/*
++	 * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
++	 * controlled by the hypervisor.
++	 */
++	pci_msi_ignore_mask = 1;
+ }
+ 
+ #else /* CONFIG_PCI_MSI */