--- /dev/null
+From b3a9cae09836af27192786123314c3eb72ec592f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Oct 2022 13:34:23 -0700
+Subject: ACPI: extlog: Handle multiple records
+
+From: Tony Luck <tony.luck@intel.com>
+
+[ Upstream commit f6ec01da40e4139b41179f046044ee7c4f6370dc ]
+
+If there is no user space consumer of extlog_mem trace records, then
+Linux properly handles multiple error records in an ELOG block
+
+ extlog_print()
+ print_extlog_rcd()
+ __print_extlog_rcd()
+ cper_estatus_print()
+ apei_estatus_for_each_section()
+
+But the other code path hard codes looking for a single record to
+output a trace record.
+
+Fix by using the same apei_estatus_for_each_section() iterator
+to step over all records.
+
+Fixes: 2dfb7d51a61d ("trace, RAS: Add eMCA trace event interface")
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/acpi_extlog.c | 33 ++++++++++++++++++++-------------
+ 1 file changed, 20 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
+index 72f1fb77abcd..e648158368a7 100644
+--- a/drivers/acpi/acpi_extlog.c
++++ b/drivers/acpi/acpi_extlog.c
+@@ -12,6 +12,7 @@
+ #include <linux/ratelimit.h>
+ #include <linux/edac.h>
+ #include <linux/ras.h>
++#include <acpi/ghes.h>
+ #include <asm/cpu.h>
+ #include <asm/mce.h>
+
+@@ -138,8 +139,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
+ int cpu = mce->extcpu;
+ struct acpi_hest_generic_status *estatus, *tmp;
+ struct acpi_hest_generic_data *gdata;
+- const guid_t *fru_id = &guid_null;
+- char *fru_text = "";
++ const guid_t *fru_id;
++ char *fru_text;
+ guid_t *sec_type;
+ static u32 err_seq;
+
+@@ -160,17 +161,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
+
+ /* log event via trace */
+ err_seq++;
+- gdata = (struct acpi_hest_generic_data *)(tmp + 1);
+- if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+- fru_id = (guid_t *)gdata->fru_id;
+- if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+- fru_text = gdata->fru_text;
+- sec_type = (guid_t *)gdata->section_type;
+- if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+- struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+- if (gdata->error_data_length >= sizeof(*mem))
+- trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+- (u8)gdata->error_severity);
++ apei_estatus_for_each_section(tmp, gdata) {
++ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
++ fru_id = (guid_t *)gdata->fru_id;
++ else
++ fru_id = &guid_null;
++ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
++ fru_text = gdata->fru_text;
++ else
++ fru_text = "";
++ sec_type = (guid_t *)gdata->section_type;
++ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
++ struct cper_sec_mem_err *mem = (void *)(gdata + 1);
++
++ if (gdata->error_data_length >= sizeof(*mem))
++ trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
++ (u8)gdata->error_severity);
++ }
+ }
+
+ out:
+--
+2.35.1
+
--- /dev/null
+From ee4183a3c57cf5703185c99b9f359b40e74b09ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jan 2022 19:50:50 +0300
+Subject: ALSA: hda/realtek: fix speakers and micmute on HP 855 G8
+
+From: Alexander Sergeyev <sergeev917@gmail.com>
+
+[ Upstream commit 91502a9a0b0d5252cf3f32ebd898823c2f5aadab ]
+
+There are several PCI ids associated with HP EliteBook 855 G8 Notebook
+PC. Commit 0e68c4b11f1e6 ("ALSA: hda/realtek: fix mute/micmute LEDs for
+HP 855 G8") covers 0x103c:0x8896, while this commit covers 0x103c:0x8895
+which needs some additional work on top of the quirk from 0e68c4b11f1e6.
+
+Note that the device can boot up with working speakers and micmute LED
+without this patch, but the success rate would be quite low (order of
+16 working boots across 709 boots) at least for the built-in drivers
+scenario. This also means that there are some timing issues during early
+boot and this patch is a workaround.
+
+With this patch applied speakers and headphones are consistenly working,
+as well as mute/micmute LEDs and the internal microphone.
+
+Signed-off-by: Alexander Sergeyev <sergeev917@gmail.com>
+Link: https://lore.kernel.org/r/20220114165050.ouw2nknuspclynro@localhost.localdomain
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Stable-dep-of: 225f6e1bc151 ("ALSA: hda/realtek: Add quirk for HP Zbook Firefly 14 G9 model")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 60e3bc124836..5dd786321a18 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -6883,6 +6883,7 @@ enum {
+ ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME,
+ ALC285_FIXUP_LEGION_Y9000X_SPEAKERS,
+ ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
++ ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
+ };
+
+ /* A special fixup for Lenovo C940 and Yoga Duet 7;
+@@ -8693,6 +8694,16 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+ },
++ [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = {
++ .type = HDA_FIXUP_VERBS,
++ .v.verbs = (const struct hda_verb[]) {
++ { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 },
++ { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 },
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC285_FIXUP_HP_MUTE_LED,
++ },
+ };
+
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -8915,6 +8926,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+--
+2.35.1
+
--- /dev/null
+From 991194e6de9c7646460c3e847e3bada2f5a08b5b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Oct 2022 13:16:51 +0100
+Subject: btrfs: fix processing of delayed data refs during backref walking
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 4fc7b57228243d09c0d878873bf24fa64a90fa01 ]
+
+When processing delayed data references during backref walking and we are
+using a share context (we are being called through fiemap), whenever we
+find a delayed data reference for an inode different from the one we are
+interested in, then we immediately exit and consider the data extent as
+shared. This is wrong, because:
+
+1) This might be a DROP reference that will cancel out a reference in the
+ extent tree;
+
+2) Even if it's an ADD reference, it may be followed by a DROP reference
+ that cancels it out.
+
+In either case we should not exit immediately.
+
+Fix this by never exiting when we find a delayed data reference for
+another inode - instead add the reference and if it does not cancel out
+other delayed reference, we will exit early when we call
+extent_is_shared() after processing all delayed references. If we find
+a drop reference, then signal the code that processes references from
+the extent tree (add_inline_refs() and add_keyed_refs()) to not exit
+immediately if it finds there a reference for another inode, since we
+have delayed drop references that may cancel it out. In this later case
+we exit once we don't have references in the rb trees that cancel out
+each other and have two references for different inodes.
+
+Example reproducer for case 1):
+
+ $ cat test-1.sh
+ #!/bin/bash
+
+ DEV=/dev/sdj
+ MNT=/mnt/sdj
+
+ mkfs.btrfs -f $DEV
+ mount $DEV $MNT
+
+ xfs_io -f -c "pwrite 0 64K" $MNT/foo
+ cp --reflink=always $MNT/foo $MNT/bar
+
+ echo
+ echo "fiemap after cloning:"
+ xfs_io -c "fiemap -v" $MNT/foo
+
+ rm -f $MNT/bar
+ echo
+ echo "fiemap after removing file bar:"
+ xfs_io -c "fiemap -v" $MNT/foo
+
+ umount $MNT
+
+Running it before this patch, the extent is still listed as shared, it has
+the flag 0x2000 (FIEMAP_EXTENT_SHARED) set:
+
+ $ ./test-1.sh
+ fiemap after cloning:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x2001
+
+ fiemap after removing file bar:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x2001
+
+Example reproducer for case 2):
+
+ $ cat test-2.sh
+ #!/bin/bash
+
+ DEV=/dev/sdj
+ MNT=/mnt/sdj
+
+ mkfs.btrfs -f $DEV
+ mount $DEV $MNT
+
+ xfs_io -f -c "pwrite 0 64K" $MNT/foo
+ cp --reflink=always $MNT/foo $MNT/bar
+
+ # Flush delayed references to the extent tree and commit current
+ # transaction.
+ sync
+
+ echo
+ echo "fiemap after cloning:"
+ xfs_io -c "fiemap -v" $MNT/foo
+
+ rm -f $MNT/bar
+ echo
+ echo "fiemap after removing file bar:"
+ xfs_io -c "fiemap -v" $MNT/foo
+
+ umount $MNT
+
+Running it before this patch, the extent is still listed as shared, it has
+the flag 0x2000 (FIEMAP_EXTENT_SHARED) set:
+
+ $ ./test-2.sh
+ fiemap after cloning:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x2001
+
+ fiemap after removing file bar:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x2001
+
+After this patch, after deleting bar in both tests, the extent is not
+reported with the 0x2000 flag anymore, it gets only the flag 0x1
+(which is FIEMAP_EXTENT_LAST):
+
+ $ ./test-1.sh
+ fiemap after cloning:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x2001
+
+ fiemap after removing file bar:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x1
+
+ $ ./test-2.sh
+ fiemap after cloning:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x2001
+
+ fiemap after removing file bar:
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..127]: 26624..26751 128 0x1
+
+These tests will later be converted to a test case for fstests.
+
+Fixes: dc046b10c8b7d4 ("Btrfs: make fiemap not blow when you have lots of snapshots")
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/backref.c | 33 ++++++++++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 9 deletions(-)
+
+diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
+index baff31a147e7..7e8fac12f3f8 100644
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -137,6 +137,7 @@ struct share_check {
+ u64 root_objectid;
+ u64 inum;
+ int share_count;
++ bool have_delayed_delete_refs;
+ };
+
+ static inline int extent_is_shared(struct share_check *sc)
+@@ -881,13 +882,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ key.offset = ref->offset;
+
+ /*
+- * Found a inum that doesn't match our known inum, we
+- * know it's shared.
++ * If we have a share check context and a reference for
++ * another inode, we can't exit immediately. This is
++ * because even if this is a BTRFS_ADD_DELAYED_REF
++ * reference we may find next a BTRFS_DROP_DELAYED_REF
++ * which cancels out this ADD reference.
++ *
++ * If this is a DROP reference and there was no previous
++ * ADD reference, then we need to signal that when we
++ * process references from the extent tree (through
++ * add_inline_refs() and add_keyed_refs()), we should
++ * not exit early if we find a reference for another
++ * inode, because one of the delayed DROP references
++ * may cancel that reference in the extent tree.
+ */
+- if (sc && sc->inum && ref->objectid != sc->inum) {
+- ret = BACKREF_FOUND_SHARED;
+- goto out;
+- }
++ if (sc && count < 0)
++ sc->have_delayed_delete_refs = true;
+
+ ret = add_indirect_ref(fs_info, preftrees, ref->root,
+ &key, 0, node->bytenr, count, sc,
+@@ -917,7 +927,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ }
+ if (!ret)
+ ret = extent_is_shared(sc);
+-out:
++
+ spin_unlock(&head->lock);
+ return ret;
+ }
+@@ -1020,7 +1030,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+
+- if (sc && sc->inum && key.objectid != sc->inum) {
++ if (sc && sc->inum && key.objectid != sc->inum &&
++ !sc->have_delayed_delete_refs) {
+ ret = BACKREF_FOUND_SHARED;
+ break;
+ }
+@@ -1030,6 +1041,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
+ ret = add_indirect_ref(fs_info, preftrees, root,
+ &key, 0, bytenr, count,
+ sc, GFP_NOFS);
++
+ break;
+ }
+ default:
+@@ -1119,7 +1131,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+
+- if (sc && sc->inum && key.objectid != sc->inum) {
++ if (sc && sc->inum && key.objectid != sc->inum &&
++ !sc->have_delayed_delete_refs) {
+ ret = BACKREF_FOUND_SHARED;
+ break;
+ }
+@@ -1542,6 +1555,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+ .root_objectid = root->root_key.objectid,
+ .inum = inum,
+ .share_count = 0,
++ .have_delayed_delete_refs = false,
+ };
+
+ ulist_init(roots);
+@@ -1576,6 +1590,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+ break;
+ bytenr = node->val;
+ shared.share_count = 0;
++ shared.have_delayed_delete_refs = false;
+ cond_resched();
+ }
+
+--
+2.35.1
+
--- /dev/null
+From a463ffcdc7131fe14012064f23ce2c0495108b07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Oct 2022 13:16:52 +0100
+Subject: btrfs: fix processing of delayed tree block refs during backref
+ walking
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 943553ef9b51db303ab2b955c1025261abfdf6fb ]
+
+During backref walking, when processing a delayed reference with a type of
+BTRFS_TREE_BLOCK_REF_KEY, we have two bugs there:
+
+1) We are accessing the delayed references extent_op, and its key, without
+ the protection of the delayed ref head's lock;
+
+2) If there's no extent op for the delayed ref head, we end up with an
+ uninitialized key in the stack, variable 'tmp_op_key', and then pass
+ it to add_indirect_ref(), which adds the reference to the indirect
+ refs rb tree.
+
+ This is wrong, because indirect references should have a NULL key
+ when we don't have access to the key, and in that case they should be
+ added to the indirect_missing_keys rb tree and not to the indirect rb
+ tree.
+
+ This means that if have BTRFS_TREE_BLOCK_REF_KEY delayed ref resulting
+ from freeing an extent buffer, therefore with a count of -1, it will
+ not cancel out the corresponding reference we have in the extent tree
+ (with a count of 1), since both references end up in different rb
+ trees.
+
+ When using fiemap, where we often need to check if extents are shared
+ through shared subtrees resulting from snapshots, it means we can
+ incorrectly report an extent as shared when it's no longer shared.
+ However this is temporary because after the transaction is committed
+ the extent is no longer reported as shared, as running the delayed
+ reference results in deleting the tree block reference from the extent
+ tree.
+
+ Outside the fiemap context, the result is unpredictable, as the key was
+ not initialized but it's used when navigating the rb trees to insert
+ and search for references (prelim_ref_compare()), and we expect all
+ references in the indirect rb tree to have valid keys.
+
+The following reproducer triggers the second bug:
+
+ $ cat test.sh
+ #!/bin/bash
+
+ DEV=/dev/sdj
+ MNT=/mnt/sdj
+
+ mkfs.btrfs -f $DEV
+ mount -o compress $DEV $MNT
+
+ # With a compressed 128M file we get a tree height of 2 (level 1 root).
+ xfs_io -f -c "pwrite -b 1M 0 128M" $MNT/foo
+
+ btrfs subvolume snapshot $MNT $MNT/snap
+
+ # Fiemap should output 0x2008 in the flags column.
+ # 0x2000 means shared extent
+ # 0x8 means encoded extent (because it's compressed)
+ echo
+ echo "fiemap after snapshot, range [120M, 120M + 128K):"
+ xfs_io -c "fiemap -v 120M 128K" $MNT/foo
+ echo
+
+ # Overwrite one extent and fsync to flush delalloc and COW a new path
+ # in the snapshot's tree.
+ #
+ # After this we have a BTRFS_DROP_DELAYED_REF delayed ref of type
+ # BTRFS_TREE_BLOCK_REF_KEY with a count of -1 for every COWed extent
+ # buffer in the path.
+ #
+ # In the extent tree we have inline references of type
+ # BTRFS_TREE_BLOCK_REF_KEY, with a count of 1, for the same extent
+ # buffers, so they should cancel each other, and the extent buffers in
+ # the fs tree should no longer be considered as shared.
+ #
+ echo "Overwriting file range [120M, 120M + 128K)..."
+ xfs_io -c "pwrite -b 128K 120M 128K" $MNT/snap/foo
+ xfs_io -c "fsync" $MNT/snap/foo
+
+ # Fiemap should output 0x8 in the flags column. The extent in the range
+ # [120M, 120M + 128K) is no longer shared, it's now exclusive to the fs
+ # tree.
+ echo
+ echo "fiemap after overwrite range [120M, 120M + 128K):"
+ xfs_io -c "fiemap -v 120M 128K" $MNT/foo
+ echo
+
+ umount $MNT
+
+Running it before this patch:
+
+ $ ./test.sh
+ (...)
+ wrote 134217728/134217728 bytes at offset 0
+ 128 MiB, 128 ops; 0.1152 sec (1.085 GiB/sec and 1110.5809 ops/sec)
+ Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap'
+
+ fiemap after snapshot, range [120M, 120M + 128K):
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [245760..246015]: 34304..34559 256 0x2008
+
+ Overwriting file range [120M, 120M + 128K)...
+ wrote 131072/131072 bytes at offset 125829120
+ 128 KiB, 1 ops; 0.0001 sec (683.060 MiB/sec and 5464.4809 ops/sec)
+
+ fiemap after overwrite range [120M, 120M + 128K):
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [245760..246015]: 34304..34559 256 0x2008
+
+The extent in the range [120M, 120M + 128K) is still reported as shared
+(0x2000 bit set) after overwriting that range and flushing delalloc, which
+is not correct - an entire path was COWed in the snapshot's tree and the
+extent is now only referenced by the original fs tree.
+
+Running it after this patch:
+
+ $ ./test.sh
+ (...)
+ wrote 134217728/134217728 bytes at offset 0
+ 128 MiB, 128 ops; 0.1198 sec (1.043 GiB/sec and 1068.2067 ops/sec)
+ Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap'
+
+ fiemap after snapshot, range [120M, 120M + 128K):
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [245760..246015]: 34304..34559 256 0x2008
+
+ Overwriting file range [120M, 120M + 128K)...
+ wrote 131072/131072 bytes at offset 125829120
+ 128 KiB, 1 ops; 0.0001 sec (694.444 MiB/sec and 5555.5556 ops/sec)
+
+ fiemap after overwrite range [120M, 120M + 128K):
+ /mnt/sdj/foo:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [245760..246015]: 34304..34559 256 0x8
+
+Now the extent is not reported as shared anymore.
+
+So fix this by passing a NULL key pointer to add_indirect_ref() when
+processing a delayed reference for a tree block if there's no extent op
+for our delayed ref head with a defined key. Also access the extent op
+only after locking the delayed ref head's lock.
+
+The reproducer will be converted later to a test case for fstests.
+
+Fixes: 86d5f994425252 ("btrfs: convert prelimary reference tracking to use rbtrees")
+Fixes: a6dbceafb915e8 ("btrfs: Remove unused op_key var from add_delayed_refs")
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/backref.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
+index 7e8fac12f3f8..92cb16c0e5ee 100644
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -818,16 +818,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ struct preftrees *preftrees, struct share_check *sc)
+ {
+ struct btrfs_delayed_ref_node *node;
+- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
+ struct btrfs_key key;
+- struct btrfs_key tmp_op_key;
+ struct rb_node *n;
+ int count;
+ int ret = 0;
+
+- if (extent_op && extent_op->update_key)
+- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
+-
+ spin_lock(&head->lock);
+ for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
+ node = rb_entry(n, struct btrfs_delayed_ref_node,
+@@ -853,10 +848,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ case BTRFS_TREE_BLOCK_REF_KEY: {
+ /* NORMAL INDIRECT METADATA backref */
+ struct btrfs_delayed_tree_ref *ref;
++ struct btrfs_key *key_ptr = NULL;
++
++ if (head->extent_op && head->extent_op->update_key) {
++ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
++ key_ptr = &key;
++ }
+
+ ref = btrfs_delayed_node_to_tree_ref(node);
+ ret = add_indirect_ref(fs_info, preftrees, ref->root,
+- &tmp_op_key, ref->level + 1,
++ key_ptr, ref->level + 1,
+ node->bytenr, count, sc,
+ GFP_ATOMIC);
+ break;
+--
+2.35.1
+
--- /dev/null
+From fca36c2f98ba5c022fc1395d6617efa11e21eae9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Oct 2022 22:45:22 +0800
+Subject: cifs: Fix xid leak in cifs_copy_file_range()
+
+From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+
+[ Upstream commit 9a97df404a402fe1174d2d1119f87ff2a0ca2fe9 ]
+
+If the file is used by swap, before return -EOPNOTSUPP, should
+free the xid, otherwise, the xid will be leaked.
+
+Fixes: 4e8aea30f775 ("smb3: enable swap on SMB3 mounts")
+Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/cifsfs.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
+index bc957e6ca48b..f442ef8b65da 100644
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -1221,8 +1221,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
+ ssize_t rc;
+ struct cifsFileInfo *cfile = dst_file->private_data;
+
+- if (cfile->swapfile)
+- return -EOPNOTSUPP;
++ if (cfile->swapfile) {
++ rc = -EOPNOTSUPP;
++ free_xid(xid);
++ return rc;
++ }
+
+ rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
+ len, flags);
+--
+2.35.1
+
--- /dev/null
+From c842c72c0cbcba31bbc69070e6b8f64337019b6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Oct 2022 22:45:23 +0800
+Subject: cifs: Fix xid leak in cifs_flock()
+
+From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+
+[ Upstream commit 575e079c782b9862ec2626403922d041a42e6ed6 ]
+
+If not flock, before return -ENOLCK, should free the xid,
+otherwise, the xid will be leaked.
+
+Fixes: d0677992d2af ("cifs: add support for flock")
+Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/file.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/fs/cifs/file.c b/fs/cifs/file.c
+index a648146e49cf..144064dc0d38 100644
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -1735,11 +1735,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
+ struct cifsFileInfo *cfile;
+ __u32 type;
+
+- rc = -EACCES;
+ xid = get_xid();
+
+- if (!(fl->fl_flags & FL_FLOCK))
+- return -ENOLCK;
++ if (!(fl->fl_flags & FL_FLOCK)) {
++ rc = -ENOLCK;
++ free_xid(xid);
++ return rc;
++ }
+
+ cfile = (struct cifsFileInfo *)file->private_data;
+ tcon = tlink_tcon(cfile->tlink);
+@@ -1758,8 +1760,9 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
+ * if no lock or unlock then nothing to do since we do not
+ * know what it is
+ */
++ rc = -EOPNOTSUPP;
+ free_xid(xid);
+- return -EOPNOTSUPP;
++ return rc;
+ }
+
+ rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
+--
+2.35.1
+
--- /dev/null
+From 2399563eeac3c1dd136518d78e4e08bfced93409 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Oct 2022 22:45:24 +0800
+Subject: cifs: Fix xid leak in cifs_ses_add_channel()
+
+From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+
+[ Upstream commit e909d054bdea75ef1ec48c18c5936affdaecbb2c ]
+
+Before return, should free the xid, otherwise, the
+xid will be leaked.
+
+Fixes: d70e9fa55884 ("cifs: try opening channels after mounting")
+Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/sess.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
+index d58c5ffeca0d..cf6fd138d8d5 100644
+--- a/fs/cifs/sess.c
++++ b/fs/cifs/sess.c
+@@ -306,6 +306,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface)
+ cifs_put_tcp_session(chan->server, 0);
+ unload_nls(vol.local_nls);
+
++ free_xid(xid);
+ return rc;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From ac67c1dc42ffdc167ecc29029b496394be4b6c39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jan 2021 17:21:59 +0000
+Subject: crypto: qat - reduce size of mapped region
+
+From: Adam Guerin <adam.guerin@intel.com>
+
+[ Upstream commit e48767c17718067ba21fb2ef461779ec2506f845 ]
+
+Restrict size of field to what is required by the operation.
+
+This issue was detected by smatch:
+
+ drivers/crypto/qat/qat_common/qat_asym_algs.c:328 qat_dh_compute_value() error: dma_map_single_attrs() '&qat_req->in.dh.in.b' too small (8 vs 64)
+
+Signed-off-by: Adam Guerin <adam.guerin@intel.com>
+Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Stable-dep-of: 9c5f21b198d2 ("Revert "crypto: qat - reduce size of mapped region"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/crypto/qat/qat_common/qat_asym_algs.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
+index 846569ec9066..2b1aca487fc3 100644
+--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
++++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
+@@ -326,13 +326,13 @@ static int qat_dh_compute_value(struct kpp_request *req)
+ qat_req->out.dh.out_tab[1] = 0;
+ /* Mapping in.in.b or in.in_g2.xa is the same */
+ qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b,
+- sizeof(struct qat_dh_input_params),
++ sizeof(qat_req->in.dh.in.b),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+ qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r,
+- sizeof(struct qat_dh_output_params),
++ sizeof(qat_req->out.dh.r),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+@@ -721,13 +721,13 @@ static int qat_rsa_enc(struct akcipher_request *req)
+ qat_req->in.rsa.in_tab[3] = 0;
+ qat_req->out.rsa.out_tab[1] = 0;
+ qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m,
+- sizeof(struct qat_rsa_input_params),
++ sizeof(qat_req->in.rsa.enc.m),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+ qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c,
+- sizeof(struct qat_rsa_output_params),
++ sizeof(qat_req->out.rsa.enc.c),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+@@ -869,13 +869,13 @@ static int qat_rsa_dec(struct akcipher_request *req)
+ qat_req->in.rsa.in_tab[3] = 0;
+ qat_req->out.rsa.out_tab[1] = 0;
+ qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c,
+- sizeof(struct qat_rsa_input_params),
++ sizeof(qat_req->in.rsa.dec.c),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+ qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m,
+- sizeof(struct qat_rsa_output_params),
++ sizeof(qat_req->out.rsa.dec.m),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+--
+2.35.1
+
--- /dev/null
+From 900dd401d452f295b4999cdf68d03de7a81dce68 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Nov 2020 16:30:51 -0300
+Subject: dmaengine: mxs-dma: Remove the unused .id_table
+
+From: Fabio Estevam <festevam@gmail.com>
+
+[ Upstream commit cc2afb0d4c7cbba6743ed6d9564f0883cab6bae1 ]
+
+The mxs-dma driver is only used by DT platforms and the .id_table
+is unused.
+
+Get rid of it to simplify the code.
+
+Signed-off-by: Fabio Estevam <festevam@gmail.com>
+Link: https://lore.kernel.org/r/20201123193051.17285-1-festevam@gmail.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Stable-dep-of: 26696d465716 ("dmaengine: mxs: use platform_driver_register")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/mxs-dma.c | 37 +++++--------------------------------
+ 1 file changed, 5 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
+index 65f816b40c32..994fc4d2aca4 100644
+--- a/drivers/dma/mxs-dma.c
++++ b/drivers/dma/mxs-dma.c
+@@ -167,29 +167,11 @@ static struct mxs_dma_type mxs_dma_types[] = {
+ }
+ };
+
+-static const struct platform_device_id mxs_dma_ids[] = {
+- {
+- .name = "imx23-dma-apbh",
+- .driver_data = (kernel_ulong_t) &mxs_dma_types[0],
+- }, {
+- .name = "imx23-dma-apbx",
+- .driver_data = (kernel_ulong_t) &mxs_dma_types[1],
+- }, {
+- .name = "imx28-dma-apbh",
+- .driver_data = (kernel_ulong_t) &mxs_dma_types[2],
+- }, {
+- .name = "imx28-dma-apbx",
+- .driver_data = (kernel_ulong_t) &mxs_dma_types[3],
+- }, {
+- /* end of list */
+- }
+-};
+-
+ static const struct of_device_id mxs_dma_dt_ids[] = {
+- { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_ids[0], },
+- { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_ids[1], },
+- { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_ids[2], },
+- { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_ids[3], },
++ { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_types[0], },
++ { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_types[1], },
++ { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_types[2], },
++ { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_types[3], },
+ { /* sentinel */ }
+ };
+ MODULE_DEVICE_TABLE(of, mxs_dma_dt_ids);
+@@ -762,8 +744,6 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec,
+ static int __init mxs_dma_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
+- const struct platform_device_id *id_entry;
+- const struct of_device_id *of_id;
+ const struct mxs_dma_type *dma_type;
+ struct mxs_dma_engine *mxs_dma;
+ struct resource *iores;
+@@ -779,13 +759,7 @@ static int __init mxs_dma_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- of_id = of_match_device(mxs_dma_dt_ids, &pdev->dev);
+- if (of_id)
+- id_entry = of_id->data;
+- else
+- id_entry = platform_get_device_id(pdev);
+-
+- dma_type = (struct mxs_dma_type *)id_entry->driver_data;
++ dma_type = (struct mxs_dma_type *)of_device_get_match_data(&pdev->dev);
+ mxs_dma->type = dma_type->type;
+ mxs_dma->dev_id = dma_type->id;
+
+@@ -865,7 +839,6 @@ static struct platform_driver mxs_dma_driver = {
+ .name = "mxs-dma",
+ .of_match_table = mxs_dma_dt_ids,
+ },
+- .id_table = mxs_dma_ids,
+ };
+
+ static int __init mxs_dma_module_init(void)
+--
+2.35.1
+
--- /dev/null
+From 4a0f6b816ed3c2c75d8e78027b0fc26f93ebae8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Sep 2022 19:05:56 +0200
+Subject: dmaengine: mxs: use platform_driver_register
+
+From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
+
+[ Upstream commit 26696d4657167112a1079f86cba1739765c1360e ]
+
+Driver registration fails on SOC imx8mn as its supplier, the clock
+control module, is probed later than subsys initcall level. This driver
+uses platform_driver_probe which is not compatible with deferred probing
+and won't be probed again later if probe function fails due to clock not
+being available at that time.
+
+This patch replaces the use of platform_driver_probe with
+platform_driver_register which will allow probing the driver later again
+when the clock control module will be available.
+
+The __init annotation has been dropped because it is not compatible with
+deferred probing. The code is not executed once and its memory cannot be
+freed.
+
+Fixes: a580b8c5429a ("dmaengine: mxs-dma: add dma support for i.MX23/28")
+Co-developed-by: Michael Trimarchi <michael@amarulasolutions.com>
+Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
+Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
+Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
+Cc: stable@vger.kernel.org
+
+Link: https://lore.kernel.org/r/20220921170556.1055962-1-dario.binacchi@amarulasolutions.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/mxs-dma.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
+index 994fc4d2aca4..dc147cc2436e 100644
+--- a/drivers/dma/mxs-dma.c
++++ b/drivers/dma/mxs-dma.c
+@@ -670,7 +670,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
+ return mxs_chan->status;
+ }
+
+-static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
++static int mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+ {
+ int ret;
+
+@@ -741,7 +741,7 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec,
+ ofdma->of_node);
+ }
+
+-static int __init mxs_dma_probe(struct platform_device *pdev)
++static int mxs_dma_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
+ const struct mxs_dma_type *dma_type;
+@@ -839,10 +839,7 @@ static struct platform_driver mxs_dma_driver = {
+ .name = "mxs-dma",
+ .of_match_table = mxs_dma_dt_ids,
+ },
++ .probe = mxs_dma_probe,
+ };
+
+-static int __init mxs_dma_module_init(void)
+-{
+- return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe);
+-}
+-subsys_initcall(mxs_dma_module_init);
++builtin_platform_driver(mxs_dma_driver);
+--
+2.35.1
+
--- /dev/null
+From 050b21df2c16abb2618671ae082b258d89b1c88f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Jun 2022 23:07:22 +0300
+Subject: drm/virtio: Use appropriate atomic state in
+ virtio_gpu_plane_cleanup_fb()
+
+From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
+
+[ Upstream commit 4656b3a26a9e9fe5f04bfd2ab55b066266ba7f4d ]
+
+Make virtio_gpu_plane_cleanup_fb() to clean the state which DRM core
+wants to clean up and not the current plane's state. Normally the older
+atomic state is cleaned up, but the newer state could also be cleaned up
+in case of aborted commits.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/20220630200726.1884320-6-dmitry.osipenko@collabora.com
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/virtio/virtgpu_plane.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
+index 6a311cd93440..e6de62734269 100644
+--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
++++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
+@@ -213,14 +213,14 @@ static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane,
+ }
+
+ static void virtio_gpu_cursor_cleanup_fb(struct drm_plane *plane,
+- struct drm_plane_state *old_state)
++ struct drm_plane_state *state)
+ {
+ struct virtio_gpu_framebuffer *vgfb;
+
+- if (!plane->state->fb)
++ if (!state->fb)
+ return;
+
+- vgfb = to_virtio_gpu_framebuffer(plane->state->fb);
++ vgfb = to_virtio_gpu_framebuffer(state->fb);
+ if (vgfb->fence) {
+ dma_fence_put(&vgfb->fence->f);
+ vgfb->fence = NULL;
+--
+2.35.1
+
--- /dev/null
+From b0757e907af9c9647f6dec361f254ac6b2553e40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Jul 2021 17:18:30 +0800
+Subject: fcntl: fix potential deadlocks for &fown_struct.lock
+
+From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
+
+[ Upstream commit f671a691e299f58835d4660d642582bf0e8f6fda ]
+
+Syzbot reports a potential deadlock in do_fcntl:
+
+========================================================
+WARNING: possible irq lock inversion dependency detected
+5.12.0-syzkaller #0 Not tainted
+--------------------------------------------------------
+syz-executor132/8391 just changed the state of lock:
+ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: f_getown_ex fs/fcntl.c:211 [inline]
+ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: do_fcntl+0x8b4/0x1200 fs/fcntl.c:395
+but this lock was taken by another, HARDIRQ-safe lock in the past:
+ (&dev->event_lock){-...}-{2:2}
+
+and interrupts could create inverse lock ordering between them.
+
+other info that might help us debug this:
+Chain exists of:
+ &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock
+
+ Possible interrupt unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(&f->f_owner.lock);
+ local_irq_disable();
+ lock(&dev->event_lock);
+ lock(&new->fa_lock);
+ <Interrupt>
+ lock(&dev->event_lock);
+
+ *** DEADLOCK ***
+
+This happens because there is a lock hierarchy of
+&dev->event_lock --> &new->fa_lock --> &f->f_owner.lock
+from the following call chain:
+
+ input_inject_event():
+ spin_lock_irqsave(&dev->event_lock,...);
+ input_handle_event():
+ input_pass_values():
+ input_to_handler():
+ evdev_events():
+ evdev_pass_values():
+ spin_lock(&client->buffer_lock);
+ __pass_event():
+ kill_fasync():
+ kill_fasync_rcu():
+ read_lock(&fa->fa_lock);
+ send_sigio():
+ read_lock_irqsave(&fown->lock,...);
+
+However, since &dev->event_lock is HARDIRQ-safe, interrupts have to be
+disabled while grabbing &f->f_owner.lock, otherwise we invert the lock
+hierarchy.
+
+Hence, we replace calls to read_lock/read_unlock on &f->f_owner.lock,
+with read_lock_irq/read_unlock_irq.
+
+Reported-and-tested-by: syzbot+e6d5398a02c516ce5e70@syzkaller.appspotmail.com
+Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fcntl.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/fs/fcntl.c b/fs/fcntl.c
+index 5a56351f1fc3..fcf34f83bf6a 100644
+--- a/fs/fcntl.c
++++ b/fs/fcntl.c
+@@ -149,7 +149,8 @@ void f_delown(struct file *filp)
+ pid_t f_getown(struct file *filp)
+ {
+ pid_t pid = 0;
+- read_lock(&filp->f_owner.lock);
++
++ read_lock_irq(&filp->f_owner.lock);
+ rcu_read_lock();
+ if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
+ pid = pid_vnr(filp->f_owner.pid);
+@@ -157,7 +158,7 @@ pid_t f_getown(struct file *filp)
+ pid = -pid;
+ }
+ rcu_read_unlock();
+- read_unlock(&filp->f_owner.lock);
++ read_unlock_irq(&filp->f_owner.lock);
+ return pid;
+ }
+
+@@ -207,7 +208,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
+ struct f_owner_ex owner = {};
+ int ret = 0;
+
+- read_lock(&filp->f_owner.lock);
++ read_lock_irq(&filp->f_owner.lock);
+ rcu_read_lock();
+ if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
+ owner.pid = pid_vnr(filp->f_owner.pid);
+@@ -230,7 +231,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
+ ret = -EINVAL;
+ break;
+ }
+- read_unlock(&filp->f_owner.lock);
++ read_unlock_irq(&filp->f_owner.lock);
+
+ if (!ret) {
+ ret = copy_to_user(owner_p, &owner, sizeof(owner));
+@@ -248,10 +249,10 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
+ uid_t src[2];
+ int err;
+
+- read_lock(&filp->f_owner.lock);
++ read_lock_irq(&filp->f_owner.lock);
+ src[0] = from_kuid(user_ns, filp->f_owner.uid);
+ src[1] = from_kuid(user_ns, filp->f_owner.euid);
+- read_unlock(&filp->f_owner.lock);
++ read_unlock_irq(&filp->f_owner.lock);
+
+ err = put_user(src[0], &dst[0]);
+ err |= put_user(src[1], &dst[1]);
+--
+2.35.1
+
--- /dev/null
+From 40f5b617ba64bfbd50b469718fb24c2b4557230e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Feb 2021 15:41:56 +0300
+Subject: fcntl: make F_GETOWN(EX) return 0 on dead owner task
+
+From: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+
+[ Upstream commit cc4a3f885e8f2bc3c86a265972e94fef32d68f67 ]
+
+Currently there is no way to differentiate the file with alive owner
+from the file with dead owner but pid of the owner reused. That's why
+CRIU can't actually know if it needs to restore file owner or not,
+because if it restores owner but actual owner was dead, this can
+introduce unexpected signals to the "false"-owner (which reused the
+pid).
+
+Let's change the api, so that F_GETOWN(EX) returns 0 in case actual
+owner is dead already. This comports with the POSIX spec, which
+states that a PID of 0 indicates that no signal will be sent.
+
+Cc: Jeff Layton <jlayton@kernel.org>
+Cc: "J. Bruce Fields" <bfields@fieldses.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: linux-fsdevel@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Cc: Cyrill Gorcunov <gorcunov@gmail.com>
+Cc: Andrei Vagin <avagin@gmail.com>
+Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Stable-dep-of: f671a691e299 ("fcntl: fix potential deadlocks for &fown_struct.lock")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fcntl.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/fs/fcntl.c b/fs/fcntl.c
+index 71b43538fa44..5a56351f1fc3 100644
+--- a/fs/fcntl.c
++++ b/fs/fcntl.c
+@@ -148,11 +148,15 @@ void f_delown(struct file *filp)
+
+ pid_t f_getown(struct file *filp)
+ {
+- pid_t pid;
++ pid_t pid = 0;
+ read_lock(&filp->f_owner.lock);
+- pid = pid_vnr(filp->f_owner.pid);
+- if (filp->f_owner.pid_type == PIDTYPE_PGID)
+- pid = -pid;
++ rcu_read_lock();
++ if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
++ pid = pid_vnr(filp->f_owner.pid);
++ if (filp->f_owner.pid_type == PIDTYPE_PGID)
++ pid = -pid;
++ }
++ rcu_read_unlock();
+ read_unlock(&filp->f_owner.lock);
+ return pid;
+ }
+@@ -200,11 +204,14 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
+ static int f_getown_ex(struct file *filp, unsigned long arg)
+ {
+ struct f_owner_ex __user *owner_p = (void __user *)arg;
+- struct f_owner_ex owner;
++ struct f_owner_ex owner = {};
+ int ret = 0;
+
+ read_lock(&filp->f_owner.lock);
+- owner.pid = pid_vnr(filp->f_owner.pid);
++ rcu_read_lock();
++ if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
++ owner.pid = pid_vnr(filp->f_owner.pid);
++ rcu_read_unlock();
+ switch (filp->f_owner.pid_type) {
+ case PIDTYPE_PID:
+ owner.type = F_OWNER_TID;
+--
+2.35.1
+
--- /dev/null
+From 14e2488e7853fd96ed92fd5443a4e3a5e6cf2174 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 11:02:49 -0800
+Subject: fs: clean up __mark_inode_dirty() a bit
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit 35d14f278e530ecb635ab00de984065ed90ee12f ]
+
+Improve some comments, and don't bother checking for the I_DIRTY_TIME
+flag in the case where we just cleared it.
+
+Also, warn if I_DIRTY_TIME and I_DIRTY_PAGES are passed to
+__mark_inode_dirty() at the same time, as this case isn't handled.
+
+Link: https://lore.kernel.org/r/20210112190253.64307-8-ebiggers@kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 49 +++++++++++++++++++++++++++++------------------
+ 1 file changed, 30 insertions(+), 19 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index b6d572a519fa..71043e847e7c 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -2206,23 +2206,24 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
+ }
+
+ /**
+- * __mark_inode_dirty - internal function
++ * __mark_inode_dirty - internal function to mark an inode dirty
+ *
+ * @inode: inode to mark
+- * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
++ * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of
++ * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined
++ * with I_DIRTY_PAGES.
+ *
+- * Mark an inode as dirty. Callers should use mark_inode_dirty or
+- * mark_inode_dirty_sync.
++ * Mark an inode as dirty. We notify the filesystem, then update the inode's
++ * dirty flags. Then, if needed we add the inode to the appropriate dirty list.
+ *
+- * Put the inode on the super block's dirty list.
++ * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync()
++ * instead of calling this directly.
+ *
+- * CAREFUL! We mark it dirty unconditionally, but move it onto the
+- * dirty list only if it is hashed or if it refers to a blockdev.
+- * If it was not hashed, it will never be added to the dirty list
+- * even if it is later hashed, as it will have been marked dirty already.
++ * CAREFUL! We only add the inode to the dirty list if it is hashed or if it
++ * refers to a blockdev. Unhashed inodes will never be added to the dirty list
++ * even if they are later hashed, as they will have been marked dirty already.
+ *
+- * In short, make sure you hash any inodes _before_ you start marking
+- * them dirty.
++ * In short, ensure you hash any inodes _before_ you start marking them dirty.
+ *
+ * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
+ * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
+@@ -2234,25 +2235,34 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
+ void __mark_inode_dirty(struct inode *inode, int flags)
+ {
+ struct super_block *sb = inode->i_sb;
+- int dirtytime;
++ int dirtytime = 0;
+
+ trace_writeback_mark_inode_dirty(inode, flags);
+
+- /*
+- * Don't do this for I_DIRTY_PAGES - that doesn't actually
+- * dirty the inode itself
+- */
+ if (flags & I_DIRTY_INODE) {
++ /*
++ * Notify the filesystem about the inode being dirtied, so that
++ * (if needed) it can update on-disk fields and journal the
++ * inode. This is only needed when the inode itself is being
++ * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not
++ * for just I_DIRTY_PAGES or I_DIRTY_TIME.
++ */
+ trace_writeback_dirty_inode_start(inode, flags);
+-
+ if (sb->s_op->dirty_inode)
+ sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
+-
+ trace_writeback_dirty_inode(inode, flags);
+
++ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
+ flags &= ~I_DIRTY_TIME;
++ } else {
++ /*
++ * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing.
++ * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME
++ * in one call to __mark_inode_dirty().)
++ */
++ dirtytime = flags & I_DIRTY_TIME;
++ WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
+ }
+- dirtytime = flags & I_DIRTY_TIME;
+
+ /*
+ * Paired with smp_mb() in __writeback_single_inode() for the
+@@ -2272,6 +2282,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+
+ inode_attach_wb(inode, NULL);
+
++ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
+ if (flags & I_DIRTY_INODE)
+ inode->i_state &= ~I_DIRTY_TIME;
+ inode->i_state |= flags;
+--
+2.35.1
+
--- /dev/null
+From 5aa02147cd3d8e2a637fb16a091ff7e784f74ecd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 11:02:44 -0800
+Subject: fs: correctly document the inode dirty flags
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit 1e9d63331f8fa556f31e1406ab12f2a1e5cdb495 ]
+
+The documentation for I_DIRTY_SYNC and I_DIRTY_DATASYNC is a bit
+misleading, and I_DIRTY_TIME isn't documented at all. Fix this.
+
+Link: https://lore.kernel.org/r/20210112190253.64307-3-ebiggers@kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/fs.h | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index c8f887641878..8ee26322a527 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2087,8 +2087,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+ /*
+ * Inode state bits. Protected by inode->i_lock
+ *
+- * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
+- * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
++ * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
++ * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
+ *
+ * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
+ * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
+@@ -2097,12 +2097,20 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+ * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
+ *
+ * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
+- * fdatasync(). i_atime is the usual cause.
+- * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
++ * fdatasync() (unless I_DIRTY_DATASYNC is also set).
++ * Timestamp updates are the usual cause.
++ * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
+ * these changes separately from I_DIRTY_SYNC so that we
+ * don't have to write inode on fdatasync() when only
+- * mtime has changed in it.
++ * e.g. the timestamps have changed.
+ * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
++ * I_DIRTY_TIME The inode itself only has dirty timestamps, and the
++ * lazytime mount option is enabled. We keep track of this
++ * separately from I_DIRTY_SYNC in order to implement
++ * lazytime. This gets cleared if I_DIRTY_INODE
++ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
++ * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
++ * i_state, but not both. I_DIRTY_PAGES may still be set.
+ * I_NEW Serves as both a mutex and completion notification.
+ * New inodes set I_NEW. If two processes both create
+ * the same inode, one of them will release its inode and
+--
+2.35.1
+
--- /dev/null
+From 81815952d81efbb44aab514f0eaa765a0abf9744 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 11:02:47 -0800
+Subject: fs: don't call ->dirty_inode for lazytime timestamp updates
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit e2728c5621fd9c68c65a6647875a1d1c67b9f257 ]
+
+There is no need to call ->dirty_inode for lazytime timestamp updates
+(i.e. for __mark_inode_dirty(I_DIRTY_TIME)), since by the definition of
+lazytime, filesystems must ignore these updates. Filesystems only need
+to care about the updated timestamps when they expire.
+
+Therefore, only call ->dirty_inode when I_DIRTY_INODE is set.
+
+Based on a patch from Christoph Hellwig:
+https://lore.kernel.org/r/20200325122825.1086872-4-hch@lst.de
+
+Link: https://lore.kernel.org/r/20210112190253.64307-6-ebiggers@kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 12 +-----------
+ fs/f2fs/super.c | 3 ---
+ fs/fs-writeback.c | 6 +++---
+ fs/gfs2/super.c | 2 --
+ 4 files changed, 4 insertions(+), 19 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 45f31dc1e66f..2a9ce6826d6b 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -6003,26 +6003,16 @@ int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode,
+ * If the inode is marked synchronous, we don't honour that here - doing
+ * so would cause a commit on atime updates, which we don't bother doing.
+ * We handle synchronous inodes at the highest possible level.
+- *
+- * If only the I_DIRTY_TIME flag is set, we can skip everything. If
+- * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
+- * to copy into the on-disk inode structure are the timestamp files.
+ */
+ void ext4_dirty_inode(struct inode *inode, int flags)
+ {
+ handle_t *handle;
+
+- if (flags == I_DIRTY_TIME)
+- return;
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ if (IS_ERR(handle))
+- goto out;
+-
++ return;
+ ext4_mark_inode_dirty(handle, inode);
+-
+ ext4_journal_stop(handle);
+-out:
+- return;
+ }
+
+ int ext4_change_inode_journal_flag(struct inode *inode, int val)
+diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
+index fba413ced982..b6a33935528c 100644
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1213,9 +1213,6 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
+ inode->i_ino == F2FS_META_INO(sbi))
+ return;
+
+- if (flags == I_DIRTY_TIME)
+- return;
+-
+ if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
+ clear_inode_flag(inode, FI_AUTO_RECOVER);
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 2011199476ea..2088046de4ef 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -2242,16 +2242,16 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ * Don't do this for I_DIRTY_PAGES - that doesn't actually
+ * dirty the inode itself
+ */
+- if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
++ if (flags & I_DIRTY_INODE) {
+ trace_writeback_dirty_inode_start(inode, flags);
+
+ if (sb->s_op->dirty_inode)
+ sb->s_op->dirty_inode(inode, flags);
+
+ trace_writeback_dirty_inode(inode, flags);
+- }
+- if (flags & I_DIRTY_INODE)
++
+ flags &= ~I_DIRTY_TIME;
++ }
+ dirtytime = flags & I_DIRTY_TIME;
+
+ /*
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index d14b98aa1c3e..21bb02dc3aed 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -506,8 +506,6 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
+ int need_endtrans = 0;
+ int ret;
+
+- if (!(flags & I_DIRTY_INODE))
+- return;
+ if (unlikely(gfs2_withdrawn(sdp)))
+ return;
+ if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
+--
+2.35.1
+
--- /dev/null
+From a0b3b58097f66aa6b33340b0962ff0ee41da9c60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 11:02:48 -0800
+Subject: fs: pass only I_DIRTY_INODE flags to ->dirty_inode
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit a38ed483a72672ee6bdb5d8cf17fc0838377baa0 ]
+
+->dirty_inode is now only called when I_DIRTY_INODE (I_DIRTY_SYNC and/or
+I_DIRTY_DATASYNC) is set. However it may still be passed other dirty
+flags at the same time, provided that these other flags happened to be
+passed to __mark_inode_dirty() at the same time as I_DIRTY_INODE.
+
+This doesn't make sense because there is no reason for filesystems to
+care about these extra flags. Nor are filesystems notified about all
+updates to these other flags.
+
+Therefore, mask the flags before passing them to ->dirty_inode.
+
+Also properly document ->dirty_inode in vfs.rst.
+
+Link: https://lore.kernel.org/r/20210112190253.64307-7-ebiggers@kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/filesystems/vfs.rst | 5 ++++-
+ fs/fs-writeback.c | 2 +-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
+index ca52c82e5bb5..287b80948a40 100644
+--- a/Documentation/filesystems/vfs.rst
++++ b/Documentation/filesystems/vfs.rst
+@@ -270,7 +270,10 @@ or bottom half).
+ ->alloc_inode.
+
+ ``dirty_inode``
+- this method is called by the VFS to mark an inode dirty.
++ this method is called by the VFS when an inode is marked dirty.
++ This is specifically for the inode itself being marked dirty,
++ not its data. If the update needs to be persisted by fdatasync(),
++ then I_DIRTY_DATASYNC will be set in the flags argument.
+
+ ``write_inode``
+ this method is called when the VFS needs to write an inode to
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 2088046de4ef..b6d572a519fa 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -2246,7 +2246,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ trace_writeback_dirty_inode_start(inode, flags);
+
+ if (sb->s_op->dirty_inode)
+- sb->s_op->dirty_inode(inode, flags);
++ sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
+
+ trace_writeback_dirty_inode(inode, flags);
+
+--
+2.35.1
+
--- /dev/null
+From 8139a7d3a79e5c63d33a578821e0cc2785e0d377 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Aug 2022 12:06:57 +0200
+Subject: fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+[ Upstream commit cbfecb927f429a6fa613d74b998496bd71e4438a ]
+
+Currently the I_DIRTY_TIME will never get set if the inode already has
+I_DIRTY_INODE with assumption that it supersedes I_DIRTY_TIME. That's
+true, however ext4 will only update the on-disk inode in
+->dirty_inode(), not on actual writeback. As a result if the inode
+already has I_DIRTY_INODE state by the time we get to
+__mark_inode_dirty() only with I_DIRTY_TIME, the time was already filled
+into on-disk inode and will not get updated until the next I_DIRTY_INODE
+update, which might never come if we crash or get a power failure.
+
+The problem can be reproduced on ext4 by running xfstest generic/622
+with -o iversion mount option.
+
+Fix it by allowing I_DIRTY_TIME to be set even if the inode already has
+I_DIRTY_INODE. Also make sure that the case is properly handled in
+writeback_single_inode() as well. Additionally changes in
+xfs_fs_dirty_inode() was made to accommodate for I_DIRTY_TIME in flag.
+
+Thanks Jan Kara for suggestions on how to make this work properly.
+
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Suggested-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220825100657.44217-1-lczerner@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/filesystems/vfs.rst | 3 +++
+ fs/fs-writeback.c | 37 +++++++++++++++++++++----------
+ fs/xfs/xfs_super.c | 10 +++++++--
+ include/linux/fs.h | 9 ++++----
+ 4 files changed, 41 insertions(+), 18 deletions(-)
+
+diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
+index 287b80948a40..ee69d60818b5 100644
+--- a/Documentation/filesystems/vfs.rst
++++ b/Documentation/filesystems/vfs.rst
+@@ -274,6 +274,9 @@ or bottom half).
+ This is specifically for the inode itself being marked dirty,
+ not its data. If the update needs to be persisted by fdatasync(),
+ then I_DIRTY_DATASYNC will be set in the flags argument.
++ I_DIRTY_TIME will be set in the flags in case lazytime is enabled
++ and struct inode has times updated since the last ->dirty_inode
++ call.
+
+ ``write_inode``
+ this method is called when the VFS needs to write an inode to
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 4c667662a4d9..f47797e15685 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -1618,9 +1618,14 @@ static int writeback_single_inode(struct inode *inode,
+ */
+ if (!(inode->i_state & I_DIRTY_ALL))
+ inode_cgwb_move_to_attached(inode, wb);
+- else if (!(inode->i_state & I_SYNC_QUEUED) &&
+- (inode->i_state & I_DIRTY))
+- redirty_tail_locked(inode, wb);
++ else if (!(inode->i_state & I_SYNC_QUEUED)) {
++ if ((inode->i_state & I_DIRTY))
++ redirty_tail_locked(inode, wb);
++ else if (inode->i_state & I_DIRTY_TIME) {
++ inode->dirtied_when = jiffies;
++ inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
++ }
++ }
+
+ spin_unlock(&wb->list_lock);
+ inode_sync_complete(inode);
+@@ -2276,6 +2281,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ trace_writeback_mark_inode_dirty(inode, flags);
+
+ if (flags & I_DIRTY_INODE) {
++ /*
++ * Inode timestamp update will piggback on this dirtying.
++ * We tell ->dirty_inode callback that timestamps need to
++ * be updated by setting I_DIRTY_TIME in flags.
++ */
++ if (inode->i_state & I_DIRTY_TIME) {
++ spin_lock(&inode->i_lock);
++ if (inode->i_state & I_DIRTY_TIME) {
++ inode->i_state &= ~I_DIRTY_TIME;
++ flags |= I_DIRTY_TIME;
++ }
++ spin_unlock(&inode->i_lock);
++ }
++
+ /*
+ * Notify the filesystem about the inode being dirtied, so that
+ * (if needed) it can update on-disk fields and journal the
+@@ -2285,7 +2304,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ */
+ trace_writeback_dirty_inode_start(inode, flags);
+ if (sb->s_op->dirty_inode)
+- sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
++ sb->s_op->dirty_inode(inode,
++ flags & (I_DIRTY_INODE | I_DIRTY_TIME));
+ trace_writeback_dirty_inode(inode, flags);
+
+ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
+@@ -2306,21 +2326,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ */
+ smp_mb();
+
+- if (((inode->i_state & flags) == flags) ||
+- (dirtytime && (inode->i_state & I_DIRTY_INODE)))
++ if ((inode->i_state & flags) == flags)
+ return;
+
+ spin_lock(&inode->i_lock);
+- if (dirtytime && (inode->i_state & I_DIRTY_INODE))
+- goto out_unlock_inode;
+ if ((inode->i_state & flags) != flags) {
+ const int was_dirty = inode->i_state & I_DIRTY;
+
+ inode_attach_wb(inode, NULL);
+
+- /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
+- if (flags & I_DIRTY_INODE)
+- inode->i_state &= ~I_DIRTY_TIME;
+ inode->i_state |= flags;
+
+ /*
+@@ -2393,7 +2407,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ out_unlock:
+ if (wb)
+ spin_unlock(&wb->list_lock);
+-out_unlock_inode:
+ spin_unlock(&inode->i_lock);
+ }
+ EXPORT_SYMBOL(__mark_inode_dirty);
+diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
+index 434c87cc9fbf..3cc20640b3da 100644
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -668,7 +668,7 @@ xfs_fs_destroy_inode(
+ static void
+ xfs_fs_dirty_inode(
+ struct inode *inode,
+- int flag)
++ int flags)
+ {
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+@@ -676,7 +676,13 @@ xfs_fs_dirty_inode(
+
+ if (!(inode->i_sb->s_flags & SB_LAZYTIME))
+ return;
+- if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
++
++ /*
++ * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
++ * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
++ * in flags possibly together with I_DIRTY_SYNC.
++ */
++ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
+ return;
+
+ if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 8ee26322a527..ae7cd6ee1142 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2104,13 +2104,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+ * don't have to write inode on fdatasync() when only
+ * e.g. the timestamps have changed.
+ * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
+- * I_DIRTY_TIME The inode itself only has dirty timestamps, and the
++ * I_DIRTY_TIME The inode itself has dirty timestamps, and the
+ * lazytime mount option is enabled. We keep track of this
+ * separately from I_DIRTY_SYNC in order to implement
+ * lazytime. This gets cleared if I_DIRTY_INODE
+- * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
+- * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
+- * i_state, but not both. I_DIRTY_PAGES may still be set.
++ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
++ * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
++ * in place because writeback might already be in progress
++ * and we don't want to lose the time update
+ * I_NEW Serves as both a mutex and completion notification.
+ * New inodes set I_NEW. If two processes both create
+ * the same inode, one of them will release its inode and
+--
+2.35.1
+
--- /dev/null
+From a38fb7f951aa2e47325aa0740605ec977f988757 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Oct 2022 20:27:47 +0200
+Subject: HID: magicmouse: Do not set BTN_MOUSE on double report
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: José Expósito <jose.exposito89@gmail.com>
+
+[ Upstream commit bb5f0c855dcfc893ae5ed90e4c646bde9e4498bf ]
+
+Under certain conditions the Magic Trackpad can group 2 reports in a
+single packet. The packet is split and the raw event function is
+invoked recursively for each part.
+
+However, after processing each part, the BTN_MOUSE status is updated,
+sending multiple click events. [1]
+
+Return after processing double reports to avoid this issue.
+
+Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/811 # [1]
+Fixes: a462230e16ac ("HID: magicmouse: enable Magic Trackpad support")
+Reported-by: Nulo <git@nulo.in>
+Signed-off-by: José Expósito <jose.exposito89@gmail.com>
+Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Link: https://lore.kernel.org/r/20221009182747.90730-1-jose.exposito89@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-magicmouse.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
+index fc4c07459753..28158d2f2352 100644
+--- a/drivers/hid/hid-magicmouse.c
++++ b/drivers/hid/hid-magicmouse.c
+@@ -387,7 +387,7 @@ static int magicmouse_raw_event(struct hid_device *hdev,
+ magicmouse_raw_event(hdev, report, data + 2, data[1]);
+ magicmouse_raw_event(hdev, report, data + 2 + data[1],
+ size - 2 - data[1]);
+- break;
++ return 0;
+ default:
+ return 0;
+ }
+--
+2.35.1
+
--- /dev/null
+From 0420cc9ad4be789293baf3783b27ad18b422d261 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Oct 2022 13:54:40 -0700
+Subject: i40e: Fix DMA mappings leak
+
+From: Jan Sokolowski <jan.sokolowski@intel.com>
+
+[ Upstream commit aae425efdfd1b1d8452260a3cb49344ebf20b1f5 ]
+
+During reallocation of RX buffers, new DMA mappings are created for
+those buffers.
+
+steps for reproduction:
+while :
+do
+for ((i=0; i<=8160; i=i+32))
+do
+ethtool -G enp130s0f0 rx $i tx $i
+sleep 0.5
+ethtool -g enp130s0f0
+done
+done
+
+This resulted in crash:
+i40e 0000:01:00.1: Unable to allocate memory for the Rx descriptor ring, size=65536
+Driver BUG
+WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:141 xdp_rxq_info_unreg+0x43/0x50
+Call Trace:
+i40e_free_rx_resources+0x70/0x80 [i40e]
+i40e_set_ringparam+0x27c/0x800 [i40e]
+ethnl_set_rings+0x1b2/0x290
+genl_family_rcv_msg_doit.isra.15+0x10f/0x150
+genl_family_rcv_msg+0xb3/0x160
+? rings_fill_reply+0x1a0/0x1a0
+genl_rcv_msg+0x47/0x90
+? genl_family_rcv_msg+0x160/0x160
+netlink_rcv_skb+0x4c/0x120
+genl_rcv+0x24/0x40
+netlink_unicast+0x196/0x230
+netlink_sendmsg+0x204/0x3d0
+sock_sendmsg+0x4c/0x50
+__sys_sendto+0xee/0x160
+? handle_mm_fault+0xbe/0x1e0
+? syscall_trace_enter+0x1d3/0x2c0
+__x64_sys_sendto+0x24/0x30
+do_syscall_64+0x5b/0x1a0
+entry_SYSCALL_64_after_hwframe+0x65/0xca
+RIP: 0033:0x7f5eac8b035b
+Missing register, driver bug
+WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:119 xdp_rxq_info_unreg_mem_model+0x69/0x140
+Call Trace:
+xdp_rxq_info_unreg+0x1e/0x50
+i40e_free_rx_resources+0x70/0x80 [i40e]
+i40e_set_ringparam+0x27c/0x800 [i40e]
+ethnl_set_rings+0x1b2/0x290
+genl_family_rcv_msg_doit.isra.15+0x10f/0x150
+genl_family_rcv_msg+0xb3/0x160
+? rings_fill_reply+0x1a0/0x1a0
+genl_rcv_msg+0x47/0x90
+? genl_family_rcv_msg+0x160/0x160
+netlink_rcv_skb+0x4c/0x120
+genl_rcv+0x24/0x40
+netlink_unicast+0x196/0x230
+netlink_sendmsg+0x204/0x3d0
+sock_sendmsg+0x4c/0x50
+__sys_sendto+0xee/0x160
+? handle_mm_fault+0xbe/0x1e0
+? syscall_trace_enter+0x1d3/0x2c0
+__x64_sys_sendto+0x24/0x30
+do_syscall_64+0x5b/0x1a0
+entry_SYSCALL_64_after_hwframe+0x65/0xca
+RIP: 0033:0x7f5eac8b035b
+
+This was caused because of new buffers with different RX ring count should
+substitute older ones, but those buffers were freed in
+i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi,
+thus kfree on rx_bi caused leak of already mapped DMA.
+
+Fix this by reallocating ZC with rx_bi_zc struct when BPF program loads. Additionally
+reallocate back to rx_bi when BPF program unloads.
+
+If BPF program is loaded/unloaded and XSK pools are created, reallocate
+RX queues accordingly in XSP_SETUP_XSK_POOL handler.
+
+Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings")
+Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Chandan <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/intel/i40e/i40e_ethtool.c | 3 -
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++--
+ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 13 ++--
+ drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 -
+ drivers/net/ethernet/intel/i40e/i40e_xsk.c | 67 ++++++++++++++++---
+ drivers/net/ethernet/intel/i40e/i40e_xsk.h | 2 +-
+ 6 files changed, 74 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+index 63054061966e..cc5f5c237774 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+@@ -2081,9 +2081,6 @@ static int i40e_set_ringparam(struct net_device *netdev,
+ */
+ rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
+ err = i40e_setup_rx_descriptors(&rx_rings[i]);
+- if (err)
+- goto rx_unwind;
+- err = i40e_alloc_rx_bi(&rx_rings[i]);
+ if (err)
+ goto rx_unwind;
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index c7f243ddbcf7..ea6a984c6d12 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -3409,12 +3409,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
+ if (ring->vsi->type == I40E_VSI_MAIN)
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+- kfree(ring->rx_bi);
+ ring->xsk_pool = i40e_xsk_pool(ring);
+ if (ring->xsk_pool) {
+- ret = i40e_alloc_rx_bi_zc(ring);
+- if (ret)
+- return ret;
+ ring->rx_buf_len =
+ xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ /* For AF_XDP ZC, we disallow packets to span on
+@@ -3432,9 +3428,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
+ ring->queue_index);
+
+ } else {
+- ret = i40e_alloc_rx_bi(ring);
+- if (ret)
+- return ret;
+ ring->rx_buf_len = vsi->rx_buf_len;
+ if (ring->vsi->type == I40E_VSI_MAIN) {
+ ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+@@ -12684,6 +12677,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
+ i40e_reset_and_rebuild(pf, true, true);
+ }
+
++ if (!i40e_enabled_xdp_vsi(vsi) && prog) {
++ if (i40e_realloc_rx_bi_zc(vsi, true))
++ return -ENOMEM;
++ } else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
++ if (i40e_realloc_rx_bi_zc(vsi, false))
++ return -ENOMEM;
++ }
++
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+
+@@ -12916,6 +12917,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
+
+ i40e_queue_pair_disable_irq(vsi, queue_pair);
+ err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
++ i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
+ i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
+ i40e_queue_pair_clean_rings(vsi, queue_pair);
+ i40e_queue_pair_reset_stats(vsi, queue_pair);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index 5ad28129fab2..43be33d87e39 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -1305,14 +1305,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
+ return -ENOMEM;
+ }
+
+-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
+-{
+- unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
+-
+- rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
+- return rx_ring->rx_bi ? 0 : -ENOMEM;
+-}
+-
+ static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
+ {
+ memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
+@@ -1443,6 +1435,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
+
+ rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
+
++ rx_ring->rx_bi =
++ kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
++ if (!rx_ring->rx_bi)
++ return -ENOMEM;
++
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+index 93ac201f68b8..af843e8169f7 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+@@ -465,7 +465,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
+ bool __i40e_chk_linearize(struct sk_buff *skb);
+ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
+
+ /**
+ * i40e_get_head - Retrieve head from head writeback
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+index 75e4a698c3db..7f1226123629 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+@@ -9,14 +9,6 @@
+ #include "i40e_txrx_common.h"
+ #include "i40e_xsk.h"
+
+-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
+-{
+- unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
+-
+- rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
+- return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
+-}
+-
+ void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
+ {
+ memset(rx_ring->rx_bi_zc, 0,
+@@ -28,6 +20,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+ return &rx_ring->rx_bi_zc[idx];
+ }
+
++/**
++ * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer
++ * @rx_ring: Current rx ring
++ * @pool_present: is pool for XSK present
++ *
++ * Try allocating memory and return ENOMEM, if failed to allocate.
++ * If allocation was successful, substitute buffer with allocated one.
++ * Returns 0 on success, negative on failure
++ */
++static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
++{
++ size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
++ sizeof(*rx_ring->rx_bi);
++ void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
++
++ if (!sw_ring)
++ return -ENOMEM;
++
++ if (pool_present) {
++ kfree(rx_ring->rx_bi);
++ rx_ring->rx_bi = NULL;
++ rx_ring->rx_bi_zc = sw_ring;
++ } else {
++ kfree(rx_ring->rx_bi_zc);
++ rx_ring->rx_bi_zc = NULL;
++ rx_ring->rx_bi = sw_ring;
++ }
++ return 0;
++}
++
++/**
++ * i40e_realloc_rx_bi_zc - reallocate rx SW rings
++ * @vsi: Current VSI
++ * @zc: is zero copy set
++ *
++ * Reallocate buffer for rx_rings that might be used by XSK.
++ * XDP requires more memory, than rx_buf provides.
++ * Returns 0 on success, negative on failure
++ */
++int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
++{
++ struct i40e_ring *rx_ring;
++ unsigned long q;
++
++ for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
++ rx_ring = vsi->rx_rings[q];
++ if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
++ return -ENOMEM;
++ }
++ return 0;
++}
++
+ /**
+ * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
+ * certain ring/qid
+@@ -68,6 +112,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
+ if (err)
+ return err;
+
++ err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
++ if (err)
++ return err;
++
+ err = i40e_queue_pair_enable(vsi, qid);
+ if (err)
+ return err;
+@@ -112,6 +160,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
+ xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
+
+ if (if_running) {
++ err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
++ if (err)
++ return err;
+ err = i40e_queue_pair_enable(vsi, qid);
+ if (err)
+ return err;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+index 7adfd8539247..36f5b6d20601 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+@@ -17,7 +17,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
+
+ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
+ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
++int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
+ void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
+
+ #endif /* _I40E_XSK_H_ */
+--
+2.35.1
+
--- /dev/null
+From 0201257917d0e6b24431f2cbd8141ceda6d30a19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 08:44:46 +0800
+Subject: iommu/vt-d: Allow NVS regions in arch_rmrr_sanity_check()
+
+From: Charlotte Tan <charlotte@extrahop.com>
+
+[ Upstream commit 5566e68d829f5d87670d5984c1c2ccb4c518405f ]
+
+arch_rmrr_sanity_check() warns if the RMRR is not covered by an ACPI
+Reserved region, but it seems like it should accept an NVS region as
+well. The ACPI spec
+https://uefi.org/specs/ACPI/6.5/15_System_Address_Map_Interfaces.html
+uses similar wording for "Reserved" and "NVS" region types; for NVS
+regions it says "This range of addresses is in use or reserved by the
+system and must not be used by the operating system."
+
+There is an old comment on this mailing list that also suggests NVS
+regions should pass the arch_rmrr_sanity_check() test:
+
+ The warnings come from arch_rmrr_sanity_check() since it checks whether
+ the region is E820_TYPE_RESERVED. However, if the purpose of the check
+ is to detect RMRR has regions that may be used by OS as free memory,
+ isn't E820_TYPE_NVS safe, too?
+
+This patch overlaps with another proposed patch that would add the region
+type to the log since sometimes the bug reporter sees this log on the
+console but doesn't know to include the kernel log:
+
+https://lore.kernel.org/lkml/20220611204859.234975-3-atomlin@redhat.com/
+
+Here's an example of the "Firmware Bug" apparent false positive (wrapped
+for line length):
+
+ DMAR: [Firmware Bug]: No firmware reserved region can cover this RMRR
+ [0x000000006f760000-0x000000006f762fff], contact BIOS vendor for
+ fixes
+ DMAR: [Firmware Bug]: Your BIOS is broken; bad RMRR
+ [0x000000006f760000-0x000000006f762fff]
+
+This is the snippet from the e820 table:
+
+ BIOS-e820: [mem 0x0000000068bff000-0x000000006ebfefff] reserved
+ BIOS-e820: [mem 0x000000006ebff000-0x000000006f9fefff] ACPI NVS
+ BIOS-e820: [mem 0x000000006f9ff000-0x000000006fffefff] ACPI data
+
+Fixes: f036c7fa0ab6 ("iommu/vt-d: Check VT-d RMRR region in BIOS is reported as reserved")
+Cc: Will Mortensen <will@extrahop.com>
+Link: https://lore.kernel.org/linux-iommu/64a5843d-850d-e58c-4fc2-0a0eeeb656dc@nec.com/
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216443
+Signed-off-by: Charlotte Tan <charlotte@extrahop.com>
+Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
+Link: https://lore.kernel.org/r/20220929044449.32515-1-charlotte@extrahop.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/iommu.h | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
+index bf1ed2ddc74b..7a983119bc40 100644
+--- a/arch/x86/include/asm/iommu.h
++++ b/arch/x86/include/asm/iommu.h
+@@ -17,8 +17,10 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+ {
+ u64 start = rmrr->base_address;
+ u64 end = rmrr->end_address + 1;
++ int entry_type;
+
+- if (e820__mapped_all(start, end, E820_TYPE_RESERVED))
++ entry_type = e820__get_entry_type(start, end);
++ if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS)
+ return 0;
+
+ pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n",
+--
+2.35.1
+
--- /dev/null
+From ae123e32d0af7507f3bcc2929df90460c40693d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 08:44:47 +0800
+Subject: iommu/vt-d: Clean up si_domain in the init_dmars() error path
+
+From: Jerry Snitselaar <jsnitsel@redhat.com>
+
+[ Upstream commit 620bf9f981365c18cc2766c53d92bf8131c63f32 ]
+
+A splat from kmem_cache_destroy() was seen with a kernel prior to
+commit ee2653bbe89d ("iommu/vt-d: Remove domain and devinfo mempool")
+when there was a failure in init_dmars(), because the iommu_domain
+cache still had objects. While the mempool code is now gone, there
+still is a leak of the si_domain memory if init_dmars() fails. So
+clean up si_domain in the init_dmars() error path.
+
+Cc: Lu Baolu <baolu.lu@linux.intel.com>
+Cc: Joerg Roedel <joro@8bytes.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: Robin Murphy <robin.murphy@arm.com>
+Fixes: 86080ccc223a ("iommu/vt-d: Allocate si_domain in init_dmars()")
+Signed-off-by: Jerry Snitselaar <jsnitsel@redhat.com>
+Link: https://lore.kernel.org/r/20221010144842.308890-1-jsnitsel@redhat.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/iommu.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index c48cf737b521..f23329b7f97c 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -2846,6 +2846,7 @@ static int __init si_domain_init(int hw)
+
+ if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ domain_exit(si_domain);
++ si_domain = NULL;
+ return -EFAULT;
+ }
+
+@@ -3505,6 +3506,10 @@ static int __init init_dmars(void)
+ disable_dmar_iommu(iommu);
+ free_dmar_iommu(iommu);
+ }
++ if (si_domain) {
++ domain_exit(si_domain);
++ si_domain = NULL;
++ }
+
+ kfree(g_iommus);
+
+--
+2.35.1
+
--- /dev/null
+From 80770d513ebb3eba4958bc0eeb817f3dae31de82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Oct 2022 16:31:23 -0700
+Subject: ionic: catch NULL pointer issue on reconfig
+
+From: Brett Creeley <brett@pensando.io>
+
+[ Upstream commit aa1d7e1267c12e07d979aa34c613716a89029db2 ]
+
+It's possible that the driver will dereference a qcq that doesn't exist
+when calling ionic_reconfigure_queues(), which causes a page fault BUG.
+
+If a reduction in the number of queues is followed by a different
+reconfig such as changing the ring size, the driver can hit a NULL
+pointer when trying to clean up non-existent queues.
+
+Fix this by checking to make sure both the qcqs array and qcq entry
+exists bofore trying to use and free the entry.
+
+Fixes: 101b40a0171f ("ionic: change queue count with no reset")
+Signed-off-by: Brett Creeley <brett@pensando.io>
+Signed-off-by: Shannon Nelson <snelson@pensando.io>
+Link: https://lore.kernel.org/r/20221017233123.15869-1-snelson@pensando.io
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/pensando/ionic/ionic_lif.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+index e42520f909fe..cb12d0171517 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+@@ -2383,11 +2383,15 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
+ * than the full array, but leave the qcq shells in place
+ */
+ for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) {
+- lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+- ionic_qcq_free(lif, lif->txqcqs[i]);
++ if (lif->txqcqs && lif->txqcqs[i]) {
++ lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
++ ionic_qcq_free(lif, lif->txqcqs[i]);
++ }
+
+- lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+- ionic_qcq_free(lif, lif->rxqcqs[i]);
++ if (lif->rxqcqs && lif->rxqcqs[i]) {
++ lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
++ ionic_qcq_free(lif, lif->rxqcqs[i]);
++ }
+ }
+
+ return err;
+--
+2.35.1
+
--- /dev/null
+From dcfe8c010accdad535bc505e4f6257168cf5c1f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Sep 2022 14:17:23 +0200
+Subject: kernfs: fix use-after-free in __kernfs_remove
+
+From: Christian A. Ehrhardt <lk@c--e.de>
+
+[ Upstream commit 4abc99652812a2ddf932f137515d5c5a04723538 ]
+
+Syzkaller managed to trigger concurrent calls to
+kernfs_remove_by_name_ns() for the same file resulting in
+a KASAN detected use-after-free. The race occurs when the root
+node is freed during kernfs_drain().
+
+To prevent this acquire an additional reference for the root
+of the tree that is removed before calling __kernfs_remove().
+
+Found by syzkaller with the following reproducer (slab_nomerge is
+required):
+
+syz_mount_image$ext4(0x0, &(0x7f0000000100)='./file0\x00', 0x100000, 0x0, 0x0, 0x0, 0x0)
+r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
+close(r0)
+pipe2(&(0x7f0000000140)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x800)
+mount$9p_fd(0x0, &(0x7f0000000040)='./file0\x00', &(0x7f00000000c0), 0x408, &(0x7f0000000280)={'trans=fd,', {'rfdno', 0x3d, r0}, 0x2c, {'wfdno', 0x3d, r1}, 0x2c, {[{@cache_loose}, {@mmap}, {@loose}, {@loose}, {@mmap}], [{@mask={'mask', 0x3d, '^MAY_EXEC'}}, {@fsmagic={'fsmagic', 0x3d, 0x10001}}, {@dont_hash}]}})
+
+Sample report:
+
+==================================================================
+BUG: KASAN: use-after-free in kernfs_type include/linux/kernfs.h:335 [inline]
+BUG: KASAN: use-after-free in kernfs_leftmost_descendant fs/kernfs/dir.c:1261 [inline]
+BUG: KASAN: use-after-free in __kernfs_remove.part.0+0x843/0x960 fs/kernfs/dir.c:1369
+Read of size 2 at addr ffff8880088807f0 by task syz-executor.2/857
+
+CPU: 0 PID: 857 Comm: syz-executor.2 Not tainted 6.0.0-rc3-00363-g7726d4c3e60b #5
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x6e/0x91 lib/dump_stack.c:106
+ print_address_description mm/kasan/report.c:317 [inline]
+ print_report.cold+0x5e/0x5e5 mm/kasan/report.c:433
+ kasan_report+0xa3/0x130 mm/kasan/report.c:495
+ kernfs_type include/linux/kernfs.h:335 [inline]
+ kernfs_leftmost_descendant fs/kernfs/dir.c:1261 [inline]
+ __kernfs_remove.part.0+0x843/0x960 fs/kernfs/dir.c:1369
+ __kernfs_remove fs/kernfs/dir.c:1356 [inline]
+ kernfs_remove_by_name_ns+0x108/0x190 fs/kernfs/dir.c:1589
+ sysfs_slab_add+0x133/0x1e0 mm/slub.c:5943
+ __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899
+ create_cache mm/slab_common.c:229 [inline]
+ kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335
+ p9_client_create+0xd4d/0x1190 net/9p/client.c:993
+ v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408
+ v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126
+ legacy_get_tree+0xf1/0x200 fs/fs_context.c:610
+ vfs_get_tree+0x85/0x2e0 fs/super.c:1530
+ do_new_mount fs/namespace.c:3040 [inline]
+ path_mount+0x675/0x1d00 fs/namespace.c:3370
+ do_mount fs/namespace.c:3383 [inline]
+ __do_sys_mount fs/namespace.c:3591 [inline]
+ __se_sys_mount fs/namespace.c:3568 [inline]
+ __x64_sys_mount+0x282/0x300 fs/namespace.c:3568
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f725f983aed
+Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f725f0f7028 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
+RAX: ffffffffffffffda RBX: 00007f725faa3f80 RCX: 00007f725f983aed
+RDX: 00000000200000c0 RSI: 0000000020000040 RDI: 0000000000000000
+RBP: 00007f725f9f419c R08: 0000000020000280 R09: 0000000000000000
+R10: 0000000000000408 R11: 0000000000000246 R12: 0000000000000000
+R13: 0000000000000006 R14: 00007f725faa3f80 R15: 00007f725f0d7000
+ </TASK>
+
+Allocated by task 855:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track mm/kasan/common.c:45 [inline]
+ set_alloc_info mm/kasan/common.c:437 [inline]
+ __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:470
+ kasan_slab_alloc include/linux/kasan.h:224 [inline]
+ slab_post_alloc_hook mm/slab.h:727 [inline]
+ slab_alloc_node mm/slub.c:3243 [inline]
+ slab_alloc mm/slub.c:3251 [inline]
+ __kmem_cache_alloc_lru mm/slub.c:3258 [inline]
+ kmem_cache_alloc+0xbf/0x200 mm/slub.c:3268
+ kmem_cache_zalloc include/linux/slab.h:723 [inline]
+ __kernfs_new_node+0xd4/0x680 fs/kernfs/dir.c:593
+ kernfs_new_node fs/kernfs/dir.c:655 [inline]
+ kernfs_create_dir_ns+0x9c/0x220 fs/kernfs/dir.c:1010
+ sysfs_create_dir_ns+0x127/0x290 fs/sysfs/dir.c:59
+ create_dir lib/kobject.c:63 [inline]
+ kobject_add_internal+0x24a/0x8d0 lib/kobject.c:223
+ kobject_add_varg lib/kobject.c:358 [inline]
+ kobject_init_and_add+0x101/0x160 lib/kobject.c:441
+ sysfs_slab_add+0x156/0x1e0 mm/slub.c:5954
+ __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899
+ create_cache mm/slab_common.c:229 [inline]
+ kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335
+ p9_client_create+0xd4d/0x1190 net/9p/client.c:993
+ v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408
+ v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126
+ legacy_get_tree+0xf1/0x200 fs/fs_context.c:610
+ vfs_get_tree+0x85/0x2e0 fs/super.c:1530
+ do_new_mount fs/namespace.c:3040 [inline]
+ path_mount+0x675/0x1d00 fs/namespace.c:3370
+ do_mount fs/namespace.c:3383 [inline]
+ __do_sys_mount fs/namespace.c:3591 [inline]
+ __se_sys_mount fs/namespace.c:3568 [inline]
+ __x64_sys_mount+0x282/0x300 fs/namespace.c:3568
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Freed by task 857:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track+0x21/0x30 mm/kasan/common.c:45
+ kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:370
+ ____kasan_slab_free mm/kasan/common.c:367 [inline]
+ ____kasan_slab_free mm/kasan/common.c:329 [inline]
+ __kasan_slab_free+0x108/0x190 mm/kasan/common.c:375
+ kasan_slab_free include/linux/kasan.h:200 [inline]
+ slab_free_hook mm/slub.c:1754 [inline]
+ slab_free_freelist_hook mm/slub.c:1780 [inline]
+ slab_free mm/slub.c:3534 [inline]
+ kmem_cache_free+0x9c/0x340 mm/slub.c:3551
+ kernfs_put.part.0+0x2b2/0x520 fs/kernfs/dir.c:547
+ kernfs_put+0x42/0x50 fs/kernfs/dir.c:521
+ __kernfs_remove.part.0+0x72d/0x960 fs/kernfs/dir.c:1407
+ __kernfs_remove fs/kernfs/dir.c:1356 [inline]
+ kernfs_remove_by_name_ns+0x108/0x190 fs/kernfs/dir.c:1589
+ sysfs_slab_add+0x133/0x1e0 mm/slub.c:5943
+ __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899
+ create_cache mm/slab_common.c:229 [inline]
+ kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335
+ p9_client_create+0xd4d/0x1190 net/9p/client.c:993
+ v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408
+ v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126
+ legacy_get_tree+0xf1/0x200 fs/fs_context.c:610
+ vfs_get_tree+0x85/0x2e0 fs/super.c:1530
+ do_new_mount fs/namespace.c:3040 [inline]
+ path_mount+0x675/0x1d00 fs/namespace.c:3370
+ do_mount fs/namespace.c:3383 [inline]
+ __do_sys_mount fs/namespace.c:3591 [inline]
+ __se_sys_mount fs/namespace.c:3568 [inline]
+ __x64_sys_mount+0x282/0x300 fs/namespace.c:3568
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+The buggy address belongs to the object at ffff888008880780
+ which belongs to the cache kernfs_node_cache of size 128
+The buggy address is located 112 bytes inside of
+ 128-byte region [ffff888008880780, ffff888008880800)
+
+The buggy address belongs to the physical page:
+page:00000000732833f8 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x8880
+flags: 0x100000000000200(slab|node=0|zone=1)
+raw: 0100000000000200 0000000000000000 dead000000000122 ffff888001147280
+raw: 0000000000000000 0000000000150015 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff888008880680: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb
+ ffff888008880700: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
+>ffff888008880780: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff888008880800: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb
+ ffff888008880880: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
+==================================================================
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Cc: stable <stable@kernel.org> # -rc3
+Signed-off-by: Christian A. Ehrhardt <lk@c--e.de>
+Link: https://lore.kernel.org/r/20220913121723.691454-1-lk@c--e.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/kernfs/dir.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
+index afb39e1bbe3b..8b3c86a502da 100644
+--- a/fs/kernfs/dir.c
++++ b/fs/kernfs/dir.c
+@@ -1519,8 +1519,11 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
+ mutex_lock(&kernfs_mutex);
+
+ kn = kernfs_find_ns(parent, name, ns);
+- if (kn)
++ if (kn) {
++ kernfs_get(kn);
+ __kernfs_remove(kn);
++ kernfs_put(kn);
++ }
+
+ mutex_unlock(&kernfs_mutex);
+
+--
+2.35.1
+
--- /dev/null
+From a5ba4a9bb0982e29a8a046ca76e578bb2f8f74c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jan 2022 10:26:19 -0500
+Subject: libbpf: Use IS_ERR_OR_NULL() in hashmap__free()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mauricio Vásquez <mauricio@kinvolk.io>
+
+[ Upstream commit fba60b171a0322830b446dd28170092c47243d39 ]
+
+hashmap__new() uses ERR_PTR() to return an error so it's better to
+use IS_ERR_OR_NULL() in order to check the pointer before calling
+free(). This will prevent freeing an invalid pointer if somebody calls
+hashmap__free() with the result of a failed hashmap__new() call.
+
+Signed-off-by: Mauricio Vásquez <mauricio@kinvolk.io>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Song Liu <songliubraving@fb.com>
+Link: https://lore.kernel.org/bpf/20220107152620.192327-1-mauricio@kinvolk.io
+Stable-dep-of: 1fcc064b305a ("netfilter: rpfilter/fib: Set ->flowic_uid correctly for user namespaces.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/lib/bpf/hashmap.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
+index 3c20b126d60d..aeb09c288716 100644
+--- a/tools/lib/bpf/hashmap.c
++++ b/tools/lib/bpf/hashmap.c
+@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
+
+ void hashmap__free(struct hashmap *map)
+ {
+- if (!map)
++ if (IS_ERR_OR_NULL(map))
+ return;
+
+ hashmap__clear(map);
+@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
+
+ return true;
+ }
+-
+--
+2.35.1
+
--- /dev/null
+From 3e2553510c2410a41e71de7c48c969021996b5da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Sep 2022 12:57:44 +0300
+Subject: mmc: core: Add SD card quirk for broken discard
+
+From: Avri Altman <avri.altman@wdc.com>
+
+[ Upstream commit 07d2872bf4c864eb83d034263c155746a2fb7a3b ]
+
+Some SD-cards from Sandisk that are SDA-6.0 compliant reports they supports
+discard, while they actually don't. This might cause mk2fs to fail while
+trying to format the card and revert it to a read-only mode.
+
+To fix this problem, let's add a card quirk (MMC_QUIRK_BROKEN_SD_DISCARD)
+to indicate that we shall fall-back to use the legacy erase command
+instead.
+
+Signed-off-by: Avri Altman <avri.altman@wdc.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220928095744.16455-1-avri.altman@wdc.com
+[Ulf: Updated the commit message]
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mmc/core/block.c | 6 +++++-
+ drivers/mmc/core/card.h | 6 ++++++
+ drivers/mmc/core/quirks.h | 6 ++++++
+ include/linux/mmc/card.h | 1 +
+ 4 files changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
+index a9f9a45eafe4..cdf43f790f92 100644
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -1100,8 +1100,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
+ {
+ struct mmc_blk_data *md = mq->blkdata;
+ struct mmc_card *card = md->queue.card;
++ unsigned int arg = card->erase_arg;
+
+- mmc_blk_issue_erase_rq(mq, req, MMC_BLK_DISCARD, card->erase_arg);
++ if (mmc_card_broken_sd_discard(card))
++ arg = SD_ERASE_ARG;
++
++ mmc_blk_issue_erase_rq(mq, req, MMC_BLK_DISCARD, arg);
+ }
+
+ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
+diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
+index 7bd392d55cfa..5c6986131faf 100644
+--- a/drivers/mmc/core/card.h
++++ b/drivers/mmc/core/card.h
+@@ -70,6 +70,7 @@ struct mmc_fixup {
+ #define EXT_CSD_REV_ANY (-1u)
+
+ #define CID_MANFID_SANDISK 0x2
++#define CID_MANFID_SANDISK_SD 0x3
+ #define CID_MANFID_ATP 0x9
+ #define CID_MANFID_TOSHIBA 0x11
+ #define CID_MANFID_MICRON 0x13
+@@ -222,4 +223,9 @@ static inline int mmc_card_broken_hpi(const struct mmc_card *c)
+ return c->quirks & MMC_QUIRK_BROKEN_HPI;
+ }
+
++static inline int mmc_card_broken_sd_discard(const struct mmc_card *c)
++{
++ return c->quirks & MMC_QUIRK_BROKEN_SD_DISCARD;
++}
++
+ #endif
+diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
+index d68e6e513a4f..c8c0f50a2076 100644
+--- a/drivers/mmc/core/quirks.h
++++ b/drivers/mmc/core/quirks.h
+@@ -99,6 +99,12 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
+ MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_TRIM_BROKEN),
+
++ /*
++ * Some SD cards reports discard support while they don't
++ */
++ MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
++ MMC_QUIRK_BROKEN_SD_DISCARD),
++
+ END_FIXUP
+ };
+
+diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
+index 42df06c6b19c..ef870d1f4f5f 100644
+--- a/include/linux/mmc/card.h
++++ b/include/linux/mmc/card.h
+@@ -270,6 +270,7 @@ struct mmc_card {
+ #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */
+ #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */
+ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */
++#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */
+
+ bool reenable_cmdq; /* Re-enable Command Queue */
+
+--
+2.35.1
+
--- /dev/null
+From 264f7b493a2cd529c07fb76d3679e16665aae18f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 17:21:18 +0200
+Subject: mmc: core: Support zeroout using TRIM for eMMC
+
+From: Vincent Whitchurch <vincent.whitchurch@axis.com>
+
+[ Upstream commit f7b6fc327327698924ef3afa0c3e87a5b7466af3 ]
+
+If an eMMC card supports TRIM and indicates that it erases to zeros, we can
+use it to support hardware offloading of REQ_OP_WRITE_ZEROES, so let's add
+support for this.
+
+Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
+Reviewed-by: Avri Altman <Avri.Altman@wdc.com>
+Link: https://lore.kernel.org/r/20220429152118.3617303-1-vincent.whitchurch@axis.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Stable-dep-of: 07d2872bf4c8 ("mmc: core: Add SD card quirk for broken discard")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mmc/core/block.c | 26 ++++++++++++++++++++++----
+ drivers/mmc/core/queue.c | 2 ++
+ 2 files changed, 24 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
+index 66a00b7c751f..a9f9a45eafe4 100644
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -118,6 +118,7 @@ struct mmc_blk_data {
+ #define MMC_BLK_DISCARD BIT(2)
+ #define MMC_BLK_SECDISCARD BIT(3)
+ #define MMC_BLK_CQE_RECOVERY BIT(4)
++#define MMC_BLK_TRIM BIT(5)
+
+ /*
+ * Only set in main mmc_blk_data associated
+@@ -1052,12 +1053,13 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req)
+ blk_mq_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
+ }
+
+-static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
++static void mmc_blk_issue_erase_rq(struct mmc_queue *mq, struct request *req,
++ int type, unsigned int erase_arg)
+ {
+ struct mmc_blk_data *md = mq->blkdata;
+ struct mmc_card *card = md->queue.card;
+ unsigned int from, nr;
+- int err = 0, type = MMC_BLK_DISCARD;
++ int err = 0;
+ blk_status_t status = BLK_STS_OK;
+
+ if (!mmc_can_erase(card)) {
+@@ -1073,13 +1075,13 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
+ if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ INAND_CMD38_ARG_EXT_CSD,
+- card->erase_arg == MMC_TRIM_ARG ?
++ erase_arg == MMC_TRIM_ARG ?
+ INAND_CMD38_ARG_TRIM :
+ INAND_CMD38_ARG_ERASE,
+ card->ext_csd.generic_cmd6_time);
+ }
+ if (!err)
+- err = mmc_erase(card, from, nr, card->erase_arg);
++ err = mmc_erase(card, from, nr, erase_arg);
+ } while (err == -EIO && !mmc_blk_reset(md, card->host, type));
+ if (err)
+ status = BLK_STS_IOERR;
+@@ -1089,6 +1091,19 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
+ blk_mq_end_request(req, status);
+ }
+
++static void mmc_blk_issue_trim_rq(struct mmc_queue *mq, struct request *req)
++{
++ mmc_blk_issue_erase_rq(mq, req, MMC_BLK_TRIM, MMC_TRIM_ARG);
++}
++
++static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
++{
++ struct mmc_blk_data *md = mq->blkdata;
++ struct mmc_card *card = md->queue.card;
++
++ mmc_blk_issue_erase_rq(mq, req, MMC_BLK_DISCARD, card->erase_arg);
++}
++
+ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
+ struct request *req)
+ {
+@@ -2227,6 +2242,9 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req)
+ case REQ_OP_SECURE_ERASE:
+ mmc_blk_issue_secdiscard_rq(mq, req);
+ break;
++ case REQ_OP_WRITE_ZEROES:
++ mmc_blk_issue_trim_rq(mq, req);
++ break;
+ case REQ_OP_FLUSH:
+ mmc_blk_issue_flush(mq, req);
+ break;
+diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
+index 002426e3cf76..ae235828e909 100644
+--- a/drivers/mmc/core/queue.c
++++ b/drivers/mmc/core/queue.c
+@@ -193,6 +193,8 @@ static void mmc_queue_setup_discard(struct request_queue *q,
+ q->limits.discard_granularity = SECTOR_SIZE;
+ if (mmc_can_secure_erase_trim(card))
+ blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
++ if (mmc_can_trim(card) && card->erased_byte == 0)
++ blk_queue_max_write_zeroes_sectors(q, max_discard);
+ }
+
+ static unsigned int mmc_get_max_segments(struct mmc_host *host)
+--
+2.35.1
+
--- /dev/null
+From c88ccb28fba44468457fface9dd8b3f677069569 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Oct 2022 18:36:22 +0530
+Subject: mmc: sdhci-tegra: Use actual clock rate for SW tuning correction
+
+From: Prathamesh Shete <pshete@nvidia.com>
+
+[ Upstream commit b78870e7f41534cc719c295d1f8809aca93aeeab ]
+
+Ensure tegra_host member "curr_clk_rate" holds the actual clock rate
+instead of requested clock rate for proper use during tuning correction
+algorithm. Actual clk rate may not be the same as the requested clk
+frequency depending on the parent clock source set. Tuning correction
+algorithm depends on certain parameters which are sensitive to current
+clk rate. If the host clk is selected instead of the actual clock rate,
+tuning correction algorithm may end up applying invalid correction,
+which could result in errors
+
+Fixes: ea8fc5953e8b ("mmc: tegra: update hw tuning process")
+Signed-off-by: Aniruddha TVS Rao <anrao@nvidia.com>
+Signed-off-by: Prathamesh Shete <pshete@nvidia.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Acked-by: Thierry Reding <treding@nvidia.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20221006130622.22900-4-pshete@nvidia.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mmc/host/sdhci-tegra.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
+index d50b691f6c44..67211fc42d24 100644
+--- a/drivers/mmc/host/sdhci-tegra.c
++++ b/drivers/mmc/host/sdhci-tegra.c
+@@ -760,7 +760,7 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+ */
+ host_clk = tegra_host->ddr_signaling ? clock * 2 : clock;
+ clk_set_rate(pltfm_host->clk, host_clk);
+- tegra_host->curr_clk_rate = host_clk;
++ tegra_host->curr_clk_rate = clk_get_rate(pltfm_host->clk);
+ if (tegra_host->ddr_signaling)
+ host->max_clk = host_clk;
+ else
+--
+2.35.1
+
--- /dev/null
+From 53d987f5ef7009ac9c9d3b5dfd897ab2269b68f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Oct 2022 10:05:40 +0800
+Subject: net/atm: fix proc_mpc_write incorrect return value
+
+From: Xiaobo Liu <cppcoffee@gmail.com>
+
+[ Upstream commit d8bde3bf7f82dac5fc68a62c2816793a12cafa2a ]
+
+Then the input contains '\0' or '\n', proc_mpc_write has read them,
+so the return value needs +1.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Xiaobo Liu <cppcoffee@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/atm/mpoa_proc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
+index 829db9eba0cb..aaf64b953915 100644
+--- a/net/atm/mpoa_proc.c
++++ b/net/atm/mpoa_proc.c
+@@ -219,11 +219,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
+ if (!page)
+ return -ENOMEM;
+
+- for (p = page, len = 0; len < nbytes; p++, len++) {
++ for (p = page, len = 0; len < nbytes; p++) {
+ if (get_user(*p, buff++)) {
+ free_page((unsigned long)page);
+ return -EFAULT;
+ }
++ len += 1;
+ if (*p == '\0' || *p == '\n')
+ break;
+ }
+--
+2.35.1
+
--- /dev/null
+From add2519edb109b88b6ae6c0ae30fd2be6a06589b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 20:24:51 +0800
+Subject: net: hns: fix possible memory leak in hnae_ae_register()
+
+From: Yang Yingliang <yangyingliang@huawei.com>
+
+[ Upstream commit ff2f5ec5d009844ec28f171123f9e58750cef4bf ]
+
+Inject fault while probing module, if device_register() fails,
+but the refcount of kobject is not decreased to 0, the name
+allocated in dev_set_name() is leaked. Fix this by calling
+put_device(), so that name can be freed in callback function
+kobject_cleanup().
+
+unreferenced object 0xffff00c01aba2100 (size 128):
+ comm "systemd-udevd", pid 1259, jiffies 4294903284 (age 294.152s)
+ hex dump (first 32 bytes):
+ 68 6e 61 65 30 00 00 00 18 21 ba 1a c0 00 ff ff hnae0....!......
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ [<0000000034783f26>] slab_post_alloc_hook+0xa0/0x3e0
+ [<00000000748188f2>] __kmem_cache_alloc_node+0x164/0x2b0
+ [<00000000ab0743e8>] __kmalloc_node_track_caller+0x6c/0x390
+ [<000000006c0ffb13>] kvasprintf+0x8c/0x118
+ [<00000000fa27bfe1>] kvasprintf_const+0x60/0xc8
+ [<0000000083e10ed7>] kobject_set_name_vargs+0x3c/0xc0
+ [<000000000b87affc>] dev_set_name+0x7c/0xa0
+ [<000000003fd8fe26>] hnae_ae_register+0xcc/0x190 [hnae]
+ [<00000000fe97edc9>] hns_dsaf_ae_init+0x9c/0x108 [hns_dsaf]
+ [<00000000c36ff1eb>] hns_dsaf_probe+0x548/0x748 [hns_dsaf]
+
+Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support")
+Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20221018122451.1749171-1-yangyingliang@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
+index 00fafc0f8512..430eccea8e5e 100644
+--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
++++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
+@@ -419,8 +419,10 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner)
+ hdev->cls_dev.release = hnae_release;
+ (void)dev_set_name(&hdev->cls_dev, "hnae%d", hdev->id);
+ ret = device_register(&hdev->cls_dev);
+- if (ret)
++ if (ret) {
++ put_device(&hdev->cls_dev);
+ return ret;
++ }
+
+ __module_get(THIS_MODULE);
+
+--
+2.35.1
+
--- /dev/null
+From 815ad26e6f7245e68fd931d2b6e2dd1d48ef694c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Oct 2022 16:59:28 +0000
+Subject: net: hsr: avoid possible NULL deref in skb_clone()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d8b57135fd9ffe9a5b445350a686442a531c5339 ]
+
+syzbot got a crash [1] in skb_clone(), caused by a bug
+in hsr_get_untagged_frame().
+
+When/if create_stripped_skb_hsr() returns NULL, we must
+not attempt to call skb_clone().
+
+While we are at it, replace a WARN_ONCE() by netdev_warn_once().
+
+[1]
+general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] PREEMPT SMP KASAN
+KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f]
+CPU: 1 PID: 754 Comm: syz-executor.0 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022
+RIP: 0010:skb_clone+0x108/0x3c0 net/core/skbuff.c:1641
+Code: 93 02 00 00 49 83 7c 24 28 00 0f 85 e9 00 00 00 e8 5d 4a 29 fa 4c 8d 75 7e 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 84 c0 0f 85 9e 01 00 00
+RSP: 0018:ffffc90003ccf4e0 EFLAGS: 00010207
+
+RAX: dffffc0000000000 RBX: ffffc90003ccf5f8 RCX: ffffc9000c24b000
+RDX: 000000000000000f RSI: ffffffff8751cb13 RDI: 0000000000000000
+RBP: 0000000000000000 R08: 00000000000000f0 R09: 0000000000000140
+R10: fffffbfff181d972 R11: 0000000000000000 R12: ffff888161fc3640
+R13: 0000000000000a20 R14: 000000000000007e R15: ffffffff8dc5f620
+FS: 00007feb621e4700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007feb621e3ff8 CR3: 00000001643a9000 CR4: 00000000003506e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+<TASK>
+hsr_get_untagged_frame+0x4e/0x610 net/hsr/hsr_forward.c:164
+hsr_forward_do net/hsr/hsr_forward.c:461 [inline]
+hsr_forward_skb+0xcca/0x1d50 net/hsr/hsr_forward.c:623
+hsr_handle_frame+0x588/0x7c0 net/hsr/hsr_slave.c:69
+__netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379
+__netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483
+__netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599
+netif_receive_skb_internal net/core/dev.c:5685 [inline]
+netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744
+tun_rx_batched+0x4ab/0x7a0 drivers/net/tun.c:1544
+tun_get_user+0x2686/0x3a00 drivers/net/tun.c:1995
+tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2025
+call_write_iter include/linux/fs.h:2187 [inline]
+new_sync_write fs/read_write.c:491 [inline]
+vfs_write+0x9e9/0xdd0 fs/read_write.c:584
+ksys_write+0x127/0x250 fs/read_write.c:637
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Fixes: f266a683a480 ("net/hsr: Better frame dispatch")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20221017165928.2150130-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_forward.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
+index baf4765be6d7..908324b46328 100644
+--- a/net/hsr/hsr_forward.c
++++ b/net/hsr/hsr_forward.c
+@@ -108,15 +108,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port)
+ {
+ if (!frame->skb_std) {
+- if (frame->skb_hsr) {
++ if (frame->skb_hsr)
+ frame->skb_std =
+ create_stripped_skb_hsr(frame->skb_hsr, frame);
+- } else {
+- /* Unexpected */
+- WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
+- __FILE__, __LINE__, port->dev->name);
++ else
++ netdev_warn_once(port->dev,
++ "Unexpected frame received in hsr_get_untagged_frame()\n");
++
++ if (!frame->skb_std)
+ return NULL;
+- }
+ }
+
+ return skb_clone(frame->skb_std, GFP_ATOMIC);
+--
+2.35.1
+
--- /dev/null
+From 3e3da7b204b323fad2e25355323618b85bef3a96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 12:47:54 +0200
+Subject: net: phy: dp83822: disable MDI crossover status change interrupt
+
+From: Felix Riemann <felix.riemann@sma.de>
+
+[ Upstream commit 7f378c03aa4952507521174fb0da7b24a9ad0be6 ]
+
+If the cable is disconnected the PHY seems to toggle between MDI and
+MDI-X modes. With the MDI crossover status interrupt active this causes
+roughly 10 interrupts per second.
+
+As the crossover status isn't checked by the driver, the interrupt can
+be disabled to reduce the interrupt load.
+
+Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission")
+Signed-off-by: Felix Riemann <felix.riemann@sma.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20221018104755.30025-1-svc.sw.rte.linux@sma.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/dp83822.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
+index 3a8849716459..db651649e0b8 100644
+--- a/drivers/net/phy/dp83822.c
++++ b/drivers/net/phy/dp83822.c
+@@ -268,8 +268,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
+ DP83822_EEE_ERROR_CHANGE_INT_EN);
+
+ if (!dp83822->fx_enabled)
+- misr_status |= DP83822_MDI_XOVER_INT_EN |
+- DP83822_ANEG_ERR_INT_EN |
++ misr_status |= DP83822_ANEG_ERR_INT_EN |
+ DP83822_WOL_PKT_INT_EN;
+
+ err = phy_write(phydev, MII_DP83822_MISR2, misr_status);
+--
+2.35.1
+
--- /dev/null
+From 64e9beb9998ad18102560f4328d09e147f1aa484 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Oct 2022 12:17:35 +0530
+Subject: net: phy: dp83867: Extend RX strap quirk for SGMII mode
+
+From: Harini Katakam <harini.katakam@amd.com>
+
+[ Upstream commit 0c9efbd5c50c64ead434960a404c9c9a097b0403 ]
+
+When RX strap in HW is not set to MODE 3 or 4, bit 7 and 8 in CF4
+register should be set. The former is already handled in
+dp83867_config_init; add the latter in SGMII specific initialization.
+
+Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy")
+Signed-off-by: Harini Katakam <harini.katakam@amd.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/dp83867.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
+index f86acad0aad4..c8031e297faf 100644
+--- a/drivers/net/phy/dp83867.c
++++ b/drivers/net/phy/dp83867.c
+@@ -757,6 +757,14 @@ static int dp83867_config_init(struct phy_device *phydev)
+ else
+ val &= ~DP83867_SGMII_TYPE;
+ phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_SGMIICTL, val);
++
++ /* This is a SW workaround for link instability if RX_CTRL is
++ * not strapped to mode 3 or 4 in HW. This is required for SGMII
++ * in addition to clearing bit 7, handled above.
++ */
++ if (dp83867->rxctrl_strap_quirk)
++ phy_set_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4,
++ BIT(8));
+ }
+
+ val = phy_read(phydev, DP83867_CFG3);
+--
+2.35.1
+
--- /dev/null
+From 1f4d25ad81cae5cc01f9c96c9ebcd5d7a890292f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 14:31:59 +0800
+Subject: net: sched: cake: fix null pointer access issue when cake_init()
+ fails
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit 51f9a8921ceacd7bf0d3f47fa867a64988ba1dcb ]
+
+When the default qdisc is cake, if the qdisc of dev_queue fails to be
+inited during mqprio_init(), cake_reset() is invoked to clear
+resources. In this case, the tins is NULL, and it will cause gpf issue.
+
+The process is as follows:
+qdisc_create_dflt()
+ cake_init()
+ q->tins = kvcalloc(...) --->failed, q->tins is NULL
+ ...
+ qdisc_put()
+ ...
+ cake_reset()
+ ...
+ cake_dequeue_one()
+ b = &q->tins[...] --->q->tins is NULL
+
+The following is the Call Trace information:
+general protection fault, probably for non-canonical address
+0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN
+KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
+RIP: 0010:cake_dequeue_one+0xc9/0x3c0
+Call Trace:
+<TASK>
+cake_reset+0xb1/0x140
+qdisc_reset+0xed/0x6f0
+qdisc_destroy+0x82/0x4c0
+qdisc_put+0x9e/0xb0
+qdisc_create_dflt+0x2c3/0x4a0
+mqprio_init+0xa71/0x1760
+qdisc_create+0x3eb/0x1000
+tc_modify_qdisc+0x408/0x1720
+rtnetlink_rcv_msg+0x38e/0xac0
+netlink_rcv_skb+0x12d/0x3a0
+netlink_unicast+0x4a2/0x740
+netlink_sendmsg+0x826/0xcc0
+sock_sendmsg+0xc5/0x100
+____sys_sendmsg+0x583/0x690
+___sys_sendmsg+0xe8/0x160
+__sys_sendmsg+0xbf/0x160
+do_syscall_64+0x35/0x80
+entry_SYSCALL_64_after_hwframe+0x46/0xb0
+RIP: 0033:0x7f89e5122d04
+</TASK>
+
+Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_cake.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
+index c580139fcede..5dc7a3c310c9 100644
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -2224,8 +2224,12 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
+
+ static void cake_reset(struct Qdisc *sch)
+ {
++ struct cake_sched_data *q = qdisc_priv(sch);
+ u32 c;
+
++ if (!q->tins)
++ return;
++
+ for (c = 0; c < CAKE_MAX_TINS; c++)
+ cake_clear_tin(sch, c);
+ }
+--
+2.35.1
+
--- /dev/null
+From 70ee65259c1725b99adc372f8a23e101edd4520c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Aug 2022 08:52:31 +0800
+Subject: net: sched: delete duplicate cleanup of backlog and qlen
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit c19d893fbf3f2f8fa864ae39652c7fee939edde2 ]
+
+qdisc_reset() is clearing qdisc->q.qlen and qdisc->qstats.backlog
+_after_ calling qdisc->ops->reset. There is no need to clear them
+again in the specific reset function.
+
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Link: https://lore.kernel.org/r/20220824005231.345727-1-shaozhengchao@huawei.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 2a3fc78210b9 ("net: sched: sfb: fix null pointer access issue when sfb_init() fails")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sch_generic.h | 1 -
+ net/sched/sch_atm.c | 1 -
+ net/sched/sch_cbq.c | 1 -
+ net/sched/sch_choke.c | 2 --
+ net/sched/sch_drr.c | 2 --
+ net/sched/sch_dsmark.c | 2 --
+ net/sched/sch_etf.c | 3 ---
+ net/sched/sch_ets.c | 2 --
+ net/sched/sch_fq_codel.c | 2 --
+ net/sched/sch_fq_pie.c | 3 ---
+ net/sched/sch_hfsc.c | 2 --
+ net/sched/sch_htb.c | 2 --
+ net/sched/sch_multiq.c | 1 -
+ net/sched/sch_prio.c | 2 --
+ net/sched/sch_qfq.c | 2 --
+ net/sched/sch_red.c | 2 --
+ net/sched/sch_sfb.c | 2 --
+ net/sched/sch_skbprio.c | 3 ---
+ net/sched/sch_taprio.c | 2 --
+ net/sched/sch_tbf.c | 2 --
+ net/sched/sch_teql.c | 1 -
+ 21 files changed, 40 deletions(-)
+
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index bed2387af456..e7e8c318925d 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -1178,7 +1178,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
+ static inline void qdisc_reset_queue(struct Qdisc *sch)
+ {
+ __qdisc_reset_queue(&sch->q);
+- sch->qstats.backlog = 0;
+ }
+
+ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
+diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
+index 1c281cc81f57..794c7377cd7e 100644
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -575,7 +575,6 @@ static void atm_tc_reset(struct Qdisc *sch)
+ pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
+ list_for_each_entry(flow, &p->flows, list)
+ qdisc_reset(flow->q);
+- sch->q.qlen = 0;
+ }
+
+ static void atm_tc_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
+index 4a78fcf5d4f9..9a3dff02b7a2 100644
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1053,7 +1053,6 @@ cbq_reset(struct Qdisc *sch)
+ cl->cpriority = cl->priority;
+ }
+ }
+- sch->q.qlen = 0;
+ }
+
+
+diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
+index 2adbd945bf15..25d2daaa8122 100644
+--- a/net/sched/sch_choke.c
++++ b/net/sched/sch_choke.c
+@@ -315,8 +315,6 @@ static void choke_reset(struct Qdisc *sch)
+ rtnl_qdisc_drop(skb, sch);
+ }
+
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ q->head = q->tail = 0;
+diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
+index dde564670ad8..08424aac6da8 100644
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -443,8 +443,6 @@ static void drr_reset_qdisc(struct Qdisc *sch)
+ qdisc_reset(cl->qdisc);
+ }
+ }
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void drr_destroy_qdisc(struct Qdisc *sch)
+diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
+index 76ed1a05ded2..a75bc7f80cd7 100644
+--- a/net/sched/sch_dsmark.c
++++ b/net/sched/sch_dsmark.c
+@@ -408,8 +408,6 @@ static void dsmark_reset(struct Qdisc *sch)
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
+ if (p->q)
+ qdisc_reset(p->q);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void dsmark_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
+index c48f91075b5c..d96103b0e2bf 100644
+--- a/net/sched/sch_etf.c
++++ b/net/sched/sch_etf.c
+@@ -445,9 +445,6 @@ static void etf_reset(struct Qdisc *sch)
+ timesortedlist_clear(sch);
+ __qdisc_reset_queue(&sch->q);
+
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+-
+ q->last = 0;
+ }
+
+diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
+index 9c224872ef03..05817c55692f 100644
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -722,8 +722,6 @@ static void ets_qdisc_reset(struct Qdisc *sch)
+ }
+ for (band = 0; band < q->nbands; band++)
+ qdisc_reset(q->classes[band].qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void ets_qdisc_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
+index 99e8db262198..01d6eea5b0ce 100644
+--- a/net/sched/sch_fq_codel.c
++++ b/net/sched/sch_fq_codel.c
+@@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch)
+ codel_vars_init(&flow->cvars);
+ }
+ memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ q->memory_usage = 0;
+ }
+
+diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
+index c70802785518..cf04f70e96bf 100644
+--- a/net/sched/sch_fq_pie.c
++++ b/net/sched/sch_fq_pie.c
+@@ -521,9 +521,6 @@ static void fq_pie_reset(struct Qdisc *sch)
+ INIT_LIST_HEAD(&flow->flowchain);
+ pie_vars_init(&flow->vars);
+ }
+-
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ }
+
+ static void fq_pie_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
+index d1902fca9844..cdc43a06aa9b 100644
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -1484,8 +1484,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
+ }
+ q->eligible = RB_ROOT;
+ qdisc_watchdog_cancel(&q->watchdog);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void
+diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
+index cd70dbcbd72f..c3ba018fd083 100644
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -966,8 +966,6 @@ static void htb_reset(struct Qdisc *sch)
+ }
+ qdisc_watchdog_cancel(&q->watchdog);
+ __qdisc_reset_queue(&q->direct_queue);
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ memset(q->hlevel, 0, sizeof(q->hlevel));
+ memset(q->row_mask, 0, sizeof(q->row_mask));
+ }
+diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
+index 5c27b4270b90..1c6dbcfa89b8 100644
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch)
+
+ for (band = 0; band < q->bands; band++)
+ qdisc_reset(q->queues[band]);
+- sch->q.qlen = 0;
+ q->curband = 0;
+ }
+
+diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
+index 3eabb871a1d5..1c805fe05b82 100644
+--- a/net/sched/sch_prio.c
++++ b/net/sched/sch_prio.c
+@@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch)
+
+ for (prio = 0; prio < q->bands; prio++)
+ qdisc_reset(q->queues[prio]);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index af8c63a9ec18..1d1d81aeb389 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -1458,8 +1458,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
+ qdisc_reset(cl->qdisc);
+ }
+ }
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void qfq_destroy_qdisc(struct Qdisc *sch)
+diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
+index 40adf1f07a82..f1e013e3f04a 100644
+--- a/net/sched/sch_red.c
++++ b/net/sched/sch_red.c
+@@ -176,8 +176,6 @@ static void red_reset(struct Qdisc *sch)
+ struct red_sched_data *q = qdisc_priv(sch);
+
+ qdisc_reset(q->qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ red_restart(&q->vars);
+ }
+
+diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
+index b2724057629f..0e1cb517b0d9 100644
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -456,8 +456,6 @@ static void sfb_reset(struct Qdisc *sch)
+ struct sfb_sched_data *q = qdisc_priv(sch);
+
+ qdisc_reset(q->qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ q->slot = 0;
+ q->double_buffering = false;
+ sfb_zero_all_buckets(q);
+diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
+index 7a5e4c454715..df72fb83d9c7 100644
+--- a/net/sched/sch_skbprio.c
++++ b/net/sched/sch_skbprio.c
+@@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch)
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ int prio;
+
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+-
+ for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+ __skb_queue_purge(&q->qdiscs[prio]);
+
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index ab8835a72cee..7f33b31c7b8b 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1626,8 +1626,6 @@ static void taprio_reset(struct Qdisc *sch)
+ if (q->qdiscs[i])
+ qdisc_reset(q->qdiscs[i]);
+ }
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void taprio_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
+index 6eb17004a9e4..7461e5c67d50 100644
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -316,8 +316,6 @@ static void tbf_reset(struct Qdisc *sch)
+ struct tbf_sched_data *q = qdisc_priv(sch);
+
+ qdisc_reset(q->qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ q->t_c = ktime_get_ns();
+ q->tokens = q->buffer;
+ q->ptokens = q->mtu;
+diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
+index 6af6b95bdb67..79aaab51cbf5 100644
+--- a/net/sched/sch_teql.c
++++ b/net/sched/sch_teql.c
+@@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch)
+ struct teql_sched_data *dat = qdisc_priv(sch);
+
+ skb_queue_purge(&dat->q);
+- sch->q.qlen = 0;
+ }
+
+ static void
+--
+2.35.1
+
--- /dev/null
+From 3ce60e54a28dbfc95e759897fa347346f38d3e2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 20:32:58 +0000
+Subject: net: sched: fix race condition in qdisc_graft()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ebda44da44f6f309d302522b049f43d6f829f7aa ]
+
+We had one syzbot report [1] in syzbot queue for a while.
+I was waiting for more occurrences and/or a repro but
+Dmitry Vyukov spotted the issue right away.
+
+<quoting Dmitry>
+qdisc_graft() drops reference to qdisc in notify_and_destroy
+while it's still assigned to dev->qdisc
+</quoting>
+
+Indeed, RCU rules are clear when replacing a data structure.
+The visible pointer (dev->qdisc in this case) must be updated
+to the new object _before_ RCU grace period is started
+(qdisc_put(old) in this case).
+
+[1]
+BUG: KASAN: use-after-free in __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066
+Read of size 4 at addr ffff88802065e038 by task syz-executor.4/21027
+
+CPU: 0 PID: 21027 Comm: syz-executor.4 Not tainted 6.0.0-rc3-syzkaller-00363-g7726d4c3e60b #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022
+Call Trace:
+<TASK>
+__dump_stack lib/dump_stack.c:88 [inline]
+dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+print_address_description mm/kasan/report.c:317 [inline]
+print_report.cold+0x2ba/0x719 mm/kasan/report.c:433
+kasan_report+0xb1/0x1e0 mm/kasan/report.c:495
+__tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066
+__tcf_qdisc_find net/sched/cls_api.c:1051 [inline]
+tc_new_tfilter+0x34f/0x2200 net/sched/cls_api.c:2018
+rtnetlink_rcv_msg+0x955/0xca0 net/core/rtnetlink.c:6081
+netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501
+netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345
+netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921
+sock_sendmsg_nosec net/socket.c:714 [inline]
+sock_sendmsg+0xcf/0x120 net/socket.c:734
+____sys_sendmsg+0x6eb/0x810 net/socket.c:2482
+___sys_sendmsg+0x110/0x1b0 net/socket.c:2536
+__sys_sendmsg+0xf3/0x1c0 net/socket.c:2565
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f5efaa89279
+Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f5efbc31168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007f5efab9bf80 RCX: 00007f5efaa89279
+RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005
+RBP: 00007f5efaae32e9 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007f5efb0cfb1f R14: 00007f5efbc31300 R15: 0000000000022000
+</TASK>
+
+Allocated by task 21027:
+kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+kasan_set_track mm/kasan/common.c:45 [inline]
+set_alloc_info mm/kasan/common.c:437 [inline]
+____kasan_kmalloc mm/kasan/common.c:516 [inline]
+____kasan_kmalloc mm/kasan/common.c:475 [inline]
+__kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:525
+kmalloc_node include/linux/slab.h:623 [inline]
+kzalloc_node include/linux/slab.h:744 [inline]
+qdisc_alloc+0xb0/0xc50 net/sched/sch_generic.c:938
+qdisc_create_dflt+0x71/0x4a0 net/sched/sch_generic.c:997
+attach_one_default_qdisc net/sched/sch_generic.c:1152 [inline]
+netdev_for_each_tx_queue include/linux/netdevice.h:2437 [inline]
+attach_default_qdiscs net/sched/sch_generic.c:1170 [inline]
+dev_activate+0x760/0xcd0 net/sched/sch_generic.c:1229
+__dev_open+0x393/0x4d0 net/core/dev.c:1441
+__dev_change_flags+0x583/0x750 net/core/dev.c:8556
+rtnl_configure_link+0xee/0x240 net/core/rtnetlink.c:3189
+rtnl_newlink_create net/core/rtnetlink.c:3371 [inline]
+__rtnl_newlink+0x10b8/0x17e0 net/core/rtnetlink.c:3580
+rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593
+rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090
+netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501
+netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345
+netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921
+sock_sendmsg_nosec net/socket.c:714 [inline]
+sock_sendmsg+0xcf/0x120 net/socket.c:734
+____sys_sendmsg+0x6eb/0x810 net/socket.c:2482
+___sys_sendmsg+0x110/0x1b0 net/socket.c:2536
+__sys_sendmsg+0xf3/0x1c0 net/socket.c:2565
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Freed by task 21020:
+kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+kasan_set_track+0x21/0x30 mm/kasan/common.c:45
+kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370
+____kasan_slab_free mm/kasan/common.c:367 [inline]
+____kasan_slab_free+0x166/0x1c0 mm/kasan/common.c:329
+kasan_slab_free include/linux/kasan.h:200 [inline]
+slab_free_hook mm/slub.c:1754 [inline]
+slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1780
+slab_free mm/slub.c:3534 [inline]
+kfree+0xe2/0x580 mm/slub.c:4562
+rcu_do_batch kernel/rcu/tree.c:2245 [inline]
+rcu_core+0x7b5/0x1890 kernel/rcu/tree.c:2505
+__do_softirq+0x1d3/0x9c6 kernel/softirq.c:571
+
+Last potentially related work creation:
+kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+__kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348
+call_rcu+0x99/0x790 kernel/rcu/tree.c:2793
+qdisc_put+0xcd/0xe0 net/sched/sch_generic.c:1083
+notify_and_destroy net/sched/sch_api.c:1012 [inline]
+qdisc_graft+0xeb1/0x1270 net/sched/sch_api.c:1084
+tc_modify_qdisc+0xbb7/0x1a00 net/sched/sch_api.c:1671
+rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090
+netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501
+netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345
+netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921
+sock_sendmsg_nosec net/socket.c:714 [inline]
+sock_sendmsg+0xcf/0x120 net/socket.c:734
+____sys_sendmsg+0x6eb/0x810 net/socket.c:2482
+___sys_sendmsg+0x110/0x1b0 net/socket.c:2536
+__sys_sendmsg+0xf3/0x1c0 net/socket.c:2565
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Second to last potentially related work creation:
+kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+__kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348
+kvfree_call_rcu+0x74/0x940 kernel/rcu/tree.c:3322
+neigh_destroy+0x431/0x630 net/core/neighbour.c:912
+neigh_release include/net/neighbour.h:454 [inline]
+neigh_cleanup_and_release+0x1f8/0x330 net/core/neighbour.c:103
+neigh_del net/core/neighbour.c:225 [inline]
+neigh_remove_one+0x37d/0x460 net/core/neighbour.c:246
+neigh_forced_gc net/core/neighbour.c:276 [inline]
+neigh_alloc net/core/neighbour.c:447 [inline]
+___neigh_create+0x18b5/0x29a0 net/core/neighbour.c:642
+ip6_finish_output2+0xfb8/0x1520 net/ipv6/ip6_output.c:125
+__ip6_finish_output net/ipv6/ip6_output.c:195 [inline]
+ip6_finish_output+0x690/0x1160 net/ipv6/ip6_output.c:206
+NF_HOOK_COND include/linux/netfilter.h:296 [inline]
+ip6_output+0x1ed/0x540 net/ipv6/ip6_output.c:227
+dst_output include/net/dst.h:451 [inline]
+NF_HOOK include/linux/netfilter.h:307 [inline]
+NF_HOOK include/linux/netfilter.h:301 [inline]
+mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820
+mld_send_cr net/ipv6/mcast.c:2121 [inline]
+mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2653
+process_one_work+0x991/0x1610 kernel/workqueue.c:2289
+worker_thread+0x665/0x1080 kernel/workqueue.c:2436
+kthread+0x2e4/0x3a0 kernel/kthread.c:376
+ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306
+
+The buggy address belongs to the object at ffff88802065e000
+which belongs to the cache kmalloc-1k of size 1024
+The buggy address is located 56 bytes inside of
+1024-byte region [ffff88802065e000, ffff88802065e400)
+
+The buggy address belongs to the physical page:
+page:ffffea0000819600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x20658
+head:ffffea0000819600 order:3 compound_mapcount:0 compound_pincount:0
+flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff)
+raw: 00fff00000010200 0000000000000000 dead000000000001 ffff888011841dc0
+raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+page_owner tracks the page as allocated
+page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 3523, tgid 3523 (sshd), ts 41495190986, free_ts 41417713212
+prep_new_page mm/page_alloc.c:2532 [inline]
+get_page_from_freelist+0x109b/0x2ce0 mm/page_alloc.c:4283
+__alloc_pages+0x1c7/0x510 mm/page_alloc.c:5515
+alloc_pages+0x1a6/0x270 mm/mempolicy.c:2270
+alloc_slab_page mm/slub.c:1824 [inline]
+allocate_slab+0x27e/0x3d0 mm/slub.c:1969
+new_slab mm/slub.c:2029 [inline]
+___slab_alloc+0x7f1/0xe10 mm/slub.c:3031
+__slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3118
+slab_alloc_node mm/slub.c:3209 [inline]
+__kmalloc_node_track_caller+0x2f2/0x380 mm/slub.c:4955
+kmalloc_reserve net/core/skbuff.c:358 [inline]
+__alloc_skb+0xd9/0x2f0 net/core/skbuff.c:430
+alloc_skb_fclone include/linux/skbuff.h:1307 [inline]
+tcp_stream_alloc_skb+0x38/0x580 net/ipv4/tcp.c:861
+tcp_sendmsg_locked+0xc36/0x2f80 net/ipv4/tcp.c:1325
+tcp_sendmsg+0x2b/0x40 net/ipv4/tcp.c:1483
+inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819
+sock_sendmsg_nosec net/socket.c:714 [inline]
+sock_sendmsg+0xcf/0x120 net/socket.c:734
+sock_write_iter+0x291/0x3d0 net/socket.c:1108
+call_write_iter include/linux/fs.h:2187 [inline]
+new_sync_write fs/read_write.c:491 [inline]
+vfs_write+0x9e9/0xdd0 fs/read_write.c:578
+ksys_write+0x1e8/0x250 fs/read_write.c:631
+page last free stack trace:
+reset_page_owner include/linux/page_owner.h:24 [inline]
+free_pages_prepare mm/page_alloc.c:1449 [inline]
+free_pcp_prepare+0x5e4/0xd20 mm/page_alloc.c:1499
+free_unref_page_prepare mm/page_alloc.c:3380 [inline]
+free_unref_page+0x19/0x4d0 mm/page_alloc.c:3476
+__unfreeze_partials+0x17c/0x1a0 mm/slub.c:2548
+qlink_free mm/kasan/quarantine.c:168 [inline]
+qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187
+kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:294
+__kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:447
+kasan_slab_alloc include/linux/kasan.h:224 [inline]
+slab_post_alloc_hook mm/slab.h:727 [inline]
+slab_alloc_node mm/slub.c:3243 [inline]
+slab_alloc mm/slub.c:3251 [inline]
+__kmem_cache_alloc_lru mm/slub.c:3258 [inline]
+kmem_cache_alloc+0x267/0x3b0 mm/slub.c:3268
+kmem_cache_zalloc include/linux/slab.h:723 [inline]
+alloc_buffer_head+0x20/0x140 fs/buffer.c:2974
+alloc_page_buffers+0x280/0x790 fs/buffer.c:829
+create_empty_buffers+0x2c/0xee0 fs/buffer.c:1558
+ext4_block_write_begin+0x1004/0x1530 fs/ext4/inode.c:1074
+ext4_da_write_begin+0x422/0xae0 fs/ext4/inode.c:2996
+generic_perform_write+0x246/0x560 mm/filemap.c:3738
+ext4_buffered_write_iter+0x15b/0x460 fs/ext4/file.c:270
+ext4_file_write_iter+0x44a/0x1660 fs/ext4/file.c:679
+call_write_iter include/linux/fs.h:2187 [inline]
+new_sync_write fs/read_write.c:491 [inline]
+vfs_write+0x9e9/0xdd0 fs/read_write.c:578
+
+Fixes: af356afa010f ("net_sched: reintroduce dev->qdisc for use by sch_api")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Diagnosed-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20221018203258.2793282-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_api.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index 6e18aa417782..d8ffe4114385 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -1081,12 +1081,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+
+ skip:
+ if (!ingress) {
+- notify_and_destroy(net, skb, n, classid,
+- rtnl_dereference(dev->qdisc), new);
++ old = rtnl_dereference(dev->qdisc);
+ if (new && !new->ops->attach)
+ qdisc_refcount_inc(new);
+ rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
+
++ notify_and_destroy(net, skb, n, classid, old, new);
++
+ if (new && new->ops->attach)
+ new->ops->attach(new);
+ } else {
+--
+2.35.1
+
--- /dev/null
+From cc3e73723c9f5894842ced0cab05a90c753aa995 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 14:32:01 +0800
+Subject: net: sched: sfb: fix null pointer access issue when sfb_init() fails
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit 2a3fc78210b9f0e85372a2435368962009f480fc ]
+
+When the default qdisc is sfb, if the qdisc of dev_queue fails to be
+inited during mqprio_init(), sfb_reset() is invoked to clear resources.
+In this case, the q->qdisc is NULL, and it will cause gpf issue.
+
+The process is as follows:
+qdisc_create_dflt()
+ sfb_init()
+ tcf_block_get() --->failed, q->qdisc is NULL
+ ...
+ qdisc_put()
+ ...
+ sfb_reset()
+ qdisc_reset(q->qdisc) --->q->qdisc is NULL
+ ops = qdisc->ops
+
+The following is the Call Trace information:
+general protection fault, probably for non-canonical address
+0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN
+KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
+RIP: 0010:qdisc_reset+0x2b/0x6f0
+Call Trace:
+<TASK>
+sfb_reset+0x37/0xd0
+qdisc_reset+0xed/0x6f0
+qdisc_destroy+0x82/0x4c0
+qdisc_put+0x9e/0xb0
+qdisc_create_dflt+0x2c3/0x4a0
+mqprio_init+0xa71/0x1760
+qdisc_create+0x3eb/0x1000
+tc_modify_qdisc+0x408/0x1720
+rtnetlink_rcv_msg+0x38e/0xac0
+netlink_rcv_skb+0x12d/0x3a0
+netlink_unicast+0x4a2/0x740
+netlink_sendmsg+0x826/0xcc0
+sock_sendmsg+0xc5/0x100
+____sys_sendmsg+0x583/0x690
+___sys_sendmsg+0xe8/0x160
+__sys_sendmsg+0xbf/0x160
+do_syscall_64+0x35/0x80
+entry_SYSCALL_64_after_hwframe+0x46/0xb0
+RIP: 0033:0x7f2164122d04
+</TASK>
+
+Fixes: e13e02a3c68d ("net_sched: SFB flow scheduler")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_sfb.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
+index 0e1cb517b0d9..9ded56228ea1 100644
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -455,7 +455,8 @@ static void sfb_reset(struct Qdisc *sch)
+ {
+ struct sfb_sched_data *q = qdisc_priv(sch);
+
+- qdisc_reset(q->qdisc);
++ if (likely(q->qdisc))
++ qdisc_reset(q->qdisc);
+ q->slot = 0;
+ q->double_buffering = false;
+ sfb_zero_all_buckets(q);
+--
+2.35.1
+
--- /dev/null
+From 34e09e5c9a1611aa6be5e62844ff84e408ee3701 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Nov 2020 14:48:29 -0500
+Subject: NFSD: Add common helpers to decode void args and encode void results
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 788f7183fba86b46074c16e7d57ea09302badff4 ]
+
+Start off the conversion to xdr_stream by de-duplicating the functions
+that decode void arguments and encode void results.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs2acl.c | 21 ++++-----------------
+ fs/nfsd/nfs3acl.c | 8 ++++----
+ fs/nfsd/nfs3proc.c | 10 ++++------
+ fs/nfsd/nfs3xdr.c | 11 -----------
+ fs/nfsd/nfs4proc.c | 11 ++++-------
+ fs/nfsd/nfs4xdr.c | 12 ------------
+ fs/nfsd/nfsd.h | 8 ++++++++
+ fs/nfsd/nfsproc.c | 25 ++++++++++++-------------
+ fs/nfsd/nfssvc.c | 28 ++++++++++++++++++++++++++++
+ fs/nfsd/nfsxdr.c | 10 ----------
+ fs/nfsd/xdr.h | 2 --
+ fs/nfsd/xdr3.h | 2 --
+ fs/nfsd/xdr4.h | 2 --
+ 13 files changed, 64 insertions(+), 86 deletions(-)
+
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index 6a900f770dd2..b0f66604532a 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -185,10 +185,6 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
+ /*
+ * XDR decode functions
+ */
+-static int nfsaclsvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return 1;
+-}
+
+ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
+ {
+@@ -255,15 +251,6 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+ * XDR encode functions
+ */
+
+-/*
+- * There must be an encoding function for void results so svc_process
+- * will work properly.
+- */
+-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return xdr_ressize_check(rqstp, p);
+-}
+-
+ /* GETACL */
+ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+ {
+@@ -378,10 +365,10 @@ struct nfsd3_voidargs { int dummy; };
+ static const struct svc_procedure nfsd_acl_procedures2[5] = {
+ [ACLPROC2_NULL] = {
+ .pc_func = nfsacld_proc_null,
+- .pc_decode = nfsaclsvc_decode_voidarg,
+- .pc_encode = nfsaclsvc_encode_voidres,
+- .pc_argsize = sizeof(struct nfsd3_voidargs),
+- .pc_ressize = sizeof(struct nfsd3_voidargs),
++ .pc_decode = nfssvc_decode_voidarg,
++ .pc_encode = nfssvc_encode_voidres,
++ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+ },
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index 34a394e50e1d..7c30876a31a1 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -245,10 +245,10 @@ struct nfsd3_voidargs { int dummy; };
+ static const struct svc_procedure nfsd_acl_procedures3[3] = {
+ [ACLPROC3_NULL] = {
+ .pc_func = nfsd3_proc_null,
+- .pc_decode = nfs3svc_decode_voidarg,
+- .pc_encode = nfs3svc_encode_voidres,
+- .pc_argsize = sizeof(struct nfsd3_voidargs),
+- .pc_ressize = sizeof(struct nfsd3_voidargs),
++ .pc_decode = nfssvc_decode_voidarg,
++ .pc_encode = nfssvc_encode_voidres,
++ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+ },
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 981a4e4c9a3c..a4dfe8160d55 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -698,8 +698,6 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
+ #define nfsd3_attrstatres nfsd3_attrstat
+ #define nfsd3_wccstatres nfsd3_attrstat
+ #define nfsd3_createres nfsd3_diropres
+-#define nfsd3_voidres nfsd3_voidargs
+-struct nfsd3_voidargs { int dummy; };
+
+ #define ST 1 /* status*/
+ #define FH 17 /* filehandle with length */
+@@ -710,10 +708,10 @@ struct nfsd3_voidargs { int dummy; };
+ static const struct svc_procedure nfsd_procedures3[22] = {
+ [NFS3PROC_NULL] = {
+ .pc_func = nfsd3_proc_null,
+- .pc_decode = nfs3svc_decode_voidarg,
+- .pc_encode = nfs3svc_encode_voidres,
+- .pc_argsize = sizeof(struct nfsd3_voidargs),
+- .pc_ressize = sizeof(struct nfsd3_voidres),
++ .pc_decode = nfssvc_decode_voidarg,
++ .pc_encode = nfssvc_encode_voidres,
++ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+ },
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 716566da400e..9dc22d917bd2 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -304,11 +304,6 @@ void fill_post_wcc(struct svc_fh *fhp)
+ /*
+ * XDR decode functions
+ */
+-int
+-nfs3svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return 1;
+-}
+
+ int
+ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
+@@ -642,12 +637,6 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
+ * XDR encode functions
+ */
+
+-int
+-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return xdr_ressize_check(rqstp, p);
+-}
+-
+ /* GETATTR */
+ int
+ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 9aeeb51e8c61..1acafc39f008 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -3293,16 +3293,13 @@ static const char *nfsd4_op_name(unsigned opnum)
+ return "unknown_operation";
+ }
+
+-#define nfsd4_voidres nfsd4_voidargs
+-struct nfsd4_voidargs { int dummy; };
+-
+ static const struct svc_procedure nfsd_procedures4[2] = {
+ [NFSPROC4_NULL] = {
+ .pc_func = nfsd4_proc_null,
+- .pc_decode = nfs4svc_decode_voidarg,
+- .pc_encode = nfs4svc_encode_voidres,
+- .pc_argsize = sizeof(struct nfsd4_voidargs),
+- .pc_ressize = sizeof(struct nfsd4_voidres),
++ .pc_decode = nfssvc_decode_voidarg,
++ .pc_encode = nfssvc_encode_voidres,
++ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 1,
+ },
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index cc605ee0b2fa..e7b891a19bf8 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5263,12 +5263,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
+ p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
+ }
+
+-int
+-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return xdr_ressize_check(rqstp, p);
+-}
+-
+ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
+ {
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+@@ -5286,12 +5280,6 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
+ }
+ }
+
+-int
+-nfs4svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return 1;
+-}
+-
+ int
+ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
+ {
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index cb742e17e04a..7907de3f2ee6 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -73,6 +73,14 @@ extern unsigned long nfsd_drc_mem_used;
+
+ extern const struct seq_operations nfs_exports_op;
+
++/*
++ * Common void argument and result helpers
++ */
++struct nfsd_voidargs { };
++struct nfsd_voidres { };
++int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p);
++int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
++
+ /*
+ * Function prototypes.
+ */
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index bbd01e8397f6..dbd8d3604653 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -609,7 +609,6 @@ nfsd_proc_statfs(struct svc_rqst *rqstp)
+ * NFSv2 Server procedures.
+ * Only the results of non-idempotent operations are cached.
+ */
+-struct nfsd_void { int dummy; };
+
+ #define ST 1 /* status */
+ #define FH 8 /* filehandle */
+@@ -618,10 +617,10 @@ struct nfsd_void { int dummy; };
+ static const struct svc_procedure nfsd_procedures2[18] = {
+ [NFSPROC_NULL] = {
+ .pc_func = nfsd_proc_null,
+- .pc_decode = nfssvc_decode_void,
+- .pc_encode = nfssvc_encode_void,
+- .pc_argsize = sizeof(struct nfsd_void),
+- .pc_ressize = sizeof(struct nfsd_void),
++ .pc_decode = nfssvc_decode_voidarg,
++ .pc_encode = nfssvc_encode_voidres,
++ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+ },
+@@ -647,10 +646,10 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ },
+ [NFSPROC_ROOT] = {
+ .pc_func = nfsd_proc_root,
+- .pc_decode = nfssvc_decode_void,
+- .pc_encode = nfssvc_encode_void,
+- .pc_argsize = sizeof(struct nfsd_void),
+- .pc_ressize = sizeof(struct nfsd_void),
++ .pc_decode = nfssvc_decode_voidarg,
++ .pc_encode = nfssvc_encode_voidres,
++ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+ },
+@@ -685,10 +684,10 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ },
+ [NFSPROC_WRITECACHE] = {
+ .pc_func = nfsd_proc_writecache,
+- .pc_decode = nfssvc_decode_void,
+- .pc_encode = nfssvc_encode_void,
+- .pc_argsize = sizeof(struct nfsd_void),
+- .pc_ressize = sizeof(struct nfsd_void),
++ .pc_decode = nfssvc_decode_voidarg,
++ .pc_encode = nfssvc_encode_voidres,
++ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+ },
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index ad6fedf37a40..8b675e8e6a6e 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1074,6 +1074,34 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ return 1;
+ }
+
++/**
++ * nfssvc_decode_voidarg - Decode void arguments
++ * @rqstp: Server RPC transaction context
++ * @p: buffer containing arguments to decode
++ *
++ * Return values:
++ * %0: Arguments were not valid
++ * %1: Decoding was successful
++ */
++int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
++{
++ return 1;
++}
++
++/**
++ * nfssvc_encode_voidres - Encode void results
++ * @rqstp: Server RPC transaction context
++ * @p: buffer in which to encode results
++ *
++ * Return values:
++ * %0: Local error while encoding
++ * %1: Encoding was successful
++ */
++int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
++{
++ return xdr_ressize_check(rqstp, p);
++}
++
+ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
+ {
+ int ret;
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index 8a288c8fcd57..13df5464a087 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -192,11 +192,6 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
+ /*
+ * XDR decode functions
+ */
+-int
+-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return xdr_argsize_check(rqstp, p);
+-}
+
+ int
+ nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
+@@ -423,11 +418,6 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
+ /*
+ * XDR encode functions
+ */
+-int
+-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
+-{
+- return xdr_ressize_check(rqstp, p);
+-}
+
+ int
+ nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p)
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index b8cc6a4b2e0e..edd87688ff86 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -144,7 +144,6 @@ union nfsd_xdrstore {
+ #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
+
+
+-int nfssvc_decode_void(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+@@ -156,7 +155,6 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
+-int nfssvc_encode_void(struct svc_rqst *, __be32 *);
+ int nfssvc_encode_stat(struct svc_rqst *, __be32 *);
+ int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
+ int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index ae6fa6c9cb46..456fcd7a1038 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -273,7 +273,6 @@ union nfsd3_xdrstore {
+
+ #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
+
+-int nfs3svc_decode_voidarg(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+@@ -290,7 +289,6 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
+ int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
+ int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+ int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 679d40af1bbb..37f89ad5e992 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -781,8 +781,6 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+
+
+ bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
+-int nfs4svc_decode_voidarg(struct svc_rqst *, __be32 *);
+-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
+ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
+ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
+ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
+--
+2.35.1
+
--- /dev/null
+From 3d69c488be74d18b9be2d42a67f4f2326a499362 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Feb 2022 15:19:34 -0500
+Subject: NFSD: Fix the behavior of READ near OFFSET_MAX
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 0cb4d23ae08c48f6bf3c29a8e5c4a74b8388b960 ]
+
+Dan Aloni reports:
+> Due to commit 8cfb9015280d ("NFS: Always provide aligned buffers to
+> the RPC read layers") on the client, a read of 0xfff is aligned up
+> to server rsize of 0x1000.
+>
+> As a result, in a test where the server has a file of size
+> 0x7fffffffffffffff, and the client tries to read from the offset
+> 0x7ffffffffffff000, the read causes loff_t overflow in the server
+> and it returns an NFS code of EINVAL to the client. The client as
+> a result indefinitely retries the request.
+
+The Linux NFS client does not handle NFS?ERR_INVAL, even though all
+NFS specifications permit servers to return that status code for a
+READ.
+
+Instead of NFS?ERR_INVAL, have out-of-range READ requests succeed
+and return a short result. Set the EOF flag in the result to prevent
+the client from retrying the READ request. This behavior appears to
+be consistent with Solaris NFS servers.
+
+Note that NFSv3 and NFSv4 use u64 offset values on the wire. These
+must be converted to loff_t internally before use -- an implicit
+type cast is not adequate for this purpose. Otherwise VFS checks
+against sb->s_maxbytes do not work properly.
+
+Reported-by: Dan Aloni <dan.aloni@vastdata.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs3proc.c | 8 ++++++--
+ fs/nfsd/nfs4proc.c | 8 ++++++--
+ fs/nfsd/nfs4xdr.c | 8 ++------
+ 3 files changed, 14 insertions(+), 10 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 104e7d705ea8..60faa5b8eccf 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -148,13 +148,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
+ unsigned int len;
+ int v;
+
+- argp->count = min_t(u32, argp->count, max_blocksize);
+-
+ dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
+ SVCFH_fmt(&argp->fh),
+ (unsigned long) argp->count,
+ (unsigned long long) argp->offset);
+
++ argp->count = min_t(u32, argp->count, max_blocksize);
++ if (argp->offset > (u64)OFFSET_MAX)
++ argp->offset = (u64)OFFSET_MAX;
++ if (argp->offset + argp->count > (u64)OFFSET_MAX)
++ argp->count = (u64)OFFSET_MAX - argp->offset;
++
+ v = 0;
+ len = argp->count;
+ while (len > 0) {
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 5054dc66cbf9..363df0a795bc 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -772,12 +772,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ __be32 status;
+
+ read->rd_nf = NULL;
+- if (read->rd_offset >= OFFSET_MAX)
+- return nfserr_inval;
+
+ trace_nfsd_read_start(rqstp, &cstate->current_fh,
+ read->rd_offset, read->rd_length);
+
++ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
++ if (read->rd_offset > (u64)OFFSET_MAX)
++ read->rd_offset = (u64)OFFSET_MAX;
++ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
++ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
++
+ /*
+ * If we do a zero copy read, then a client will see read data
+ * that reflects the state of the file *after* performing the
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index d0af93a0558f..930bed3e40a4 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3754,10 +3754,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ }
+ xdr_commit_encode(xdr);
+
+- maxcount = svc_max_payload(resp->rqstp);
+- maxcount = min_t(unsigned long, maxcount,
++ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));
+- maxcount = min_t(unsigned long, maxcount, read->rd_length);
+
+ if (file->f_op->splice_read &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
+@@ -4585,10 +4583,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+ return nfserr_resource;
+ xdr_commit_encode(xdr);
+
+- maxcount = svc_max_payload(resp->rqstp);
+- maxcount = min_t(unsigned long, maxcount,
++ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));
+- maxcount = min_t(unsigned long, maxcount, read->rd_length);
+ count = maxcount;
+
+ eof = read->rd_offset >= i_size_read(file_inode(file));
+--
+2.35.1
+
--- /dev/null
+From 371805ae7d57acb15b4ae15945abde5c27336ac4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 15:10:18 -0400
+Subject: NFSD: Protect against send buffer overflow in NFSv2 READ
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 401bc1f90874280a80b93f23be33a0e7e2d1f912 ]
+
+Since before the git era, NFSD has conserved the number of pages
+held by each nfsd thread by combining the RPC receive and send
+buffers into a single array of pages. This works because there are
+no cases where an operation needs a large RPC Call message and a
+large RPC Reply at the same time.
+
+Once an RPC Call has been received, svc_process() updates
+svc_rqst::rq_res to describe the part of rq_pages that can be
+used for constructing the Reply. This means that the send buffer
+(rq_res) shrinks when the received RPC record containing the RPC
+Call is large.
+
+A client can force this shrinkage on TCP by sending a correctly-
+formed RPC Call header contained in an RPC record that is
+excessively large. The full maximum payload size cannot be
+constructed in that case.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfsproc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index deaa34b89251..c540326c8e00 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -180,6 +180,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
+ argp->count, argp->offset);
+
+ argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
+
+ v = 0;
+ len = argp->count;
+--
+2.35.1
+
--- /dev/null
+From 4d327c12bb49db2ecc125a939bbc32a273a6607a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 15:10:24 -0400
+Subject: NFSD: Protect against send buffer overflow in NFSv3 READ
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit fa6be9cc6e80ec79892ddf08a8c10cabab9baf38 ]
+
+Since before the git era, NFSD has conserved the number of pages
+held by each nfsd thread by combining the RPC receive and send
+buffers into a single array of pages. This works because there are
+no cases where an operation needs a large RPC Call message and a
+large RPC Reply at the same time.
+
+Once an RPC Call has been received, svc_process() updates
+svc_rqst::rq_res to describe the part of rq_pages that can be
+used for constructing the Reply. This means that the send buffer
+(rq_res) shrinks when the received RPC record containing the RPC
+Call is large.
+
+A client can force this shrinkage on TCP by sending a correctly-
+formed RPC Call header contained in an RPC record that is
+excessively large. The full maximum payload size cannot be
+constructed in that case.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs3proc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 60faa5b8eccf..84e700a54d01 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -144,7 +144,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_readargs *argp = rqstp->rq_argp;
+ struct nfsd3_readres *resp = rqstp->rq_resp;
+- u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned int len;
+ int v;
+
+@@ -153,7 +152,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
+ (unsigned long) argp->count,
+ (unsigned long long) argp->offset);
+
+- argp->count = min_t(u32, argp->count, max_blocksize);
++ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp));
++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+--
+2.35.1
+
--- /dev/null
+From add4e4ab8734bc7519f9c446cb6b5a4aa1bb5b63 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Nov 2020 13:19:51 -0500
+Subject: NFSD: Replace READ* macros in nfsd4_decode_commit()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit cbd9abb3706e96563b36af67595707a7054ab693 ]
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4xdr.c | 12 +++++-------
+ include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++
+ 2 files changed, 26 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 69233350b061..d0af93a0558f 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -568,13 +568,11 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+ static __be32
+ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
+ {
+- DECODE_HEAD;
+-
+- READ_BUF(12);
+- p = xdr_decode_hyper(p, &commit->co_offset);
+- commit->co_count = be32_to_cpup(p++);
+-
+- DECODE_TAIL;
++ if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0)
++ return nfserr_bad_xdr;
++ if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
++ return nfserr_bad_xdr;
++ return nfs_ok;
+ }
+
+ static __be32
+diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
+index c03f7bf585c9..6b1757543747 100644
+--- a/include/linux/sunrpc/xdr.h
++++ b/include/linux/sunrpc/xdr.h
+@@ -569,6 +569,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
+ return 0;
+ }
+
++/**
++ * xdr_stream_decode_u64 - Decode a 64-bit integer
++ * @xdr: pointer to xdr_stream
++ * @ptr: location to store 64-bit integer
++ *
++ * Return values:
++ * %0 on success
++ * %-EBADMSG on XDR buffer overflow
++ */
++static inline ssize_t
++xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
++{
++ const size_t count = sizeof(*ptr);
++ __be32 *p = xdr_inline_decode(xdr, count);
++
++ if (unlikely(!p))
++ return -EBADMSG;
++ xdr_decode_hyper(p, ptr);
++ return 0;
++}
++
+ /**
+ * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data
+ * @xdr: pointer to xdr_stream
+--
+2.35.1
+
--- /dev/null
+From fbbbdfeb61de7dfbaa5e5498af2254b49f0cb417 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Nov 2020 11:54:23 -0500
+Subject: NFSD: Replace the internals of the READ_BUF() macro
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c1346a1216ab5cb04a265380ac9035d91b16b6d5 ]
+
+Convert the READ_BUF macro in nfs4xdr.c from open code to instead
+use the new xdr_stream-style decoders already in use by the encode
+side (and by the in-kernel NFS client implementation). Once this
+conversion is done, each individual NFSv4 argument decoder can be
+independently cleaned up to replace these macros with C code.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4proc.c | 4 +-
+ fs/nfsd/nfs4xdr.c | 181 ++++++-------------------------------
+ fs/nfsd/xdr4.h | 10 +-
+ include/linux/sunrpc/xdr.h | 2 +
+ net/sunrpc/xdr.c | 45 +++++++++
+ 5 files changed, 77 insertions(+), 165 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 1acafc39f008..5054dc66cbf9 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1024,8 +1024,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+
+ write->wr_how_written = write->wr_stable_how;
+
+- nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
+- &write->wr_head, write->wr_buflen);
++ nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages,
++ write->wr_payload.head, write->wr_buflen);
+ WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
+
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index e7b891a19bf8..69233350b061 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -129,90 +129,13 @@ xdr_error: \
+ memcpy((x), p, nbytes); \
+ p += XDR_QUADLEN(nbytes); \
+ } while (0)
+-
+-/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
+-#define READ_BUF(nbytes) do { \
+- if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \
+- p = argp->p; \
+- argp->p += XDR_QUADLEN(nbytes); \
+- } else if (!(p = read_buf(argp, nbytes))) { \
+- dprintk("NFSD: xdr error (%s:%d)\n", \
+- __FILE__, __LINE__); \
+- goto xdr_error; \
+- } \
+-} while (0)
+-
+-static void next_decode_page(struct nfsd4_compoundargs *argp)
+-{
+- argp->p = page_address(argp->pagelist[0]);
+- argp->pagelist++;
+- if (argp->pagelen < PAGE_SIZE) {
+- argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
+- argp->pagelen = 0;
+- } else {
+- argp->end = argp->p + (PAGE_SIZE>>2);
+- argp->pagelen -= PAGE_SIZE;
+- }
+-}
+-
+-static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
+-{
+- /* We want more bytes than seem to be available.
+- * Maybe we need a new page, maybe we have just run out
+- */
+- unsigned int avail = (char *)argp->end - (char *)argp->p;
+- __be32 *p;
+-
+- if (argp->pagelen == 0) {
+- struct kvec *vec = &argp->rqstp->rq_arg.tail[0];
+-
+- if (!argp->tail) {
+- argp->tail = true;
+- avail = vec->iov_len;
+- argp->p = vec->iov_base;
+- argp->end = vec->iov_base + avail;
+- }
+-
+- if (avail < nbytes)
+- return NULL;
+-
+- p = argp->p;
+- argp->p += XDR_QUADLEN(nbytes);
+- return p;
+- }
+-
+- if (avail + argp->pagelen < nbytes)
+- return NULL;
+- if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
+- return NULL;
+- /* ok, we can do it with the current plus the next page */
+- if (nbytes <= sizeof(argp->tmp))
+- p = argp->tmp;
+- else {
+- kfree(argp->tmpp);
+- p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL);
+- if (!p)
+- return NULL;
+-
+- }
+- /*
+- * The following memcpy is safe because read_buf is always
+- * called with nbytes > avail, and the two cases above both
+- * guarantee p points to at least nbytes bytes.
+- */
+- memcpy(p, argp->p, avail);
+- next_decode_page(argp);
+- memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
+- argp->p += XDR_QUADLEN(nbytes - avail);
+- return p;
+-}
+-
+-static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
+-{
+- unsigned int this = (char *)argp->end - (char *)argp->p;
+-
+- return this + argp->pagelen;
+-}
++#define READ_BUF(nbytes) \
++ do { \
++ p = xdr_inline_decode(argp->xdr,\
++ nbytes); \
++ if (!p) \
++ goto xdr_error; \
++ } while (0)
+
+ static int zero_clientid(clientid_t *clid)
+ {
+@@ -259,44 +182,6 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
+ return p;
+ }
+
+-static __be32
+-svcxdr_construct_vector(struct nfsd4_compoundargs *argp, struct kvec *head,
+- struct page ***pagelist, u32 buflen)
+-{
+- int avail;
+- int len;
+- int pages;
+-
+- /* Sorry .. no magic macros for this.. *
+- * READ_BUF(write->wr_buflen);
+- * SAVEMEM(write->wr_buf, write->wr_buflen);
+- */
+- avail = (char *)argp->end - (char *)argp->p;
+- if (avail + argp->pagelen < buflen) {
+- dprintk("NFSD: xdr error (%s:%d)\n",
+- __FILE__, __LINE__);
+- return nfserr_bad_xdr;
+- }
+- head->iov_base = argp->p;
+- head->iov_len = avail;
+- *pagelist = argp->pagelist;
+-
+- len = XDR_QUADLEN(buflen) << 2;
+- if (len >= avail) {
+- len -= avail;
+-
+- pages = len >> PAGE_SHIFT;
+- argp->pagelist += pages;
+- argp->pagelen -= pages * PAGE_SIZE;
+- len -= pages * PAGE_SIZE;
+-
+- next_decode_page(argp);
+- }
+- argp->p += XDR_QUADLEN(len);
+-
+- return 0;
+-}
+-
+ /**
+ * savemem - duplicate a chunk of memory for later processing
+ * @argp: NFSv4 compound argument structure to be freed with
+@@ -396,7 +281,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
+ READ_BUF(4); len += 4;
+ nace = be32_to_cpup(p++);
+
+- if (nace > compoundargs_bytes_left(argp)/20)
++ if (nace > xdr_stream_remaining(argp->xdr) / sizeof(struct nfs4_ace))
+ /*
+ * Even with 4-byte names there wouldn't be
+ * space for that many aces; something fishy is
+@@ -927,7 +812,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)
+
+ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
+ {
+- __be32 *p;
++ DECODE_HEAD;
+
+ READ_BUF(4);
+ o->len = be32_to_cpup(p++);
+@@ -937,9 +822,8 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne
+
+ READ_BUF(o->len);
+ SAVEMEM(o->data, o->len);
+- return nfs_ok;
+-xdr_error:
+- return nfserr_bad_xdr;
++
++ DECODE_TAIL;
+ }
+
+ static __be32
+@@ -1317,10 +1201,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+ goto xdr_error;
+ write->wr_buflen = be32_to_cpup(p++);
+
+- status = svcxdr_construct_vector(argp, &write->wr_head,
+- &write->wr_pagelist, write->wr_buflen);
+- if (status)
+- return status;
++ if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen))
++ goto xdr_error;
+
+ DECODE_TAIL;
+ }
+@@ -1889,13 +1771,14 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+ */
+
+ /*
+- * Decode data into buffer. Uses head and pages constructed by
+- * svcxdr_construct_vector.
++ * Decode data into buffer.
+ */
+ static __be32
+-nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head,
+- struct page **pages, char **bufp, u32 buflen)
++nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr,
++ char **bufp, u32 buflen)
+ {
++ struct page **pages = xdr->pages;
++ struct kvec *head = xdr->head;
+ char *tmp, *dp;
+ u32 len;
+
+@@ -2010,8 +1893,6 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+ {
+ DECODE_HEAD;
+ u32 flags, maxcount, size;
+- struct kvec head;
+- struct page **pagelist;
+
+ READ_BUF(4);
+ flags = be32_to_cpup(p++);
+@@ -2034,12 +1915,12 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+
+ setxattr->setxa_len = size;
+ if (size > 0) {
+- status = svcxdr_construct_vector(argp, &head, &pagelist, size);
+- if (status)
+- return status;
++ struct xdr_buf payload;
+
+- status = nfsd4_vbuf_from_vector(argp, &head, pagelist,
+- &setxattr->setxa_buf, size);
++ if (!xdr_stream_subsegment(argp->xdr, &payload, size))
++ goto xdr_error;
++ status = nfsd4_vbuf_from_vector(argp, &payload,
++ &setxattr->setxa_buf, size);
+ }
+
+ DECODE_TAIL;
+@@ -5271,8 +5152,6 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
+ kfree(args->ops);
+ args->ops = args->iops;
+ }
+- kfree(args->tmpp);
+- args->tmpp = NULL;
+ while (args->to_free) {
+ struct svcxdr_tmpbuf *tb = args->to_free;
+ args->to_free = tb->next;
+@@ -5285,19 +5164,11 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
+ {
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
+- if (rqstp->rq_arg.head[0].iov_len % 4) {
+- /* client is nuts */
+- dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
+- __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
+- return 0;
+- }
+- args->p = p;
+- args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
+- args->pagelist = rqstp->rq_arg.pages;
+- args->pagelen = rqstp->rq_arg.page_len;
+- args->tail = false;
++ /* svcxdr_tmp_alloc */
+ args->tmpp = NULL;
+ args->to_free = NULL;
++
++ args->xdr = &rqstp->rq_arg_stream;
+ args->ops = args->iops;
+ args->rqstp = rqstp;
+
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 37f89ad5e992..0eb13bd603ea 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -419,8 +419,7 @@ struct nfsd4_write {
+ u64 wr_offset; /* request */
+ u32 wr_stable_how; /* request */
+ u32 wr_buflen; /* request */
+- struct kvec wr_head;
+- struct page ** wr_pagelist; /* request */
++ struct xdr_buf wr_payload; /* request */
+
+ u32 wr_bytes_written; /* response */
+ u32 wr_how_written; /* response */
+@@ -696,15 +695,10 @@ struct svcxdr_tmpbuf {
+
+ struct nfsd4_compoundargs {
+ /* scratch variables for XDR decode */
+- __be32 * p;
+- __be32 * end;
+- struct page ** pagelist;
+- int pagelen;
+- bool tail;
+ __be32 tmp[8];
+ __be32 * tmpp;
++ struct xdr_stream *xdr;
+ struct svcxdr_tmpbuf *to_free;
+-
+ struct svc_rqst *rqstp;
+
+ u32 taglen;
+diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
+index 0c8cab6210b3..c03f7bf585c9 100644
+--- a/include/linux/sunrpc/xdr.h
++++ b/include/linux/sunrpc/xdr.h
+@@ -252,6 +252,8 @@ extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
+ extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
+ extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t);
+ extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t);
++extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
++ unsigned int len);
+
+ /**
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
+index 02adc5c7f034..722586696fad 100644
+--- a/net/sunrpc/xdr.c
++++ b/net/sunrpc/xdr.c
+@@ -1412,6 +1412,51 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
+ }
+ EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
+
++/**
++ * xdr_stream_subsegment - set @subbuf to a portion of @xdr
++ * @xdr: an xdr_stream set up for decoding
++ * @subbuf: the result buffer
++ * @nbytes: length of @xdr to extract, in bytes
++ *
++ * Sets up @subbuf to represent a portion of @xdr. The portion
++ * starts at the current offset in @xdr, and extends for a length
++ * of @nbytes. If this is successful, @xdr is advanced to the next
++ * position following that portion.
++ *
++ * Return values:
++ * %true: @subbuf has been initialized, and @xdr has been advanced.
++ * %false: a bounds error has occurred
++ */
++bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
++ unsigned int nbytes)
++{
++ unsigned int remaining, offset, len;
++
++ if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes))
++ return false;
++
++ if (subbuf->head[0].iov_len)
++ if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len))
++ return false;
++
++ remaining = subbuf->page_len;
++ offset = subbuf->page_base;
++ while (remaining) {
++ len = min_t(unsigned int, remaining, PAGE_SIZE) - offset;
++
++ if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
++ return false;
++ if (!__xdr_inline_decode(xdr, len))
++ return false;
++
++ remaining -= len;
++ offset = 0;
++ }
++
++ return true;
++}
++EXPORT_SYMBOL_GPL(xdr_stream_subsegment);
++
+ /**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+--
+2.35.1
+
--- /dev/null
+From f846eda22099bbc03b9706b9a0e4318b753d8077 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Oct 2020 14:30:02 -0400
+Subject: NFSD: Update GETATTR3args decoder to use struct xdr_stream
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 9575363a9e4c8d7e2f9ba5e79884d623fff0be6f ]
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs3proc.c | 3 +--
+ fs/nfsd/nfs3xdr.c | 31 +++++++++++++++++++++++++------
+ fs/nfsd/xdr3.h | 2 +-
+ 3 files changed, 27 insertions(+), 9 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index a4dfe8160d55..3d741ea482f4 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -688,7 +688,6 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
+ * NFSv3 Server procedures.
+ * Only the results of non-idempotent operations are cached.
+ */
+-#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle
+ #define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat
+ #define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat
+ #define nfsd3_mkdirargs nfsd3_createargs
+@@ -720,7 +719,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_decode = nfs3svc_decode_fhandleargs,
+ .pc_encode = nfs3svc_encode_attrstatres,
+ .pc_release = nfs3svc_release_fhandle,
+- .pc_argsize = sizeof(struct nfsd3_fhandleargs),
++ .pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd3_attrstatres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 9dc22d917bd2..ed500f254337 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -29,8 +29,9 @@ static u32 nfs3_ftypes[] = {
+
+
+ /*
+- * XDR functions for basic NFS types
++ * Basic NFSv3 data types (RFC 1813 Sections 2.5 and 2.6)
+ */
++
+ static __be32 *
+ encode_time3(__be32 *p, struct timespec64 *time)
+ {
+@@ -46,6 +47,26 @@ decode_time3(__be32 *p, struct timespec64 *time)
+ return p;
+ }
+
++static bool
++svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp)
++{
++ __be32 *p;
++ u32 size;
++
++ if (xdr_stream_decode_u32(xdr, &size) < 0)
++ return false;
++ if (size == 0 || size > NFS3_FHSIZE)
++ return false;
++ p = xdr_inline_decode(xdr, size);
++ if (!p)
++ return false;
++ fh_init(fhp, NFS3_FHSIZE);
++ fhp->fh_handle.fh_size = size;
++ memcpy(&fhp->fh_handle.fh_base, p, size);
++
++ return true;
++}
++
+ static __be32 *
+ decode_fh(__be32 *p, struct svc_fh *fhp)
+ {
+@@ -306,14 +327,12 @@ void fill_post_wcc(struct svc_fh *fhp)
+ */
+
+ int
+-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
+ {
++ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_fhandle *args = rqstp->rq_argp;
+
+- p = decode_fh(p, &args->fh);
+- if (!p)
+- return 0;
+- return xdr_argsize_check(rqstp, p);
++ return svcxdr_decode_nfs_fh3(xdr, &args->fh);
+ }
+
+ int
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index 456fcd7a1038..62ea669768cf 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -273,7 +273,7 @@ union nfsd3_xdrstore {
+
+ #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
+
+-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
++int nfs3svc_decode_fhandleargs(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+ int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+--
+2.35.1
+
--- /dev/null
+From ec8c3337fcceb6bc1f0cbbd1ae44d5e419cbf171 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Oct 2020 14:34:40 -0400
+Subject: NFSD: Update READ3arg decoder to use struct xdr_stream
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit be63bd2ac6bbf8c065a0ef6dfbea76934326c352 ]
+
+The code that sets up rq_vec is refactored so that it is now
+adjacent to the nfsd_read() call site where it is used.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs3proc.c | 23 ++++++++++++++++++-----
+ fs/nfsd/nfs3xdr.c | 28 +++++++---------------------
+ fs/nfsd/xdr3.h | 1 -
+ 3 files changed, 25 insertions(+), 27 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 3d741ea482f4..104e7d705ea8 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -144,25 +144,38 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_readargs *argp = rqstp->rq_argp;
+ struct nfsd3_readres *resp = rqstp->rq_resp;
+- u32 max_blocksize = svc_max_payload(rqstp);
+- unsigned long cnt = min(argp->count, max_blocksize);
++ u32 max_blocksize = svc_max_payload(rqstp);
++ unsigned int len;
++ int v;
++
++ argp->count = min_t(u32, argp->count, max_blocksize);
+
+ dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
+ SVCFH_fmt(&argp->fh),
+ (unsigned long) argp->count,
+ (unsigned long long) argp->offset);
+
++ v = 0;
++ len = argp->count;
++ while (len > 0) {
++ struct page *page = *(rqstp->rq_next_page++);
++
++ rqstp->rq_vec[v].iov_base = page_address(page);
++ rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
++ len -= rqstp->rq_vec[v].iov_len;
++ v++;
++ }
++
+ /* Obtain buffer pointer for payload.
+ * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
+ * + 1 (xdr opaque byte count) = 26
+ */
+- resp->count = cnt;
++ resp->count = argp->count;
+ svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
+
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
+- rqstp->rq_vec, argp->vlen, &resp->count,
+- &resp->eof);
++ rqstp->rq_vec, v, &resp->count, &resp->eof);
+ return rpc_success;
+ }
+
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index ed500f254337..0c51f241c047 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -382,31 +382,17 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+ int
+ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+ {
++ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_readargs *args = rqstp->rq_argp;
+- unsigned int len;
+- int v;
+- u32 max_blocksize = svc_max_payload(rqstp);
+
+- p = decode_fh(p, &args->fh);
+- if (!p)
++ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
++ return 0;
++ if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
++ return 0;
++ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return 0;
+- p = xdr_decode_hyper(p, &args->offset);
+-
+- args->count = ntohl(*p++);
+- len = min(args->count, max_blocksize);
+-
+- /* set up the kvec */
+- v=0;
+- while (len > 0) {
+- struct page *p = *(rqstp->rq_next_page++);
+
+- rqstp->rq_vec[v].iov_base = page_address(p);
+- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
+- len -= rqstp->rq_vec[v].iov_len;
+- v++;
+- }
+- args->vlen = v;
+- return xdr_argsize_check(rqstp, p);
++ return 1;
+ }
+
+ int
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index 62ea669768cf..68cbb9e24afa 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -32,7 +32,6 @@ struct nfsd3_readargs {
+ struct svc_fh fh;
+ __u64 offset;
+ __u32 count;
+- int vlen;
+ };
+
+ struct nfsd3_writeargs {
+--
+2.35.1
+
--- /dev/null
+From 1e265741e8e5554e2fd1e4a42aa49e827a518da2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Oct 2020 12:14:23 -0400
+Subject: NFSD: Update the NFSv2 GETATTR argument decoder to use struct
+ xdr_stream
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit ebcd8e8b28535b643a4c06685bd363b3b73a96af ]
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfsproc.c | 4 ++--
+ fs/nfsd/nfsxdr.c | 26 ++++++++++++++++++++------
+ fs/nfsd/xdr.h | 2 +-
+ 3 files changed, 23 insertions(+), 9 deletions(-)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index dbd8d3604653..5c187d3bcb57 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -626,7 +626,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ },
+ [NFSPROC_GETATTR] = {
+ .pc_func = nfsd_proc_getattr,
+- .pc_decode = nfssvc_decode_fhandle,
++ .pc_decode = nfssvc_decode_fhandleargs,
+ .pc_encode = nfssvc_encode_attrstat,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
+@@ -776,7 +776,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ },
+ [NFSPROC_STATFS] = {
+ .pc_func = nfsd_proc_statfs,
+- .pc_decode = nfssvc_decode_fhandle,
++ .pc_decode = nfssvc_decode_fhandleargs,
+ .pc_encode = nfssvc_encode_statfsres,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd_statfsres),
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index 13df5464a087..45d980d624f4 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -23,8 +23,9 @@ static u32 nfs_ftypes[] = {
+
+
+ /*
+- * XDR functions for basic NFS types
++ * Basic NFSv2 data types (RFC 1094 Section 2.3)
+ */
++
+ static __be32 *
+ decode_fh(__be32 *p, struct svc_fh *fhp)
+ {
+@@ -37,6 +38,21 @@ decode_fh(__be32 *p, struct svc_fh *fhp)
+ return p + (NFS_FHSIZE >> 2);
+ }
+
++static bool
++svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp)
++{
++ __be32 *p;
++
++ p = xdr_inline_decode(xdr, NFS_FHSIZE);
++ if (!p)
++ return false;
++ fh_init(fhp, NFS_FHSIZE);
++ memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
++ fhp->fh_handle.fh_size = NFS_FHSIZE;
++
++ return true;
++}
++
+ /* Helper function for NFSv2 ACL code */
+ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
+ {
+@@ -194,14 +210,12 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
+ */
+
+ int
+-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
+ {
++ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_fhandle *args = rqstp->rq_argp;
+
+- p = decode_fh(p, &args->fh);
+- if (!p)
+- return 0;
+- return xdr_argsize_check(rqstp, p);
++ return svcxdr_decode_fhandle(xdr, &args->fh);
+ }
+
+ int
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index edd87688ff86..50466ac6200c 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -144,7 +144,7 @@ union nfsd_xdrstore {
+ #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
+
+
+-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
++int nfssvc_decode_fhandleargs(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+ int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+--
+2.35.1
+
--- /dev/null
+From 8085ba35427e0fa7b6378cddbd1d5cc0e89c8b09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Oct 2020 12:15:51 -0400
+Subject: NFSD: Update the NFSv2 READ argument decoder to use struct xdr_stream
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 8c293ef993c8df0b1bea9ecb0de6eb96dec3ac9d ]
+
+The code that sets up rq_vec is refactored so that it is now
+adjacent to the nfsd_read() call site where it is used.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfsproc.c | 32 ++++++++++++++++++--------------
+ fs/nfsd/nfsxdr.c | 36 ++++++++++++------------------------
+ fs/nfsd/xdr.h | 1 -
+ 3 files changed, 30 insertions(+), 39 deletions(-)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 5c187d3bcb57..deaa34b89251 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -171,32 +171,36 @@ nfsd_proc_read(struct svc_rqst *rqstp)
+ {
+ struct nfsd_readargs *argp = rqstp->rq_argp;
+ struct nfsd_readres *resp = rqstp->rq_resp;
++ unsigned int len;
+ u32 eof;
++ int v;
+
+ dprintk("nfsd: READ %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count, argp->offset);
+
++ argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
++
++ v = 0;
++ len = argp->count;
++ while (len > 0) {
++ struct page *page = *(rqstp->rq_next_page++);
++
++ rqstp->rq_vec[v].iov_base = page_address(page);
++ rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
++ len -= rqstp->rq_vec[v].iov_len;
++ v++;
++ }
++
+ /* Obtain buffer pointer for payload. 19 is 1 word for
+ * status, 17 words for fattr, and 1 word for the byte count.
+ */
+-
+- if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
+- char buf[RPC_MAX_ADDRBUFLEN];
+- printk(KERN_NOTICE
+- "oversized read request from %s (%d bytes)\n",
+- svc_print_addr(rqstp, buf, sizeof(buf)),
+- argp->count);
+- argp->count = NFSSVC_MAXBLKSIZE_V2;
+- }
+ svc_reserve_auth(rqstp, (19<<2) + argp->count + 4);
+
+ resp->count = argp->count;
+- resp->status = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
+- argp->offset,
+- rqstp->rq_vec, argp->vlen,
+- &resp->count,
+- &eof);
++ fh_copy(&resp->fh, &argp->fh);
++ resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
++ rqstp->rq_vec, v, &resp->count, &eof);
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index 45d980d624f4..a44c42d35351 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -246,33 +246,21 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
+ int
+ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+ {
++ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_readargs *args = rqstp->rq_argp;
+- unsigned int len;
+- int v;
+- p = decode_fh(p, &args->fh);
+- if (!p)
+- return 0;
++ u32 totalcount;
+
+- args->offset = ntohl(*p++);
+- len = args->count = ntohl(*p++);
+- p++; /* totalcount - unused */
+-
+- len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
++ if (!svcxdr_decode_fhandle(xdr, &args->fh))
++ return 0;
++ if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
++ return 0;
++ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
++ return 0;
++ /* totalcount is ignored */
++ if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
++ return 0;
+
+- /* set up somewhere to store response.
+- * We take pages, put them on reslist and include in iovec
+- */
+- v=0;
+- while (len > 0) {
+- struct page *p = *(rqstp->rq_next_page++);
+-
+- rqstp->rq_vec[v].iov_base = page_address(p);
+- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
+- len -= rqstp->rq_vec[v].iov_len;
+- v++;
+- }
+- args->vlen = v;
+- return xdr_argsize_check(rqstp, p);
++ return 1;
+ }
+
+ int
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index 50466ac6200c..7c704fa3215e 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -27,7 +27,6 @@ struct nfsd_readargs {
+ struct svc_fh fh;
+ __u32 offset;
+ __u32 count;
+- int vlen;
+ };
+
+ struct nfsd_writeargs {
+--
+2.35.1
+
--- /dev/null
+From 5d0a2f22b089bc5f655f8fe3365e7cb780b89ebe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 16:55:55 +0200
+Subject: nvme-hwmon: consistently ignore errors from nvme_hwmon_init
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 6b8cf94005187952f794c0c4ed3920a1e8accfa3 ]
+
+An NVMe controller works perfectly fine even when the hwmon
+initialization fails. Stop returning errors that do not come from a
+controller reset from nvme_hwmon_init to handle this case consistently.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Guenter Roeck <linux@roeck-us.net>
+Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
+Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c | 6 +++++-
+ drivers/nvme/host/hwmon.c | 13 ++++++++-----
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 51e5c12988fe..3f106771d15b 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -3232,8 +3232,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
+ return ret;
+
+ if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
++ /*
++ * Do not return errors unless we are in a controller reset,
++ * the controller works perfectly fine without hwmon.
++ */
+ ret = nvme_hwmon_init(ctrl);
+- if (ret < 0)
++ if (ret == -EINTR)
+ return ret;
+ }
+
+diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
+index 0a586d712920..23918bb7bdca 100644
+--- a/drivers/nvme/host/hwmon.c
++++ b/drivers/nvme/host/hwmon.c
+@@ -230,7 +230,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+- return 0;
++ return -ENOMEM;
+
+ data->ctrl = ctrl;
+ mutex_init(&data->read_lock);
+@@ -238,8 +238,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ err = nvme_hwmon_get_smart_log(data);
+ if (err) {
+ dev_warn(dev, "Failed to read smart log (error %d)\n", err);
+- kfree(data);
+- return err;
++ goto err_free_data;
+ }
+
+ hwmon = hwmon_device_register_with_info(dev, "nvme",
+@@ -247,11 +246,15 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ NULL);
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+- kfree(data);
+- return PTR_ERR(hwmon);
++ err = PTR_ERR(hwmon);
++ goto err_free_data;
+ }
+ ctrl->hwmon_device = hwmon;
+ return 0;
++
++err_free_data:
++ kfree(data);
++ return err;
+ }
+
+ void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
+--
+2.35.1
+
--- /dev/null
+From b6006cb7a37dfc50438b101482cb35a2872bc944 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 17:33:52 +0200
+Subject: nvme-hwmon: kmalloc the NVME SMART log buffer
+
+From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
+
+[ Upstream commit c94b7f9bab22ac504f9153767676e659988575ad ]
+
+Recent commit 52fde2c07da6 ("nvme: set dma alignment to dword") has
+caused a regression on our platform.
+
+It turned out that the nvme_get_log() method invocation caused the
+nvme_hwmon_data structure instance corruption. In particular the
+nvme_hwmon_data.ctrl pointer was overwritten either with zeros or with
+garbage. After some research we discovered that the problem happened
+even before the actual NVME DMA execution, but during the buffer mapping.
+Since our platform is DMA-noncoherent, the mapping implied the cache-line
+invalidations or write-backs depending on the DMA-direction parameter.
+In case of the NVME SMART log getting the DMA was performed
+from-device-to-memory, thus the cache-invalidation was activated during
+the buffer mapping. Since the log-buffer isn't cache-line aligned, the
+cache-invalidation caused the neighbour data to be discarded. The
+neighbouring data turned to be the data surrounding the buffer in the
+framework of the nvme_hwmon_data structure.
+
+In order to fix that we need to make sure that the whole log-buffer is
+defined within the cache-line-aligned memory region so the
+cache-invalidation procedure wouldn't involve the adjacent data. One of
+the option to guarantee that is to kmalloc the DMA-buffer [1]. Seeing the
+rest of the NVME core driver prefer that method it has been chosen to fix
+this problem too.
+
+Note after a deeper researches we found out that the denoted commit wasn't
+a root cause of the problem. It just revealed the invalidity by activating
+the DMA-based NVME SMART log getting performed in the framework of the
+NVME hwmon driver. The problem was here since the initial commit of the
+driver.
+
+[1] Documentation/core-api/dma-api-howto.rst
+
+Fixes: 400b6a7b13a3 ("nvme: Add hardware monitoring support")
+Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/hwmon.c | 23 ++++++++++++++++-------
+ 1 file changed, 16 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
+index 23918bb7bdca..9e6e56c20ec9 100644
+--- a/drivers/nvme/host/hwmon.c
++++ b/drivers/nvme/host/hwmon.c
+@@ -12,7 +12,7 @@
+
+ struct nvme_hwmon_data {
+ struct nvme_ctrl *ctrl;
+- struct nvme_smart_log log;
++ struct nvme_smart_log *log;
+ struct mutex read_lock;
+ };
+
+@@ -60,14 +60,14 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
+ {
+ return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
+- NVME_CSI_NVM, &data->log, sizeof(data->log), 0);
++ NVME_CSI_NVM, data->log, sizeof(*data->log), 0);
+ }
+
+ static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+ {
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+- struct nvme_smart_log *log = &data->log;
++ struct nvme_smart_log *log = data->log;
+ int temp;
+ int err;
+
+@@ -163,7 +163,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
+ case hwmon_temp_max:
+ case hwmon_temp_min:
+ if ((!channel && data->ctrl->wctemp) ||
+- (channel && data->log.temp_sensor[channel - 1])) {
++ (channel && data->log->temp_sensor[channel - 1])) {
+ if (data->ctrl->quirks &
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
+ return 0444;
+@@ -176,7 +176,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
+ break;
+ case hwmon_temp_input:
+ case hwmon_temp_label:
+- if (!channel || data->log.temp_sensor[channel - 1])
++ if (!channel || data->log->temp_sensor[channel - 1])
+ return 0444;
+ break;
+ default:
+@@ -232,13 +232,19 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ if (!data)
+ return -ENOMEM;
+
++ data->log = kzalloc(sizeof(*data->log), GFP_KERNEL);
++ if (!data->log) {
++ err = -ENOMEM;
++ goto err_free_data;
++ }
++
+ data->ctrl = ctrl;
+ mutex_init(&data->read_lock);
+
+ err = nvme_hwmon_get_smart_log(data);
+ if (err) {
+ dev_warn(dev, "Failed to read smart log (error %d)\n", err);
+- goto err_free_data;
++ goto err_free_log;
+ }
+
+ hwmon = hwmon_device_register_with_info(dev, "nvme",
+@@ -247,11 +253,13 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+ err = PTR_ERR(hwmon);
+- goto err_free_data;
++ goto err_free_log;
+ }
+ ctrl->hwmon_device = hwmon;
+ return 0;
+
++err_free_log:
++ kfree(data->log);
+ err_free_data:
+ kfree(data);
+ return err;
+@@ -265,6 +273,7 @@ void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
+
+ hwmon_device_unregister(ctrl->hwmon_device);
+ ctrl->hwmon_device = NULL;
++ kfree(data->log);
+ kfree(data);
+ }
+ }
+--
+2.35.1
+
--- /dev/null
+From 9b1decb3ed2711a2dbb96720490578f9a9d6f236 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Feb 2021 10:30:15 +0100
+Subject: nvme-hwmon: Return error code when registration fails
+
+From: Daniel Wagner <dwagner@suse.de>
+
+[ Upstream commit 78570f8873c8cd44c12714c7fa7db2601ec5617d ]
+
+The hwmon pointer wont be NULL if the registration fails. Though the
+exit code path will assign it to ctrl->hwmon_device. Later
+nvme_hwmon_exit() will try to free the invalid pointer. Avoid this by
+returning the error code from hwmon_device_register_with_info().
+
+Fixes: ed7770f66286 ("nvme/hwmon: rework to avoid devm allocation")
+Signed-off-by: Daniel Wagner <dwagner@suse.de>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/hwmon.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
+index 8f9e96986780..0a586d712920 100644
+--- a/drivers/nvme/host/hwmon.c
++++ b/drivers/nvme/host/hwmon.c
+@@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+ kfree(data);
++ return PTR_ERR(hwmon);
+ }
+ ctrl->hwmon_device = hwmon;
+ return 0;
+--
+2.35.1
+
--- /dev/null
+From 10615e7e2ba8e3473dbb0063ad45919c1a4689fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Jan 2021 07:43:18 +0100
+Subject: nvme-hwmon: rework to avoid devm allocation
+
+From: Hannes Reinecke <hare@suse.de>
+
+[ Upstream commit ed7770f6628691c13c9423bce7eee7cff2399c12 ]
+
+The original design to use device-managed resource allocation
+doesn't really work as the NVMe controller has a vastly different
+lifetime than the hwmon sysfs attributes, causing warning about
+duplicate sysfs entries upon reconnection.
+This patch reworks the hwmon allocation to avoid device-managed
+resource allocation, and uses the NVMe controller as parent for
+the sysfs attributes.
+
+Cc: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Hannes Reinecke <hare@suse.de>
+Tested-by: Enzo Matsumiya <ematsumiya@suse.de>
+Tested-by: Daniel Wagner <dwagner@suse.de>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c | 1 +
+ drivers/nvme/host/hwmon.c | 31 +++++++++++++++++++++----------
+ drivers/nvme/host/nvme.h | 8 ++++++++
+ 3 files changed, 30 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index e9c13804760e..51e5c12988fe 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -4485,6 +4485,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
+
+ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+ {
++ nvme_hwmon_exit(ctrl);
+ nvme_fault_inject_fini(&ctrl->fault_inject);
+ dev_pm_qos_hide_latency_tolerance(ctrl->device);
+ cdev_device_del(&ctrl->cdev, ctrl->device);
+diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
+index 552dbc04567b..8f9e96986780 100644
+--- a/drivers/nvme/host/hwmon.c
++++ b/drivers/nvme/host/hwmon.c
+@@ -223,12 +223,12 @@ static const struct hwmon_chip_info nvme_hwmon_chip_info = {
+
+ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ {
+- struct device *dev = ctrl->dev;
++ struct device *dev = ctrl->device;
+ struct nvme_hwmon_data *data;
+ struct device *hwmon;
+ int err;
+
+- data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return 0;
+
+@@ -237,19 +237,30 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+
+ err = nvme_hwmon_get_smart_log(data);
+ if (err) {
+- dev_warn(ctrl->device,
+- "Failed to read smart log (error %d)\n", err);
+- devm_kfree(dev, data);
++ dev_warn(dev, "Failed to read smart log (error %d)\n", err);
++ kfree(data);
+ return err;
+ }
+
+- hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data,
+- &nvme_hwmon_chip_info,
+- NULL);
++ hwmon = hwmon_device_register_with_info(dev, "nvme",
++ data, &nvme_hwmon_chip_info,
++ NULL);
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+- devm_kfree(dev, data);
++ kfree(data);
+ }
+-
++ ctrl->hwmon_device = hwmon;
+ return 0;
+ }
++
++void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
++{
++ if (ctrl->hwmon_device) {
++ struct nvme_hwmon_data *data =
++ dev_get_drvdata(ctrl->hwmon_device);
++
++ hwmon_device_unregister(ctrl->hwmon_device);
++ ctrl->hwmon_device = NULL;
++ kfree(data);
++ }
++}
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index 58cf9e39d613..abae7ef2ac51 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -257,6 +257,9 @@ struct nvme_ctrl {
+ struct rw_semaphore namespaces_rwsem;
+ struct device ctrl_device;
+ struct device *device; /* char device */
++#ifdef CONFIG_NVME_HWMON
++ struct device *hwmon_device;
++#endif
+ struct cdev cdev;
+ struct work_struct reset_work;
+ struct work_struct delete_work;
+@@ -876,11 +879,16 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
+
+ #ifdef CONFIG_NVME_HWMON
+ int nvme_hwmon_init(struct nvme_ctrl *ctrl);
++void nvme_hwmon_exit(struct nvme_ctrl *ctrl);
+ #else
+ static inline int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ {
+ return 0;
+ }
++
++static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
++{
++}
+ #endif
+
+ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+--
+2.35.1
+
--- /dev/null
+From 5ba80251870dc82e661ccc83e6d60223d07dd2a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Mar 2021 13:11:38 +0800
+Subject: perf pmu: Validate raw event with sysfs exported format bits
+
+From: Jin Yao <yao.jin@linux.intel.com>
+
+[ Upstream commit e40647762fb5881360874e08e03e972d58d63c42 ]
+
+A raw PMU event (eventsel+umask) in the form of rNNN is supported
+by perf but lacks of checking for the validity of raw encoding.
+
+For example, bit 16 and bit 17 are not valid on KBL but perf doesn't
+report warning when encoding with these bits.
+
+Before:
+
+ # ./perf stat -e cpu/r031234/ -a -- sleep 1
+
+ Performance counter stats for 'system wide':
+
+ 0 cpu/r031234/
+
+ 1.003798924 seconds time elapsed
+
+It may silently measure the wrong event!
+
+The kernel supported bits have been exported through
+/sys/devices/<pmu>/format/. Perf collects the information to
+'struct perf_pmu_format' and links it to 'pmu->format' list.
+
+The 'struct perf_pmu_format' has a bitmap which records the
+valid bits for this format. For example,
+
+ root@kbl-ppc:/sys/devices/cpu/format# cat umask
+ config:8-15
+
+The valid bits (bit8-bit15) are recorded in bitmap of format 'umask'.
+
+We collect total valid bits of all formats, save to a local variable
+'masks' and reverse it. Now '~masks' represents total invalid bits.
+
+bits = config & ~masks;
+
+The set bits in 'bits' indicate the invalid bits used in config.
+Finally we use bitmap_scnprintf to report the invalid bits.
+
+Some architectures may not export supported bits through sysfs,
+so if masks is 0, perf_pmu__warn_invalid_config directly returns.
+
+After:
+
+Single event without name:
+
+ # ./perf stat -e cpu/r031234/ -a -- sleep 1
+ WARNING: event 'N/A' not valid (bits 16-17 of config '31234' not supported by kernel)!
+
+ Performance counter stats for 'system wide':
+
+ 0 cpu/r031234/
+
+ 1.001597373 seconds time elapsed
+
+Multiple events with names:
+
+ # ./perf stat -e cpu/rf01234,name=aaa/,cpu/r031234,name=bbb/ -a -- sleep 1
+ WARNING: event 'aaa' not valid (bits 20,22 of config 'f01234' not supported by kernel)!
+ WARNING: event 'bbb' not valid (bits 16-17 of config '31234' not supported by kernel)!
+
+ Performance counter stats for 'system wide':
+
+ 0 aaa
+ 0 bbb
+
+ 1.001573787 seconds time elapsed
+
+Warnings are reported for invalid bits.
+
+Co-developed-by: Jiri Olsa <jolsa@redhat.com>
+Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
+Reviewed-by: Jiri Olsa <jolsa@redhat.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Jin Yao <yao.jin@intel.com>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lore.kernel.org/lkml/20210310051138.12154-1-yao.jin@linux.intel.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: e552b7be12ed ("perf: Skip and warn on unknown format 'configN' attrs")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/parse-events.c | 3 +++
+ tools/perf/util/pmu.c | 33 +++++++++++++++++++++++++++++++++
+ tools/perf/util/pmu.h | 3 +++
+ 3 files changed, 39 insertions(+)
+
+diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
+index 3a0a7930cd10..36969fc8f1fc 100644
+--- a/tools/perf/util/parse-events.c
++++ b/tools/perf/util/parse-events.c
+@@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx,
+ struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
+ cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+
++ if (pmu && attr->type == PERF_TYPE_RAW)
++ perf_pmu__warn_invalid_config(pmu, attr->config, name);
++
+ if (init_attr)
+ event_attr_init(attr);
+
+diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
+index d41caeb35cf6..349012f7defb 100644
+--- a/tools/perf/util/pmu.c
++++ b/tools/perf/util/pmu.c
+@@ -1716,3 +1716,36 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
+
+ return nr_caps;
+ }
++
++void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
++ char *name)
++{
++ struct perf_pmu_format *format;
++ __u64 masks = 0, bits;
++ char buf[100];
++ unsigned int i;
++
++ list_for_each_entry(format, &pmu->format, list) {
++ if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG)
++ continue;
++
++ for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS)
++ masks |= 1ULL << i;
++ }
++
++ /*
++ * Kernel doesn't export any valid format bits.
++ */
++ if (masks == 0)
++ return;
++
++ bits = config & ~masks;
++ if (bits == 0)
++ return;
++
++ bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf));
++
++ pr_warning("WARNING: event '%s' not valid (bits %s of config "
++ "'%llx' not supported by kernel)!\n",
++ name ?: "N/A", buf, config);
++}
+diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
+index a64e9c9ce731..d9aa8c958d21 100644
+--- a/tools/perf/util/pmu.h
++++ b/tools/perf/util/pmu.h
+@@ -120,4 +120,7 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+
+ int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
++void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
++ char *name);
++
+ #endif /* __PMU_H */
+--
+2.35.1
+
--- /dev/null
+From bec74548fda891a38764e2bf08e0063c3f82dc33 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Oct 2022 14:12:35 -0500
+Subject: perf: Skip and warn on unknown format 'configN' attrs
+
+From: Rob Herring <robh@kernel.org>
+
+[ Upstream commit e552b7be12ed62357df84392efa525ecb01910fb ]
+
+If the kernel exposes a new perf_event_attr field in a format attr, perf
+will return an error stating the specified PMU can't be found. For
+example, a format attr with 'config3:0-63' causes an error as config3 is
+unknown to perf. This causes a compatibility issue between a newer
+kernel with older perf tool.
+
+Before this change with a kernel adding 'config3' I get:
+
+ $ perf record -e arm_spe// -- true
+ event syntax error: 'arm_spe//'
+ \___ Cannot find PMU `arm_spe'. Missing kernel support?
+ Run 'perf list' for a list of valid events
+
+ Usage: perf record [<options>] [<command>]
+ or: perf record [<options>] -- <command> [<options>]
+
+ -e, --event <event> event selector. use 'perf list' to list
+ available events
+
+After this change, I get:
+
+ $ perf record -e arm_spe// -- true
+ WARNING: 'arm_spe_0' format 'inv_event_filter' requires 'perf_event_attr::config3' which is not supported by this version of perf!
+ [ perf record: Woken up 2 times to write data ]
+ [ perf record: Captured and wrote 0.091 MB perf.data ]
+
+To support unknown configN formats, rework the YACC implementation to
+pass any config[0-9]+ format to perf_pmu__new_format() to handle with a
+warning.
+
+Reviewed-by: Namhyung Kim <namhyung@kernel.org>
+Signed-off-by: Rob Herring <robh@kernel.org>
+Tested-by: Leo Yan <leo.yan@linaro.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: James Clark <james.clark@arm.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220914-arm-perf-tool-spe1-2-v2-v4-1-83c098e6212e@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/parse-events.c | 3 +++
+ tools/perf/util/pmu.c | 17 +++++++++++++++++
+ tools/perf/util/pmu.h | 2 ++
+ tools/perf/util/pmu.l | 2 --
+ tools/perf/util/pmu.y | 15 ++++-----------
+ 5 files changed, 26 insertions(+), 13 deletions(-)
+
+diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
+index 36969fc8f1fc..c56a4d9c3be9 100644
+--- a/tools/perf/util/parse-events.c
++++ b/tools/perf/util/parse-events.c
+@@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx,
+ struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
+ cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+
++ if (pmu)
++ perf_pmu__warn_invalid_formats(pmu);
++
+ if (pmu && attr->type == PERF_TYPE_RAW)
+ perf_pmu__warn_invalid_config(pmu, attr->config, name);
+
+diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
+index 349012f7defb..ac45da0302a7 100644
+--- a/tools/perf/util/pmu.c
++++ b/tools/perf/util/pmu.c
+@@ -862,6 +862,23 @@ static struct perf_pmu *pmu_lookup(const char *name)
+ return pmu;
+ }
+
++void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
++{
++ struct perf_pmu_format *format;
++
++ /* fake pmu doesn't have format list */
++ if (pmu == &perf_pmu__fake)
++ return;
++
++ list_for_each_entry(format, &pmu->format, list)
++ if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) {
++ pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'"
++ "which is not supported by this version of perf!\n",
++ pmu->name, format->name, format->value);
++ return;
++ }
++}
++
+ static struct perf_pmu *pmu_find(const char *name)
+ {
+ struct perf_pmu *pmu;
+diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
+index d9aa8c958d21..7d208b850769 100644
+--- a/tools/perf/util/pmu.h
++++ b/tools/perf/util/pmu.h
+@@ -15,6 +15,7 @@ enum {
+ PERF_PMU_FORMAT_VALUE_CONFIG,
+ PERF_PMU_FORMAT_VALUE_CONFIG1,
+ PERF_PMU_FORMAT_VALUE_CONFIG2,
++ PERF_PMU_FORMAT_VALUE_CONFIG_END,
+ };
+
+ #define PERF_PMU_FORMAT_BITS 64
+@@ -122,5 +123,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
+ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+ char *name);
++void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
+
+ #endif /* __PMU_H */
+diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l
+index a15d9fbd7c0e..58b4926cfaca 100644
+--- a/tools/perf/util/pmu.l
++++ b/tools/perf/util/pmu.l
+@@ -27,8 +27,6 @@ num_dec [0-9]+
+
+ {num_dec} { return value(10); }
+ config { return PP_CONFIG; }
+-config1 { return PP_CONFIG1; }
+-config2 { return PP_CONFIG2; }
+ - { return '-'; }
+ : { return ':'; }
+ , { return ','; }
+diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
+index bfd7e8509869..283efe059819 100644
+--- a/tools/perf/util/pmu.y
++++ b/tools/perf/util/pmu.y
+@@ -20,7 +20,7 @@ do { \
+
+ %}
+
+-%token PP_CONFIG PP_CONFIG1 PP_CONFIG2
++%token PP_CONFIG
+ %token PP_VALUE PP_ERROR
+ %type <num> PP_VALUE
+ %type <bits> bit_term
+@@ -47,18 +47,11 @@ PP_CONFIG ':' bits
+ $3));
+ }
+ |
+-PP_CONFIG1 ':' bits
++PP_CONFIG PP_VALUE ':' bits
+ {
+ ABORT_ON(perf_pmu__new_format(format, name,
+- PERF_PMU_FORMAT_VALUE_CONFIG1,
+- $3));
+-}
+-|
+-PP_CONFIG2 ':' bits
+-{
+- ABORT_ON(perf_pmu__new_format(format, name,
+- PERF_PMU_FORMAT_VALUE_CONFIG2,
+- $3));
++ $2,
++ $4));
+ }
+
+ bits:
+--
+2.35.1
+
--- /dev/null
+From 44dd123969ab1fda162e65b27c98f6cc86979678 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Sep 2022 11:49:13 +0100
+Subject: Revert "crypto: qat - reduce size of mapped region"
+
+From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+
+[ Upstream commit 9c5f21b198d259bfe1191b1fedf08e2eab15b33b ]
+
+This reverts commit e48767c17718067ba21fb2ef461779ec2506f845.
+
+In an attempt to resolve a set of warnings reported by the static
+analyzer Smatch, the reverted commit improperly reduced the sizes of the
+DMA mappings used for the input and output parameters for both RSA and
+DH creating a mismatch (map size=8 bytes, unmap size=64 bytes).
+
+This issue is reported when CONFIG_DMA_API_DEBUG is selected, when the
+crypto self test is run. The function dma_unmap_single() reports a
+warning similar to the one below, saying that the `device driver frees
+DMA memory with different size`.
+
+ DMA-API: 4xxx 0000:06:00.0: device driver frees DMA memory with different size [device address=0x0000000123206c80] [map size=8 bytes] [unmap size=64 bytes]
+ WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:973 check_unmap+0x3d0/0x8c0\
+ ...
+ Call Trace:
+ <IRQ>
+ debug_dma_unmap_page+0x5c/0x60
+ qat_dh_cb+0xd7/0x110 [intel_qat]
+ qat_alg_asym_callback+0x1a/0x30 [intel_qat]
+ adf_response_handler+0xbd/0x1a0 [intel_qat]
+ tasklet_action_common.constprop.0+0xcd/0xe0
+ __do_softirq+0xf8/0x30c
+ __irq_exit_rcu+0xbf/0x140
+ common_interrupt+0xb9/0xd0
+ </IRQ>
+ <TASK>
+
+The original commit was correct.
+
+Cc: <stable@vger.kernel.org>
+Reported-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/crypto/qat/qat_common/qat_asym_algs.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
+index 2b1aca487fc3..846569ec9066 100644
+--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
++++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
+@@ -326,13 +326,13 @@ static int qat_dh_compute_value(struct kpp_request *req)
+ qat_req->out.dh.out_tab[1] = 0;
+ /* Mapping in.in.b or in.in_g2.xa is the same */
+ qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b,
+- sizeof(qat_req->in.dh.in.b),
++ sizeof(struct qat_dh_input_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+ qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r,
+- sizeof(qat_req->out.dh.r),
++ sizeof(struct qat_dh_output_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+@@ -721,13 +721,13 @@ static int qat_rsa_enc(struct akcipher_request *req)
+ qat_req->in.rsa.in_tab[3] = 0;
+ qat_req->out.rsa.out_tab[1] = 0;
+ qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m,
+- sizeof(qat_req->in.rsa.enc.m),
++ sizeof(struct qat_rsa_input_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+ qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c,
+- sizeof(qat_req->out.rsa.enc.c),
++ sizeof(struct qat_rsa_output_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+@@ -869,13 +869,13 @@ static int qat_rsa_dec(struct akcipher_request *req)
+ qat_req->in.rsa.in_tab[3] = 0;
+ qat_req->out.rsa.out_tab[1] = 0;
+ qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c,
+- sizeof(qat_req->in.rsa.dec.c),
++ sizeof(struct qat_rsa_input_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+ qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m,
+- sizeof(qat_req->out.rsa.dec.m),
++ sizeof(struct qat_rsa_output_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+--
+2.35.1
+
--- /dev/null
+From f70267a06e7f8e62e815e2253de4b2ac7fa5a28b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Nov 2020 19:44:15 +0800
+Subject: riscv: Add machine name to kernel boot log and stack dump output
+
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+
+[ Upstream commit 46ad48e8a28da7cc37a16c7e7fc632ecf906e4bf ]
+
+Add the machine name to kernel boot-up log, and install
+the machine name to stack dump for DT boot mode.
+
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Reviewed-by: Atish Patra <atish.patra@wdc.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Stable-dep-of: 10f6913c548b ("riscv: always honor the CONFIG_CMDLINE_FORCE when parsing dtb")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/setup.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
+index 117f3212a8e4..5d17d3ce36fd 100644
+--- a/arch/riscv/kernel/setup.c
++++ b/arch/riscv/kernel/setup.c
+@@ -54,8 +54,15 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
+ static void __init parse_dtb(void)
+ {
+ /* Early scan of device tree from init memory */
+- if (early_init_dt_scan(dtb_early_va))
++ if (early_init_dt_scan(dtb_early_va)) {
++ const char *name = of_flat_dt_get_machine_name();
++
++ if (name) {
++ pr_info("Machine model: %s\n", name);
++ dump_stack_set_arch_desc("%s (DT)", name);
++ }
+ return;
++ }
+
+ pr_err("No DTB passed to the kernel\n");
+ #ifdef CONFIG_CMDLINE_FORCE
+--
+2.35.1
+
--- /dev/null
+From 2c967e87d16194adbf78dfc575762eb3f5d226e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 16:38:22 -0400
+Subject: riscv: always honor the CONFIG_CMDLINE_FORCE when parsing dtb
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Wenting Zhang <zephray@outlook.com>
+
+[ Upstream commit 10f6913c548b32ecb73801a16b120e761c6957ea ]
+
+When CONFIG_CMDLINE_FORCE is enabled, cmdline provided by
+CONFIG_CMDLINE are always used. This allows CONFIG_CMDLINE to be
+used regardless of the result of device tree scanning.
+
+This especially fixes the case where a device tree without the
+chosen node is supplied to the kernel. In such cases,
+early_init_dt_scan would return true. But inside
+early_init_dt_scan_chosen, the cmdline won't be updated as there
+is no chosen node in the device tree. As a result, CONFIG_CMDLINE
+is not copied into boot_command_line even if CONFIG_CMDLINE_FORCE
+is enabled. This commit allows properly update boot_command_line
+in this situation.
+
+Fixes: 8fd6e05c7463 ("arch: riscv: support kernel command line forcing when no DTB passed")
+Signed-off-by: Wenting Zhang <zephray@outlook.com>
+Reviewed-by: Björn Töpel <bjorn@kernel.org>
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/PSBPR04MB399135DFC54928AB958D0638B1829@PSBPR04MB3991.apcprd04.prod.outlook.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/setup.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
+index 5d17d3ce36fd..cc85858f7fe8 100644
+--- a/arch/riscv/kernel/setup.c
++++ b/arch/riscv/kernel/setup.c
+@@ -61,10 +61,10 @@ static void __init parse_dtb(void)
+ pr_info("Machine model: %s\n", name);
+ dump_stack_set_arch_desc("%s (DT)", name);
+ }
+- return;
++ } else {
++ pr_err("No DTB passed to the kernel\n");
+ }
+
+- pr_err("No DTB passed to the kernel\n");
+ #ifdef CONFIG_CMDLINE_FORCE
+ strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ pr_info("Forcing kernel command line to: %s\n", boot_command_line);
+--
+2.35.1
+
blk-wbt-call-rq_qos_add-after-wb_normal-is-initialized.patch
arm64-errata-remove-aes-hwcap-for-compat-tasks.patch
r8152-add-pid-for-the-lenovo-onelink-dock.patch
+btrfs-fix-processing-of-delayed-data-refs-during-bac.patch
+btrfs-fix-processing-of-delayed-tree-block-refs-duri.patch
+acpi-extlog-handle-multiple-records.patch
+tipc-fix-recognition-of-trial-period.patch
+tipc-fix-an-information-leak-in-tipc_topsrv_kern_sub.patch
+i40e-fix-dma-mappings-leak.patch
+hid-magicmouse-do-not-set-btn_mouse-on-double-report.patch
+sfc-change-vf-mac-via-pf-as-first-preference-if-avai.patch
+net-atm-fix-proc_mpc_write-incorrect-return-value.patch
+net-phy-dp83867-extend-rx-strap-quirk-for-sgmii-mode.patch
+tcp-add-num_closed_socks-to-struct-sock_reuseport.patch
+udp-update-reuse-has_conns-under-reuseport_lock.patch
+cifs-fix-xid-leak-in-cifs_copy_file_range.patch
+cifs-fix-xid-leak-in-cifs_flock.patch
+cifs-fix-xid-leak-in-cifs_ses_add_channel.patch
+net-hsr-avoid-possible-null-deref-in-skb_clone.patch
+ionic-catch-null-pointer-issue-on-reconfig.patch
+libbpf-use-is_err_or_null-in-hashmap__free.patch
+nvme-hwmon-rework-to-avoid-devm-allocation.patch
+nvme-hwmon-return-error-code-when-registration-fails.patch
+nvme-hwmon-consistently-ignore-errors-from-nvme_hwmo.patch
+nvme-hwmon-kmalloc-the-nvme-smart-log-buffer.patch
+net-sched-cake-fix-null-pointer-access-issue-when-ca.patch
+net-sched-delete-duplicate-cleanup-of-backlog-and-ql.patch
+net-sched-sfb-fix-null-pointer-access-issue-when-sfb.patch
+sfc-include-vport_id-in-filter-spec-hash-and-equal.patch
+net-hns-fix-possible-memory-leak-in-hnae_ae_register.patch
+net-sched-fix-race-condition-in-qdisc_graft.patch
+net-phy-dp83822-disable-mdi-crossover-status-change-.patch
+iommu-vt-d-allow-nvs-regions-in-arch_rmrr_sanity_che.patch
+iommu-vt-d-clean-up-si_domain-in-the-init_dmars-erro.patch
+drm-virtio-use-appropriate-atomic-state-in-virtio_gp.patch
+crypto-qat-reduce-size-of-mapped-region.patch
+revert-crypto-qat-reduce-size-of-mapped-region.patch
+usb-add-reset_resume-quirk-for-nvidia-jetson-devices.patch
+kernfs-fix-use-after-free-in-__kernfs_remove.patch
+sunrpc-add-xdr_set_scratch_page-and-xdr_reset_scratc.patch
+sunrpc-prepare-for-xdr_stream-style-decoding-on-the-.patch
+nfsd-add-common-helpers-to-decode-void-args-and-enco.patch
+nfsd-update-the-nfsv2-getattr-argument-decoder-to-us.patch
+nfsd-update-the-nfsv2-read-argument-decoder-to-use-s.patch
+nfsd-protect-against-send-buffer-overflow-in-nfsv2-r.patch
+nfsd-replace-the-internals-of-the-read_buf-macro.patch
+nfsd-replace-read-macros-in-nfsd4_decode_commit.patch
+nfsd-update-getattr3args-decoder-to-use-struct-xdr_s.patch
+nfsd-update-read3arg-decoder-to-use-struct-xdr_strea.patch
+nfsd-fix-the-behavior-of-read-near-offset_max.patch
+nfsd-protect-against-send-buffer-overflow-in-nfsv3-r.patch
+dmaengine-mxs-dma-remove-the-unused-.id_table.patch
+dmaengine-mxs-use-platform_driver_register.patch
+alsa-hda-realtek-fix-speakers-and-micmute-on-hp-855-.patch
+writeback-don-t-warn-on-an-unregistered-bdi-in-__mar.patch
+fs-correctly-document-the-inode-dirty-flags.patch
+fs-don-t-call-dirty_inode-for-lazytime-timestamp-upd.patch
+fs-pass-only-i_dirty_inode-flags-to-dirty_inode.patch
+fs-clean-up-__mark_inode_dirty-a-bit.patch
+writeback-cgroup-keep-list-of-inodes-attached-to-bdi.patch
+writeback-avoid-skipping-inode-writeback.patch
+writeback-fix-inode-i_io_list-not-be-protected-by-in.patch
+fs-record-i_dirty_time-even-if-inode-already-has-i_d.patch
+tracing-simplify-conditional-compilation-code-in-tra.patch
+tracing-do-not-free-snapshot-if-tracer-is-on-cmdline.patch
+xen-assume-xenfeat_gnttab_map_avail_bits-being-set-f.patch
+xen-gntdev-accommodate-vma-splitting.patch
+mmc-core-support-zeroout-using-trim-for-emmc.patch
+mmc-core-add-sd-card-quirk-for-broken-discard.patch
+mmc-sdhci-tegra-use-actual-clock-rate-for-sw-tuning-.patch
+riscv-add-machine-name-to-kernel-boot-log-and-stack-.patch
+riscv-always-honor-the-config_cmdline_force-when-par.patch
+perf-pmu-validate-raw-event-with-sysfs-exported-form.patch
+perf-skip-and-warn-on-unknown-format-confign-attrs.patch
+fcntl-make-f_getown-ex-return-0-on-dead-owner-task.patch
+fcntl-fix-potential-deadlocks-for-fown_struct.lock.patch
--- /dev/null
+From 5614b64913269b7e6ee3aab8e68784a56e2755c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Oct 2022 10:55:53 +0100
+Subject: sfc: Change VF mac via PF as first preference if available.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jonathan Cooper <jonathan.s.cooper@amd.com>
+
+[ Upstream commit a8aed7b35becfd21f22a77c7014029ea837b018f ]
+
+Changing a VF's mac address through the VF (rather than via the PF)
+fails with EPERM because the latter part of efx_ef10_set_mac_address
+attempts to change the vport mac address list as the VF.
+Even with this fixed it still fails with EBUSY because the vadaptor
+is still assigned on the VF - the vadaptor reassignment must be within
+a section where the VF has torn down its state.
+
+A major reason this has broken is because we have two functions that
+ostensibly do the same thing - have a PF and VF cooperate to change a
+VF mac address. Rather than do this, if we are changing the mac of a VF
+that has a link to the PF in the same VM then simply call
+sriov_set_vf_mac instead, which is a proven working function that does
+that.
+
+If there is no PF available, or that fails non-fatally, then attempt to
+change the VF's mac address as we would a PF, without updating the PF's
+data.
+
+Test case:
+Create a VF:
+ echo 1 > /sys/class/net/<if>/device/sriov_numvfs
+Set the mac address of the VF directly:
+ ip link set <vf> addr 00:11:22:33:44:55
+Set the MAC address of the VF via the PF:
+ ip link set <pf> vf 0 mac 00:11:22:33:44:66
+Without this patch the last command will fail with ENOENT.
+
+Signed-off-by: Jonathan Cooper <jonathan.s.cooper@amd.com>
+Reported-by: Íñigo Huguet <ihuguet@redhat.com>
+Fixes: 910c8789a777 ("set the MAC address using MC_CMD_VADAPTOR_SET_MAC")
+Acked-by: Edward Cree <ecree.xilinx@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/sfc/ef10.c | 58 ++++++++++++++-------------------
+ 1 file changed, 24 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
+index 5b7413305be6..eb1be7302082 100644
+--- a/drivers/net/ethernet/sfc/ef10.c
++++ b/drivers/net/ethernet/sfc/ef10.c
+@@ -3255,6 +3255,30 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
+ bool was_enabled = efx->port_enabled;
+ int rc;
+
++#ifdef CONFIG_SFC_SRIOV
++ /* If this function is a VF and we have access to the parent PF,
++ * then use the PF control path to attempt to change the VF MAC address.
++ */
++ if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) {
++ struct efx_nic *efx_pf = pci_get_drvdata(efx->pci_dev->physfn);
++ struct efx_ef10_nic_data *nic_data = efx->nic_data;
++ u8 mac[ETH_ALEN];
++
++ /* net_dev->dev_addr can be zeroed by efx_net_stop in
++ * efx_ef10_sriov_set_vf_mac, so pass in a copy.
++ */
++ ether_addr_copy(mac, efx->net_dev->dev_addr);
++
++ rc = efx_ef10_sriov_set_vf_mac(efx_pf, nic_data->vf_index, mac);
++ if (!rc)
++ return 0;
++
++ netif_dbg(efx, drv, efx->net_dev,
++ "Updating VF mac via PF failed (%d), setting directly\n",
++ rc);
++ }
++#endif
++
+ efx_device_detach_sync(efx);
+ efx_net_stop(efx->net_dev);
+
+@@ -3277,40 +3301,6 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
+ efx_net_open(efx->net_dev);
+ efx_device_attach_if_not_resetting(efx);
+
+-#ifdef CONFIG_SFC_SRIOV
+- if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) {
+- struct efx_ef10_nic_data *nic_data = efx->nic_data;
+- struct pci_dev *pci_dev_pf = efx->pci_dev->physfn;
+-
+- if (rc == -EPERM) {
+- struct efx_nic *efx_pf;
+-
+- /* Switch to PF and change MAC address on vport */
+- efx_pf = pci_get_drvdata(pci_dev_pf);
+-
+- rc = efx_ef10_sriov_set_vf_mac(efx_pf,
+- nic_data->vf_index,
+- efx->net_dev->dev_addr);
+- } else if (!rc) {
+- struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf);
+- struct efx_ef10_nic_data *nic_data = efx_pf->nic_data;
+- unsigned int i;
+-
+- /* MAC address successfully changed by VF (with MAC
+- * spoofing) so update the parent PF if possible.
+- */
+- for (i = 0; i < efx_pf->vf_count; ++i) {
+- struct ef10_vf *vf = nic_data->vf + i;
+-
+- if (vf->efx == efx) {
+- ether_addr_copy(vf->mac,
+- efx->net_dev->dev_addr);
+- return 0;
+- }
+- }
+- }
+- } else
+-#endif
+ if (rc == -EPERM) {
+ netif_err(efx, drv, efx->net_dev,
+ "Cannot change MAC address; use sfboot to enable"
+--
+2.35.1
+
--- /dev/null
+From 62bb027c64c869690a4be307a2421d93aea96c68 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 10:28:41 +0100
+Subject: sfc: include vport_id in filter spec hash and equal()
+
+From: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
+
+[ Upstream commit c2bf23e4a5af37a4d77901d9ff14c50a269f143d ]
+
+Filters on different vports are qualified by different implicit MACs and/or
+VLANs, so shouldn't be considered equal even if their other match fields
+are identical.
+
+Fixes: 7c460d9be610 ("sfc: Extend and abstract efx_filter_spec to cover Huntington/EF10")
+Co-developed-by: Edward Cree <ecree.xilinx@gmail.com>
+Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
+Signed-off-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
+Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com>
+Link: https://lore.kernel.org/r/20221018092841.32206-1-pieter.jansen-van-vuuren@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/sfc/filter.h | 3 ++-
+ drivers/net/ethernet/sfc/rx_common.c | 10 +++++-----
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h
+index 40b2af8bfb81..2ac3c8f1b04b 100644
+--- a/drivers/net/ethernet/sfc/filter.h
++++ b/drivers/net/ethernet/sfc/filter.h
+@@ -157,7 +157,8 @@ struct efx_filter_spec {
+ u32 flags:6;
+ u32 dmaq_id:12;
+ u32 rss_context;
+- __be16 outer_vid __aligned(4); /* allow jhash2() of match values */
++ u32 vport_id;
++ __be16 outer_vid;
+ __be16 inner_vid;
+ u8 loc_mac[ETH_ALEN];
+ u8 rem_mac[ETH_ALEN];
+diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
+index 2c09afac5beb..36b46ddb6710 100644
+--- a/drivers/net/ethernet/sfc/rx_common.c
++++ b/drivers/net/ethernet/sfc/rx_common.c
+@@ -676,17 +676,17 @@ bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
+ return false;
+
+- return memcmp(&left->outer_vid, &right->outer_vid,
++ return memcmp(&left->vport_id, &right->vport_id,
+ sizeof(struct efx_filter_spec) -
+- offsetof(struct efx_filter_spec, outer_vid)) == 0;
++ offsetof(struct efx_filter_spec, vport_id)) == 0;
+ }
+
+ u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
+ {
+- BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
+- return jhash2((const u32 *)&spec->outer_vid,
++ BUILD_BUG_ON(offsetof(struct efx_filter_spec, vport_id) & 3);
++ return jhash2((const u32 *)&spec->vport_id,
+ (sizeof(struct efx_filter_spec) -
+- offsetof(struct efx_filter_spec, outer_vid)) / 4,
++ offsetof(struct efx_filter_spec, vport_id)) / 4,
+ 0);
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 520c12fe7527e8c6fa85982c46d0d079fa704a08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Nov 2020 15:52:47 -0500
+Subject: SUNRPC: Add xdr_set_scratch_page() and xdr_reset_scratch_buffer()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 0ae4c3e8a64ace1b8d7de033b0751afe43024416 ]
+
+Clean up: De-duplicate some frequently-used code.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/blocklayout/blocklayout.c | 2 +-
+ fs/nfs/blocklayout/dev.c | 2 +-
+ fs/nfs/dir.c | 2 +-
+ fs/nfs/filelayout/filelayout.c | 2 +-
+ fs/nfs/filelayout/filelayoutdev.c | 2 +-
+ fs/nfs/flexfilelayout/flexfilelayout.c | 2 +-
+ fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +-
+ fs/nfs/nfs42xdr.c | 2 +-
+ fs/nfs/nfs4xdr.c | 6 ++--
+ fs/nfsd/nfs4proc.c | 2 +-
+ include/linux/sunrpc/xdr.h | 44 ++++++++++++++++++++++-
+ net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +-
+ net/sunrpc/xdr.c | 28 +++------------
+ 13 files changed, 59 insertions(+), 39 deletions(-)
+
+diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
+index 08108b6d2fa1..3be6836074ae 100644
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -697,7 +697,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
+
+ xdr_init_decode_pages(&xdr, &buf,
+ lgr->layoutp->pages, lgr->layoutp->len);
+- xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(&xdr, scratch);
+
+ status = -EIO;
+ p = xdr_inline_decode(&xdr, 4);
+diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
+index dec5880ac6de..acb1d22907da 100644
+--- a/fs/nfs/blocklayout/dev.c
++++ b/fs/nfs/blocklayout/dev.c
+@@ -510,7 +510,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
+ goto out;
+
+ xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
+- xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(&xdr, scratch);
+
+ p = xdr_inline_decode(&xdr, sizeof(__be32));
+ if (!p)
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 9f88ca7b2001..935029632d5f 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -576,7 +576,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
+ goto out_nopages;
+
+ xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
+- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(&stream, scratch);
+
+ do {
+ if (entry->label)
+diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
+index ae5ed3a07494..d2103852475f 100644
+--- a/fs/nfs/filelayout/filelayout.c
++++ b/fs/nfs/filelayout/filelayout.c
+@@ -666,7 +666,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
+ return -ENOMEM;
+
+ xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
+- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(&stream, scratch);
+
+ /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
+ * num_fh (4) */
+diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
+index d913e818858f..86c3f7e69ec4 100644
+--- a/fs/nfs/filelayout/filelayoutdev.c
++++ b/fs/nfs/filelayout/filelayoutdev.c
+@@ -82,7 +82,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
+ goto out_err;
+
+ xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
+- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(&stream, scratch);
+
+ /* Get the stripe count (number of stripe index) */
+ p = xdr_inline_decode(&stream, 4);
+diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
+index a8a02081942d..0200d96b8d5b 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -378,7 +378,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
+
+ xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages,
+ lgr->layoutp->len);
+- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(&stream, scratch);
+
+ /* stripe unit and mirror_array_cnt */
+ rc = -EIO;
+diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+index 1f12297109b4..bfa7202ca7be 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+@@ -69,7 +69,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
+ INIT_LIST_HEAD(&dsaddrs);
+
+ xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
+- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(&stream, scratch);
+
+ /* multipath count */
+ p = xdr_inline_decode(&stream, 4);
+diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
+index f2248d9d4db5..df5bee2f505c 100644
+--- a/fs/nfs/nfs42xdr.c
++++ b/fs/nfs/nfs42xdr.c
+@@ -1536,7 +1536,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
+ struct compound_hdr hdr;
+ int status;
+
+- xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE);
++ xdr_set_scratch_page(xdr, res->scratch);
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
+index e2f0e3446e22..f21dc4284468 100644
+--- a/fs/nfs/nfs4xdr.c
++++ b/fs/nfs/nfs4xdr.c
+@@ -6406,10 +6406,8 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct compound_hdr hdr;
+ int status;
+
+- if (res->acl_scratch != NULL) {
+- void *p = page_address(res->acl_scratch);
+- xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+- }
++ if (res->acl_scratch != NULL)
++ xdr_set_scratch_page(xdr, res->acl_scratch);
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 735ee8a79870..9aeeb51e8c61 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2274,7 +2274,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
+ xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
+ /* Tail and page_len should be zero at this point: */
+ buf->len = buf->head[0].iov_len;
+- xdr->scratch.iov_len = 0;
++ xdr_reset_scratch_buffer(xdr);
+ xdr->page_ptr = buf->pages - 1;
+ buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+ - rqstp->rq_auth_slack;
+diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
+index 6d9d1520612b..0c8cab6210b3 100644
+--- a/include/linux/sunrpc/xdr.h
++++ b/include/linux/sunrpc/xdr.h
+@@ -246,7 +246,6 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
+ __be32 *p, struct rpc_rqst *rqst);
+ extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+ struct page **pages, unsigned int len);
+-extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
+ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
+ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
+ extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
+@@ -254,6 +253,49 @@ extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned in
+ extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t);
+ extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t);
+
++/**
++ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
++ * @xdr: pointer to xdr_stream struct
++ * @buf: pointer to an empty buffer
++ * @buflen: size of 'buf'
++ *
++ * The scratch buffer is used when decoding from an array of pages.
++ * If an xdr_inline_decode() call spans across page boundaries, then
++ * we copy the data into the scratch buffer in order to allow linear
++ * access.
++ */
++static inline void
++xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
++{
++ xdr->scratch.iov_base = buf;
++ xdr->scratch.iov_len = buflen;
++}
++
++/**
++ * xdr_set_scratch_page - Attach a scratch buffer for decoding data
++ * @xdr: pointer to xdr_stream struct
++ * @page: an anonymous page
++ *
++ * See xdr_set_scratch_buffer().
++ */
++static inline void
++xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page)
++{
++ xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE);
++}
++
++/**
++ * xdr_reset_scratch_buffer - Clear scratch buffer information
++ * @xdr: pointer to xdr_stream struct
++ *
++ * See xdr_set_scratch_buffer().
++ */
++static inline void
++xdr_reset_scratch_buffer(struct xdr_stream *xdr)
++{
++ xdr_set_scratch_buffer(xdr, NULL, 0);
++}
++
+ /**
+ * xdr_stream_remaining - Return the number of bytes remaining in the stream
+ * @xdr: pointer to struct xdr_stream
+diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
+index 2ff7b7083eba..c636c648849b 100644
+--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
++++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
+@@ -789,7 +789,7 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+ scratch = alloc_page(GFP_KERNEL);
+ if (!scratch)
+ return -ENOMEM;
+- xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE);
++ xdr_set_scratch_page(xdr, scratch);
+
+ /* res->status */
+ err = gssx_dec_status(xdr, &res->status);
+diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
+index d84bb5037bb5..02adc5c7f034 100644
+--- a/net/sunrpc/xdr.c
++++ b/net/sunrpc/xdr.c
+@@ -669,7 +669,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+ struct kvec *iov = buf->head;
+ int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
+
+- xdr_set_scratch_buffer(xdr, NULL, 0);
++ xdr_reset_scratch_buffer(xdr);
+ BUG_ON(scratch_len < 0);
+ xdr->buf = buf;
+ xdr->iov = iov;
+@@ -713,7 +713,7 @@ inline void xdr_commit_encode(struct xdr_stream *xdr)
+ page = page_address(*xdr->page_ptr);
+ memcpy(xdr->scratch.iov_base, page, shift);
+ memmove(page, page + shift, (void *)xdr->p - page);
+- xdr->scratch.iov_len = 0;
++ xdr_reset_scratch_buffer(xdr);
+ }
+ EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+@@ -743,8 +743,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ * the "scratch" iov to track any temporarily unused fragment of
+ * space at the end of the previous buffer:
+ */
+- xdr->scratch.iov_base = xdr->p;
+- xdr->scratch.iov_len = frag1bytes;
++ xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
+ p = page_address(*xdr->page_ptr);
+ /*
+ * Note this is where the next encode will start after we've
+@@ -1056,8 +1055,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+ struct rpc_rqst *rqst)
+ {
+ xdr->buf = buf;
+- xdr->scratch.iov_base = NULL;
+- xdr->scratch.iov_len = 0;
++ xdr_reset_scratch_buffer(xdr);
+ xdr->nwords = XDR_QUADLEN(buf->len);
+ if (buf->head[0].iov_len != 0)
+ xdr_set_iov(xdr, buf->head, buf->len);
+@@ -1105,24 +1103,6 @@ static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
+ return p;
+ }
+
+-/**
+- * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+- * @xdr: pointer to xdr_stream struct
+- * @buf: pointer to an empty buffer
+- * @buflen: size of 'buf'
+- *
+- * The scratch buffer is used when decoding from an array of pages.
+- * If an xdr_inline_decode() call spans across page boundaries, then
+- * we copy the data into the scratch buffer in order to allow linear
+- * access.
+- */
+-void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+-{
+- xdr->scratch.iov_base = buf;
+- xdr->scratch.iov_len = buflen;
+-}
+-EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
+-
+ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
+ {
+ __be32 *p;
+--
+2.35.1
+
--- /dev/null
+From ba564cda1b99d035764257f9c3fc615b81efb780 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Nov 2020 11:19:42 -0500
+Subject: SUNRPC: Prepare for xdr_stream-style decoding on the server-side
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5191955d6fc65e6d4efe8f4f10a6028298f57281 ]
+
+A "permanent" struct xdr_stream is allocated in struct svc_rqst so
+that it is usable by all server-side decoders. A per-rqst scratch
+buffer is also allocated to handle decoding XDR data items that
+cross page boundaries.
+
+To demonstrate how it will be used, add the first call site for the
+new svcxdr_init_decode() API.
+
+As an additional part of the overall conversion, add symbolic
+constants for successful and failed XDR operations. Returning "0" is
+overloaded. Sometimes it means something failed, but sometimes it
+means success. To make it more clear when XDR decoding functions
+succeed or fail, introduce symbolic constants.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfssvc.c | 2 ++
+ include/linux/sunrpc/svc.h | 16 ++++++++++++++++
+ net/sunrpc/svc.c | 5 +++++
+ 3 files changed, 23 insertions(+)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 9323e30a7eaf..ad6fedf37a40 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1019,6 +1019,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ * (necessary in the NFSv4.0 compound case)
+ */
+ rqstp->rq_cachetype = proc->pc_cachetype;
++
++ svcxdr_init_decode(rqstp);
+ if (!proc->pc_decode(rqstp, argv->iov_base))
+ goto out_decode_err;
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 386628b36bc7..6ce2e12589cb 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -247,6 +247,8 @@ struct svc_rqst {
+
+ size_t rq_xprt_hlen; /* xprt header len */
+ struct xdr_buf rq_arg;
++ struct xdr_stream rq_arg_stream;
++ struct page *rq_scratch_page;
+ struct xdr_buf rq_res;
+ struct page *rq_pages[RPCSVC_MAXPAGES + 1];
+ struct page * *rq_respages; /* points into rq_pages */
+@@ -557,4 +559,18 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
+ svc_reserve(rqstp, space + rqstp->rq_auth_slack);
+ }
+
++/**
++ * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding
++ * @rqstp: controlling server RPC transaction context
++ *
++ */
++static inline void svcxdr_init_decode(struct svc_rqst *rqstp)
++{
++ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
++ struct kvec *argv = rqstp->rq_arg.head;
++
++ xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL);
++ xdr_set_scratch_page(xdr, rqstp->rq_scratch_page);
++}
++
+ #endif /* SUNRPC_SVC_H */
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index d38788cd9433..bb55f124b62e 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -614,6 +614,10 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
+ rqstp->rq_server = serv;
+ rqstp->rq_pool = pool;
+
++ rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0);
++ if (!rqstp->rq_scratch_page)
++ goto out_enomem;
++
+ rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
+ if (!rqstp->rq_argp)
+ goto out_enomem;
+@@ -842,6 +846,7 @@ void
+ svc_rqst_free(struct svc_rqst *rqstp)
+ {
+ svc_release_buffer(rqstp);
++ put_page(rqstp->rq_scratch_page);
+ kfree(rqstp->rq_resp);
+ kfree(rqstp->rq_argp);
+ kfree(rqstp->rq_auth_data);
+--
+2.35.1
+
--- /dev/null
+From b1cdd1c781f9b8335f6b5c708bd99b180896c5bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 21:32:15 +0900
+Subject: tcp: Add num_closed_socks to struct sock_reuseport.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+
+[ Upstream commit 5c040eaf5d1753aafe12989ca712175df0b9c436 ]
+
+As noted in the following commit, a closed listener has to hold the
+reference to the reuseport group for socket migration. This patch adds a
+field (num_closed_socks) to struct sock_reuseport to manage closed sockets
+within the same reuseport group. Moreover, this and the following commits
+introduce some helper functions to split socks[] into two sections and keep
+TCP_LISTEN and TCP_CLOSE sockets in each section. Like a double-ended
+queue, we will place TCP_LISTEN sockets from the front and TCP_CLOSE
+sockets from the end.
+
+ TCP_LISTEN----------> <-------TCP_CLOSE
+ +---+---+ --- +---+ --- +---+ --- +---+
+ | 0 | 1 | ... | i | ... | j | ... | k |
+ +---+---+ --- +---+ --- +---+ --- +---+
+
+ i = num_socks - 1
+ j = max_socks - num_closed_socks
+ k = max_socks - 1
+
+This patch also extends reuseport_add_sock() and reuseport_grow() to
+support num_closed_socks.
+
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Link: https://lore.kernel.org/bpf/20210612123224.12525-3-kuniyu@amazon.co.jp
+Stable-dep-of: 69421bf98482 ("udp: Update reuse->has_conns under reuseport_lock.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock_reuseport.h | 5 ++-
+ net/core/sock_reuseport.c | 75 +++++++++++++++++++++++++++---------
+ 2 files changed, 60 insertions(+), 20 deletions(-)
+
+diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
+index 505f1e18e9bf..0e558ca7afbf 100644
+--- a/include/net/sock_reuseport.h
++++ b/include/net/sock_reuseport.h
+@@ -13,8 +13,9 @@ extern spinlock_t reuseport_lock;
+ struct sock_reuseport {
+ struct rcu_head rcu;
+
+- u16 max_socks; /* length of socks */
+- u16 num_socks; /* elements in socks */
++ u16 max_socks; /* length of socks */
++ u16 num_socks; /* elements in socks */
++ u16 num_closed_socks; /* closed elements in socks */
+ /* The last synq overflow event timestamp of this
+ * reuse->socks[] group.
+ */
+diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
+index b065f0a103ed..f478c65a281b 100644
+--- a/net/core/sock_reuseport.c
++++ b/net/core/sock_reuseport.c
+@@ -18,6 +18,49 @@ DEFINE_SPINLOCK(reuseport_lock);
+
+ static DEFINE_IDA(reuseport_ida);
+
++static int reuseport_sock_index(struct sock *sk,
++ const struct sock_reuseport *reuse,
++ bool closed)
++{
++ int left, right;
++
++ if (!closed) {
++ left = 0;
++ right = reuse->num_socks;
++ } else {
++ left = reuse->max_socks - reuse->num_closed_socks;
++ right = reuse->max_socks;
++ }
++
++ for (; left < right; left++)
++ if (reuse->socks[left] == sk)
++ return left;
++ return -1;
++}
++
++static void __reuseport_add_sock(struct sock *sk,
++ struct sock_reuseport *reuse)
++{
++ reuse->socks[reuse->num_socks] = sk;
++ /* paired with smp_rmb() in reuseport_select_sock() */
++ smp_wmb();
++ reuse->num_socks++;
++}
++
++static bool __reuseport_detach_sock(struct sock *sk,
++ struct sock_reuseport *reuse)
++{
++ int i = reuseport_sock_index(sk, reuse, false);
++
++ if (i == -1)
++ return false;
++
++ reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
++ reuse->num_socks--;
++
++ return true;
++}
++
+ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
+ {
+ unsigned int size = sizeof(struct sock_reuseport) +
+@@ -72,9 +115,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
+ }
+
+ reuse->reuseport_id = id;
++ reuse->bind_inany = bind_inany;
+ reuse->socks[0] = sk;
+ reuse->num_socks = 1;
+- reuse->bind_inany = bind_inany;
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+ out:
+@@ -98,6 +141,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
+ return NULL;
+
+ more_reuse->num_socks = reuse->num_socks;
++ more_reuse->num_closed_socks = reuse->num_closed_socks;
+ more_reuse->prog = reuse->prog;
+ more_reuse->reuseport_id = reuse->reuseport_id;
+ more_reuse->bind_inany = reuse->bind_inany;
+@@ -105,9 +149,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
+
+ memcpy(more_reuse->socks, reuse->socks,
+ reuse->num_socks * sizeof(struct sock *));
++ memcpy(more_reuse->socks +
++ (more_reuse->max_socks - more_reuse->num_closed_socks),
++ reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
++ reuse->num_closed_socks * sizeof(struct sock *));
+ more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
+
+- for (i = 0; i < reuse->num_socks; ++i)
++ for (i = 0; i < reuse->max_socks; ++i)
+ rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
+ more_reuse);
+
+@@ -158,7 +206,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
+ return -EBUSY;
+ }
+
+- if (reuse->num_socks == reuse->max_socks) {
++ if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
+ reuse = reuseport_grow(reuse);
+ if (!reuse) {
+ spin_unlock_bh(&reuseport_lock);
+@@ -166,10 +214,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
+ }
+ }
+
+- reuse->socks[reuse->num_socks] = sk;
+- /* paired with smp_rmb() in reuseport_select_sock() */
+- smp_wmb();
+- reuse->num_socks++;
++ __reuseport_add_sock(sk, reuse);
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+ spin_unlock_bh(&reuseport_lock);
+@@ -183,7 +228,6 @@ EXPORT_SYMBOL(reuseport_add_sock);
+ void reuseport_detach_sock(struct sock *sk)
+ {
+ struct sock_reuseport *reuse;
+- int i;
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+@@ -200,16 +244,11 @@ void reuseport_detach_sock(struct sock *sk)
+ bpf_sk_reuseport_detach(sk);
+
+ rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
++ __reuseport_detach_sock(sk, reuse);
++
++ if (reuse->num_socks + reuse->num_closed_socks == 0)
++ call_rcu(&reuse->rcu, reuseport_free_rcu);
+
+- for (i = 0; i < reuse->num_socks; i++) {
+- if (reuse->socks[i] == sk) {
+- reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+- reuse->num_socks--;
+- if (reuse->num_socks == 0)
+- call_rcu(&reuse->rcu, reuseport_free_rcu);
+- break;
+- }
+- }
+ spin_unlock_bh(&reuseport_lock);
+ }
+ EXPORT_SYMBOL(reuseport_detach_sock);
+@@ -274,7 +313,7 @@ struct sock *reuseport_select_sock(struct sock *sk,
+ prog = rcu_dereference(reuse->prog);
+ socks = READ_ONCE(reuse->num_socks);
+ if (likely(socks)) {
+- /* paired with smp_wmb() in reuseport_add_sock() */
++ /* paired with smp_wmb() in __reuseport_add_sock() */
+ smp_rmb();
+
+ if (!prog || !skb)
+--
+2.35.1
+
--- /dev/null
+From 680f5683bb4055bc0d86d2aa1961ec9e1d8f728e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Oct 2022 17:25:14 +0200
+Subject: tipc: fix an information leak in tipc_topsrv_kern_subscr
+
+From: Alexander Potapenko <glider@google.com>
+
+[ Upstream commit 777ecaabd614d47c482a5c9031579e66da13989a ]
+
+Use a 8-byte write to initialize sub.usr_handle in
+tipc_topsrv_kern_subscr(), otherwise four bytes remain uninitialized
+when issuing setsockopt(..., SOL_TIPC, ...).
+This resulted in an infoleak reported by KMSAN when the packet was
+received:
+
+ =====================================================
+ BUG: KMSAN: kernel-infoleak in copyout+0xbc/0x100 lib/iov_iter.c:169
+ instrument_copy_to_user ./include/linux/instrumented.h:121
+ copyout+0xbc/0x100 lib/iov_iter.c:169
+ _copy_to_iter+0x5c0/0x20a0 lib/iov_iter.c:527
+ copy_to_iter ./include/linux/uio.h:176
+ simple_copy_to_iter+0x64/0xa0 net/core/datagram.c:513
+ __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:419
+ skb_copy_datagram_iter+0x58/0x200 net/core/datagram.c:527
+ skb_copy_datagram_msg ./include/linux/skbuff.h:3903
+ packet_recvmsg+0x521/0x1e70 net/packet/af_packet.c:3469
+ ____sys_recvmsg+0x2c4/0x810 net/socket.c:?
+ ___sys_recvmsg+0x217/0x840 net/socket.c:2743
+ __sys_recvmsg net/socket.c:2773
+ __do_sys_recvmsg net/socket.c:2783
+ __se_sys_recvmsg net/socket.c:2780
+ __x64_sys_recvmsg+0x364/0x540 net/socket.c:2780
+ do_syscall_x64 arch/x86/entry/common.c:50
+ do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120
+
+ ...
+
+ Uninit was stored to memory at:
+ tipc_sub_subscribe+0x42d/0xb50 net/tipc/subscr.c:156
+ tipc_conn_rcv_sub+0x246/0x620 net/tipc/topsrv.c:375
+ tipc_topsrv_kern_subscr+0x2e8/0x400 net/tipc/topsrv.c:579
+ tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190
+ tipc_sk_join+0x2a8/0x770 net/tipc/socket.c:3084
+ tipc_setsockopt+0xae5/0xe40 net/tipc/socket.c:3201
+ __sys_setsockopt+0x87f/0xdc0 net/socket.c:2252
+ __do_sys_setsockopt net/socket.c:2263
+ __se_sys_setsockopt net/socket.c:2260
+ __x64_sys_setsockopt+0xe0/0x160 net/socket.c:2260
+ do_syscall_x64 arch/x86/entry/common.c:50
+ do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120
+
+ Local variable sub created at:
+ tipc_topsrv_kern_subscr+0x57/0x400 net/tipc/topsrv.c:562
+ tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190
+
+ Bytes 84-87 of 88 are uninitialized
+ Memory access of size 88 starts at ffff88801ed57cd0
+ Data copied to user address 0000000020000400
+ ...
+ =====================================================
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Fixes: 026321c6d056a5 ("tipc: rename tipc_server to tipc_topsrv")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/topsrv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
+index 13f3143609f9..d9e2c0fea3f2 100644
+--- a/net/tipc/topsrv.c
++++ b/net/tipc/topsrv.c
+@@ -568,7 +568,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = filter;
+- *(u32 *)&sub.usr_handle = port;
++ *(u64 *)&sub.usr_handle = (u64)port;
+
+ con = tipc_conn_alloc(tipc_topsrv(net));
+ if (IS_ERR(con))
+--
+2.35.1
+
--- /dev/null
+From c7e813435954cb70adcbcad1079b3ae4db834d85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Oct 2022 15:46:13 +1300
+Subject: tipc: Fix recognition of trial period
+
+From: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+
+[ Upstream commit 28be7ca4fcfd69a2d52aaa331adbf9dbe91f9e6e ]
+
+The trial period exists until jiffies is after addr_trial_end. But as
+jiffies will eventually overflow, just using time_after will eventually
+give incorrect results. As the node address is set once the trial period
+ends, this can be used to know that we are not in the trial period.
+
+Fixes: e415577f57f4 ("tipc: correct discovery message handling during address trial period")
+Signed-off-by: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/discover.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/tipc/discover.c b/net/tipc/discover.c
+index 14bc20604051..2ae268b67465 100644
+--- a/net/tipc/discover.c
++++ b/net/tipc/discover.c
+@@ -147,8 +147,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+ {
+ struct net *net = d->net;
+ struct tipc_net *tn = tipc_net(net);
+- bool trial = time_before(jiffies, tn->addr_trial_end);
+ u32 self = tipc_own_addr(net);
++ bool trial = time_before(jiffies, tn->addr_trial_end) && !self;
+
+ if (mtyp == DSC_TRIAL_FAIL_MSG) {
+ if (!trial)
+--
+2.35.1
+
--- /dev/null
+From 9eeabe2096e18c6b3c110aec18bdcb17fcc3d8a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Oct 2022 11:37:57 -0400
+Subject: tracing: Do not free snapshot if tracer is on cmdline
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit a541a9559bb0a8ecc434de01d3e4826c32e8bb53 ]
+
+The ftrace_boot_snapshot and alloc_snapshot cmdline options allocate the
+snapshot buffer at boot up for use later. The ftrace_boot_snapshot in
+particular requires the snapshot to be allocated because it will take a
+snapshot at the end of boot up allowing to see the traces that happened
+during boot so that it's not lost when user space takes over.
+
+When a tracer is registered (started) there's a path that checks if it
+requires the snapshot buffer or not, and if it does not and it was
+allocated it will do a synchronization and free the snapshot buffer.
+
+This is only required if the previous tracer was using it for "max
+latency" snapshots, as it needs to make sure all max snapshots are
+complete before freeing. But this is only needed if the previous tracer
+was using the snapshot buffer for latency (like irqoff tracer and
+friends). But it does not make sense to free it, if the previous tracer
+was not using it, and the snapshot was allocated by the cmdline
+parameters. This basically takes away the point of allocating it in the
+first place!
+
+Note, the allocated snapshot worked fine for just trace events, but fails
+when a tracer is enabled on the cmdline.
+
+Further investigation, this goes back even further and it does not require
+a tracer on the cmdline to fail. Simply enable snapshots and then enable a
+tracer, and it will remove the snapshot.
+
+Link: https://lkml.kernel.org/r/20221005113757.041df7fe@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Fixes: 45ad21ca5530 ("tracing: Have trace_array keep track if snapshot buffer is allocated")
+Reported-by: Ross Zwisler <zwisler@kernel.org>
+Tested-by: Ross Zwisler <zwisler@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 870033f9c198..b7cb9147f0c5 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -6008,12 +6008,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ if (tr->current_trace->reset)
+ tr->current_trace->reset(tr);
+
++#ifdef CONFIG_TRACER_MAX_TRACE
++ had_max_tr = tr->current_trace->use_max_tr;
++
+ /* Current trace needs to be nop_trace before synchronize_rcu */
+ tr->current_trace = &nop_trace;
+
+-#ifdef CONFIG_TRACER_MAX_TRACE
+- had_max_tr = tr->allocated_snapshot;
+-
+ if (had_max_tr && !t->use_max_tr) {
+ /*
+ * We need to make sure that the update_max_tr sees that
+@@ -6026,11 +6026,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ free_snapshot(tr);
+ }
+
+- if (t->use_max_tr && !had_max_tr) {
++ if (t->use_max_tr && !tr->allocated_snapshot) {
+ ret = tracing_alloc_snapshot_instance(tr);
+ if (ret < 0)
+ goto out;
+ }
++#else
++ tr->current_trace = &nop_trace;
+ #endif
+
+ if (t->init) {
+--
+2.35.1
+
--- /dev/null
+From c7f70e344e8cc18a5dcfd489c091922085641031 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jun 2022 22:06:13 +0800
+Subject: tracing: Simplify conditional compilation code in
+ tracing_set_tracer()
+
+From: sunliming <sunliming@kylinos.cn>
+
+[ Upstream commit f4b0d318097e45cbac5e14976f8bb56aa2cef504 ]
+
+Two conditional compilation directives "#ifdef CONFIG_TRACER_MAX_TRACE"
+are used consecutively, and no other code in between. Simplify conditional
+the compilation code and only use one "#ifdef CONFIG_TRACER_MAX_TRACE".
+
+Link: https://lkml.kernel.org/r/20220602140613.545069-1-sunliming@kylinos.cn
+
+Signed-off-by: sunliming <sunliming@kylinos.cn>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: a541a9559bb0 ("tracing: Do not free snapshot if tracer is on cmdline")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index a5245362ce7a..870033f9c198 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -6025,9 +6025,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ synchronize_rcu();
+ free_snapshot(tr);
+ }
+-#endif
+
+-#ifdef CONFIG_TRACER_MAX_TRACE
+ if (t->use_max_tr && !had_max_tr) {
+ ret = tracing_alloc_snapshot_instance(tr);
+ if (ret < 0)
+--
+2.35.1
+
--- /dev/null
+From 4b84c215237ebacc8ee55b1ab4c829614b619842 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Oct 2022 11:26:25 -0700
+Subject: udp: Update reuse->has_conns under reuseport_lock.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 69421bf98482d089e50799f45e48b25ce4a8d154 ]
+
+When we call connect() for a UDP socket in a reuseport group, we have
+to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel
+could select a unconnected socket wrongly for packets sent to the
+connected socket.
+
+However, the current way to set has_conns is illegal and possible to
+trigger that problem. reuseport_has_conns() changes has_conns under
+rcu_read_lock(), which upgrades the RCU reader to the updater. Then,
+it must do the update under the updater's lock, reuseport_lock, but
+it doesn't for now.
+
+For this reason, there is a race below where we fail to set has_conns
+resulting in the wrong socket selection. To avoid the race, let's split
+the reader and updater with proper locking.
+
+ cpu1 cpu2
++----+ +----+
+
+__ip[46]_datagram_connect() reuseport_grow()
+. .
+|- reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size)
+| . |
+| |- rcu_read_lock()
+| |- reuse = rcu_dereference(sk->sk_reuseport_cb)
+| |
+| | | /* reuse->has_conns == 0 here */
+| | |- more_reuse->has_conns = reuse->has_conns
+| |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */
+| | |
+| | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
+| | | more_reuse)
+| `- rcu_read_unlock() `- kfree_rcu(reuse, rcu)
+|
+|- sk->sk_state = TCP_ESTABLISHED
+
+Note the likely(reuse) in reuseport_has_conns_set() is always true,
+but we put the test there for ease of review. [0]
+
+For the record, usually, sk_reuseport_cb is changed under lock_sock().
+The only exception is reuseport_grow() & TCP reqsk migration case.
+
+ 1) shutdown() TCP listener, which is moved into the latter part of
+ reuse->socks[] to migrate reqsk.
+
+ 2) New listen() overflows reuse->socks[] and call reuseport_grow().
+
+ 3) reuse->max_socks overflows u16 with the new listener.
+
+ 4) reuseport_grow() pops the old shutdown()ed listener from the array
+ and update its sk->sk_reuseport_cb as NULL without lock_sock().
+
+shutdown()ed TCP sk->sk_reuseport_cb can be changed without lock_sock(),
+but, reuseport_has_conns_set() is called only for UDP under lock_sock(),
+so likely(reuse) never be false in reuseport_has_conns_set().
+
+[0]: https://lore.kernel.org/netdev/CANn89iLja=eQHbsM_Ta2sQF0tOGU8vAGrh_izRuuHjuO1ouUag@mail.gmail.com/
+
+Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20221014182625.89913-1-kuniyu@amazon.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock_reuseport.h | 11 +++++------
+ net/core/sock_reuseport.c | 16 ++++++++++++++++
+ net/ipv4/datagram.c | 2 +-
+ net/ipv4/udp.c | 2 +-
+ net/ipv6/datagram.c | 2 +-
+ net/ipv6/udp.c | 2 +-
+ 6 files changed, 25 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
+index 0e558ca7afbf..6348c6f26903 100644
+--- a/include/net/sock_reuseport.h
++++ b/include/net/sock_reuseport.h
+@@ -39,21 +39,20 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
+ extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
+ extern int reuseport_detach_prog(struct sock *sk);
+
+-static inline bool reuseport_has_conns(struct sock *sk, bool set)
++static inline bool reuseport_has_conns(struct sock *sk)
+ {
+ struct sock_reuseport *reuse;
+ bool ret = false;
+
+ rcu_read_lock();
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+- if (reuse) {
+- if (set)
+- reuse->has_conns = 1;
+- ret = reuse->has_conns;
+- }
++ if (reuse && reuse->has_conns)
++ ret = true;
+ rcu_read_unlock();
+
+ return ret;
+ }
+
++void reuseport_has_conns_set(struct sock *sk);
++
+ #endif /* _SOCK_REUSEPORT_H */
+diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
+index f478c65a281b..364cf6c6912b 100644
+--- a/net/core/sock_reuseport.c
++++ b/net/core/sock_reuseport.c
+@@ -18,6 +18,22 @@ DEFINE_SPINLOCK(reuseport_lock);
+
+ static DEFINE_IDA(reuseport_ida);
+
++void reuseport_has_conns_set(struct sock *sk)
++{
++ struct sock_reuseport *reuse;
++
++ if (!rcu_access_pointer(sk->sk_reuseport_cb))
++ return;
++
++ spin_lock_bh(&reuseport_lock);
++ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
++ lockdep_is_held(&reuseport_lock));
++ if (likely(reuse))
++ reuse->has_conns = 1;
++ spin_unlock_bh(&reuseport_lock);
++}
++EXPORT_SYMBOL(reuseport_has_conns_set);
++
+ static int reuseport_sock_index(struct sock *sk,
+ const struct sock_reuseport *reuse,
+ bool closed)
+diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
+index 4a8550c49202..112c6e892d30 100644
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
+ }
+ inet->inet_daddr = fl4->daddr;
+ inet->inet_dport = usin->sin_port;
+- reuseport_has_conns(sk, true);
++ reuseport_has_conns_set(sk);
+ sk->sk_state = TCP_ESTABLISHED;
+ sk_set_txhash(sk);
+ inet->inet_id = prandom_u32();
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 4446aa8237ff..b093daaa3deb 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -446,7 +446,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ /* Fall back to scoring if group has connections */
+- if (result && !reuseport_has_conns(sk, false))
++ if (result && !reuseport_has_conns(sk))
+ return result;
+
+ result = result ? : sk;
+diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
+index 206f66310a88..f4559e5bc84b 100644
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
+ goto out;
+ }
+
+- reuseport_has_conns(sk, true);
++ reuseport_has_conns_set(sk);
+ sk->sk_state = TCP_ESTABLISHED;
+ sk_set_txhash(sk);
+ out:
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 9b504bf49214..514e6a55959f 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -179,7 +179,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ /* Fall back to scoring if group has connections */
+- if (result && !reuseport_has_conns(sk, false))
++ if (result && !reuseport_has_conns(sk))
+ return result;
+
+ result = result ? : sk;
+--
+2.35.1
+
--- /dev/null
+From 15bd6fce2c8be3bc43ba3d6e682eb958e2ad5800 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Sep 2022 20:16:10 +0300
+Subject: USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM
+
+From: Hannu Hartikainen <hannu@hrtk.in>
+
+[ Upstream commit fc4ade55c617dc73c7e9756b57f3230b4ff24540 ]
+
+NVIDIA Jetson devices in Force Recovery mode (RCM) do not support
+suspending, ie. flashing fails if the device has been suspended. The
+devices are still visible in lsusb and seem to work otherwise, making
+the issue hard to debug. This has been discovered in various forum
+posts, eg. [1].
+
+The patch has been tested on NVIDIA Jetson AGX Xavier, but I'm adding
+all the Jetson models listed in [2] on the assumption that they all
+behave similarly.
+
+[1]: https://forums.developer.nvidia.com/t/flashing-not-working/72365
+[2]: https://docs.nvidia.com/jetson/archives/l4t-archived/l4t-3271/index.html#page/Tegra%20Linux%20Driver%20Package%20Development%20Guide/quick_start.html
+
+Signed-off-by: Hannu Hartikainen <hannu@hrtk.in>
+Cc: stable <stable@kernel.org> # after 6.1-rc3
+Link: https://lore.kernel.org/r/20220919171610.30484-1-hannu@hrtk.in
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/core/quirks.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 03473e20e218..eb3ea45d5d13 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -388,6 +388,15 @@ static const struct usb_device_id usb_quirk_list[] = {
+ /* Kingston DataTraveler 3.0 */
+ { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM },
+
++ /* NVIDIA Jetson devices in Force Recovery mode */
++ { USB_DEVICE(0x0955, 0x7018), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7019), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7418), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7721), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7c18), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7e19), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7f21), .driver_info = USB_QUIRK_RESET_RESUME },
++
+ /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */
+ { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF },
+
+--
+2.35.1
+
--- /dev/null
+From c48b354b20bb7b07b2e92f5e3a77a4a22cec038e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 May 2022 10:35:14 +0800
+Subject: writeback: Avoid skipping inode writeback
+
+From: Jing Xia <jing.xia@unisoc.com>
+
+[ Upstream commit 846a3351ddfe4a86eede4bb26a205c3f38ef84d3 ]
+
+We have run into an issue that a task gets stuck in
+balance_dirty_pages_ratelimited() when perform I/O stress testing.
+The reason we observed is that an I_DIRTY_PAGES inode with lots
+of dirty pages is in b_dirty_time list and standard background
+writeback cannot writeback the inode.
+After studing the relevant code, the following scenario may lead
+to the issue:
+
+task1 task2
+----- -----
+fuse_flush
+ write_inode_now //in b_dirty_time
+ writeback_single_inode
+ __writeback_single_inode
+ fuse_write_end
+ filemap_dirty_folio
+ __xa_set_mark:PAGECACHE_TAG_DIRTY
+ lock inode->i_lock
+ if mapping tagged PAGECACHE_TAG_DIRTY
+ inode->i_state |= I_DIRTY_PAGES
+ unlock inode->i_lock
+ __mark_inode_dirty:I_DIRTY_PAGES
+ lock inode->i_lock
+ -was dirty,inode stays in
+ -b_dirty_time
+ unlock inode->i_lock
+
+ if(!(inode->i_state & I_DIRTY_All))
+ -not true,so nothing done
+
+This patch moves the dirty inode to b_dirty list when the inode
+currently is not queued in b_io or b_more_io list at the end of
+writeback_single_inode.
+
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+CC: stable@vger.kernel.org
+Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
+Signed-off-by: Jing Xia <jing.xia@unisoc.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220510023514.27399-1-jing.xia@unisoc.com
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 489514bcd7e1..645e3f6ffe44 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -1612,6 +1612,10 @@ static int writeback_single_inode(struct inode *inode,
+ */
+ if (!(inode->i_state & I_DIRTY_ALL))
+ inode_cgwb_move_to_attached(inode, wb);
++ else if (!(inode->i_state & I_SYNC_QUEUED) &&
++ (inode->i_state & I_DIRTY))
++ redirty_tail_locked(inode, wb);
++
+ spin_unlock(&wb->list_lock);
+ inode_sync_complete(inode);
+ out:
+--
+2.35.1
+
--- /dev/null
+From 6ebfdd72f515b6ccb3f7f3900a4f12c715dd8290 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Jun 2021 19:35:53 -0700
+Subject: writeback, cgroup: keep list of inodes attached to bdi_writeback
+
+From: Roman Gushchin <guro@fb.com>
+
+[ Upstream commit f3b6a6df38aa514d97e8c6fcc748be1d4142bec9 ]
+
+Currently there is no way to iterate over inodes attached to a specific
+cgwb structure. It limits the ability to efficiently reclaim the
+writeback structure itself and associated memory and block cgroup
+structures without scanning all inodes belonging to a sb, which can be
+prohibitively expensive.
+
+While dirty/in-active-writeback an inode belongs to one of the
+bdi_writeback's io lists: b_dirty, b_io, b_more_io and b_dirty_time. Once
+cleaned up, it's removed from all io lists. So the inode->i_io_list can
+be reused to maintain the list of inodes, attached to a bdi_writeback
+structure.
+
+This patch introduces a new wb->b_attached list, which contains all inodes
+which were dirty at least once and are attached to the given cgwb. Inodes
+attached to the root bdi_writeback structures are never placed on such
+list. The following patch will use this list to try to release cgwbs
+structures more efficiently.
+
+Link: https://lkml.kernel.org/r/20210608230225.2078447-6-guro@fb.com
+Signed-off-by: Roman Gushchin <guro@fb.com>
+Suggested-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Tejun Heo <tj@kernel.org>
+Acked-by: Dennis Zhou <dennis@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Dave Chinner <dchinner@redhat.com>
+Cc: Jan Kara <jack@suse.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 93 ++++++++++++++++++++------------
+ include/linux/backing-dev-defs.h | 1 +
+ mm/backing-dev.c | 2 +
+ 3 files changed, 62 insertions(+), 34 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 71043e847e7c..489514bcd7e1 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -131,25 +131,6 @@ static bool inode_io_list_move_locked(struct inode *inode,
+ return false;
+ }
+
+-/**
+- * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
+- * @inode: inode to be removed
+- * @wb: bdi_writeback @inode is being removed from
+- *
+- * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
+- * clear %WB_has_dirty_io if all are empty afterwards.
+- */
+-static void inode_io_list_del_locked(struct inode *inode,
+- struct bdi_writeback *wb)
+-{
+- assert_spin_locked(&wb->list_lock);
+- assert_spin_locked(&inode->i_lock);
+-
+- inode->i_state &= ~I_SYNC_QUEUED;
+- list_del_init(&inode->i_io_list);
+- wb_io_lists_depopulated(wb);
+-}
+-
+ static void wb_wakeup(struct bdi_writeback *wb)
+ {
+ spin_lock_bh(&wb->work_lock);
+@@ -278,6 +259,28 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
+ }
+ EXPORT_SYMBOL_GPL(__inode_attach_wb);
+
++/**
++ * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
++ * @inode: inode of interest with i_lock held
++ * @wb: target bdi_writeback
++ *
++ * Remove the inode from wb's io lists and if necessarily put onto b_attached
++ * list. Only inodes attached to cgwb's are kept on this list.
++ */
++static void inode_cgwb_move_to_attached(struct inode *inode,
++ struct bdi_writeback *wb)
++{
++ assert_spin_locked(&wb->list_lock);
++ assert_spin_locked(&inode->i_lock);
++
++ inode->i_state &= ~I_SYNC_QUEUED;
++ if (wb != &wb->bdi->wb)
++ list_move(&inode->i_io_list, &wb->b_attached);
++ else
++ list_del_init(&inode->i_io_list);
++ wb_io_lists_depopulated(wb);
++}
++
+ /**
+ * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
+ * @inode: inode of interest with i_lock held
+@@ -419,21 +422,28 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+ wb_get(new_wb);
+
+ /*
+- * Transfer to @new_wb's IO list if necessary. The specific list
+- * @inode was on is ignored and the inode is put on ->b_dirty which
+- * is always correct including from ->b_dirty_time. The transfer
+- * preserves @inode->dirtied_when ordering.
++ * Transfer to @new_wb's IO list if necessary. If the @inode is dirty,
++ * the specific list @inode was on is ignored and the @inode is put on
++ * ->b_dirty which is always correct including from ->b_dirty_time.
++ * The transfer preserves @inode->dirtied_when ordering. If the @inode
++ * was clean, it means it was on the b_attached list, so move it onto
++ * the b_attached list of @new_wb.
+ */
+ if (!list_empty(&inode->i_io_list)) {
+- struct inode *pos;
+-
+- inode_io_list_del_locked(inode, old_wb);
+ inode->i_wb = new_wb;
+- list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+- if (time_after_eq(inode->dirtied_when,
+- pos->dirtied_when))
+- break;
+- inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
++
++ if (inode->i_state & I_DIRTY_ALL) {
++ struct inode *pos;
++
++ list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
++ if (time_after_eq(inode->dirtied_when,
++ pos->dirtied_when))
++ break;
++ inode_io_list_move_locked(inode, new_wb,
++ pos->i_io_list.prev);
++ } else {
++ inode_cgwb_move_to_attached(inode, new_wb);
++ }
+ } else {
+ inode->i_wb = new_wb;
+ }
+@@ -1030,6 +1040,17 @@ fs_initcall(cgroup_writeback_init);
+ static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
++static void inode_cgwb_move_to_attached(struct inode *inode,
++ struct bdi_writeback *wb)
++{
++ assert_spin_locked(&wb->list_lock);
++ assert_spin_locked(&inode->i_lock);
++
++ inode->i_state &= ~I_SYNC_QUEUED;
++ list_del_init(&inode->i_io_list);
++ wb_io_lists_depopulated(wb);
++}
++
+ static struct bdi_writeback *
+ locked_inode_to_wb_and_lock_list(struct inode *inode)
+ __releases(&inode->i_lock)
+@@ -1130,7 +1151,11 @@ void inode_io_list_del(struct inode *inode)
+
+ wb = inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
+- inode_io_list_del_locked(inode, wb);
++
++ inode->i_state &= ~I_SYNC_QUEUED;
++ list_del_init(&inode->i_io_list);
++ wb_io_lists_depopulated(wb);
++
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&wb->list_lock);
+ }
+@@ -1443,7 +1468,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
+ inode->i_state &= ~I_SYNC_QUEUED;
+ } else {
+ /* The inode is clean. Remove from writeback lists. */
+- inode_io_list_del_locked(inode, wb);
++ inode_cgwb_move_to_attached(inode, wb);
+ }
+ }
+
+@@ -1586,7 +1611,7 @@ static int writeback_single_inode(struct inode *inode,
+ * touch it. See comment above for explanation.
+ */
+ if (!(inode->i_state & I_DIRTY_ALL))
+- inode_io_list_del_locked(inode, wb);
++ inode_cgwb_move_to_attached(inode, wb);
+ spin_unlock(&wb->list_lock);
+ inode_sync_complete(inode);
+ out:
+diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
+index fff9367a6348..e5dc238ebe4f 100644
+--- a/include/linux/backing-dev-defs.h
++++ b/include/linux/backing-dev-defs.h
+@@ -154,6 +154,7 @@ struct bdi_writeback {
+ struct cgroup_subsys_state *blkcg_css; /* and blkcg */
+ struct list_head memcg_node; /* anchored at memcg->cgwb_list */
+ struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
++ struct list_head b_attached; /* attached inodes, protected by list_lock */
+
+ union {
+ struct work_struct release_work;
+diff --git a/mm/backing-dev.c b/mm/backing-dev.c
+index ca770a783a9f..1c1b44fcaf7d 100644
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -397,6 +397,7 @@ static void cgwb_release_workfn(struct work_struct *work)
+ fprop_local_destroy_percpu(&wb->memcg_completions);
+ percpu_ref_exit(&wb->refcnt);
+ wb_exit(wb);
++ WARN_ON_ONCE(!list_empty(&wb->b_attached));
+ kfree_rcu(wb, rcu);
+ }
+
+@@ -473,6 +474,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
+
+ wb->memcg_css = memcg_css;
+ wb->blkcg_css = blkcg_css;
++ INIT_LIST_HEAD(&wb->b_attached);
+ INIT_WORK(&wb->release_work, cgwb_release_workfn);
+ set_bit(WB_registered, &wb->state);
+
+--
+2.35.1
+
--- /dev/null
+From 4854300901e994f74b1801192c89cbab97d1a9da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Sep 2020 14:26:13 +0200
+Subject: writeback: don't warn on an unregistered BDI in __mark_inode_dirty
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit f7387170339afb473a0d95b7732f904346f9795e ]
+
+BDIs get unregistered during device removal, and this WARN can be
+trivially triggered by hot-removing a NVMe device while running fsx
+It is otherwise harmless as we still hold a BDI reference, and the
+writeback has been shut down already.
+
+Link: https://lore.kernel.org/r/20200928122613.434820-1-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 46c15dd2405c..2011199476ea 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -2307,10 +2307,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+
+ wb = locked_inode_to_wb_and_lock_list(inode);
+
+- WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) &&
+- !test_bit(WB_registered, &wb->state),
+- "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
+-
+ inode->dirtied_when = jiffies;
+ if (dirtytime)
+ inode->dirtied_time_when = jiffies;
+--
+2.35.1
+
--- /dev/null
+From a6d977e8d36035d60883d72a9a881a77520bfc03 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 May 2022 08:05:40 -0700
+Subject: writeback: Fix inode->i_io_list not be protected by inode->i_lock
+ error
+
+From: Jchao Sun <sunjunchao2870@gmail.com>
+
+[ Upstream commit 10e14073107dd0b6d97d9516a02845a8e501c2c9 ]
+
+Commit b35250c0816c ("writeback: Protect inode->i_io_list with
+inode->i_lock") made inode->i_io_list not only protected by
+wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
+was missed. Add lock there and also update comment describing
+things protected by inode->i_lock. This also fixes a race where
+__mark_inode_dirty() could move inode under flush worker's hands
+and thus sync(2) could miss writing some inodes.
+
+Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock")
+Link: https://lore.kernel.org/r/20220524150540.12552-1-sunjunchao2870@gmail.com
+CC: stable@vger.kernel.org
+Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 37 ++++++++++++++++++++++++++++---------
+ fs/inode.c | 2 +-
+ 2 files changed, 29 insertions(+), 10 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 645e3f6ffe44..4c667662a4d9 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -120,6 +120,7 @@ static bool inode_io_list_move_locked(struct inode *inode,
+ struct list_head *head)
+ {
+ assert_spin_locked(&wb->list_lock);
++ assert_spin_locked(&inode->i_lock);
+
+ list_move(&inode->i_io_list, head);
+
+@@ -1282,9 +1283,9 @@ static int move_expired_inodes(struct list_head *delaying_queue,
+ inode = wb_inode(delaying_queue->prev);
+ if (inode_dirtied_after(inode, dirtied_before))
+ break;
++ spin_lock(&inode->i_lock);
+ list_move(&inode->i_io_list, &tmp);
+ moved++;
+- spin_lock(&inode->i_lock);
+ inode->i_state |= I_SYNC_QUEUED;
+ spin_unlock(&inode->i_lock);
+ if (sb_is_blkdev_sb(inode->i_sb))
+@@ -1300,7 +1301,12 @@ static int move_expired_inodes(struct list_head *delaying_queue,
+ goto out;
+ }
+
+- /* Move inodes from one superblock together */
++ /*
++ * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue',
++ * we don't take inode->i_lock here because it is just a pointless overhead.
++ * Inode is already marked as I_SYNC_QUEUED so writeback list handling is
++ * fully under our control.
++ */
+ while (!list_empty(&tmp)) {
+ sb = wb_inode(tmp.prev)->i_sb;
+ list_for_each_prev_safe(pos, node, &tmp) {
+@@ -1726,8 +1732,8 @@ static long writeback_sb_inodes(struct super_block *sb,
+ * We'll have another go at writing back this inode
+ * when we completed a full scan of b_io.
+ */
+- spin_unlock(&inode->i_lock);
+ requeue_io(inode, wb);
++ spin_unlock(&inode->i_lock);
+ trace_writeback_sb_inodes_requeue(inode);
+ continue;
+ }
+@@ -2265,6 +2271,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ {
+ struct super_block *sb = inode->i_sb;
+ int dirtytime = 0;
++ struct bdi_writeback *wb = NULL;
+
+ trace_writeback_mark_inode_dirty(inode, flags);
+
+@@ -2316,6 +2323,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ inode->i_state &= ~I_DIRTY_TIME;
+ inode->i_state |= flags;
+
++ /*
++ * Grab inode's wb early because it requires dropping i_lock and we
++ * need to make sure following checks happen atomically with dirty
++ * list handling so that we don't move inodes under flush worker's
++ * hands.
++ */
++ if (!was_dirty) {
++ wb = locked_inode_to_wb_and_lock_list(inode);
++ spin_lock(&inode->i_lock);
++ }
++
+ /*
+ * If the inode is queued for writeback by flush worker, just
+ * update its dirty state. Once the flush worker is done with
+@@ -2323,7 +2341,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ * list, based upon its state.
+ */
+ if (inode->i_state & I_SYNC_QUEUED)
+- goto out_unlock_inode;
++ goto out_unlock;
+
+ /*
+ * Only add valid (hashed) inodes to the superblock's
+@@ -2331,22 +2349,19 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ */
+ if (!S_ISBLK(inode->i_mode)) {
+ if (inode_unhashed(inode))
+- goto out_unlock_inode;
++ goto out_unlock;
+ }
+ if (inode->i_state & I_FREEING)
+- goto out_unlock_inode;
++ goto out_unlock;
+
+ /*
+ * If the inode was already on b_dirty/b_io/b_more_io, don't
+ * reposition it (that would break b_dirty time-ordering).
+ */
+ if (!was_dirty) {
+- struct bdi_writeback *wb;
+ struct list_head *dirty_list;
+ bool wakeup_bdi = false;
+
+- wb = locked_inode_to_wb_and_lock_list(inode);
+-
+ inode->dirtied_when = jiffies;
+ if (dirtytime)
+ inode->dirtied_time_when = jiffies;
+@@ -2360,6 +2375,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ dirty_list);
+
+ spin_unlock(&wb->list_lock);
++ spin_unlock(&inode->i_lock);
+ trace_writeback_dirty_inode_enqueue(inode);
+
+ /*
+@@ -2374,6 +2390,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ return;
+ }
+ }
++out_unlock:
++ if (wb)
++ spin_unlock(&wb->list_lock);
+ out_unlock_inode:
+ spin_unlock(&inode->i_lock);
+ }
+diff --git a/fs/inode.c b/fs/inode.c
+index 9f49e0bdc2f7..51726f2ad994 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -28,7 +28,7 @@
+ * Inode locking rules:
+ *
+ * inode->i_lock protects:
+- * inode->i_state, inode->i_hash, __iget()
++ * inode->i_state, inode->i_hash, __iget(), inode->i_io_list
+ * Inode LRU list locks protect:
+ * inode->i_sb->s_inode_lru, inode->i_lru
+ * inode->i_sb->s_inode_list_lock protects:
+--
+2.35.1
+
--- /dev/null
+From d6435ea3208226d59ffeae497311f5ad7759d1d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jul 2021 09:18:04 +0200
+Subject: xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 30dcc56bba911db561c35d4131baf983a41023f8 ]
+
+XENFEAT_gnttab_map_avail_bits is always set in Xen 4.0 and newer.
+Remove coding assuming it might be zero.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Link: https://lore.kernel.org/r/20210730071804.4302-4-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: 5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/gntdev.c | 36 ++----------------------------------
+ 1 file changed, 2 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
+index 54fee4087bf1..5dd9d1ac755e 100644
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -289,20 +289,13 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+ {
+ struct gntdev_grant_map *map = data;
+ unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+- int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
++ int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
++ (1 << _GNTMAP_guest_avail0);
+ u64 pte_maddr;
+
+ BUG_ON(pgnr >= map->count);
+ pte_maddr = arbitrary_virt_to_machine(pte).maddr;
+
+- /*
+- * Set the PTE as special to force get_user_pages_fast() fall
+- * back to the slow path. If this is not supported as part of
+- * the grant map, it will be done afterwards.
+- */
+- if (xen_feature(XENFEAT_gnttab_map_avail_bits))
+- flags |= (1 << _GNTMAP_guest_avail0);
+-
+ gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
+ map->grants[pgnr].ref,
+ map->grants[pgnr].domid);
+@@ -311,14 +304,6 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+ return 0;
+ }
+
+-#ifdef CONFIG_X86
+-static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
+-{
+- set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
+- return 0;
+-}
+-#endif
+-
+ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
+ {
+ size_t alloced = 0;
+@@ -1102,23 +1087,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ err = vm_map_pages_zero(vma, map->pages, map->count);
+ if (err)
+ goto out_put_map;
+- } else {
+-#ifdef CONFIG_X86
+- /*
+- * If the PTEs were not made special by the grant map
+- * hypercall, do so here.
+- *
+- * This is racy since the mapping is already visible
+- * to userspace but userspace should be well-behaved
+- * enough to not touch it until the mmap() call
+- * returns.
+- */
+- if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) {
+- apply_to_page_range(vma->vm_mm, vma->vm_start,
+- vma->vm_end - vma->vm_start,
+- set_grant_ptes_as_special, NULL);
+- }
+-#endif
+ }
+
+ return 0;
+--
+2.35.1
+
--- /dev/null
+From e4fb93409b5f43ec8c06d049b1901a2e4b1f2870 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 2 Oct 2022 18:20:06 -0400
+Subject: xen/gntdev: Accommodate VMA splitting
+
+From: M. Vefa Bicakci <m.v.b@runbox.com>
+
+[ Upstream commit 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c ]
+
+Prior to this commit, the gntdev driver code did not handle the
+following scenario correctly with paravirtualized (PV) Xen domains:
+
+* User process sets up a gntdev mapping composed of two grant mappings
+ (i.e., two pages shared by another Xen domain).
+* User process munmap()s one of the pages.
+* User process munmap()s the remaining page.
+* User process exits.
+
+In the scenario above, the user process would cause the kernel to log
+the following messages in dmesg for the first munmap(), and the second
+munmap() call would result in similar log messages:
+
+ BUG: Bad page map in process doublemap.test pte:... pmd:...
+ page:0000000057c97bff refcount:1 mapcount:-1 \
+ mapping:0000000000000000 index:0x0 pfn:...
+ ...
+ page dumped because: bad pte
+ ...
+ file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
+ ...
+ Call Trace:
+ <TASK>
+ dump_stack_lvl+0x46/0x5e
+ print_bad_pte.cold+0x66/0xb6
+ unmap_page_range+0x7e5/0xdc0
+ unmap_vmas+0x78/0xf0
+ unmap_region+0xa8/0x110
+ __do_munmap+0x1ea/0x4e0
+ __vm_munmap+0x75/0x120
+ __x64_sys_munmap+0x28/0x40
+ do_syscall_64+0x38/0x90
+ entry_SYSCALL_64_after_hwframe+0x61/0xcb
+ ...
+
+For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
+would print out the following and trigger a general protection fault in
+the affected Xen PV domain:
+
+ (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
+ (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
+
+As of this writing, gntdev_grant_map structure's vma field (referred to
+as map->vma below) is mainly used for checking the start and end
+addresses of mappings. However, with split VMAs, these may change, and
+there could be more than one VMA associated with a gntdev mapping.
+Hence, remove the use of map->vma and rely on map->pages_vm_start for
+the original start address and on (map->count << PAGE_SHIFT) for the
+original mapping size. Let the invalidate() and find_special_page()
+hooks use these.
+
+Also, given that there can be multiple VMAs associated with a gntdev
+mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
+the end of gntdev_put_map, so that the MMU notifier is only removed
+after the closing of the last remaining VMA.
+
+Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
+instead of using the map->live_grants atomic counter and/or the map->vma
+pointer (the latter of which is now removed). This prevents the
+userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
+same address range as a previously set up gntdev mapping. This scenario
+can be summarized with the following call-trace, which was valid prior
+to this commit:
+
+ mmap
+ gntdev_mmap
+ mmap (repeat mmap with MAP_FIXED over the same address range)
+ gntdev_invalidate
+ unmap_grant_pages (sets 'being_removed' entries to true)
+ gnttab_unmap_refs_async
+ unmap_single_vma
+ gntdev_mmap (maps the shared pages again)
+ munmap
+ gntdev_invalidate
+ unmap_grant_pages
+ (no-op because 'being_removed' entries are true)
+ unmap_single_vma (For PV domains, Xen reports that a granted page
+ is being unmapped and triggers a general protection fault in the
+ affected domain, if Xen was built with CONFIG_DEBUG)
+
+The fix for this last scenario could be worth its own commit, but we
+opted for a single commit, because removing the gntdev_grant_map
+structure's vma field requires guarding the entry to gntdev_mmap(), and
+the live_grants atomic counter is not sufficient on its own to prevent
+the mmap() over a pre-existing mapping.
+
+Link: https://github.com/QubesOS/qubes-issues/issues/7631
+Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
+Cc: stable@vger.kernel.org
+Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/gntdev-common.h | 3 +-
+ drivers/xen/gntdev.c | 58 ++++++++++++++++---------------------
+ 2 files changed, 27 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
+index 40ef379c28ab..9c286b2a1900 100644
+--- a/drivers/xen/gntdev-common.h
++++ b/drivers/xen/gntdev-common.h
+@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
+ };
+
+ struct gntdev_grant_map {
++ atomic_t in_use;
+ struct mmu_interval_notifier notifier;
++ bool notifier_init;
+ struct list_head next;
+- struct vm_area_struct *vma;
+ int index;
+ int count;
+ int flags;
+diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
+index 5dd9d1ac755e..ff195b571763 100644
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -276,6 +276,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
+ */
+ }
+
++ if (use_ptemod && map->notifier_init)
++ mmu_interval_notifier_remove(&map->notifier);
++
+ if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+ notify_remote_via_evtchn(map->notify.event);
+ evtchn_put(map->notify.event);
+@@ -288,7 +291,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
+ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+ {
+ struct gntdev_grant_map *map = data;
+- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
++ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
+ int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
+ (1 << _GNTMAP_guest_avail0);
+ u64 pte_maddr;
+@@ -478,11 +481,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
+ struct gntdev_priv *priv = file->private_data;
+
+ pr_debug("gntdev_vma_close %p\n", vma);
+- if (use_ptemod) {
+- WARN_ON(map->vma != vma);
+- mmu_interval_notifier_remove(&map->notifier);
+- map->vma = NULL;
+- }
++
+ vma->vm_private_data = NULL;
+ gntdev_put_map(priv, map);
+ }
+@@ -510,29 +509,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
+ struct gntdev_grant_map *map =
+ container_of(mn, struct gntdev_grant_map, notifier);
+ unsigned long mstart, mend;
++ unsigned long map_start, map_end;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
++ map_start = map->pages_vm_start;
++ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
++
+ /*
+ * If the VMA is split or otherwise changed the notifier is not
+ * updated, but we don't want to process VA's outside the modified
+ * VMA. FIXME: It would be much more understandable to just prevent
+ * modifying the VMA in the first place.
+ */
+- if (map->vma->vm_start >= range->end ||
+- map->vma->vm_end <= range->start)
++ if (map_start >= range->end || map_end <= range->start)
+ return true;
+
+- mstart = max(range->start, map->vma->vm_start);
+- mend = min(range->end, map->vma->vm_end);
++ mstart = max(range->start, map_start);
++ mend = min(range->end, map_end);
+ pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
+- map->index, map->count,
+- map->vma->vm_start, map->vma->vm_end,
+- range->start, range->end, mstart, mend);
+- unmap_grant_pages(map,
+- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
+- (mend - mstart) >> PAGE_SHIFT);
++ map->index, map->count, map_start, map_end,
++ range->start, range->end, mstart, mend);
++ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
++ (mend - mstart) >> PAGE_SHIFT);
+
+ return true;
+ }
+@@ -1012,18 +1012,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ return -EINVAL;
+
+ pr_debug("map %d+%d at %lx (pgoff %lx)\n",
+- index, count, vma->vm_start, vma->vm_pgoff);
++ index, count, vma->vm_start, vma->vm_pgoff);
+
+ mutex_lock(&priv->lock);
+ map = gntdev_find_map_index(priv, index, count);
+ if (!map)
+ goto unlock_out;
+- if (use_ptemod && map->vma)
+- goto unlock_out;
+- if (atomic_read(&map->live_grants)) {
+- err = -EAGAIN;
++ if (!atomic_add_unless(&map->in_use, 1, 1))
+ goto unlock_out;
+- }
++
+ refcount_inc(&map->users);
+
+ vma->vm_ops = &gntdev_vmops;
+@@ -1044,15 +1041,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ map->flags |= GNTMAP_readonly;
+ }
+
++ map->pages_vm_start = vma->vm_start;
++
+ if (use_ptemod) {
+- map->vma = vma;
+ err = mmu_interval_notifier_insert_locked(
+ &map->notifier, vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
+- if (err) {
+- map->vma = NULL;
++ if (err)
+ goto out_unlock_put;
+- }
++
++ map->notifier_init = true;
+ }
+ mutex_unlock(&priv->lock);
+
+@@ -1069,7 +1067,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ */
+ mmu_interval_read_begin(&map->notifier);
+
+- map->pages_vm_start = vma->vm_start;
+ err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start,
+ find_grant_ptes, map);
+@@ -1098,13 +1095,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ out_unlock_put:
+ mutex_unlock(&priv->lock);
+ out_put_map:
+- if (use_ptemod) {
++ if (use_ptemod)
+ unmap_grant_pages(map, 0, map->count);
+- if (map->vma) {
+- mmu_interval_notifier_remove(&map->notifier);
+- map->vma = NULL;
+- }
+- }
+ gntdev_put_map(priv, map);
+ return err;
+ }
+--
+2.35.1
+