From: Sasha Levin Date: Thu, 27 Oct 2022 01:36:32 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v5.10.151~39 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fb48365849e818722645ee57e6230b52674377f8;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/acpi-extlog-handle-multiple-records.patch b/queue-5.10/acpi-extlog-handle-multiple-records.patch new file mode 100644 index 00000000000..598d5a0c075 --- /dev/null +++ b/queue-5.10/acpi-extlog-handle-multiple-records.patch @@ -0,0 +1,93 @@ +From b3a9cae09836af27192786123314c3eb72ec592f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Oct 2022 13:34:23 -0700 +Subject: ACPI: extlog: Handle multiple records + +From: Tony Luck + +[ Upstream commit f6ec01da40e4139b41179f046044ee7c4f6370dc ] + +If there is no user space consumer of extlog_mem trace records, then +Linux properly handles multiple error records in an ELOG block + + extlog_print() + print_extlog_rcd() + __print_extlog_rcd() + cper_estatus_print() + apei_estatus_for_each_section() + +But the other code path hard codes looking for a single record to +output a trace record. + +Fix by using the same apei_estatus_for_each_section() iterator +to step over all records. + +Fixes: 2dfb7d51a61d ("trace, RAS: Add eMCA trace event interface") +Signed-off-by: Tony Luck +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/acpi_extlog.c | 33 ++++++++++++++++++++------------- + 1 file changed, 20 insertions(+), 13 deletions(-) + +diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c +index 72f1fb77abcd..e648158368a7 100644 +--- a/drivers/acpi/acpi_extlog.c ++++ b/drivers/acpi/acpi_extlog.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -138,8 +139,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, + int cpu = mce->extcpu; + struct acpi_hest_generic_status *estatus, *tmp; + struct acpi_hest_generic_data *gdata; +- const guid_t *fru_id = &guid_null; +- char *fru_text = ""; ++ const guid_t *fru_id; ++ char *fru_text; + guid_t *sec_type; + static u32 err_seq; + +@@ -160,17 +161,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, + + /* log event via trace */ + err_seq++; +- gdata = (struct acpi_hest_generic_data *)(tmp + 1); +- if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) +- fru_id = (guid_t *)gdata->fru_id; +- if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) +- fru_text = gdata->fru_text; +- sec_type = (guid_t *)gdata->section_type; +- if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { +- struct cper_sec_mem_err *mem = (void *)(gdata + 1); +- if (gdata->error_data_length >= sizeof(*mem)) +- trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, +- (u8)gdata->error_severity); ++ apei_estatus_for_each_section(tmp, gdata) { ++ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) ++ fru_id = (guid_t *)gdata->fru_id; ++ else ++ fru_id = &guid_null; ++ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) ++ fru_text = gdata->fru_text; ++ else ++ fru_text = ""; ++ sec_type = (guid_t *)gdata->section_type; ++ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { ++ struct cper_sec_mem_err *mem = (void *)(gdata + 1); ++ ++ if (gdata->error_data_length >= sizeof(*mem)) ++ trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, ++ (u8)gdata->error_severity); ++ } + } + + out: +-- +2.35.1 + diff --git a/queue-5.10/alsa-hda-realtek-fix-speakers-and-micmute-on-hp-855-.patch b/queue-5.10/alsa-hda-realtek-fix-speakers-and-micmute-on-hp-855-.patch new file mode 100644 index 00000000000..c3565b59586 --- /dev/null +++ b/queue-5.10/alsa-hda-realtek-fix-speakers-and-micmute-on-hp-855-.patch @@ -0,0 +1,72 @@ +From ee4183a3c57cf5703185c99b9f359b40e74b09ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jan 2022 19:50:50 +0300 +Subject: ALSA: hda/realtek: fix speakers and micmute on HP 855 G8 + +From: Alexander Sergeyev + +[ Upstream commit 91502a9a0b0d5252cf3f32ebd898823c2f5aadab ] + +There are several PCI ids associated with HP EliteBook 855 G8 Notebook +PC. Commit 0e68c4b11f1e6 ("ALSA: hda/realtek: fix mute/micmute LEDs for +HP 855 G8") covers 0x103c:0x8896, while this commit covers 0x103c:0x8895 +which needs some additional work on top of the quirk from 0e68c4b11f1e6. + +Note that the device can boot up with working speakers and micmute LED +without this patch, but the success rate would be quite low (order of +16 working boots across 709 boots) at least for the built-in drivers +scenario. This also means that there are some timing issues during early +boot and this patch is a workaround. + +With this patch applied speakers and headphones are consistenly working, +as well as mute/micmute LEDs and the internal microphone. + +Signed-off-by: Alexander Sergeyev +Link: https://lore.kernel.org/r/20220114165050.ouw2nknuspclynro@localhost.localdomain +Signed-off-by: Takashi Iwai +Stable-dep-of: 225f6e1bc151 ("ALSA: hda/realtek: Add quirk for HP Zbook Firefly 14 G9 model") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 60e3bc124836..5dd786321a18 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -6883,6 +6883,7 @@ enum { + ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, + ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, + ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, ++ ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED, + }; + + /* A special fixup for Lenovo C940 and Yoga Duet 7; +@@ -8693,6 +8694,16 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, ++ [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = { ++ .type = HDA_FIXUP_VERBS, ++ .v.verbs = (const struct hda_verb[]) { ++ { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 }, ++ { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 }, ++ { } ++ }, ++ .chained = true, ++ .chain_id = ALC285_FIXUP_HP_MUTE_LED, ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8915,6 +8926,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED), ++ SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), +-- +2.35.1 + diff --git a/queue-5.10/btrfs-fix-processing-of-delayed-data-refs-during-bac.patch b/queue-5.10/btrfs-fix-processing-of-delayed-data-refs-during-bac.patch new file mode 100644 index 00000000000..dd96a04fb53 --- /dev/null +++ b/queue-5.10/btrfs-fix-processing-of-delayed-data-refs-during-bac.patch @@ -0,0 +1,249 @@ +From 991194e6de9c7646460c3e847e3bada2f5a08b5b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Oct 2022 13:16:51 +0100 +Subject: btrfs: fix processing of delayed data refs during backref walking + +From: Filipe Manana + +[ Upstream commit 4fc7b57228243d09c0d878873bf24fa64a90fa01 ] + +When processing delayed data references during backref walking and we are +using a share context (we are being called through fiemap), whenever we +find a delayed data reference for an inode different from the one we are +interested in, then we immediately exit and consider the data extent as +shared. This is wrong, because: + +1) This might be a DROP reference that will cancel out a reference in the + extent tree; + +2) Even if it's an ADD reference, it may be followed by a DROP reference + that cancels it out. + +In either case we should not exit immediately. + +Fix this by never exiting when we find a delayed data reference for +another inode - instead add the reference and if it does not cancel out +other delayed reference, we will exit early when we call +extent_is_shared() after processing all delayed references. If we find +a drop reference, then signal the code that processes references from +the extent tree (add_inline_refs() and add_keyed_refs()) to not exit +immediately if it finds there a reference for another inode, since we +have delayed drop references that may cancel it out. In this later case +we exit once we don't have references in the rb trees that cancel out +each other and have two references for different inodes. + +Example reproducer for case 1): + + $ cat test-1.sh + #!/bin/bash + + DEV=/dev/sdj + MNT=/mnt/sdj + + mkfs.btrfs -f $DEV + mount $DEV $MNT + + xfs_io -f -c "pwrite 0 64K" $MNT/foo + cp --reflink=always $MNT/foo $MNT/bar + + echo + echo "fiemap after cloning:" + xfs_io -c "fiemap -v" $MNT/foo + + rm -f $MNT/bar + echo + echo "fiemap after removing file bar:" + xfs_io -c "fiemap -v" $MNT/foo + + umount $MNT + +Running it before this patch, the extent is still listed as shared, it has +the flag 0x2000 (FIEMAP_EXTENT_SHARED) set: + + $ ./test-1.sh + fiemap after cloning: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x2001 + + fiemap after removing file bar: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x2001 + +Example reproducer for case 2): + + $ cat test-2.sh + #!/bin/bash + + DEV=/dev/sdj + MNT=/mnt/sdj + + mkfs.btrfs -f $DEV + mount $DEV $MNT + + xfs_io -f -c "pwrite 0 64K" $MNT/foo + cp --reflink=always $MNT/foo $MNT/bar + + # Flush delayed references to the extent tree and commit current + # transaction. + sync + + echo + echo "fiemap after cloning:" + xfs_io -c "fiemap -v" $MNT/foo + + rm -f $MNT/bar + echo + echo "fiemap after removing file bar:" + xfs_io -c "fiemap -v" $MNT/foo + + umount $MNT + +Running it before this patch, the extent is still listed as shared, it has +the flag 0x2000 (FIEMAP_EXTENT_SHARED) set: + + $ ./test-2.sh + fiemap after cloning: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x2001 + + fiemap after removing file bar: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x2001 + +After this patch, after deleting bar in both tests, the extent is not +reported with the 0x2000 flag anymore, it gets only the flag 0x1 +(which is FIEMAP_EXTENT_LAST): + + $ ./test-1.sh + fiemap after cloning: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x2001 + + fiemap after removing file bar: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x1 + + $ ./test-2.sh + fiemap after cloning: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x2001 + + fiemap after removing file bar: + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..127]: 26624..26751 128 0x1 + +These tests will later be converted to a test case for fstests. + +Fixes: dc046b10c8b7d4 ("Btrfs: make fiemap not blow when you have lots of snapshots") +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/backref.c | 33 ++++++++++++++++++++++++--------- + 1 file changed, 24 insertions(+), 9 deletions(-) + +diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c +index baff31a147e7..7e8fac12f3f8 100644 +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -137,6 +137,7 @@ struct share_check { + u64 root_objectid; + u64 inum; + int share_count; ++ bool have_delayed_delete_refs; + }; + + static inline int extent_is_shared(struct share_check *sc) +@@ -881,13 +882,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, + key.offset = ref->offset; + + /* +- * Found a inum that doesn't match our known inum, we +- * know it's shared. ++ * If we have a share check context and a reference for ++ * another inode, we can't exit immediately. This is ++ * because even if this is a BTRFS_ADD_DELAYED_REF ++ * reference we may find next a BTRFS_DROP_DELAYED_REF ++ * which cancels out this ADD reference. ++ * ++ * If this is a DROP reference and there was no previous ++ * ADD reference, then we need to signal that when we ++ * process references from the extent tree (through ++ * add_inline_refs() and add_keyed_refs()), we should ++ * not exit early if we find a reference for another ++ * inode, because one of the delayed DROP references ++ * may cancel that reference in the extent tree. + */ +- if (sc && sc->inum && ref->objectid != sc->inum) { +- ret = BACKREF_FOUND_SHARED; +- goto out; +- } ++ if (sc && count < 0) ++ sc->have_delayed_delete_refs = true; + + ret = add_indirect_ref(fs_info, preftrees, ref->root, + &key, 0, node->bytenr, count, sc, +@@ -917,7 +927,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, + } + if (!ret) + ret = extent_is_shared(sc); +-out: ++ + spin_unlock(&head->lock); + return ret; + } +@@ -1020,7 +1030,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = btrfs_extent_data_ref_offset(leaf, dref); + +- if (sc && sc->inum && key.objectid != sc->inum) { ++ if (sc && sc->inum && key.objectid != sc->inum && ++ !sc->have_delayed_delete_refs) { + ret = BACKREF_FOUND_SHARED; + break; + } +@@ -1030,6 +1041,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, + ret = add_indirect_ref(fs_info, preftrees, root, + &key, 0, bytenr, count, + sc, GFP_NOFS); ++ + break; + } + default: +@@ -1119,7 +1131,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info, + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = btrfs_extent_data_ref_offset(leaf, dref); + +- if (sc && sc->inum && key.objectid != sc->inum) { ++ if (sc && sc->inum && key.objectid != sc->inum && ++ !sc->have_delayed_delete_refs) { + ret = BACKREF_FOUND_SHARED; + break; + } +@@ -1542,6 +1555,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, + .root_objectid = root->root_key.objectid, + .inum = inum, + .share_count = 0, ++ .have_delayed_delete_refs = false, + }; + + ulist_init(roots); +@@ -1576,6 +1590,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, + break; + bytenr = node->val; + shared.share_count = 0; ++ shared.have_delayed_delete_refs = false; + cond_resched(); + } + +-- +2.35.1 + diff --git a/queue-5.10/btrfs-fix-processing-of-delayed-tree-block-refs-duri.patch b/queue-5.10/btrfs-fix-processing-of-delayed-tree-block-refs-duri.patch new file mode 100644 index 00000000000..912cd5a1084 --- /dev/null +++ b/queue-5.10/btrfs-fix-processing-of-delayed-tree-block-refs-duri.patch @@ -0,0 +1,204 @@ +From a463ffcdc7131fe14012064f23ce2c0495108b07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Oct 2022 13:16:52 +0100 +Subject: btrfs: fix processing of delayed tree block refs during backref + walking + +From: Filipe Manana + +[ Upstream commit 943553ef9b51db303ab2b955c1025261abfdf6fb ] + +During backref walking, when processing a delayed reference with a type of +BTRFS_TREE_BLOCK_REF_KEY, we have two bugs there: + +1) We are accessing the delayed references extent_op, and its key, without + the protection of the delayed ref head's lock; + +2) If there's no extent op for the delayed ref head, we end up with an + uninitialized key in the stack, variable 'tmp_op_key', and then pass + it to add_indirect_ref(), which adds the reference to the indirect + refs rb tree. + + This is wrong, because indirect references should have a NULL key + when we don't have access to the key, and in that case they should be + added to the indirect_missing_keys rb tree and not to the indirect rb + tree. + + This means that if have BTRFS_TREE_BLOCK_REF_KEY delayed ref resulting + from freeing an extent buffer, therefore with a count of -1, it will + not cancel out the corresponding reference we have in the extent tree + (with a count of 1), since both references end up in different rb + trees. + + When using fiemap, where we often need to check if extents are shared + through shared subtrees resulting from snapshots, it means we can + incorrectly report an extent as shared when it's no longer shared. + However this is temporary because after the transaction is committed + the extent is no longer reported as shared, as running the delayed + reference results in deleting the tree block reference from the extent + tree. + + Outside the fiemap context, the result is unpredictable, as the key was + not initialized but it's used when navigating the rb trees to insert + and search for references (prelim_ref_compare()), and we expect all + references in the indirect rb tree to have valid keys. + +The following reproducer triggers the second bug: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdj + MNT=/mnt/sdj + + mkfs.btrfs -f $DEV + mount -o compress $DEV $MNT + + # With a compressed 128M file we get a tree height of 2 (level 1 root). + xfs_io -f -c "pwrite -b 1M 0 128M" $MNT/foo + + btrfs subvolume snapshot $MNT $MNT/snap + + # Fiemap should output 0x2008 in the flags column. + # 0x2000 means shared extent + # 0x8 means encoded extent (because it's compressed) + echo + echo "fiemap after snapshot, range [120M, 120M + 128K):" + xfs_io -c "fiemap -v 120M 128K" $MNT/foo + echo + + # Overwrite one extent and fsync to flush delalloc and COW a new path + # in the snapshot's tree. + # + # After this we have a BTRFS_DROP_DELAYED_REF delayed ref of type + # BTRFS_TREE_BLOCK_REF_KEY with a count of -1 for every COWed extent + # buffer in the path. + # + # In the extent tree we have inline references of type + # BTRFS_TREE_BLOCK_REF_KEY, with a count of 1, for the same extent + # buffers, so they should cancel each other, and the extent buffers in + # the fs tree should no longer be considered as shared. + # + echo "Overwriting file range [120M, 120M + 128K)..." + xfs_io -c "pwrite -b 128K 120M 128K" $MNT/snap/foo + xfs_io -c "fsync" $MNT/snap/foo + + # Fiemap should output 0x8 in the flags column. The extent in the range + # [120M, 120M + 128K) is no longer shared, it's now exclusive to the fs + # tree. + echo + echo "fiemap after overwrite range [120M, 120M + 128K):" + xfs_io -c "fiemap -v 120M 128K" $MNT/foo + echo + + umount $MNT + +Running it before this patch: + + $ ./test.sh + (...) + wrote 134217728/134217728 bytes at offset 0 + 128 MiB, 128 ops; 0.1152 sec (1.085 GiB/sec and 1110.5809 ops/sec) + Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap' + + fiemap after snapshot, range [120M, 120M + 128K): + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [245760..246015]: 34304..34559 256 0x2008 + + Overwriting file range [120M, 120M + 128K)... + wrote 131072/131072 bytes at offset 125829120 + 128 KiB, 1 ops; 0.0001 sec (683.060 MiB/sec and 5464.4809 ops/sec) + + fiemap after overwrite range [120M, 120M + 128K): + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [245760..246015]: 34304..34559 256 0x2008 + +The extent in the range [120M, 120M + 128K) is still reported as shared +(0x2000 bit set) after overwriting that range and flushing delalloc, which +is not correct - an entire path was COWed in the snapshot's tree and the +extent is now only referenced by the original fs tree. + +Running it after this patch: + + $ ./test.sh + (...) + wrote 134217728/134217728 bytes at offset 0 + 128 MiB, 128 ops; 0.1198 sec (1.043 GiB/sec and 1068.2067 ops/sec) + Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap' + + fiemap after snapshot, range [120M, 120M + 128K): + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [245760..246015]: 34304..34559 256 0x2008 + + Overwriting file range [120M, 120M + 128K)... + wrote 131072/131072 bytes at offset 125829120 + 128 KiB, 1 ops; 0.0001 sec (694.444 MiB/sec and 5555.5556 ops/sec) + + fiemap after overwrite range [120M, 120M + 128K): + /mnt/sdj/foo: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [245760..246015]: 34304..34559 256 0x8 + +Now the extent is not reported as shared anymore. + +So fix this by passing a NULL key pointer to add_indirect_ref() when +processing a delayed reference for a tree block if there's no extent op +for our delayed ref head with a defined key. Also access the extent op +only after locking the delayed ref head's lock. + +The reproducer will be converted later to a test case for fstests. + +Fixes: 86d5f994425252 ("btrfs: convert prelimary reference tracking to use rbtrees") +Fixes: a6dbceafb915e8 ("btrfs: Remove unused op_key var from add_delayed_refs") +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/backref.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c +index 7e8fac12f3f8..92cb16c0e5ee 100644 +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -818,16 +818,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, + struct preftrees *preftrees, struct share_check *sc) + { + struct btrfs_delayed_ref_node *node; +- struct btrfs_delayed_extent_op *extent_op = head->extent_op; + struct btrfs_key key; +- struct btrfs_key tmp_op_key; + struct rb_node *n; + int count; + int ret = 0; + +- if (extent_op && extent_op->update_key) +- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); +- + spin_lock(&head->lock); + for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { + node = rb_entry(n, struct btrfs_delayed_ref_node, +@@ -853,10 +848,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, + case BTRFS_TREE_BLOCK_REF_KEY: { + /* NORMAL INDIRECT METADATA backref */ + struct btrfs_delayed_tree_ref *ref; ++ struct btrfs_key *key_ptr = NULL; ++ ++ if (head->extent_op && head->extent_op->update_key) { ++ btrfs_disk_key_to_cpu(&key, &head->extent_op->key); ++ key_ptr = &key; ++ } + + ref = btrfs_delayed_node_to_tree_ref(node); + ret = add_indirect_ref(fs_info, preftrees, ref->root, +- &tmp_op_key, ref->level + 1, ++ key_ptr, ref->level + 1, + node->bytenr, count, sc, + GFP_ATOMIC); + break; +-- +2.35.1 + diff --git a/queue-5.10/cifs-fix-xid-leak-in-cifs_copy_file_range.patch b/queue-5.10/cifs-fix-xid-leak-in-cifs_copy_file_range.patch new file mode 100644 index 00000000000..fa69ecc6f4a --- /dev/null +++ b/queue-5.10/cifs-fix-xid-leak-in-cifs_copy_file_range.patch @@ -0,0 +1,42 @@ +From fca36c2f98ba5c022fc1395d6617efa11e21eae9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Oct 2022 22:45:22 +0800 +Subject: cifs: Fix xid leak in cifs_copy_file_range() + +From: Zhang Xiaoxu + +[ Upstream commit 9a97df404a402fe1174d2d1119f87ff2a0ca2fe9 ] + +If the file is used by swap, before return -EOPNOTSUPP, should +free the xid, otherwise, the xid will be leaked. + +Fixes: 4e8aea30f775 ("smb3: enable swap on SMB3 mounts") +Reviewed-by: Paulo Alcantara (SUSE) +Signed-off-by: Zhang Xiaoxu +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/cifs/cifsfs.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c +index bc957e6ca48b..f442ef8b65da 100644 +--- a/fs/cifs/cifsfs.c ++++ b/fs/cifs/cifsfs.c +@@ -1221,8 +1221,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, + ssize_t rc; + struct cifsFileInfo *cfile = dst_file->private_data; + +- if (cfile->swapfile) +- return -EOPNOTSUPP; ++ if (cfile->swapfile) { ++ rc = -EOPNOTSUPP; ++ free_xid(xid); ++ return rc; ++ } + + rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, + len, flags); +-- +2.35.1 + diff --git a/queue-5.10/cifs-fix-xid-leak-in-cifs_flock.patch b/queue-5.10/cifs-fix-xid-leak-in-cifs_flock.patch new file mode 100644 index 00000000000..c9247905dc9 --- /dev/null +++ b/queue-5.10/cifs-fix-xid-leak-in-cifs_flock.patch @@ -0,0 +1,56 @@ +From c842c72c0cbcba31bbc69070e6b8f64337019b6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Oct 2022 22:45:23 +0800 +Subject: cifs: Fix xid leak in cifs_flock() + +From: Zhang Xiaoxu + +[ Upstream commit 575e079c782b9862ec2626403922d041a42e6ed6 ] + +If not flock, before return -ENOLCK, should free the xid, +otherwise, the xid will be leaked. + +Fixes: d0677992d2af ("cifs: add support for flock") +Reviewed-by: Paulo Alcantara (SUSE) +Signed-off-by: Zhang Xiaoxu +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/cifs/file.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/fs/cifs/file.c b/fs/cifs/file.c +index a648146e49cf..144064dc0d38 100644 +--- a/fs/cifs/file.c ++++ b/fs/cifs/file.c +@@ -1735,11 +1735,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) + struct cifsFileInfo *cfile; + __u32 type; + +- rc = -EACCES; + xid = get_xid(); + +- if (!(fl->fl_flags & FL_FLOCK)) +- return -ENOLCK; ++ if (!(fl->fl_flags & FL_FLOCK)) { ++ rc = -ENOLCK; ++ free_xid(xid); ++ return rc; ++ } + + cfile = (struct cifsFileInfo *)file->private_data; + tcon = tlink_tcon(cfile->tlink); +@@ -1758,8 +1760,9 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) + * if no lock or unlock then nothing to do since we do not + * know what it is + */ ++ rc = -EOPNOTSUPP; + free_xid(xid); +- return -EOPNOTSUPP; ++ return rc; + } + + rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, +-- +2.35.1 + diff --git a/queue-5.10/cifs-fix-xid-leak-in-cifs_ses_add_channel.patch b/queue-5.10/cifs-fix-xid-leak-in-cifs_ses_add_channel.patch new file mode 100644 index 00000000000..d73094a9994 --- /dev/null +++ b/queue-5.10/cifs-fix-xid-leak-in-cifs_ses_add_channel.patch @@ -0,0 +1,36 @@ +From 2399563eeac3c1dd136518d78e4e08bfced93409 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Oct 2022 22:45:24 +0800 +Subject: cifs: Fix xid leak in cifs_ses_add_channel() + +From: Zhang Xiaoxu + +[ Upstream commit e909d054bdea75ef1ec48c18c5936affdaecbb2c ] + +Before return, should free the xid, otherwise, the +xid will be leaked. + +Fixes: d70e9fa55884 ("cifs: try opening channels after mounting") +Reviewed-by: Paulo Alcantara (SUSE) +Signed-off-by: Zhang Xiaoxu +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/cifs/sess.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c +index d58c5ffeca0d..cf6fd138d8d5 100644 +--- a/fs/cifs/sess.c ++++ b/fs/cifs/sess.c +@@ -306,6 +306,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface) + cifs_put_tcp_session(chan->server, 0); + unload_nls(vol.local_nls); + ++ free_xid(xid); + return rc; + } + +-- +2.35.1 + diff --git a/queue-5.10/crypto-qat-reduce-size-of-mapped-region.patch b/queue-5.10/crypto-qat-reduce-size-of-mapped-region.patch new file mode 100644 index 00000000000..2ddd8ee39c3 --- /dev/null +++ b/queue-5.10/crypto-qat-reduce-size-of-mapped-region.patch @@ -0,0 +1,80 @@ +From ac67c1dc42ffdc167ecc29029b496394be4b6c39 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Jan 2021 17:21:59 +0000 +Subject: crypto: qat - reduce size of mapped region + +From: Adam Guerin + +[ Upstream commit e48767c17718067ba21fb2ef461779ec2506f845 ] + +Restrict size of field to what is required by the operation. + +This issue was detected by smatch: + + drivers/crypto/qat/qat_common/qat_asym_algs.c:328 qat_dh_compute_value() error: dma_map_single_attrs() '&qat_req->in.dh.in.b' too small (8 vs 64) + +Signed-off-by: Adam Guerin +Reviewed-by: Giovanni Cabiddu +Signed-off-by: Giovanni Cabiddu +Signed-off-by: Herbert Xu +Stable-dep-of: 9c5f21b198d2 ("Revert "crypto: qat - reduce size of mapped region"") +Signed-off-by: Sasha Levin +--- + drivers/crypto/qat/qat_common/qat_asym_algs.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c +index 846569ec9066..2b1aca487fc3 100644 +--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c ++++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c +@@ -326,13 +326,13 @@ static int qat_dh_compute_value(struct kpp_request *req) + qat_req->out.dh.out_tab[1] = 0; + /* Mapping in.in.b or in.in_g2.xa is the same */ + qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b, +- sizeof(struct qat_dh_input_params), ++ sizeof(qat_req->in.dh.in.b), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r, +- sizeof(struct qat_dh_output_params), ++ sizeof(qat_req->out.dh.r), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; +@@ -721,13 +721,13 @@ static int qat_rsa_enc(struct akcipher_request *req) + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, +- sizeof(struct qat_rsa_input_params), ++ sizeof(qat_req->in.rsa.enc.m), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c, +- sizeof(struct qat_rsa_output_params), ++ sizeof(qat_req->out.rsa.enc.c), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; +@@ -869,13 +869,13 @@ static int qat_rsa_dec(struct akcipher_request *req) + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c, +- sizeof(struct qat_rsa_input_params), ++ sizeof(qat_req->in.rsa.dec.c), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m, +- sizeof(struct qat_rsa_output_params), ++ sizeof(qat_req->out.rsa.dec.m), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; +-- +2.35.1 + diff --git a/queue-5.10/dmaengine-mxs-dma-remove-the-unused-.id_table.patch b/queue-5.10/dmaengine-mxs-dma-remove-the-unused-.id_table.patch new file mode 100644 index 00000000000..454df88aeb2 --- /dev/null +++ b/queue-5.10/dmaengine-mxs-dma-remove-the-unused-.id_table.patch @@ -0,0 +1,96 @@ +From 900dd401d452f295b4999cdf68d03de7a81dce68 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Nov 2020 16:30:51 -0300 +Subject: dmaengine: mxs-dma: Remove the unused .id_table + +From: Fabio Estevam + +[ Upstream commit cc2afb0d4c7cbba6743ed6d9564f0883cab6bae1 ] + +The mxs-dma driver is only used by DT platforms and the .id_table +is unused. + +Get rid of it to simplify the code. + +Signed-off-by: Fabio Estevam +Link: https://lore.kernel.org/r/20201123193051.17285-1-festevam@gmail.com +Signed-off-by: Vinod Koul +Stable-dep-of: 26696d465716 ("dmaengine: mxs: use platform_driver_register") +Signed-off-by: Sasha Levin +--- + drivers/dma/mxs-dma.c | 37 +++++-------------------------------- + 1 file changed, 5 insertions(+), 32 deletions(-) + +diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c +index 65f816b40c32..994fc4d2aca4 100644 +--- a/drivers/dma/mxs-dma.c ++++ b/drivers/dma/mxs-dma.c +@@ -167,29 +167,11 @@ static struct mxs_dma_type mxs_dma_types[] = { + } + }; + +-static const struct platform_device_id mxs_dma_ids[] = { +- { +- .name = "imx23-dma-apbh", +- .driver_data = (kernel_ulong_t) &mxs_dma_types[0], +- }, { +- .name = "imx23-dma-apbx", +- .driver_data = (kernel_ulong_t) &mxs_dma_types[1], +- }, { +- .name = "imx28-dma-apbh", +- .driver_data = (kernel_ulong_t) &mxs_dma_types[2], +- }, { +- .name = "imx28-dma-apbx", +- .driver_data = (kernel_ulong_t) &mxs_dma_types[3], +- }, { +- /* end of list */ +- } +-}; +- + static const struct of_device_id mxs_dma_dt_ids[] = { +- { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_ids[0], }, +- { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_ids[1], }, +- { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_ids[2], }, +- { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_ids[3], }, ++ { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_types[0], }, ++ { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_types[1], }, ++ { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_types[2], }, ++ { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_types[3], }, + { /* sentinel */ } + }; + MODULE_DEVICE_TABLE(of, mxs_dma_dt_ids); +@@ -762,8 +744,6 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, + static int __init mxs_dma_probe(struct platform_device *pdev) + { + struct device_node *np = pdev->dev.of_node; +- const struct platform_device_id *id_entry; +- const struct of_device_id *of_id; + const struct mxs_dma_type *dma_type; + struct mxs_dma_engine *mxs_dma; + struct resource *iores; +@@ -779,13 +759,7 @@ static int __init mxs_dma_probe(struct platform_device *pdev) + return ret; + } + +- of_id = of_match_device(mxs_dma_dt_ids, &pdev->dev); +- if (of_id) +- id_entry = of_id->data; +- else +- id_entry = platform_get_device_id(pdev); +- +- dma_type = (struct mxs_dma_type *)id_entry->driver_data; ++ dma_type = (struct mxs_dma_type *)of_device_get_match_data(&pdev->dev); + mxs_dma->type = dma_type->type; + mxs_dma->dev_id = dma_type->id; + +@@ -865,7 +839,6 @@ static struct platform_driver mxs_dma_driver = { + .name = "mxs-dma", + .of_match_table = mxs_dma_dt_ids, + }, +- .id_table = mxs_dma_ids, + }; + + static int __init mxs_dma_module_init(void) +-- +2.35.1 + diff --git a/queue-5.10/dmaengine-mxs-use-platform_driver_register.patch b/queue-5.10/dmaengine-mxs-use-platform_driver_register.patch new file mode 100644 index 00000000000..9552e9f45dd --- /dev/null +++ b/queue-5.10/dmaengine-mxs-use-platform_driver_register.patch @@ -0,0 +1,75 @@ +From 4a0f6b816ed3c2c75d8e78027b0fc26f93ebae8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Sep 2022 19:05:56 +0200 +Subject: dmaengine: mxs: use platform_driver_register + +From: Dario Binacchi + +[ Upstream commit 26696d4657167112a1079f86cba1739765c1360e ] + +Driver registration fails on SOC imx8mn as its supplier, the clock +control module, is probed later than subsys initcall level. This driver +uses platform_driver_probe which is not compatible with deferred probing +and won't be probed again later if probe function fails due to clock not +being available at that time. + +This patch replaces the use of platform_driver_probe with +platform_driver_register which will allow probing the driver later again +when the clock control module will be available. + +The __init annotation has been dropped because it is not compatible with +deferred probing. The code is not executed once and its memory cannot be +freed. + +Fixes: a580b8c5429a ("dmaengine: mxs-dma: add dma support for i.MX23/28") +Co-developed-by: Michael Trimarchi +Signed-off-by: Michael Trimarchi +Signed-off-by: Dario Binacchi +Acked-by: Sascha Hauer +Cc: stable@vger.kernel.org + +Link: https://lore.kernel.org/r/20220921170556.1055962-1-dario.binacchi@amarulasolutions.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/dma/mxs-dma.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c +index 994fc4d2aca4..dc147cc2436e 100644 +--- a/drivers/dma/mxs-dma.c ++++ b/drivers/dma/mxs-dma.c +@@ -670,7 +670,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan, + return mxs_chan->status; + } + +-static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma) ++static int mxs_dma_init(struct mxs_dma_engine *mxs_dma) + { + int ret; + +@@ -741,7 +741,7 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, + ofdma->of_node); + } + +-static int __init mxs_dma_probe(struct platform_device *pdev) ++static int mxs_dma_probe(struct platform_device *pdev) + { + struct device_node *np = pdev->dev.of_node; + const struct mxs_dma_type *dma_type; +@@ -839,10 +839,7 @@ static struct platform_driver mxs_dma_driver = { + .name = "mxs-dma", + .of_match_table = mxs_dma_dt_ids, + }, ++ .probe = mxs_dma_probe, + }; + +-static int __init mxs_dma_module_init(void) +-{ +- return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe); +-} +-subsys_initcall(mxs_dma_module_init); ++builtin_platform_driver(mxs_dma_driver); +-- +2.35.1 + diff --git a/queue-5.10/drm-virtio-use-appropriate-atomic-state-in-virtio_gp.patch b/queue-5.10/drm-virtio-use-appropriate-atomic-state-in-virtio_gp.patch new file mode 100644 index 00000000000..32612090193 --- /dev/null +++ b/queue-5.10/drm-virtio-use-appropriate-atomic-state-in-virtio_gp.patch @@ -0,0 +1,49 @@ +From 050b21df2c16abb2618671ae082b258d89b1c88f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Jun 2022 23:07:22 +0300 +Subject: drm/virtio: Use appropriate atomic state in + virtio_gpu_plane_cleanup_fb() + +From: Dmitry Osipenko + +[ Upstream commit 4656b3a26a9e9fe5f04bfd2ab55b066266ba7f4d ] + +Make virtio_gpu_plane_cleanup_fb() to clean the state which DRM core +wants to clean up and not the current plane's state. Normally the older +atomic state is cleaned up, but the newer state could also be cleaned up +in case of aborted commits. + +Cc: stable@vger.kernel.org +Signed-off-by: Dmitry Osipenko +Link: http://patchwork.freedesktop.org/patch/msgid/20220630200726.1884320-6-dmitry.osipenko@collabora.com +Signed-off-by: Gerd Hoffmann +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/virtio/virtgpu_plane.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c +index 6a311cd93440..e6de62734269 100644 +--- a/drivers/gpu/drm/virtio/virtgpu_plane.c ++++ b/drivers/gpu/drm/virtio/virtgpu_plane.c +@@ -213,14 +213,14 @@ static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane, + } + + static void virtio_gpu_cursor_cleanup_fb(struct drm_plane *plane, +- struct drm_plane_state *old_state) ++ struct drm_plane_state *state) + { + struct virtio_gpu_framebuffer *vgfb; + +- if (!plane->state->fb) ++ if (!state->fb) + return; + +- vgfb = to_virtio_gpu_framebuffer(plane->state->fb); ++ vgfb = to_virtio_gpu_framebuffer(state->fb); + if (vgfb->fence) { + dma_fence_put(&vgfb->fence->f); + vgfb->fence = NULL; +-- +2.35.1 + diff --git a/queue-5.10/fcntl-fix-potential-deadlocks-for-fown_struct.lock.patch b/queue-5.10/fcntl-fix-potential-deadlocks-for-fown_struct.lock.patch new file mode 100644 index 00000000000..c96cbc57b78 --- /dev/null +++ b/queue-5.10/fcntl-fix-potential-deadlocks-for-fown_struct.lock.patch @@ -0,0 +1,131 @@ +From b0757e907af9c9647f6dec361f254ac6b2553e40 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Jul 2021 17:18:30 +0800 +Subject: fcntl: fix potential deadlocks for &fown_struct.lock + +From: Desmond Cheong Zhi Xi + +[ Upstream commit f671a691e299f58835d4660d642582bf0e8f6fda ] + +Syzbot reports a potential deadlock in do_fcntl: + +======================================================== +WARNING: possible irq lock inversion dependency detected +5.12.0-syzkaller #0 Not tainted +-------------------------------------------------------- +syz-executor132/8391 just changed the state of lock: +ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: f_getown_ex fs/fcntl.c:211 [inline] +ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: do_fcntl+0x8b4/0x1200 fs/fcntl.c:395 +but this lock was taken by another, HARDIRQ-safe lock in the past: + (&dev->event_lock){-...}-{2:2} + +and interrupts could create inverse lock ordering between them. + +other info that might help us debug this: +Chain exists of: + &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock + + Possible interrupt unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(&f->f_owner.lock); + local_irq_disable(); + lock(&dev->event_lock); + lock(&new->fa_lock); + + lock(&dev->event_lock); + + *** DEADLOCK *** + +This happens because there is a lock hierarchy of +&dev->event_lock --> &new->fa_lock --> &f->f_owner.lock +from the following call chain: + + input_inject_event(): + spin_lock_irqsave(&dev->event_lock,...); + input_handle_event(): + input_pass_values(): + input_to_handler(): + evdev_events(): + evdev_pass_values(): + spin_lock(&client->buffer_lock); + __pass_event(): + kill_fasync(): + kill_fasync_rcu(): + read_lock(&fa->fa_lock); + send_sigio(): + read_lock_irqsave(&fown->lock,...); + +However, since &dev->event_lock is HARDIRQ-safe, interrupts have to be +disabled while grabbing &f->f_owner.lock, otherwise we invert the lock +hierarchy. + +Hence, we replace calls to read_lock/read_unlock on &f->f_owner.lock, +with read_lock_irq/read_unlock_irq. + +Reported-and-tested-by: syzbot+e6d5398a02c516ce5e70@syzkaller.appspotmail.com +Signed-off-by: Desmond Cheong Zhi Xi +Signed-off-by: Jeff Layton +Signed-off-by: Sasha Levin +--- + fs/fcntl.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/fs/fcntl.c b/fs/fcntl.c +index 5a56351f1fc3..fcf34f83bf6a 100644 +--- a/fs/fcntl.c ++++ b/fs/fcntl.c +@@ -149,7 +149,8 @@ void f_delown(struct file *filp) + pid_t f_getown(struct file *filp) + { + pid_t pid = 0; +- read_lock(&filp->f_owner.lock); ++ ++ read_lock_irq(&filp->f_owner.lock); + rcu_read_lock(); + if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { + pid = pid_vnr(filp->f_owner.pid); +@@ -157,7 +158,7 @@ pid_t f_getown(struct file *filp) + pid = -pid; + } + rcu_read_unlock(); +- read_unlock(&filp->f_owner.lock); ++ read_unlock_irq(&filp->f_owner.lock); + return pid; + } + +@@ -207,7 +208,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) + struct f_owner_ex owner = {}; + int ret = 0; + +- read_lock(&filp->f_owner.lock); ++ read_lock_irq(&filp->f_owner.lock); + rcu_read_lock(); + if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) + owner.pid = pid_vnr(filp->f_owner.pid); +@@ -230,7 +231,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) + ret = -EINVAL; + break; + } +- read_unlock(&filp->f_owner.lock); ++ read_unlock_irq(&filp->f_owner.lock); + + if (!ret) { + ret = copy_to_user(owner_p, &owner, sizeof(owner)); +@@ -248,10 +249,10 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) + uid_t src[2]; + int err; + +- read_lock(&filp->f_owner.lock); ++ read_lock_irq(&filp->f_owner.lock); + src[0] = from_kuid(user_ns, filp->f_owner.uid); + src[1] = from_kuid(user_ns, filp->f_owner.euid); +- read_unlock(&filp->f_owner.lock); ++ read_unlock_irq(&filp->f_owner.lock); + + err = put_user(src[0], &dst[0]); + err |= put_user(src[1], &dst[1]); +-- +2.35.1 + diff --git a/queue-5.10/fcntl-make-f_getown-ex-return-0-on-dead-owner-task.patch b/queue-5.10/fcntl-make-f_getown-ex-return-0-on-dead-owner-task.patch new file mode 100644 index 00000000000..6bad14fd2dd --- /dev/null +++ b/queue-5.10/fcntl-make-f_getown-ex-return-0-on-dead-owner-task.patch @@ -0,0 +1,79 @@ +From 40f5b617ba64bfbd50b469718fb24c2b4557230e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Feb 2021 15:41:56 +0300 +Subject: fcntl: make F_GETOWN(EX) return 0 on dead owner task + +From: Pavel Tikhomirov + +[ Upstream commit cc4a3f885e8f2bc3c86a265972e94fef32d68f67 ] + +Currently there is no way to differentiate the file with alive owner +from the file with dead owner but pid of the owner reused. That's why +CRIU can't actually know if it needs to restore file owner or not, +because if it restores owner but actual owner was dead, this can +introduce unexpected signals to the "false"-owner (which reused the +pid). + +Let's change the api, so that F_GETOWN(EX) returns 0 in case actual +owner is dead already. This comports with the POSIX spec, which +states that a PID of 0 indicates that no signal will be sent. + +Cc: Jeff Layton +Cc: "J. Bruce Fields" +Cc: Alexander Viro +Cc: linux-fsdevel@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Cc: Cyrill Gorcunov +Cc: Andrei Vagin +Signed-off-by: Pavel Tikhomirov +Signed-off-by: Jeff Layton +Stable-dep-of: f671a691e299 ("fcntl: fix potential deadlocks for &fown_struct.lock") +Signed-off-by: Sasha Levin +--- + fs/fcntl.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/fs/fcntl.c b/fs/fcntl.c +index 71b43538fa44..5a56351f1fc3 100644 +--- a/fs/fcntl.c ++++ b/fs/fcntl.c +@@ -148,11 +148,15 @@ void f_delown(struct file *filp) + + pid_t f_getown(struct file *filp) + { +- pid_t pid; ++ pid_t pid = 0; + read_lock(&filp->f_owner.lock); +- pid = pid_vnr(filp->f_owner.pid); +- if (filp->f_owner.pid_type == PIDTYPE_PGID) +- pid = -pid; ++ rcu_read_lock(); ++ if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { ++ pid = pid_vnr(filp->f_owner.pid); ++ if (filp->f_owner.pid_type == PIDTYPE_PGID) ++ pid = -pid; ++ } ++ rcu_read_unlock(); + read_unlock(&filp->f_owner.lock); + return pid; + } +@@ -200,11 +204,14 @@ static int f_setown_ex(struct file *filp, unsigned long arg) + static int f_getown_ex(struct file *filp, unsigned long arg) + { + struct f_owner_ex __user *owner_p = (void __user *)arg; +- struct f_owner_ex owner; ++ struct f_owner_ex owner = {}; + int ret = 0; + + read_lock(&filp->f_owner.lock); +- owner.pid = pid_vnr(filp->f_owner.pid); ++ rcu_read_lock(); ++ if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) ++ owner.pid = pid_vnr(filp->f_owner.pid); ++ rcu_read_unlock(); + switch (filp->f_owner.pid_type) { + case PIDTYPE_PID: + owner.type = F_OWNER_TID; +-- +2.35.1 + diff --git a/queue-5.10/fs-clean-up-__mark_inode_dirty-a-bit.patch b/queue-5.10/fs-clean-up-__mark_inode_dirty-a-bit.patch new file mode 100644 index 00000000000..9ccdb4b021e --- /dev/null +++ b/queue-5.10/fs-clean-up-__mark_inode_dirty-a-bit.patch @@ -0,0 +1,120 @@ +From 14e2488e7853fd96ed92fd5443a4e3a5e6cf2174 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jan 2021 11:02:49 -0800 +Subject: fs: clean up __mark_inode_dirty() a bit + +From: Eric Biggers + +[ Upstream commit 35d14f278e530ecb635ab00de984065ed90ee12f ] + +Improve some comments, and don't bother checking for the I_DIRTY_TIME +flag in the case where we just cleared it. + +Also, warn if I_DIRTY_TIME and I_DIRTY_PAGES are passed to +__mark_inode_dirty() at the same time, as this case isn't handled. + +Link: https://lore.kernel.org/r/20210112190253.64307-8-ebiggers@kernel.org +Reviewed-by: Christoph Hellwig +Reviewed-by: Jan Kara +Signed-off-by: Eric Biggers +Signed-off-by: Jan Kara +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + fs/fs-writeback.c | 49 +++++++++++++++++++++++++++++------------------ + 1 file changed, 30 insertions(+), 19 deletions(-) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index b6d572a519fa..71043e847e7c 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -2206,23 +2206,24 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, + } + + /** +- * __mark_inode_dirty - internal function ++ * __mark_inode_dirty - internal function to mark an inode dirty + * + * @inode: inode to mark +- * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) ++ * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of ++ * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined ++ * with I_DIRTY_PAGES. + * +- * Mark an inode as dirty. Callers should use mark_inode_dirty or +- * mark_inode_dirty_sync. ++ * Mark an inode as dirty. We notify the filesystem, then update the inode's ++ * dirty flags. Then, if needed we add the inode to the appropriate dirty list. + * +- * Put the inode on the super block's dirty list. ++ * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync() ++ * instead of calling this directly. + * +- * CAREFUL! We mark it dirty unconditionally, but move it onto the +- * dirty list only if it is hashed or if it refers to a blockdev. +- * If it was not hashed, it will never be added to the dirty list +- * even if it is later hashed, as it will have been marked dirty already. ++ * CAREFUL! We only add the inode to the dirty list if it is hashed or if it ++ * refers to a blockdev. Unhashed inodes will never be added to the dirty list ++ * even if they are later hashed, as they will have been marked dirty already. + * +- * In short, make sure you hash any inodes _before_ you start marking +- * them dirty. ++ * In short, ensure you hash any inodes _before_ you start marking them dirty. + * + * Note that for blockdevs, inode->dirtied_when represents the dirtying time of + * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of +@@ -2234,25 +2235,34 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, + void __mark_inode_dirty(struct inode *inode, int flags) + { + struct super_block *sb = inode->i_sb; +- int dirtytime; ++ int dirtytime = 0; + + trace_writeback_mark_inode_dirty(inode, flags); + +- /* +- * Don't do this for I_DIRTY_PAGES - that doesn't actually +- * dirty the inode itself +- */ + if (flags & I_DIRTY_INODE) { ++ /* ++ * Notify the filesystem about the inode being dirtied, so that ++ * (if needed) it can update on-disk fields and journal the ++ * inode. This is only needed when the inode itself is being ++ * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not ++ * for just I_DIRTY_PAGES or I_DIRTY_TIME. ++ */ + trace_writeback_dirty_inode_start(inode, flags); +- + if (sb->s_op->dirty_inode) + sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE); +- + trace_writeback_dirty_inode(inode, flags); + ++ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ + flags &= ~I_DIRTY_TIME; ++ } else { ++ /* ++ * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing. ++ * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME ++ * in one call to __mark_inode_dirty().) ++ */ ++ dirtytime = flags & I_DIRTY_TIME; ++ WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME); + } +- dirtytime = flags & I_DIRTY_TIME; + + /* + * Paired with smp_mb() in __writeback_single_inode() for the +@@ -2272,6 +2282,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) + + inode_attach_wb(inode, NULL); + ++ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ + if (flags & I_DIRTY_INODE) + inode->i_state &= ~I_DIRTY_TIME; + inode->i_state |= flags; +-- +2.35.1 + diff --git a/queue-5.10/fs-correctly-document-the-inode-dirty-flags.patch b/queue-5.10/fs-correctly-document-the-inode-dirty-flags.patch new file mode 100644 index 00000000000..137acab293f --- /dev/null +++ b/queue-5.10/fs-correctly-document-the-inode-dirty-flags.patch @@ -0,0 +1,65 @@ +From 5aa02147cd3d8e2a637fb16a091ff7e784f74ecd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jan 2021 11:02:44 -0800 +Subject: fs: correctly document the inode dirty flags + +From: Eric Biggers + +[ Upstream commit 1e9d63331f8fa556f31e1406ab12f2a1e5cdb495 ] + +The documentation for I_DIRTY_SYNC and I_DIRTY_DATASYNC is a bit +misleading, and I_DIRTY_TIME isn't documented at all. Fix this. + +Link: https://lore.kernel.org/r/20210112190253.64307-3-ebiggers@kernel.org +Reviewed-by: Christoph Hellwig +Reviewed-by: Jan Kara +Signed-off-by: Eric Biggers +Signed-off-by: Jan Kara +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + include/linux/fs.h | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +diff --git a/include/linux/fs.h b/include/linux/fs.h +index c8f887641878..8ee26322a527 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2087,8 +2087,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, + /* + * Inode state bits. Protected by inode->i_lock + * +- * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, +- * I_DIRTY_DATASYNC and I_DIRTY_PAGES. ++ * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, ++ * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. + * + * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, + * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at +@@ -2097,12 +2097,20 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, + * Two bits are used for locking and completion notification, I_NEW and I_SYNC. + * + * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on +- * fdatasync(). i_atime is the usual cause. +- * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of ++ * fdatasync() (unless I_DIRTY_DATASYNC is also set). ++ * Timestamp updates are the usual cause. ++ * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of + * these changes separately from I_DIRTY_SYNC so that we + * don't have to write inode on fdatasync() when only +- * mtime has changed in it. ++ * e.g. the timestamps have changed. + * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. ++ * I_DIRTY_TIME The inode itself only has dirty timestamps, and the ++ * lazytime mount option is enabled. We keep track of this ++ * separately from I_DIRTY_SYNC in order to implement ++ * lazytime. This gets cleared if I_DIRTY_INODE ++ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e. ++ * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in ++ * i_state, but not both. I_DIRTY_PAGES may still be set. + * I_NEW Serves as both a mutex and completion notification. + * New inodes set I_NEW. If two processes both create + * the same inode, one of them will release its inode and +-- +2.35.1 + diff --git a/queue-5.10/fs-don-t-call-dirty_inode-for-lazytime-timestamp-upd.patch b/queue-5.10/fs-don-t-call-dirty_inode-for-lazytime-timestamp-upd.patch new file mode 100644 index 00000000000..57f784dde61 --- /dev/null +++ b/queue-5.10/fs-don-t-call-dirty_inode-for-lazytime-timestamp-upd.patch @@ -0,0 +1,119 @@ +From 81815952d81efbb44aab514f0eaa765a0abf9744 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jan 2021 11:02:47 -0800 +Subject: fs: don't call ->dirty_inode for lazytime timestamp updates + +From: Eric Biggers + +[ Upstream commit e2728c5621fd9c68c65a6647875a1d1c67b9f257 ] + +There is no need to call ->dirty_inode for lazytime timestamp updates +(i.e. for __mark_inode_dirty(I_DIRTY_TIME)), since by the definition of +lazytime, filesystems must ignore these updates. Filesystems only need +to care about the updated timestamps when they expire. + +Therefore, only call ->dirty_inode when I_DIRTY_INODE is set. + +Based on a patch from Christoph Hellwig: +https://lore.kernel.org/r/20200325122825.1086872-4-hch@lst.de + +Link: https://lore.kernel.org/r/20210112190253.64307-6-ebiggers@kernel.org +Reviewed-by: Christoph Hellwig +Reviewed-by: Jan Kara +Signed-off-by: Eric Biggers +Signed-off-by: Jan Kara +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 12 +----------- + fs/f2fs/super.c | 3 --- + fs/fs-writeback.c | 6 +++--- + fs/gfs2/super.c | 2 -- + 4 files changed, 4 insertions(+), 19 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 45f31dc1e66f..2a9ce6826d6b 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -6003,26 +6003,16 @@ int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode, + * If the inode is marked synchronous, we don't honour that here - doing + * so would cause a commit on atime updates, which we don't bother doing. + * We handle synchronous inodes at the highest possible level. +- * +- * If only the I_DIRTY_TIME flag is set, we can skip everything. If +- * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need +- * to copy into the on-disk inode structure are the timestamp files. + */ + void ext4_dirty_inode(struct inode *inode, int flags) + { + handle_t *handle; + +- if (flags == I_DIRTY_TIME) +- return; + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) +- goto out; +- ++ return; + ext4_mark_inode_dirty(handle, inode); +- + ext4_journal_stop(handle); +-out: +- return; + } + + int ext4_change_inode_journal_flag(struct inode *inode, int val) +diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c +index fba413ced982..b6a33935528c 100644 +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1213,9 +1213,6 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) + inode->i_ino == F2FS_META_INO(sbi)) + return; + +- if (flags == I_DIRTY_TIME) +- return; +- + if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) + clear_inode_flag(inode, FI_AUTO_RECOVER); + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 2011199476ea..2088046de4ef 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -2242,16 +2242,16 @@ void __mark_inode_dirty(struct inode *inode, int flags) + * Don't do this for I_DIRTY_PAGES - that doesn't actually + * dirty the inode itself + */ +- if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) { ++ if (flags & I_DIRTY_INODE) { + trace_writeback_dirty_inode_start(inode, flags); + + if (sb->s_op->dirty_inode) + sb->s_op->dirty_inode(inode, flags); + + trace_writeback_dirty_inode(inode, flags); +- } +- if (flags & I_DIRTY_INODE) ++ + flags &= ~I_DIRTY_TIME; ++ } + dirtytime = flags & I_DIRTY_TIME; + + /* +diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c +index d14b98aa1c3e..21bb02dc3aed 100644 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@ -506,8 +506,6 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) + int need_endtrans = 0; + int ret; + +- if (!(flags & I_DIRTY_INODE)) +- return; + if (unlikely(gfs2_withdrawn(sdp))) + return; + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { +-- +2.35.1 + diff --git a/queue-5.10/fs-pass-only-i_dirty_inode-flags-to-dirty_inode.patch b/queue-5.10/fs-pass-only-i_dirty_inode-flags-to-dirty_inode.patch new file mode 100644 index 00000000000..da6a913a043 --- /dev/null +++ b/queue-5.10/fs-pass-only-i_dirty_inode-flags-to-dirty_inode.patch @@ -0,0 +1,66 @@ +From a0b3b58097f66aa6b33340b0962ff0ee41da9c60 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jan 2021 11:02:48 -0800 +Subject: fs: pass only I_DIRTY_INODE flags to ->dirty_inode + +From: Eric Biggers + +[ Upstream commit a38ed483a72672ee6bdb5d8cf17fc0838377baa0 ] + +->dirty_inode is now only called when I_DIRTY_INODE (I_DIRTY_SYNC and/or +I_DIRTY_DATASYNC) is set. However it may still be passed other dirty +flags at the same time, provided that these other flags happened to be +passed to __mark_inode_dirty() at the same time as I_DIRTY_INODE. + +This doesn't make sense because there is no reason for filesystems to +care about these extra flags. Nor are filesystems notified about all +updates to these other flags. + +Therefore, mask the flags before passing them to ->dirty_inode. + +Also properly document ->dirty_inode in vfs.rst. + +Link: https://lore.kernel.org/r/20210112190253.64307-7-ebiggers@kernel.org +Reviewed-by: Christoph Hellwig +Reviewed-by: Jan Kara +Signed-off-by: Eric Biggers +Signed-off-by: Jan Kara +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + Documentation/filesystems/vfs.rst | 5 ++++- + fs/fs-writeback.c | 2 +- + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst +index ca52c82e5bb5..287b80948a40 100644 +--- a/Documentation/filesystems/vfs.rst ++++ b/Documentation/filesystems/vfs.rst +@@ -270,7 +270,10 @@ or bottom half). + ->alloc_inode. + + ``dirty_inode`` +- this method is called by the VFS to mark an inode dirty. ++ this method is called by the VFS when an inode is marked dirty. ++ This is specifically for the inode itself being marked dirty, ++ not its data. If the update needs to be persisted by fdatasync(), ++ then I_DIRTY_DATASYNC will be set in the flags argument. + + ``write_inode`` + this method is called when the VFS needs to write an inode to +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 2088046de4ef..b6d572a519fa 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -2246,7 +2246,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) + trace_writeback_dirty_inode_start(inode, flags); + + if (sb->s_op->dirty_inode) +- sb->s_op->dirty_inode(inode, flags); ++ sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE); + + trace_writeback_dirty_inode(inode, flags); + +-- +2.35.1 + diff --git a/queue-5.10/fs-record-i_dirty_time-even-if-inode-already-has-i_d.patch b/queue-5.10/fs-record-i_dirty_time-even-if-inode-already-has-i_d.patch new file mode 100644 index 00000000000..93e4a9b2340 --- /dev/null +++ b/queue-5.10/fs-record-i_dirty_time-even-if-inode-already-has-i_d.patch @@ -0,0 +1,196 @@ +From 8139a7d3a79e5c63d33a578821e0cc2785e0d377 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Aug 2022 12:06:57 +0200 +Subject: fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE + +From: Lukas Czerner + +[ Upstream commit cbfecb927f429a6fa613d74b998496bd71e4438a ] + +Currently the I_DIRTY_TIME will never get set if the inode already has +I_DIRTY_INODE with assumption that it supersedes I_DIRTY_TIME. That's +true, however ext4 will only update the on-disk inode in +->dirty_inode(), not on actual writeback. As a result if the inode +already has I_DIRTY_INODE state by the time we get to +__mark_inode_dirty() only with I_DIRTY_TIME, the time was already filled +into on-disk inode and will not get updated until the next I_DIRTY_INODE +update, which might never come if we crash or get a power failure. + +The problem can be reproduced on ext4 by running xfstest generic/622 +with -o iversion mount option. + +Fix it by allowing I_DIRTY_TIME to be set even if the inode already has +I_DIRTY_INODE. Also make sure that the case is properly handled in +writeback_single_inode() as well. Additionally changes in +xfs_fs_dirty_inode() was made to accommodate for I_DIRTY_TIME in flag. + +Thanks Jan Kara for suggestions on how to make this work properly. + +Cc: Dave Chinner +Cc: Christoph Hellwig +Cc: stable@kernel.org +Signed-off-by: Lukas Czerner +Suggested-by: Jan Kara +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220825100657.44217-1-lczerner@redhat.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + Documentation/filesystems/vfs.rst | 3 +++ + fs/fs-writeback.c | 37 +++++++++++++++++++++---------- + fs/xfs/xfs_super.c | 10 +++++++-- + include/linux/fs.h | 9 ++++---- + 4 files changed, 41 insertions(+), 18 deletions(-) + +diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst +index 287b80948a40..ee69d60818b5 100644 +--- a/Documentation/filesystems/vfs.rst ++++ b/Documentation/filesystems/vfs.rst +@@ -274,6 +274,9 @@ or bottom half). + This is specifically for the inode itself being marked dirty, + not its data. If the update needs to be persisted by fdatasync(), + then I_DIRTY_DATASYNC will be set in the flags argument. ++ I_DIRTY_TIME will be set in the flags in case lazytime is enabled ++ and struct inode has times updated since the last ->dirty_inode ++ call. + + ``write_inode`` + this method is called when the VFS needs to write an inode to +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 4c667662a4d9..f47797e15685 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -1618,9 +1618,14 @@ static int writeback_single_inode(struct inode *inode, + */ + if (!(inode->i_state & I_DIRTY_ALL)) + inode_cgwb_move_to_attached(inode, wb); +- else if (!(inode->i_state & I_SYNC_QUEUED) && +- (inode->i_state & I_DIRTY)) +- redirty_tail_locked(inode, wb); ++ else if (!(inode->i_state & I_SYNC_QUEUED)) { ++ if ((inode->i_state & I_DIRTY)) ++ redirty_tail_locked(inode, wb); ++ else if (inode->i_state & I_DIRTY_TIME) { ++ inode->dirtied_when = jiffies; ++ inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); ++ } ++ } + + spin_unlock(&wb->list_lock); + inode_sync_complete(inode); +@@ -2276,6 +2281,20 @@ void __mark_inode_dirty(struct inode *inode, int flags) + trace_writeback_mark_inode_dirty(inode, flags); + + if (flags & I_DIRTY_INODE) { ++ /* ++ * Inode timestamp update will piggback on this dirtying. ++ * We tell ->dirty_inode callback that timestamps need to ++ * be updated by setting I_DIRTY_TIME in flags. ++ */ ++ if (inode->i_state & I_DIRTY_TIME) { ++ spin_lock(&inode->i_lock); ++ if (inode->i_state & I_DIRTY_TIME) { ++ inode->i_state &= ~I_DIRTY_TIME; ++ flags |= I_DIRTY_TIME; ++ } ++ spin_unlock(&inode->i_lock); ++ } ++ + /* + * Notify the filesystem about the inode being dirtied, so that + * (if needed) it can update on-disk fields and journal the +@@ -2285,7 +2304,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) + */ + trace_writeback_dirty_inode_start(inode, flags); + if (sb->s_op->dirty_inode) +- sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE); ++ sb->s_op->dirty_inode(inode, ++ flags & (I_DIRTY_INODE | I_DIRTY_TIME)); + trace_writeback_dirty_inode(inode, flags); + + /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ +@@ -2306,21 +2326,15 @@ void __mark_inode_dirty(struct inode *inode, int flags) + */ + smp_mb(); + +- if (((inode->i_state & flags) == flags) || +- (dirtytime && (inode->i_state & I_DIRTY_INODE))) ++ if ((inode->i_state & flags) == flags) + return; + + spin_lock(&inode->i_lock); +- if (dirtytime && (inode->i_state & I_DIRTY_INODE)) +- goto out_unlock_inode; + if ((inode->i_state & flags) != flags) { + const int was_dirty = inode->i_state & I_DIRTY; + + inode_attach_wb(inode, NULL); + +- /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ +- if (flags & I_DIRTY_INODE) +- inode->i_state &= ~I_DIRTY_TIME; + inode->i_state |= flags; + + /* +@@ -2393,7 +2407,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) + out_unlock: + if (wb) + spin_unlock(&wb->list_lock); +-out_unlock_inode: + spin_unlock(&inode->i_lock); + } + EXPORT_SYMBOL(__mark_inode_dirty); +diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c +index 434c87cc9fbf..3cc20640b3da 100644 +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -668,7 +668,7 @@ xfs_fs_destroy_inode( + static void + xfs_fs_dirty_inode( + struct inode *inode, +- int flag) ++ int flags) + { + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; +@@ -676,7 +676,13 @@ xfs_fs_dirty_inode( + + if (!(inode->i_sb->s_flags & SB_LAZYTIME)) + return; +- if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) ++ ++ /* ++ * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) ++ * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed ++ * in flags possibly together with I_DIRTY_SYNC. ++ */ ++ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) + return; + + if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 8ee26322a527..ae7cd6ee1142 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2104,13 +2104,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, + * don't have to write inode on fdatasync() when only + * e.g. the timestamps have changed. + * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. +- * I_DIRTY_TIME The inode itself only has dirty timestamps, and the ++ * I_DIRTY_TIME The inode itself has dirty timestamps, and the + * lazytime mount option is enabled. We keep track of this + * separately from I_DIRTY_SYNC in order to implement + * lazytime. This gets cleared if I_DIRTY_INODE +- * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e. +- * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in +- * i_state, but not both. I_DIRTY_PAGES may still be set. ++ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But ++ * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already ++ * in place because writeback might already be in progress ++ * and we don't want to lose the time update + * I_NEW Serves as both a mutex and completion notification. + * New inodes set I_NEW. If two processes both create + * the same inode, one of them will release its inode and +-- +2.35.1 + diff --git a/queue-5.10/hid-magicmouse-do-not-set-btn_mouse-on-double-report.patch b/queue-5.10/hid-magicmouse-do-not-set-btn_mouse-on-double-report.patch new file mode 100644 index 00000000000..a79b7f11635 --- /dev/null +++ b/queue-5.10/hid-magicmouse-do-not-set-btn_mouse-on-double-report.patch @@ -0,0 +1,48 @@ +From a38fb7f951aa2e47325aa0740605ec977f988757 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Oct 2022 20:27:47 +0200 +Subject: HID: magicmouse: Do not set BTN_MOUSE on double report +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: José Expósito + +[ Upstream commit bb5f0c855dcfc893ae5ed90e4c646bde9e4498bf ] + +Under certain conditions the Magic Trackpad can group 2 reports in a +single packet. The packet is split and the raw event function is +invoked recursively for each part. + +However, after processing each part, the BTN_MOUSE status is updated, +sending multiple click events. [1] + +Return after processing double reports to avoid this issue. + +Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/811 # [1] +Fixes: a462230e16ac ("HID: magicmouse: enable Magic Trackpad support") +Reported-by: Nulo +Signed-off-by: José Expósito +Signed-off-by: Benjamin Tissoires +Link: https://lore.kernel.org/r/20221009182747.90730-1-jose.exposito89@gmail.com +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-magicmouse.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c +index fc4c07459753..28158d2f2352 100644 +--- a/drivers/hid/hid-magicmouse.c ++++ b/drivers/hid/hid-magicmouse.c +@@ -387,7 +387,7 @@ static int magicmouse_raw_event(struct hid_device *hdev, + magicmouse_raw_event(hdev, report, data + 2, data[1]); + magicmouse_raw_event(hdev, report, data + 2 + data[1], + size - 2 - data[1]); +- break; ++ return 0; + default: + return 0; + } +-- +2.35.1 + diff --git a/queue-5.10/i40e-fix-dma-mappings-leak.patch b/queue-5.10/i40e-fix-dma-mappings-leak.patch new file mode 100644 index 00000000000..1466745761e --- /dev/null +++ b/queue-5.10/i40e-fix-dma-mappings-leak.patch @@ -0,0 +1,323 @@ +From 0420cc9ad4be789293baf3783b27ad18b422d261 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Oct 2022 13:54:40 -0700 +Subject: i40e: Fix DMA mappings leak + +From: Jan Sokolowski + +[ Upstream commit aae425efdfd1b1d8452260a3cb49344ebf20b1f5 ] + +During reallocation of RX buffers, new DMA mappings are created for +those buffers. + +steps for reproduction: +while : +do +for ((i=0; i<=8160; i=i+32)) +do +ethtool -G enp130s0f0 rx $i tx $i +sleep 0.5 +ethtool -g enp130s0f0 +done +done + +This resulted in crash: +i40e 0000:01:00.1: Unable to allocate memory for the Rx descriptor ring, size=65536 +Driver BUG +WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:141 xdp_rxq_info_unreg+0x43/0x50 +Call Trace: +i40e_free_rx_resources+0x70/0x80 [i40e] +i40e_set_ringparam+0x27c/0x800 [i40e] +ethnl_set_rings+0x1b2/0x290 +genl_family_rcv_msg_doit.isra.15+0x10f/0x150 +genl_family_rcv_msg+0xb3/0x160 +? rings_fill_reply+0x1a0/0x1a0 +genl_rcv_msg+0x47/0x90 +? genl_family_rcv_msg+0x160/0x160 +netlink_rcv_skb+0x4c/0x120 +genl_rcv+0x24/0x40 +netlink_unicast+0x196/0x230 +netlink_sendmsg+0x204/0x3d0 +sock_sendmsg+0x4c/0x50 +__sys_sendto+0xee/0x160 +? handle_mm_fault+0xbe/0x1e0 +? syscall_trace_enter+0x1d3/0x2c0 +__x64_sys_sendto+0x24/0x30 +do_syscall_64+0x5b/0x1a0 +entry_SYSCALL_64_after_hwframe+0x65/0xca +RIP: 0033:0x7f5eac8b035b +Missing register, driver bug +WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:119 xdp_rxq_info_unreg_mem_model+0x69/0x140 +Call Trace: +xdp_rxq_info_unreg+0x1e/0x50 +i40e_free_rx_resources+0x70/0x80 [i40e] +i40e_set_ringparam+0x27c/0x800 [i40e] +ethnl_set_rings+0x1b2/0x290 +genl_family_rcv_msg_doit.isra.15+0x10f/0x150 +genl_family_rcv_msg+0xb3/0x160 +? rings_fill_reply+0x1a0/0x1a0 +genl_rcv_msg+0x47/0x90 +? genl_family_rcv_msg+0x160/0x160 +netlink_rcv_skb+0x4c/0x120 +genl_rcv+0x24/0x40 +netlink_unicast+0x196/0x230 +netlink_sendmsg+0x204/0x3d0 +sock_sendmsg+0x4c/0x50 +__sys_sendto+0xee/0x160 +? handle_mm_fault+0xbe/0x1e0 +? syscall_trace_enter+0x1d3/0x2c0 +__x64_sys_sendto+0x24/0x30 +do_syscall_64+0x5b/0x1a0 +entry_SYSCALL_64_after_hwframe+0x65/0xca +RIP: 0033:0x7f5eac8b035b + +This was caused because of new buffers with different RX ring count should +substitute older ones, but those buffers were freed in +i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi, +thus kfree on rx_bi caused leak of already mapped DMA. + +Fix this by reallocating ZC with rx_bi_zc struct when BPF program loads. Additionally +reallocate back to rx_bi when BPF program unloads. + +If BPF program is loaded/unloaded and XSK pools are created, reallocate +RX queues accordingly in XSP_SETUP_XSK_POOL handler. + +Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings") +Signed-off-by: Jan Sokolowski +Signed-off-by: Mateusz Palczewski +Signed-off-by: Jacob Keller +Tested-by: Chandan (A Contingent Worker at Intel) +Tested-by: Gurucharan (A Contingent worker at Intel) +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../net/ethernet/intel/i40e/i40e_ethtool.c | 3 - + drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++-- + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 13 ++-- + drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 - + drivers/net/ethernet/intel/i40e/i40e_xsk.c | 67 ++++++++++++++++--- + drivers/net/ethernet/intel/i40e/i40e_xsk.h | 2 +- + 6 files changed, 74 insertions(+), 28 deletions(-) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +index 63054061966e..cc5f5c237774 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +@@ -2081,9 +2081,6 @@ static int i40e_set_ringparam(struct net_device *netdev, + */ + rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS; + err = i40e_setup_rx_descriptors(&rx_rings[i]); +- if (err) +- goto rx_unwind; +- err = i40e_alloc_rx_bi(&rx_rings[i]); + if (err) + goto rx_unwind; + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index c7f243ddbcf7..ea6a984c6d12 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -3409,12 +3409,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) + if (ring->vsi->type == I40E_VSI_MAIN) + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + +- kfree(ring->rx_bi); + ring->xsk_pool = i40e_xsk_pool(ring); + if (ring->xsk_pool) { +- ret = i40e_alloc_rx_bi_zc(ring); +- if (ret) +- return ret; + ring->rx_buf_len = + xsk_pool_get_rx_frame_size(ring->xsk_pool); + /* For AF_XDP ZC, we disallow packets to span on +@@ -3432,9 +3428,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) + ring->queue_index); + + } else { +- ret = i40e_alloc_rx_bi(ring); +- if (ret) +- return ret; + ring->rx_buf_len = vsi->rx_buf_len; + if (ring->vsi->type == I40E_VSI_MAIN) { + ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, +@@ -12684,6 +12677,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, + i40e_reset_and_rebuild(pf, true, true); + } + ++ if (!i40e_enabled_xdp_vsi(vsi) && prog) { ++ if (i40e_realloc_rx_bi_zc(vsi, true)) ++ return -ENOMEM; ++ } else if (i40e_enabled_xdp_vsi(vsi) && !prog) { ++ if (i40e_realloc_rx_bi_zc(vsi, false)) ++ return -ENOMEM; ++ } ++ + for (i = 0; i < vsi->num_queue_pairs; i++) + WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); + +@@ -12916,6 +12917,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) + + i40e_queue_pair_disable_irq(vsi, queue_pair); + err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); ++ i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); + i40e_queue_pair_clean_rings(vsi, queue_pair); + i40e_queue_pair_reset_stats(vsi, queue_pair); +diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c +index 5ad28129fab2..43be33d87e39 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c +@@ -1305,14 +1305,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) + return -ENOMEM; + } + +-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring) +-{ +- unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count; +- +- rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL); +- return rx_ring->rx_bi ? 0 : -ENOMEM; +-} +- + static void i40e_clear_rx_bi(struct i40e_ring *rx_ring) + { + memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count); +@@ -1443,6 +1435,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) + + rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; + ++ rx_ring->rx_bi = ++ kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL); ++ if (!rx_ring->rx_bi) ++ return -ENOMEM; ++ + return 0; + } + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h +index 93ac201f68b8..af843e8169f7 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h +@@ -465,7 +465,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); + bool __i40e_chk_linearize(struct sk_buff *skb); + int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring); + + /** + * i40e_get_head - Retrieve head from head writeback +diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c +index 75e4a698c3db..7f1226123629 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c +@@ -9,14 +9,6 @@ + #include "i40e_txrx_common.h" + #include "i40e_xsk.h" + +-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring) +-{ +- unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count; +- +- rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL); +- return rx_ring->rx_bi_zc ? 0 : -ENOMEM; +-} +- + void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring) + { + memset(rx_ring->rx_bi_zc, 0, +@@ -28,6 +20,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx) + return &rx_ring->rx_bi_zc[idx]; + } + ++/** ++ * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer ++ * @rx_ring: Current rx ring ++ * @pool_present: is pool for XSK present ++ * ++ * Try allocating memory and return ENOMEM, if failed to allocate. ++ * If allocation was successful, substitute buffer with allocated one. ++ * Returns 0 on success, negative on failure ++ */ ++static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present) ++{ ++ size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) : ++ sizeof(*rx_ring->rx_bi); ++ void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL); ++ ++ if (!sw_ring) ++ return -ENOMEM; ++ ++ if (pool_present) { ++ kfree(rx_ring->rx_bi); ++ rx_ring->rx_bi = NULL; ++ rx_ring->rx_bi_zc = sw_ring; ++ } else { ++ kfree(rx_ring->rx_bi_zc); ++ rx_ring->rx_bi_zc = NULL; ++ rx_ring->rx_bi = sw_ring; ++ } ++ return 0; ++} ++ ++/** ++ * i40e_realloc_rx_bi_zc - reallocate rx SW rings ++ * @vsi: Current VSI ++ * @zc: is zero copy set ++ * ++ * Reallocate buffer for rx_rings that might be used by XSK. ++ * XDP requires more memory, than rx_buf provides. ++ * Returns 0 on success, negative on failure ++ */ ++int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc) ++{ ++ struct i40e_ring *rx_ring; ++ unsigned long q; ++ ++ for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) { ++ rx_ring = vsi->rx_rings[q]; ++ if (i40e_realloc_rx_xdp_bi(rx_ring, zc)) ++ return -ENOMEM; ++ } ++ return 0; ++} ++ + /** + * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a + * certain ring/qid +@@ -68,6 +112,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi, + if (err) + return err; + ++ err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true); ++ if (err) ++ return err; ++ + err = i40e_queue_pair_enable(vsi, qid); + if (err) + return err; +@@ -112,6 +160,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid) + xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR); + + if (if_running) { ++ err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false); ++ if (err) ++ return err; + err = i40e_queue_pair_enable(vsi, qid); + if (err) + return err; +diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h +index 7adfd8539247..36f5b6d20601 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h +@@ -17,7 +17,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); + + bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring); + int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); +-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring); ++int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc); + void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring); + + #endif /* _I40E_XSK_H_ */ +-- +2.35.1 + diff --git a/queue-5.10/iommu-vt-d-allow-nvs-regions-in-arch_rmrr_sanity_che.patch b/queue-5.10/iommu-vt-d-allow-nvs-regions-in-arch_rmrr_sanity_che.patch new file mode 100644 index 00000000000..973c4a980bb --- /dev/null +++ b/queue-5.10/iommu-vt-d-allow-nvs-regions-in-arch_rmrr_sanity_che.patch @@ -0,0 +1,79 @@ +From 0201257917d0e6b24431f2cbd8141ceda6d30a19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 08:44:46 +0800 +Subject: iommu/vt-d: Allow NVS regions in arch_rmrr_sanity_check() + +From: Charlotte Tan + +[ Upstream commit 5566e68d829f5d87670d5984c1c2ccb4c518405f ] + +arch_rmrr_sanity_check() warns if the RMRR is not covered by an ACPI +Reserved region, but it seems like it should accept an NVS region as +well. The ACPI spec +https://uefi.org/specs/ACPI/6.5/15_System_Address_Map_Interfaces.html +uses similar wording for "Reserved" and "NVS" region types; for NVS +regions it says "This range of addresses is in use or reserved by the +system and must not be used by the operating system." + +There is an old comment on this mailing list that also suggests NVS +regions should pass the arch_rmrr_sanity_check() test: + + The warnings come from arch_rmrr_sanity_check() since it checks whether + the region is E820_TYPE_RESERVED. However, if the purpose of the check + is to detect RMRR has regions that may be used by OS as free memory, + isn't E820_TYPE_NVS safe, too? + +This patch overlaps with another proposed patch that would add the region +type to the log since sometimes the bug reporter sees this log on the +console but doesn't know to include the kernel log: + +https://lore.kernel.org/lkml/20220611204859.234975-3-atomlin@redhat.com/ + +Here's an example of the "Firmware Bug" apparent false positive (wrapped +for line length): + + DMAR: [Firmware Bug]: No firmware reserved region can cover this RMRR + [0x000000006f760000-0x000000006f762fff], contact BIOS vendor for + fixes + DMAR: [Firmware Bug]: Your BIOS is broken; bad RMRR + [0x000000006f760000-0x000000006f762fff] + +This is the snippet from the e820 table: + + BIOS-e820: [mem 0x0000000068bff000-0x000000006ebfefff] reserved + BIOS-e820: [mem 0x000000006ebff000-0x000000006f9fefff] ACPI NVS + BIOS-e820: [mem 0x000000006f9ff000-0x000000006fffefff] ACPI data + +Fixes: f036c7fa0ab6 ("iommu/vt-d: Check VT-d RMRR region in BIOS is reported as reserved") +Cc: Will Mortensen +Link: https://lore.kernel.org/linux-iommu/64a5843d-850d-e58c-4fc2-0a0eeeb656dc@nec.com/ +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216443 +Signed-off-by: Charlotte Tan +Reviewed-by: Aaron Tomlin +Link: https://lore.kernel.org/r/20220929044449.32515-1-charlotte@extrahop.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/iommu.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h +index bf1ed2ddc74b..7a983119bc40 100644 +--- a/arch/x86/include/asm/iommu.h ++++ b/arch/x86/include/asm/iommu.h +@@ -17,8 +17,10 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) + { + u64 start = rmrr->base_address; + u64 end = rmrr->end_address + 1; ++ int entry_type; + +- if (e820__mapped_all(start, end, E820_TYPE_RESERVED)) ++ entry_type = e820__get_entry_type(start, end); ++ if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS) + return 0; + + pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n", +-- +2.35.1 + diff --git a/queue-5.10/iommu-vt-d-clean-up-si_domain-in-the-init_dmars-erro.patch b/queue-5.10/iommu-vt-d-clean-up-si_domain-in-the-init_dmars-erro.patch new file mode 100644 index 00000000000..743b141bd49 --- /dev/null +++ b/queue-5.10/iommu-vt-d-clean-up-si_domain-in-the-init_dmars-erro.patch @@ -0,0 +1,56 @@ +From ae123e32d0af7507f3bcc2929df90460c40693d4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 08:44:47 +0800 +Subject: iommu/vt-d: Clean up si_domain in the init_dmars() error path + +From: Jerry Snitselaar + +[ Upstream commit 620bf9f981365c18cc2766c53d92bf8131c63f32 ] + +A splat from kmem_cache_destroy() was seen with a kernel prior to +commit ee2653bbe89d ("iommu/vt-d: Remove domain and devinfo mempool") +when there was a failure in init_dmars(), because the iommu_domain +cache still had objects. While the mempool code is now gone, there +still is a leak of the si_domain memory if init_dmars() fails. So +clean up si_domain in the init_dmars() error path. + +Cc: Lu Baolu +Cc: Joerg Roedel +Cc: Will Deacon +Cc: Robin Murphy +Fixes: 86080ccc223a ("iommu/vt-d: Allocate si_domain in init_dmars()") +Signed-off-by: Jerry Snitselaar +Link: https://lore.kernel.org/r/20221010144842.308890-1-jsnitsel@redhat.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + drivers/iommu/intel/iommu.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c +index c48cf737b521..f23329b7f97c 100644 +--- a/drivers/iommu/intel/iommu.c ++++ b/drivers/iommu/intel/iommu.c +@@ -2846,6 +2846,7 @@ static int __init si_domain_init(int hw) + + if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + domain_exit(si_domain); ++ si_domain = NULL; + return -EFAULT; + } + +@@ -3505,6 +3506,10 @@ static int __init init_dmars(void) + disable_dmar_iommu(iommu); + free_dmar_iommu(iommu); + } ++ if (si_domain) { ++ domain_exit(si_domain); ++ si_domain = NULL; ++ } + + kfree(g_iommus); + +-- +2.35.1 + diff --git a/queue-5.10/ionic-catch-null-pointer-issue-on-reconfig.patch b/queue-5.10/ionic-catch-null-pointer-issue-on-reconfig.patch new file mode 100644 index 00000000000..78c1076cb86 --- /dev/null +++ b/queue-5.10/ionic-catch-null-pointer-issue-on-reconfig.patch @@ -0,0 +1,56 @@ +From 80770d513ebb3eba4958bc0eeb817f3dae31de82 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Oct 2022 16:31:23 -0700 +Subject: ionic: catch NULL pointer issue on reconfig + +From: Brett Creeley + +[ Upstream commit aa1d7e1267c12e07d979aa34c613716a89029db2 ] + +It's possible that the driver will dereference a qcq that doesn't exist +when calling ionic_reconfigure_queues(), which causes a page fault BUG. + +If a reduction in the number of queues is followed by a different +reconfig such as changing the ring size, the driver can hit a NULL +pointer when trying to clean up non-existent queues. + +Fix this by checking to make sure both the qcqs array and qcq entry +exists bofore trying to use and free the entry. + +Fixes: 101b40a0171f ("ionic: change queue count with no reset") +Signed-off-by: Brett Creeley +Signed-off-by: Shannon Nelson +Link: https://lore.kernel.org/r/20221017233123.15869-1-snelson@pensando.io +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/pensando/ionic/ionic_lif.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +index e42520f909fe..cb12d0171517 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +@@ -2383,11 +2383,15 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, + * than the full array, but leave the qcq shells in place + */ + for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) { +- lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; +- ionic_qcq_free(lif, lif->txqcqs[i]); ++ if (lif->txqcqs && lif->txqcqs[i]) { ++ lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; ++ ionic_qcq_free(lif, lif->txqcqs[i]); ++ } + +- lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; +- ionic_qcq_free(lif, lif->rxqcqs[i]); ++ if (lif->rxqcqs && lif->rxqcqs[i]) { ++ lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; ++ ionic_qcq_free(lif, lif->rxqcqs[i]); ++ } + } + + return err; +-- +2.35.1 + diff --git a/queue-5.10/kernfs-fix-use-after-free-in-__kernfs_remove.patch b/queue-5.10/kernfs-fix-use-after-free-in-__kernfs_remove.patch new file mode 100644 index 00000000000..0fdf15e40a6 --- /dev/null +++ b/queue-5.10/kernfs-fix-use-after-free-in-__kernfs_remove.patch @@ -0,0 +1,202 @@ +From dcfe8c010accdad535bc505e4f6257168cf5c1f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Sep 2022 14:17:23 +0200 +Subject: kernfs: fix use-after-free in __kernfs_remove + +From: Christian A. Ehrhardt + +[ Upstream commit 4abc99652812a2ddf932f137515d5c5a04723538 ] + +Syzkaller managed to trigger concurrent calls to +kernfs_remove_by_name_ns() for the same file resulting in +a KASAN detected use-after-free. The race occurs when the root +node is freed during kernfs_drain(). + +To prevent this acquire an additional reference for the root +of the tree that is removed before calling __kernfs_remove(). + +Found by syzkaller with the following reproducer (slab_nomerge is +required): + +syz_mount_image$ext4(0x0, &(0x7f0000000100)='./file0\x00', 0x100000, 0x0, 0x0, 0x0, 0x0) +r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0) +close(r0) +pipe2(&(0x7f0000000140)={0xffffffffffffffff, 0xffffffffffffffff}, 0x800) +mount$9p_fd(0x0, &(0x7f0000000040)='./file0\x00', &(0x7f00000000c0), 0x408, &(0x7f0000000280)={'trans=fd,', {'rfdno', 0x3d, r0}, 0x2c, {'wfdno', 0x3d, r1}, 0x2c, {[{@cache_loose}, {@mmap}, {@loose}, {@loose}, {@mmap}], [{@mask={'mask', 0x3d, '^MAY_EXEC'}}, {@fsmagic={'fsmagic', 0x3d, 0x10001}}, {@dont_hash}]}}) + +Sample report: + +================================================================== +BUG: KASAN: use-after-free in kernfs_type include/linux/kernfs.h:335 [inline] +BUG: KASAN: use-after-free in kernfs_leftmost_descendant fs/kernfs/dir.c:1261 [inline] +BUG: KASAN: use-after-free in __kernfs_remove.part.0+0x843/0x960 fs/kernfs/dir.c:1369 +Read of size 2 at addr ffff8880088807f0 by task syz-executor.2/857 + +CPU: 0 PID: 857 Comm: syz-executor.2 Not tainted 6.0.0-rc3-00363-g7726d4c3e60b #5 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x6e/0x91 lib/dump_stack.c:106 + print_address_description mm/kasan/report.c:317 [inline] + print_report.cold+0x5e/0x5e5 mm/kasan/report.c:433 + kasan_report+0xa3/0x130 mm/kasan/report.c:495 + kernfs_type include/linux/kernfs.h:335 [inline] + kernfs_leftmost_descendant fs/kernfs/dir.c:1261 [inline] + __kernfs_remove.part.0+0x843/0x960 fs/kernfs/dir.c:1369 + __kernfs_remove fs/kernfs/dir.c:1356 [inline] + kernfs_remove_by_name_ns+0x108/0x190 fs/kernfs/dir.c:1589 + sysfs_slab_add+0x133/0x1e0 mm/slub.c:5943 + __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899 + create_cache mm/slab_common.c:229 [inline] + kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335 + p9_client_create+0xd4d/0x1190 net/9p/client.c:993 + v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408 + v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126 + legacy_get_tree+0xf1/0x200 fs/fs_context.c:610 + vfs_get_tree+0x85/0x2e0 fs/super.c:1530 + do_new_mount fs/namespace.c:3040 [inline] + path_mount+0x675/0x1d00 fs/namespace.c:3370 + do_mount fs/namespace.c:3383 [inline] + __do_sys_mount fs/namespace.c:3591 [inline] + __se_sys_mount fs/namespace.c:3568 [inline] + __x64_sys_mount+0x282/0x300 fs/namespace.c:3568 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f725f983aed +Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f725f0f7028 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 +RAX: ffffffffffffffda RBX: 00007f725faa3f80 RCX: 00007f725f983aed +RDX: 00000000200000c0 RSI: 0000000020000040 RDI: 0000000000000000 +RBP: 00007f725f9f419c R08: 0000000020000280 R09: 0000000000000000 +R10: 0000000000000408 R11: 0000000000000246 R12: 0000000000000000 +R13: 0000000000000006 R14: 00007f725faa3f80 R15: 00007f725f0d7000 + + +Allocated by task 855: + kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 + kasan_set_track mm/kasan/common.c:45 [inline] + set_alloc_info mm/kasan/common.c:437 [inline] + __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:470 + kasan_slab_alloc include/linux/kasan.h:224 [inline] + slab_post_alloc_hook mm/slab.h:727 [inline] + slab_alloc_node mm/slub.c:3243 [inline] + slab_alloc mm/slub.c:3251 [inline] + __kmem_cache_alloc_lru mm/slub.c:3258 [inline] + kmem_cache_alloc+0xbf/0x200 mm/slub.c:3268 + kmem_cache_zalloc include/linux/slab.h:723 [inline] + __kernfs_new_node+0xd4/0x680 fs/kernfs/dir.c:593 + kernfs_new_node fs/kernfs/dir.c:655 [inline] + kernfs_create_dir_ns+0x9c/0x220 fs/kernfs/dir.c:1010 + sysfs_create_dir_ns+0x127/0x290 fs/sysfs/dir.c:59 + create_dir lib/kobject.c:63 [inline] + kobject_add_internal+0x24a/0x8d0 lib/kobject.c:223 + kobject_add_varg lib/kobject.c:358 [inline] + kobject_init_and_add+0x101/0x160 lib/kobject.c:441 + sysfs_slab_add+0x156/0x1e0 mm/slub.c:5954 + __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899 + create_cache mm/slab_common.c:229 [inline] + kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335 + p9_client_create+0xd4d/0x1190 net/9p/client.c:993 + v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408 + v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126 + legacy_get_tree+0xf1/0x200 fs/fs_context.c:610 + vfs_get_tree+0x85/0x2e0 fs/super.c:1530 + do_new_mount fs/namespace.c:3040 [inline] + path_mount+0x675/0x1d00 fs/namespace.c:3370 + do_mount fs/namespace.c:3383 [inline] + __do_sys_mount fs/namespace.c:3591 [inline] + __se_sys_mount fs/namespace.c:3568 [inline] + __x64_sys_mount+0x282/0x300 fs/namespace.c:3568 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Freed by task 857: + kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 + kasan_set_track+0x21/0x30 mm/kasan/common.c:45 + kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:370 + ____kasan_slab_free mm/kasan/common.c:367 [inline] + ____kasan_slab_free mm/kasan/common.c:329 [inline] + __kasan_slab_free+0x108/0x190 mm/kasan/common.c:375 + kasan_slab_free include/linux/kasan.h:200 [inline] + slab_free_hook mm/slub.c:1754 [inline] + slab_free_freelist_hook mm/slub.c:1780 [inline] + slab_free mm/slub.c:3534 [inline] + kmem_cache_free+0x9c/0x340 mm/slub.c:3551 + kernfs_put.part.0+0x2b2/0x520 fs/kernfs/dir.c:547 + kernfs_put+0x42/0x50 fs/kernfs/dir.c:521 + __kernfs_remove.part.0+0x72d/0x960 fs/kernfs/dir.c:1407 + __kernfs_remove fs/kernfs/dir.c:1356 [inline] + kernfs_remove_by_name_ns+0x108/0x190 fs/kernfs/dir.c:1589 + sysfs_slab_add+0x133/0x1e0 mm/slub.c:5943 + __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899 + create_cache mm/slab_common.c:229 [inline] + kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335 + p9_client_create+0xd4d/0x1190 net/9p/client.c:993 + v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408 + v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126 + legacy_get_tree+0xf1/0x200 fs/fs_context.c:610 + vfs_get_tree+0x85/0x2e0 fs/super.c:1530 + do_new_mount fs/namespace.c:3040 [inline] + path_mount+0x675/0x1d00 fs/namespace.c:3370 + do_mount fs/namespace.c:3383 [inline] + __do_sys_mount fs/namespace.c:3591 [inline] + __se_sys_mount fs/namespace.c:3568 [inline] + __x64_sys_mount+0x282/0x300 fs/namespace.c:3568 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +The buggy address belongs to the object at ffff888008880780 + which belongs to the cache kernfs_node_cache of size 128 +The buggy address is located 112 bytes inside of + 128-byte region [ffff888008880780, ffff888008880800) + +The buggy address belongs to the physical page: +page:00000000732833f8 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x8880 +flags: 0x100000000000200(slab|node=0|zone=1) +raw: 0100000000000200 0000000000000000 dead000000000122 ffff888001147280 +raw: 0000000000000000 0000000000150015 00000001ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff888008880680: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb + ffff888008880700: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc +>ffff888008880780: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff888008880800: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb + ffff888008880880: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc +================================================================== + +Acked-by: Tejun Heo +Cc: stable # -rc3 +Signed-off-by: Christian A. Ehrhardt +Link: https://lore.kernel.org/r/20220913121723.691454-1-lk@c--e.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + fs/kernfs/dir.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c +index afb39e1bbe3b..8b3c86a502da 100644 +--- a/fs/kernfs/dir.c ++++ b/fs/kernfs/dir.c +@@ -1519,8 +1519,11 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, + mutex_lock(&kernfs_mutex); + + kn = kernfs_find_ns(parent, name, ns); +- if (kn) ++ if (kn) { ++ kernfs_get(kn); + __kernfs_remove(kn); ++ kernfs_put(kn); ++ } + + mutex_unlock(&kernfs_mutex); + +-- +2.35.1 + diff --git a/queue-5.10/libbpf-use-is_err_or_null-in-hashmap__free.patch b/queue-5.10/libbpf-use-is_err_or_null-in-hashmap__free.patch new file mode 100644 index 00000000000..8b2ca061698 --- /dev/null +++ b/queue-5.10/libbpf-use-is_err_or_null-in-hashmap__free.patch @@ -0,0 +1,48 @@ +From a5ba4a9bb0982e29a8a046ca76e578bb2f8f74c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jan 2022 10:26:19 -0500 +Subject: libbpf: Use IS_ERR_OR_NULL() in hashmap__free() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mauricio Vásquez + +[ Upstream commit fba60b171a0322830b446dd28170092c47243d39 ] + +hashmap__new() uses ERR_PTR() to return an error so it's better to +use IS_ERR_OR_NULL() in order to check the pointer before calling +free(). This will prevent freeing an invalid pointer if somebody calls +hashmap__free() with the result of a failed hashmap__new() call. + +Signed-off-by: Mauricio Vásquez +Signed-off-by: Andrii Nakryiko +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20220107152620.192327-1-mauricio@kinvolk.io +Stable-dep-of: 1fcc064b305a ("netfilter: rpfilter/fib: Set ->flowic_uid correctly for user namespaces.") +Signed-off-by: Sasha Levin +--- + tools/lib/bpf/hashmap.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c +index 3c20b126d60d..aeb09c288716 100644 +--- a/tools/lib/bpf/hashmap.c ++++ b/tools/lib/bpf/hashmap.c +@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map) + + void hashmap__free(struct hashmap *map) + { +- if (!map) ++ if (IS_ERR_OR_NULL(map)) + return; + + hashmap__clear(map); +@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key, + + return true; + } +- +-- +2.35.1 + diff --git a/queue-5.10/mmc-core-add-sd-card-quirk-for-broken-discard.patch b/queue-5.10/mmc-core-add-sd-card-quirk-for-broken-discard.patch new file mode 100644 index 00000000000..32c302ab180 --- /dev/null +++ b/queue-5.10/mmc-core-add-sd-card-quirk-for-broken-discard.patch @@ -0,0 +1,102 @@ +From 3e2553510c2410a41e71de7c48c969021996b5da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Sep 2022 12:57:44 +0300 +Subject: mmc: core: Add SD card quirk for broken discard + +From: Avri Altman + +[ Upstream commit 07d2872bf4c864eb83d034263c155746a2fb7a3b ] + +Some SD-cards from Sandisk that are SDA-6.0 compliant reports they supports +discard, while they actually don't. This might cause mk2fs to fail while +trying to format the card and revert it to a read-only mode. + +To fix this problem, let's add a card quirk (MMC_QUIRK_BROKEN_SD_DISCARD) +to indicate that we shall fall-back to use the legacy erase command +instead. + +Signed-off-by: Avri Altman +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220928095744.16455-1-avri.altman@wdc.com +[Ulf: Updated the commit message] +Signed-off-by: Ulf Hansson +Signed-off-by: Sasha Levin +--- + drivers/mmc/core/block.c | 6 +++++- + drivers/mmc/core/card.h | 6 ++++++ + drivers/mmc/core/quirks.h | 6 ++++++ + include/linux/mmc/card.h | 1 + + 4 files changed, 18 insertions(+), 1 deletion(-) + +diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c +index a9f9a45eafe4..cdf43f790f92 100644 +--- a/drivers/mmc/core/block.c ++++ b/drivers/mmc/core/block.c +@@ -1100,8 +1100,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) + { + struct mmc_blk_data *md = mq->blkdata; + struct mmc_card *card = md->queue.card; ++ unsigned int arg = card->erase_arg; + +- mmc_blk_issue_erase_rq(mq, req, MMC_BLK_DISCARD, card->erase_arg); ++ if (mmc_card_broken_sd_discard(card)) ++ arg = SD_ERASE_ARG; ++ ++ mmc_blk_issue_erase_rq(mq, req, MMC_BLK_DISCARD, arg); + } + + static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, +diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h +index 7bd392d55cfa..5c6986131faf 100644 +--- a/drivers/mmc/core/card.h ++++ b/drivers/mmc/core/card.h +@@ -70,6 +70,7 @@ struct mmc_fixup { + #define EXT_CSD_REV_ANY (-1u) + + #define CID_MANFID_SANDISK 0x2 ++#define CID_MANFID_SANDISK_SD 0x3 + #define CID_MANFID_ATP 0x9 + #define CID_MANFID_TOSHIBA 0x11 + #define CID_MANFID_MICRON 0x13 +@@ -222,4 +223,9 @@ static inline int mmc_card_broken_hpi(const struct mmc_card *c) + return c->quirks & MMC_QUIRK_BROKEN_HPI; + } + ++static inline int mmc_card_broken_sd_discard(const struct mmc_card *c) ++{ ++ return c->quirks & MMC_QUIRK_BROKEN_SD_DISCARD; ++} ++ + #endif +diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h +index d68e6e513a4f..c8c0f50a2076 100644 +--- a/drivers/mmc/core/quirks.h ++++ b/drivers/mmc/core/quirks.h +@@ -99,6 +99,12 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { + MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc, + MMC_QUIRK_TRIM_BROKEN), + ++ /* ++ * Some SD cards reports discard support while they don't ++ */ ++ MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, ++ MMC_QUIRK_BROKEN_SD_DISCARD), ++ + END_FIXUP + }; + +diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h +index 42df06c6b19c..ef870d1f4f5f 100644 +--- a/include/linux/mmc/card.h ++++ b/include/linux/mmc/card.h +@@ -270,6 +270,7 @@ struct mmc_card { + #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ + #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ + #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ ++#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ + + bool reenable_cmdq; /* Re-enable Command Queue */ + +-- +2.35.1 + diff --git a/queue-5.10/mmc-core-support-zeroout-using-trim-for-emmc.patch b/queue-5.10/mmc-core-support-zeroout-using-trim-for-emmc.patch new file mode 100644 index 00000000000..64e98e61860 --- /dev/null +++ b/queue-5.10/mmc-core-support-zeroout-using-trim-for-emmc.patch @@ -0,0 +1,114 @@ +From 264f7b493a2cd529c07fb76d3679e16665aae18f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Apr 2022 17:21:18 +0200 +Subject: mmc: core: Support zeroout using TRIM for eMMC + +From: Vincent Whitchurch + +[ Upstream commit f7b6fc327327698924ef3afa0c3e87a5b7466af3 ] + +If an eMMC card supports TRIM and indicates that it erases to zeros, we can +use it to support hardware offloading of REQ_OP_WRITE_ZEROES, so let's add +support for this. + +Signed-off-by: Vincent Whitchurch +Reviewed-by: Avri Altman +Link: https://lore.kernel.org/r/20220429152118.3617303-1-vincent.whitchurch@axis.com +Signed-off-by: Ulf Hansson +Stable-dep-of: 07d2872bf4c8 ("mmc: core: Add SD card quirk for broken discard") +Signed-off-by: Sasha Levin +--- + drivers/mmc/core/block.c | 26 ++++++++++++++++++++++---- + drivers/mmc/core/queue.c | 2 ++ + 2 files changed, 24 insertions(+), 4 deletions(-) + +diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c +index 66a00b7c751f..a9f9a45eafe4 100644 +--- a/drivers/mmc/core/block.c ++++ b/drivers/mmc/core/block.c +@@ -118,6 +118,7 @@ struct mmc_blk_data { + #define MMC_BLK_DISCARD BIT(2) + #define MMC_BLK_SECDISCARD BIT(3) + #define MMC_BLK_CQE_RECOVERY BIT(4) ++#define MMC_BLK_TRIM BIT(5) + + /* + * Only set in main mmc_blk_data associated +@@ -1052,12 +1053,13 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) + blk_mq_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK); + } + +-static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) ++static void mmc_blk_issue_erase_rq(struct mmc_queue *mq, struct request *req, ++ int type, unsigned int erase_arg) + { + struct mmc_blk_data *md = mq->blkdata; + struct mmc_card *card = md->queue.card; + unsigned int from, nr; +- int err = 0, type = MMC_BLK_DISCARD; ++ int err = 0; + blk_status_t status = BLK_STS_OK; + + if (!mmc_can_erase(card)) { +@@ -1073,13 +1075,13 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) + if (card->quirks & MMC_QUIRK_INAND_CMD38) { + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + INAND_CMD38_ARG_EXT_CSD, +- card->erase_arg == MMC_TRIM_ARG ? ++ erase_arg == MMC_TRIM_ARG ? + INAND_CMD38_ARG_TRIM : + INAND_CMD38_ARG_ERASE, + card->ext_csd.generic_cmd6_time); + } + if (!err) +- err = mmc_erase(card, from, nr, card->erase_arg); ++ err = mmc_erase(card, from, nr, erase_arg); + } while (err == -EIO && !mmc_blk_reset(md, card->host, type)); + if (err) + status = BLK_STS_IOERR; +@@ -1089,6 +1091,19 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) + blk_mq_end_request(req, status); + } + ++static void mmc_blk_issue_trim_rq(struct mmc_queue *mq, struct request *req) ++{ ++ mmc_blk_issue_erase_rq(mq, req, MMC_BLK_TRIM, MMC_TRIM_ARG); ++} ++ ++static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) ++{ ++ struct mmc_blk_data *md = mq->blkdata; ++ struct mmc_card *card = md->queue.card; ++ ++ mmc_blk_issue_erase_rq(mq, req, MMC_BLK_DISCARD, card->erase_arg); ++} ++ + static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, + struct request *req) + { +@@ -2227,6 +2242,9 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req) + case REQ_OP_SECURE_ERASE: + mmc_blk_issue_secdiscard_rq(mq, req); + break; ++ case REQ_OP_WRITE_ZEROES: ++ mmc_blk_issue_trim_rq(mq, req); ++ break; + case REQ_OP_FLUSH: + mmc_blk_issue_flush(mq, req); + break; +diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c +index 002426e3cf76..ae235828e909 100644 +--- a/drivers/mmc/core/queue.c ++++ b/drivers/mmc/core/queue.c +@@ -193,6 +193,8 @@ static void mmc_queue_setup_discard(struct request_queue *q, + q->limits.discard_granularity = SECTOR_SIZE; + if (mmc_can_secure_erase_trim(card)) + blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); ++ if (mmc_can_trim(card) && card->erased_byte == 0) ++ blk_queue_max_write_zeroes_sectors(q, max_discard); + } + + static unsigned int mmc_get_max_segments(struct mmc_host *host) +-- +2.35.1 + diff --git a/queue-5.10/mmc-sdhci-tegra-use-actual-clock-rate-for-sw-tuning-.patch b/queue-5.10/mmc-sdhci-tegra-use-actual-clock-rate-for-sw-tuning-.patch new file mode 100644 index 00000000000..c4b8c575342 --- /dev/null +++ b/queue-5.10/mmc-sdhci-tegra-use-actual-clock-rate-for-sw-tuning-.patch @@ -0,0 +1,47 @@ +From c88ccb28fba44468457fface9dd8b3f677069569 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Oct 2022 18:36:22 +0530 +Subject: mmc: sdhci-tegra: Use actual clock rate for SW tuning correction + +From: Prathamesh Shete + +[ Upstream commit b78870e7f41534cc719c295d1f8809aca93aeeab ] + +Ensure tegra_host member "curr_clk_rate" holds the actual clock rate +instead of requested clock rate for proper use during tuning correction +algorithm. Actual clk rate may not be the same as the requested clk +frequency depending on the parent clock source set. Tuning correction +algorithm depends on certain parameters which are sensitive to current +clk rate. If the host clk is selected instead of the actual clock rate, +tuning correction algorithm may end up applying invalid correction, +which could result in errors + +Fixes: ea8fc5953e8b ("mmc: tegra: update hw tuning process") +Signed-off-by: Aniruddha TVS Rao +Signed-off-by: Prathamesh Shete +Acked-by: Adrian Hunter +Acked-by: Thierry Reding +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20221006130622.22900-4-pshete@nvidia.com +Signed-off-by: Ulf Hansson +Signed-off-by: Sasha Levin +--- + drivers/mmc/host/sdhci-tegra.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c +index d50b691f6c44..67211fc42d24 100644 +--- a/drivers/mmc/host/sdhci-tegra.c ++++ b/drivers/mmc/host/sdhci-tegra.c +@@ -760,7 +760,7 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) + */ + host_clk = tegra_host->ddr_signaling ? clock * 2 : clock; + clk_set_rate(pltfm_host->clk, host_clk); +- tegra_host->curr_clk_rate = host_clk; ++ tegra_host->curr_clk_rate = clk_get_rate(pltfm_host->clk); + if (tegra_host->ddr_signaling) + host->max_clk = host_clk; + else +-- +2.35.1 + diff --git a/queue-5.10/net-atm-fix-proc_mpc_write-incorrect-return-value.patch b/queue-5.10/net-atm-fix-proc_mpc_write-incorrect-return-value.patch new file mode 100644 index 00000000000..88a54798857 --- /dev/null +++ b/queue-5.10/net-atm-fix-proc_mpc_write-incorrect-return-value.patch @@ -0,0 +1,41 @@ +From 53d987f5ef7009ac9c9d3b5dfd897ab2269b68f9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Oct 2022 10:05:40 +0800 +Subject: net/atm: fix proc_mpc_write incorrect return value + +From: Xiaobo Liu + +[ Upstream commit d8bde3bf7f82dac5fc68a62c2816793a12cafa2a ] + +Then the input contains '\0' or '\n', proc_mpc_write has read them, +so the return value needs +1. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Xiaobo Liu +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/atm/mpoa_proc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c +index 829db9eba0cb..aaf64b953915 100644 +--- a/net/atm/mpoa_proc.c ++++ b/net/atm/mpoa_proc.c +@@ -219,11 +219,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff, + if (!page) + return -ENOMEM; + +- for (p = page, len = 0; len < nbytes; p++, len++) { ++ for (p = page, len = 0; len < nbytes; p++) { + if (get_user(*p, buff++)) { + free_page((unsigned long)page); + return -EFAULT; + } ++ len += 1; + if (*p == '\0' || *p == '\n') + break; + } +-- +2.35.1 + diff --git a/queue-5.10/net-hns-fix-possible-memory-leak-in-hnae_ae_register.patch b/queue-5.10/net-hns-fix-possible-memory-leak-in-hnae_ae_register.patch new file mode 100644 index 00000000000..bc47a2f2d2d --- /dev/null +++ b/queue-5.10/net-hns-fix-possible-memory-leak-in-hnae_ae_register.patch @@ -0,0 +1,61 @@ +From add2519edb109b88b6ae6c0ae30fd2be6a06589b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 20:24:51 +0800 +Subject: net: hns: fix possible memory leak in hnae_ae_register() + +From: Yang Yingliang + +[ Upstream commit ff2f5ec5d009844ec28f171123f9e58750cef4bf ] + +Inject fault while probing module, if device_register() fails, +but the refcount of kobject is not decreased to 0, the name +allocated in dev_set_name() is leaked. Fix this by calling +put_device(), so that name can be freed in callback function +kobject_cleanup(). + +unreferenced object 0xffff00c01aba2100 (size 128): + comm "systemd-udevd", pid 1259, jiffies 4294903284 (age 294.152s) + hex dump (first 32 bytes): + 68 6e 61 65 30 00 00 00 18 21 ba 1a c0 00 ff ff hnae0....!...... + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<0000000034783f26>] slab_post_alloc_hook+0xa0/0x3e0 + [<00000000748188f2>] __kmem_cache_alloc_node+0x164/0x2b0 + [<00000000ab0743e8>] __kmalloc_node_track_caller+0x6c/0x390 + [<000000006c0ffb13>] kvasprintf+0x8c/0x118 + [<00000000fa27bfe1>] kvasprintf_const+0x60/0xc8 + [<0000000083e10ed7>] kobject_set_name_vargs+0x3c/0xc0 + [<000000000b87affc>] dev_set_name+0x7c/0xa0 + [<000000003fd8fe26>] hnae_ae_register+0xcc/0x190 [hnae] + [<00000000fe97edc9>] hns_dsaf_ae_init+0x9c/0x108 [hns_dsaf] + [<00000000c36ff1eb>] hns_dsaf_probe+0x548/0x748 [hns_dsaf] + +Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support") +Signed-off-by: Yang Yingliang +Reviewed-by: Leon Romanovsky +Link: https://lore.kernel.org/r/20221018122451.1749171-1-yangyingliang@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c +index 00fafc0f8512..430eccea8e5e 100644 +--- a/drivers/net/ethernet/hisilicon/hns/hnae.c ++++ b/drivers/net/ethernet/hisilicon/hns/hnae.c +@@ -419,8 +419,10 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner) + hdev->cls_dev.release = hnae_release; + (void)dev_set_name(&hdev->cls_dev, "hnae%d", hdev->id); + ret = device_register(&hdev->cls_dev); +- if (ret) ++ if (ret) { ++ put_device(&hdev->cls_dev); + return ret; ++ } + + __module_get(THIS_MODULE); + +-- +2.35.1 + diff --git a/queue-5.10/net-hsr-avoid-possible-null-deref-in-skb_clone.patch b/queue-5.10/net-hsr-avoid-possible-null-deref-in-skb_clone.patch new file mode 100644 index 00000000000..76b63e269ca --- /dev/null +++ b/queue-5.10/net-hsr-avoid-possible-null-deref-in-skb_clone.patch @@ -0,0 +1,97 @@ +From 815ad26e6f7245e68fd931d2b6e2dd1d48ef694c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Oct 2022 16:59:28 +0000 +Subject: net: hsr: avoid possible NULL deref in skb_clone() + +From: Eric Dumazet + +[ Upstream commit d8b57135fd9ffe9a5b445350a686442a531c5339 ] + +syzbot got a crash [1] in skb_clone(), caused by a bug +in hsr_get_untagged_frame(). + +When/if create_stripped_skb_hsr() returns NULL, we must +not attempt to call skb_clone(). + +While we are at it, replace a WARN_ONCE() by netdev_warn_once(). + +[1] +general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] PREEMPT SMP KASAN +KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f] +CPU: 1 PID: 754 Comm: syz-executor.0 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 +RIP: 0010:skb_clone+0x108/0x3c0 net/core/skbuff.c:1641 +Code: 93 02 00 00 49 83 7c 24 28 00 0f 85 e9 00 00 00 e8 5d 4a 29 fa 4c 8d 75 7e 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 84 c0 0f 85 9e 01 00 00 +RSP: 0018:ffffc90003ccf4e0 EFLAGS: 00010207 + +RAX: dffffc0000000000 RBX: ffffc90003ccf5f8 RCX: ffffc9000c24b000 +RDX: 000000000000000f RSI: ffffffff8751cb13 RDI: 0000000000000000 +RBP: 0000000000000000 R08: 00000000000000f0 R09: 0000000000000140 +R10: fffffbfff181d972 R11: 0000000000000000 R12: ffff888161fc3640 +R13: 0000000000000a20 R14: 000000000000007e R15: ffffffff8dc5f620 +FS: 00007feb621e4700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007feb621e3ff8 CR3: 00000001643a9000 CR4: 00000000003506e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + +hsr_get_untagged_frame+0x4e/0x610 net/hsr/hsr_forward.c:164 +hsr_forward_do net/hsr/hsr_forward.c:461 [inline] +hsr_forward_skb+0xcca/0x1d50 net/hsr/hsr_forward.c:623 +hsr_handle_frame+0x588/0x7c0 net/hsr/hsr_slave.c:69 +__netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379 +__netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483 +__netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 +netif_receive_skb_internal net/core/dev.c:5685 [inline] +netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744 +tun_rx_batched+0x4ab/0x7a0 drivers/net/tun.c:1544 +tun_get_user+0x2686/0x3a00 drivers/net/tun.c:1995 +tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2025 +call_write_iter include/linux/fs.h:2187 [inline] +new_sync_write fs/read_write.c:491 [inline] +vfs_write+0x9e9/0xdd0 fs/read_write.c:584 +ksys_write+0x127/0x250 fs/read_write.c:637 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Fixes: f266a683a480 ("net/hsr: Better frame dispatch") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20221017165928.2150130-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/hsr/hsr_forward.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c +index baf4765be6d7..908324b46328 100644 +--- a/net/hsr/hsr_forward.c ++++ b/net/hsr/hsr_forward.c +@@ -108,15 +108,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port) + { + if (!frame->skb_std) { +- if (frame->skb_hsr) { ++ if (frame->skb_hsr) + frame->skb_std = + create_stripped_skb_hsr(frame->skb_hsr, frame); +- } else { +- /* Unexpected */ +- WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", +- __FILE__, __LINE__, port->dev->name); ++ else ++ netdev_warn_once(port->dev, ++ "Unexpected frame received in hsr_get_untagged_frame()\n"); ++ ++ if (!frame->skb_std) + return NULL; +- } + } + + return skb_clone(frame->skb_std, GFP_ATOMIC); +-- +2.35.1 + diff --git a/queue-5.10/net-phy-dp83822-disable-mdi-crossover-status-change-.patch b/queue-5.10/net-phy-dp83822-disable-mdi-crossover-status-change-.patch new file mode 100644 index 00000000000..e69eb70d86b --- /dev/null +++ b/queue-5.10/net-phy-dp83822-disable-mdi-crossover-status-change-.patch @@ -0,0 +1,43 @@ +From 3e3da7b204b323fad2e25355323618b85bef3a96 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 12:47:54 +0200 +Subject: net: phy: dp83822: disable MDI crossover status change interrupt + +From: Felix Riemann + +[ Upstream commit 7f378c03aa4952507521174fb0da7b24a9ad0be6 ] + +If the cable is disconnected the PHY seems to toggle between MDI and +MDI-X modes. With the MDI crossover status interrupt active this causes +roughly 10 interrupts per second. + +As the crossover status isn't checked by the driver, the interrupt can +be disabled to reduce the interrupt load. + +Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") +Signed-off-by: Felix Riemann +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/20221018104755.30025-1-svc.sw.rte.linux@sma.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/dp83822.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c +index 3a8849716459..db651649e0b8 100644 +--- a/drivers/net/phy/dp83822.c ++++ b/drivers/net/phy/dp83822.c +@@ -268,8 +268,7 @@ static int dp83822_config_intr(struct phy_device *phydev) + DP83822_EEE_ERROR_CHANGE_INT_EN); + + if (!dp83822->fx_enabled) +- misr_status |= DP83822_MDI_XOVER_INT_EN | +- DP83822_ANEG_ERR_INT_EN | ++ misr_status |= DP83822_ANEG_ERR_INT_EN | + DP83822_WOL_PKT_INT_EN; + + err = phy_write(phydev, MII_DP83822_MISR2, misr_status); +-- +2.35.1 + diff --git a/queue-5.10/net-phy-dp83867-extend-rx-strap-quirk-for-sgmii-mode.patch b/queue-5.10/net-phy-dp83867-extend-rx-strap-quirk-for-sgmii-mode.patch new file mode 100644 index 00000000000..6505f2bb7b4 --- /dev/null +++ b/queue-5.10/net-phy-dp83867-extend-rx-strap-quirk-for-sgmii-mode.patch @@ -0,0 +1,44 @@ +From 64e9beb9998ad18102560f4328d09e147f1aa484 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Oct 2022 12:17:35 +0530 +Subject: net: phy: dp83867: Extend RX strap quirk for SGMII mode + +From: Harini Katakam + +[ Upstream commit 0c9efbd5c50c64ead434960a404c9c9a097b0403 ] + +When RX strap in HW is not set to MODE 3 or 4, bit 7 and 8 in CF4 +register should be set. The former is already handled in +dp83867_config_init; add the latter in SGMII specific initialization. + +Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy") +Signed-off-by: Harini Katakam +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/dp83867.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c +index f86acad0aad4..c8031e297faf 100644 +--- a/drivers/net/phy/dp83867.c ++++ b/drivers/net/phy/dp83867.c +@@ -757,6 +757,14 @@ static int dp83867_config_init(struct phy_device *phydev) + else + val &= ~DP83867_SGMII_TYPE; + phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_SGMIICTL, val); ++ ++ /* This is a SW workaround for link instability if RX_CTRL is ++ * not strapped to mode 3 or 4 in HW. This is required for SGMII ++ * in addition to clearing bit 7, handled above. ++ */ ++ if (dp83867->rxctrl_strap_quirk) ++ phy_set_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, ++ BIT(8)); + } + + val = phy_read(phydev, DP83867_CFG3); +-- +2.35.1 + diff --git a/queue-5.10/net-sched-cake-fix-null-pointer-access-issue-when-ca.patch b/queue-5.10/net-sched-cake-fix-null-pointer-access-issue-when-ca.patch new file mode 100644 index 00000000000..f2110b4c905 --- /dev/null +++ b/queue-5.10/net-sched-cake-fix-null-pointer-access-issue-when-ca.patch @@ -0,0 +1,86 @@ +From 1f4d25ad81cae5cc01f9c96c9ebcd5d7a890292f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 14:31:59 +0800 +Subject: net: sched: cake: fix null pointer access issue when cake_init() + fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Zhengchao Shao + +[ Upstream commit 51f9a8921ceacd7bf0d3f47fa867a64988ba1dcb ] + +When the default qdisc is cake, if the qdisc of dev_queue fails to be +inited during mqprio_init(), cake_reset() is invoked to clear +resources. In this case, the tins is NULL, and it will cause gpf issue. + +The process is as follows: +qdisc_create_dflt() + cake_init() + q->tins = kvcalloc(...) --->failed, q->tins is NULL + ... + qdisc_put() + ... + cake_reset() + ... + cake_dequeue_one() + b = &q->tins[...] --->q->tins is NULL + +The following is the Call Trace information: +general protection fault, probably for non-canonical address +0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN +KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] +RIP: 0010:cake_dequeue_one+0xc9/0x3c0 +Call Trace: + +cake_reset+0xb1/0x140 +qdisc_reset+0xed/0x6f0 +qdisc_destroy+0x82/0x4c0 +qdisc_put+0x9e/0xb0 +qdisc_create_dflt+0x2c3/0x4a0 +mqprio_init+0xa71/0x1760 +qdisc_create+0x3eb/0x1000 +tc_modify_qdisc+0x408/0x1720 +rtnetlink_rcv_msg+0x38e/0xac0 +netlink_rcv_skb+0x12d/0x3a0 +netlink_unicast+0x4a2/0x740 +netlink_sendmsg+0x826/0xcc0 +sock_sendmsg+0xc5/0x100 +____sys_sendmsg+0x583/0x690 +___sys_sendmsg+0xe8/0x160 +__sys_sendmsg+0xbf/0x160 +do_syscall_64+0x35/0x80 +entry_SYSCALL_64_after_hwframe+0x46/0xb0 +RIP: 0033:0x7f89e5122d04 + + +Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") +Signed-off-by: Zhengchao Shao +Acked-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_cake.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c +index c580139fcede..5dc7a3c310c9 100644 +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -2224,8 +2224,12 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) + + static void cake_reset(struct Qdisc *sch) + { ++ struct cake_sched_data *q = qdisc_priv(sch); + u32 c; + ++ if (!q->tins) ++ return; ++ + for (c = 0; c < CAKE_MAX_TINS; c++) + cake_clear_tin(sch, c); + } +-- +2.35.1 + diff --git a/queue-5.10/net-sched-delete-duplicate-cleanup-of-backlog-and-ql.patch b/queue-5.10/net-sched-delete-duplicate-cleanup-of-backlog-and-ql.patch new file mode 100644 index 00000000000..847ac7f0774 --- /dev/null +++ b/queue-5.10/net-sched-delete-duplicate-cleanup-of-backlog-and-ql.patch @@ -0,0 +1,316 @@ +From 70ee65259c1725b99adc372f8a23e101edd4520c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Aug 2022 08:52:31 +0800 +Subject: net: sched: delete duplicate cleanup of backlog and qlen + +From: Zhengchao Shao + +[ Upstream commit c19d893fbf3f2f8fa864ae39652c7fee939edde2 ] + +qdisc_reset() is clearing qdisc->q.qlen and qdisc->qstats.backlog +_after_ calling qdisc->ops->reset. There is no need to clear them +again in the specific reset function. + +Signed-off-by: Zhengchao Shao +Link: https://lore.kernel.org/r/20220824005231.345727-1-shaozhengchao@huawei.com +Signed-off-by: Paolo Abeni +Stable-dep-of: 2a3fc78210b9 ("net: sched: sfb: fix null pointer access issue when sfb_init() fails") +Signed-off-by: Sasha Levin +--- + include/net/sch_generic.h | 1 - + net/sched/sch_atm.c | 1 - + net/sched/sch_cbq.c | 1 - + net/sched/sch_choke.c | 2 -- + net/sched/sch_drr.c | 2 -- + net/sched/sch_dsmark.c | 2 -- + net/sched/sch_etf.c | 3 --- + net/sched/sch_ets.c | 2 -- + net/sched/sch_fq_codel.c | 2 -- + net/sched/sch_fq_pie.c | 3 --- + net/sched/sch_hfsc.c | 2 -- + net/sched/sch_htb.c | 2 -- + net/sched/sch_multiq.c | 1 - + net/sched/sch_prio.c | 2 -- + net/sched/sch_qfq.c | 2 -- + net/sched/sch_red.c | 2 -- + net/sched/sch_sfb.c | 2 -- + net/sched/sch_skbprio.c | 3 --- + net/sched/sch_taprio.c | 2 -- + net/sched/sch_tbf.c | 2 -- + net/sched/sch_teql.c | 1 - + 21 files changed, 40 deletions(-) + +diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h +index bed2387af456..e7e8c318925d 100644 +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -1178,7 +1178,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) + static inline void qdisc_reset_queue(struct Qdisc *sch) + { + __qdisc_reset_queue(&sch->q); +- sch->qstats.backlog = 0; + } + + static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, +diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c +index 1c281cc81f57..794c7377cd7e 100644 +--- a/net/sched/sch_atm.c ++++ b/net/sched/sch_atm.c +@@ -575,7 +575,6 @@ static void atm_tc_reset(struct Qdisc *sch) + pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); + list_for_each_entry(flow, &p->flows, list) + qdisc_reset(flow->q); +- sch->q.qlen = 0; + } + + static void atm_tc_destroy(struct Qdisc *sch) +diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c +index 4a78fcf5d4f9..9a3dff02b7a2 100644 +--- a/net/sched/sch_cbq.c ++++ b/net/sched/sch_cbq.c +@@ -1053,7 +1053,6 @@ cbq_reset(struct Qdisc *sch) + cl->cpriority = cl->priority; + } + } +- sch->q.qlen = 0; + } + + +diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c +index 2adbd945bf15..25d2daaa8122 100644 +--- a/net/sched/sch_choke.c ++++ b/net/sched/sch_choke.c +@@ -315,8 +315,6 @@ static void choke_reset(struct Qdisc *sch) + rtnl_qdisc_drop(skb, sch); + } + +- sch->q.qlen = 0; +- sch->qstats.backlog = 0; + if (q->tab) + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + q->head = q->tail = 0; +diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c +index dde564670ad8..08424aac6da8 100644 +--- a/net/sched/sch_drr.c ++++ b/net/sched/sch_drr.c +@@ -443,8 +443,6 @@ static void drr_reset_qdisc(struct Qdisc *sch) + qdisc_reset(cl->qdisc); + } + } +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + } + + static void drr_destroy_qdisc(struct Qdisc *sch) +diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c +index 76ed1a05ded2..a75bc7f80cd7 100644 +--- a/net/sched/sch_dsmark.c ++++ b/net/sched/sch_dsmark.c +@@ -408,8 +408,6 @@ static void dsmark_reset(struct Qdisc *sch) + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); + if (p->q) + qdisc_reset(p->q); +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + } + + static void dsmark_destroy(struct Qdisc *sch) +diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c +index c48f91075b5c..d96103b0e2bf 100644 +--- a/net/sched/sch_etf.c ++++ b/net/sched/sch_etf.c +@@ -445,9 +445,6 @@ static void etf_reset(struct Qdisc *sch) + timesortedlist_clear(sch); + __qdisc_reset_queue(&sch->q); + +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; +- + q->last = 0; + } + +diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c +index 9c224872ef03..05817c55692f 100644 +--- a/net/sched/sch_ets.c ++++ b/net/sched/sch_ets.c +@@ -722,8 +722,6 @@ static void ets_qdisc_reset(struct Qdisc *sch) + } + for (band = 0; band < q->nbands; band++) + qdisc_reset(q->classes[band].qdisc); +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + } + + static void ets_qdisc_destroy(struct Qdisc *sch) +diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c +index 99e8db262198..01d6eea5b0ce 100644 +--- a/net/sched/sch_fq_codel.c ++++ b/net/sched/sch_fq_codel.c +@@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch) + codel_vars_init(&flow->cvars); + } + memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); +- sch->q.qlen = 0; +- sch->qstats.backlog = 0; + q->memory_usage = 0; + } + +diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c +index c70802785518..cf04f70e96bf 100644 +--- a/net/sched/sch_fq_pie.c ++++ b/net/sched/sch_fq_pie.c +@@ -521,9 +521,6 @@ static void fq_pie_reset(struct Qdisc *sch) + INIT_LIST_HEAD(&flow->flowchain); + pie_vars_init(&flow->vars); + } +- +- sch->q.qlen = 0; +- sch->qstats.backlog = 0; + } + + static void fq_pie_destroy(struct Qdisc *sch) +diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c +index d1902fca9844..cdc43a06aa9b 100644 +--- a/net/sched/sch_hfsc.c ++++ b/net/sched/sch_hfsc.c +@@ -1484,8 +1484,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) + } + q->eligible = RB_ROOT; + qdisc_watchdog_cancel(&q->watchdog); +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + } + + static void +diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c +index cd70dbcbd72f..c3ba018fd083 100644 +--- a/net/sched/sch_htb.c ++++ b/net/sched/sch_htb.c +@@ -966,8 +966,6 @@ static void htb_reset(struct Qdisc *sch) + } + qdisc_watchdog_cancel(&q->watchdog); + __qdisc_reset_queue(&q->direct_queue); +- sch->q.qlen = 0; +- sch->qstats.backlog = 0; + memset(q->hlevel, 0, sizeof(q->hlevel)); + memset(q->row_mask, 0, sizeof(q->row_mask)); + } +diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c +index 5c27b4270b90..1c6dbcfa89b8 100644 +--- a/net/sched/sch_multiq.c ++++ b/net/sched/sch_multiq.c +@@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch) + + for (band = 0; band < q->bands; band++) + qdisc_reset(q->queues[band]); +- sch->q.qlen = 0; + q->curband = 0; + } + +diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c +index 3eabb871a1d5..1c805fe05b82 100644 +--- a/net/sched/sch_prio.c ++++ b/net/sched/sch_prio.c +@@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch) + + for (prio = 0; prio < q->bands; prio++) + qdisc_reset(q->queues[prio]); +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + } + + static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) +diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c +index af8c63a9ec18..1d1d81aeb389 100644 +--- a/net/sched/sch_qfq.c ++++ b/net/sched/sch_qfq.c +@@ -1458,8 +1458,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch) + qdisc_reset(cl->qdisc); + } + } +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + } + + static void qfq_destroy_qdisc(struct Qdisc *sch) +diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c +index 40adf1f07a82..f1e013e3f04a 100644 +--- a/net/sched/sch_red.c ++++ b/net/sched/sch_red.c +@@ -176,8 +176,6 @@ static void red_reset(struct Qdisc *sch) + struct red_sched_data *q = qdisc_priv(sch); + + qdisc_reset(q->qdisc); +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + red_restart(&q->vars); + } + +diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c +index b2724057629f..0e1cb517b0d9 100644 +--- a/net/sched/sch_sfb.c ++++ b/net/sched/sch_sfb.c +@@ -456,8 +456,6 @@ static void sfb_reset(struct Qdisc *sch) + struct sfb_sched_data *q = qdisc_priv(sch); + + qdisc_reset(q->qdisc); +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); +diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c +index 7a5e4c454715..df72fb83d9c7 100644 +--- a/net/sched/sch_skbprio.c ++++ b/net/sched/sch_skbprio.c +@@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch) + struct skbprio_sched_data *q = qdisc_priv(sch); + int prio; + +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; +- + for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) + __skb_queue_purge(&q->qdiscs[prio]); + +diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c +index ab8835a72cee..7f33b31c7b8b 100644 +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -1626,8 +1626,6 @@ static void taprio_reset(struct Qdisc *sch) + if (q->qdiscs[i]) + qdisc_reset(q->qdiscs[i]); + } +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + } + + static void taprio_destroy(struct Qdisc *sch) +diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c +index 6eb17004a9e4..7461e5c67d50 100644 +--- a/net/sched/sch_tbf.c ++++ b/net/sched/sch_tbf.c +@@ -316,8 +316,6 @@ static void tbf_reset(struct Qdisc *sch) + struct tbf_sched_data *q = qdisc_priv(sch); + + qdisc_reset(q->qdisc); +- sch->qstats.backlog = 0; +- sch->q.qlen = 0; + q->t_c = ktime_get_ns(); + q->tokens = q->buffer; + q->ptokens = q->mtu; +diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c +index 6af6b95bdb67..79aaab51cbf5 100644 +--- a/net/sched/sch_teql.c ++++ b/net/sched/sch_teql.c +@@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch) + struct teql_sched_data *dat = qdisc_priv(sch); + + skb_queue_purge(&dat->q); +- sch->q.qlen = 0; + } + + static void +-- +2.35.1 + diff --git a/queue-5.10/net-sched-fix-race-condition-in-qdisc_graft.patch b/queue-5.10/net-sched-fix-race-condition-in-qdisc_graft.patch new file mode 100644 index 00000000000..39cdb6aa9a9 --- /dev/null +++ b/queue-5.10/net-sched-fix-race-condition-in-qdisc_graft.patch @@ -0,0 +1,265 @@ +From 3ce60e54a28dbfc95e759897fa347346f38d3e2a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 20:32:58 +0000 +Subject: net: sched: fix race condition in qdisc_graft() + +From: Eric Dumazet + +[ Upstream commit ebda44da44f6f309d302522b049f43d6f829f7aa ] + +We had one syzbot report [1] in syzbot queue for a while. +I was waiting for more occurrences and/or a repro but +Dmitry Vyukov spotted the issue right away. + + +qdisc_graft() drops reference to qdisc in notify_and_destroy +while it's still assigned to dev->qdisc + + +Indeed, RCU rules are clear when replacing a data structure. +The visible pointer (dev->qdisc in this case) must be updated +to the new object _before_ RCU grace period is started +(qdisc_put(old) in this case). + +[1] +BUG: KASAN: use-after-free in __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 +Read of size 4 at addr ffff88802065e038 by task syz-executor.4/21027 + +CPU: 0 PID: 21027 Comm: syz-executor.4 Not tainted 6.0.0-rc3-syzkaller-00363-g7726d4c3e60b #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 +Call Trace: + +__dump_stack lib/dump_stack.c:88 [inline] +dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 +print_address_description mm/kasan/report.c:317 [inline] +print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 +kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 +__tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 +__tcf_qdisc_find net/sched/cls_api.c:1051 [inline] +tc_new_tfilter+0x34f/0x2200 net/sched/cls_api.c:2018 +rtnetlink_rcv_msg+0x955/0xca0 net/core/rtnetlink.c:6081 +netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 +netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] +netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 +netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 +sock_sendmsg_nosec net/socket.c:714 [inline] +sock_sendmsg+0xcf/0x120 net/socket.c:734 +____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 +___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 +__sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f5efaa89279 +Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f5efbc31168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f5efab9bf80 RCX: 00007f5efaa89279 +RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 +RBP: 00007f5efaae32e9 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007f5efb0cfb1f R14: 00007f5efbc31300 R15: 0000000000022000 + + +Allocated by task 21027: +kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 +kasan_set_track mm/kasan/common.c:45 [inline] +set_alloc_info mm/kasan/common.c:437 [inline] +____kasan_kmalloc mm/kasan/common.c:516 [inline] +____kasan_kmalloc mm/kasan/common.c:475 [inline] +__kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:525 +kmalloc_node include/linux/slab.h:623 [inline] +kzalloc_node include/linux/slab.h:744 [inline] +qdisc_alloc+0xb0/0xc50 net/sched/sch_generic.c:938 +qdisc_create_dflt+0x71/0x4a0 net/sched/sch_generic.c:997 +attach_one_default_qdisc net/sched/sch_generic.c:1152 [inline] +netdev_for_each_tx_queue include/linux/netdevice.h:2437 [inline] +attach_default_qdiscs net/sched/sch_generic.c:1170 [inline] +dev_activate+0x760/0xcd0 net/sched/sch_generic.c:1229 +__dev_open+0x393/0x4d0 net/core/dev.c:1441 +__dev_change_flags+0x583/0x750 net/core/dev.c:8556 +rtnl_configure_link+0xee/0x240 net/core/rtnetlink.c:3189 +rtnl_newlink_create net/core/rtnetlink.c:3371 [inline] +__rtnl_newlink+0x10b8/0x17e0 net/core/rtnetlink.c:3580 +rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593 +rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 +netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 +netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] +netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 +netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 +sock_sendmsg_nosec net/socket.c:714 [inline] +sock_sendmsg+0xcf/0x120 net/socket.c:734 +____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 +___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 +__sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Freed by task 21020: +kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 +kasan_set_track+0x21/0x30 mm/kasan/common.c:45 +kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 +____kasan_slab_free mm/kasan/common.c:367 [inline] +____kasan_slab_free+0x166/0x1c0 mm/kasan/common.c:329 +kasan_slab_free include/linux/kasan.h:200 [inline] +slab_free_hook mm/slub.c:1754 [inline] +slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1780 +slab_free mm/slub.c:3534 [inline] +kfree+0xe2/0x580 mm/slub.c:4562 +rcu_do_batch kernel/rcu/tree.c:2245 [inline] +rcu_core+0x7b5/0x1890 kernel/rcu/tree.c:2505 +__do_softirq+0x1d3/0x9c6 kernel/softirq.c:571 + +Last potentially related work creation: +kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 +__kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 +call_rcu+0x99/0x790 kernel/rcu/tree.c:2793 +qdisc_put+0xcd/0xe0 net/sched/sch_generic.c:1083 +notify_and_destroy net/sched/sch_api.c:1012 [inline] +qdisc_graft+0xeb1/0x1270 net/sched/sch_api.c:1084 +tc_modify_qdisc+0xbb7/0x1a00 net/sched/sch_api.c:1671 +rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 +netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 +netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] +netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 +netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 +sock_sendmsg_nosec net/socket.c:714 [inline] +sock_sendmsg+0xcf/0x120 net/socket.c:734 +____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 +___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 +__sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Second to last potentially related work creation: +kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 +__kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 +kvfree_call_rcu+0x74/0x940 kernel/rcu/tree.c:3322 +neigh_destroy+0x431/0x630 net/core/neighbour.c:912 +neigh_release include/net/neighbour.h:454 [inline] +neigh_cleanup_and_release+0x1f8/0x330 net/core/neighbour.c:103 +neigh_del net/core/neighbour.c:225 [inline] +neigh_remove_one+0x37d/0x460 net/core/neighbour.c:246 +neigh_forced_gc net/core/neighbour.c:276 [inline] +neigh_alloc net/core/neighbour.c:447 [inline] +___neigh_create+0x18b5/0x29a0 net/core/neighbour.c:642 +ip6_finish_output2+0xfb8/0x1520 net/ipv6/ip6_output.c:125 +__ip6_finish_output net/ipv6/ip6_output.c:195 [inline] +ip6_finish_output+0x690/0x1160 net/ipv6/ip6_output.c:206 +NF_HOOK_COND include/linux/netfilter.h:296 [inline] +ip6_output+0x1ed/0x540 net/ipv6/ip6_output.c:227 +dst_output include/net/dst.h:451 [inline] +NF_HOOK include/linux/netfilter.h:307 [inline] +NF_HOOK include/linux/netfilter.h:301 [inline] +mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820 +mld_send_cr net/ipv6/mcast.c:2121 [inline] +mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2653 +process_one_work+0x991/0x1610 kernel/workqueue.c:2289 +worker_thread+0x665/0x1080 kernel/workqueue.c:2436 +kthread+0x2e4/0x3a0 kernel/kthread.c:376 +ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 + +The buggy address belongs to the object at ffff88802065e000 +which belongs to the cache kmalloc-1k of size 1024 +The buggy address is located 56 bytes inside of +1024-byte region [ffff88802065e000, ffff88802065e400) + +The buggy address belongs to the physical page: +page:ffffea0000819600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x20658 +head:ffffea0000819600 order:3 compound_mapcount:0 compound_pincount:0 +flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) +raw: 00fff00000010200 0000000000000000 dead000000000001 ffff888011841dc0 +raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 +page dumped because: kasan: bad access detected +page_owner tracks the page as allocated +page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 3523, tgid 3523 (sshd), ts 41495190986, free_ts 41417713212 +prep_new_page mm/page_alloc.c:2532 [inline] +get_page_from_freelist+0x109b/0x2ce0 mm/page_alloc.c:4283 +__alloc_pages+0x1c7/0x510 mm/page_alloc.c:5515 +alloc_pages+0x1a6/0x270 mm/mempolicy.c:2270 +alloc_slab_page mm/slub.c:1824 [inline] +allocate_slab+0x27e/0x3d0 mm/slub.c:1969 +new_slab mm/slub.c:2029 [inline] +___slab_alloc+0x7f1/0xe10 mm/slub.c:3031 +__slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3118 +slab_alloc_node mm/slub.c:3209 [inline] +__kmalloc_node_track_caller+0x2f2/0x380 mm/slub.c:4955 +kmalloc_reserve net/core/skbuff.c:358 [inline] +__alloc_skb+0xd9/0x2f0 net/core/skbuff.c:430 +alloc_skb_fclone include/linux/skbuff.h:1307 [inline] +tcp_stream_alloc_skb+0x38/0x580 net/ipv4/tcp.c:861 +tcp_sendmsg_locked+0xc36/0x2f80 net/ipv4/tcp.c:1325 +tcp_sendmsg+0x2b/0x40 net/ipv4/tcp.c:1483 +inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 +sock_sendmsg_nosec net/socket.c:714 [inline] +sock_sendmsg+0xcf/0x120 net/socket.c:734 +sock_write_iter+0x291/0x3d0 net/socket.c:1108 +call_write_iter include/linux/fs.h:2187 [inline] +new_sync_write fs/read_write.c:491 [inline] +vfs_write+0x9e9/0xdd0 fs/read_write.c:578 +ksys_write+0x1e8/0x250 fs/read_write.c:631 +page last free stack trace: +reset_page_owner include/linux/page_owner.h:24 [inline] +free_pages_prepare mm/page_alloc.c:1449 [inline] +free_pcp_prepare+0x5e4/0xd20 mm/page_alloc.c:1499 +free_unref_page_prepare mm/page_alloc.c:3380 [inline] +free_unref_page+0x19/0x4d0 mm/page_alloc.c:3476 +__unfreeze_partials+0x17c/0x1a0 mm/slub.c:2548 +qlink_free mm/kasan/quarantine.c:168 [inline] +qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 +kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:294 +__kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:447 +kasan_slab_alloc include/linux/kasan.h:224 [inline] +slab_post_alloc_hook mm/slab.h:727 [inline] +slab_alloc_node mm/slub.c:3243 [inline] +slab_alloc mm/slub.c:3251 [inline] +__kmem_cache_alloc_lru mm/slub.c:3258 [inline] +kmem_cache_alloc+0x267/0x3b0 mm/slub.c:3268 +kmem_cache_zalloc include/linux/slab.h:723 [inline] +alloc_buffer_head+0x20/0x140 fs/buffer.c:2974 +alloc_page_buffers+0x280/0x790 fs/buffer.c:829 +create_empty_buffers+0x2c/0xee0 fs/buffer.c:1558 +ext4_block_write_begin+0x1004/0x1530 fs/ext4/inode.c:1074 +ext4_da_write_begin+0x422/0xae0 fs/ext4/inode.c:2996 +generic_perform_write+0x246/0x560 mm/filemap.c:3738 +ext4_buffered_write_iter+0x15b/0x460 fs/ext4/file.c:270 +ext4_file_write_iter+0x44a/0x1660 fs/ext4/file.c:679 +call_write_iter include/linux/fs.h:2187 [inline] +new_sync_write fs/read_write.c:491 [inline] +vfs_write+0x9e9/0xdd0 fs/read_write.c:578 + +Fixes: af356afa010f ("net_sched: reintroduce dev->qdisc for use by sch_api") +Reported-by: syzbot +Diagnosed-by: Dmitry Vyukov +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20221018203258.2793282-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_api.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c +index 6e18aa417782..d8ffe4114385 100644 +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -1081,12 +1081,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, + + skip: + if (!ingress) { +- notify_and_destroy(net, skb, n, classid, +- rtnl_dereference(dev->qdisc), new); ++ old = rtnl_dereference(dev->qdisc); + if (new && !new->ops->attach) + qdisc_refcount_inc(new); + rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); + ++ notify_and_destroy(net, skb, n, classid, old, new); ++ + if (new && new->ops->attach) + new->ops->attach(new); + } else { +-- +2.35.1 + diff --git a/queue-5.10/net-sched-sfb-fix-null-pointer-access-issue-when-sfb.patch b/queue-5.10/net-sched-sfb-fix-null-pointer-access-issue-when-sfb.patch new file mode 100644 index 00000000000..6e18799fc02 --- /dev/null +++ b/queue-5.10/net-sched-sfb-fix-null-pointer-access-issue-when-sfb.patch @@ -0,0 +1,77 @@ +From cc3e73723c9f5894842ced0cab05a90c753aa995 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 14:32:01 +0800 +Subject: net: sched: sfb: fix null pointer access issue when sfb_init() fails + +From: Zhengchao Shao + +[ Upstream commit 2a3fc78210b9f0e85372a2435368962009f480fc ] + +When the default qdisc is sfb, if the qdisc of dev_queue fails to be +inited during mqprio_init(), sfb_reset() is invoked to clear resources. +In this case, the q->qdisc is NULL, and it will cause gpf issue. + +The process is as follows: +qdisc_create_dflt() + sfb_init() + tcf_block_get() --->failed, q->qdisc is NULL + ... + qdisc_put() + ... + sfb_reset() + qdisc_reset(q->qdisc) --->q->qdisc is NULL + ops = qdisc->ops + +The following is the Call Trace information: +general protection fault, probably for non-canonical address +0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN +KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] +RIP: 0010:qdisc_reset+0x2b/0x6f0 +Call Trace: + +sfb_reset+0x37/0xd0 +qdisc_reset+0xed/0x6f0 +qdisc_destroy+0x82/0x4c0 +qdisc_put+0x9e/0xb0 +qdisc_create_dflt+0x2c3/0x4a0 +mqprio_init+0xa71/0x1760 +qdisc_create+0x3eb/0x1000 +tc_modify_qdisc+0x408/0x1720 +rtnetlink_rcv_msg+0x38e/0xac0 +netlink_rcv_skb+0x12d/0x3a0 +netlink_unicast+0x4a2/0x740 +netlink_sendmsg+0x826/0xcc0 +sock_sendmsg+0xc5/0x100 +____sys_sendmsg+0x583/0x690 +___sys_sendmsg+0xe8/0x160 +__sys_sendmsg+0xbf/0x160 +do_syscall_64+0x35/0x80 +entry_SYSCALL_64_after_hwframe+0x46/0xb0 +RIP: 0033:0x7f2164122d04 + + +Fixes: e13e02a3c68d ("net_sched: SFB flow scheduler") +Signed-off-by: Zhengchao Shao +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_sfb.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c +index 0e1cb517b0d9..9ded56228ea1 100644 +--- a/net/sched/sch_sfb.c ++++ b/net/sched/sch_sfb.c +@@ -455,7 +455,8 @@ static void sfb_reset(struct Qdisc *sch) + { + struct sfb_sched_data *q = qdisc_priv(sch); + +- qdisc_reset(q->qdisc); ++ if (likely(q->qdisc)) ++ qdisc_reset(q->qdisc); + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); +-- +2.35.1 + diff --git a/queue-5.10/nfsd-add-common-helpers-to-decode-void-args-and-enco.patch b/queue-5.10/nfsd-add-common-helpers-to-decode-void-args-and-enco.patch new file mode 100644 index 00000000000..2f9b17abdce --- /dev/null +++ b/queue-5.10/nfsd-add-common-helpers-to-decode-void-args-and-enco.patch @@ -0,0 +1,407 @@ +From 34e09e5c9a1611aa6be5e62844ff84e408ee3701 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Nov 2020 14:48:29 -0500 +Subject: NFSD: Add common helpers to decode void args and encode void results + +From: Chuck Lever + +[ Upstream commit 788f7183fba86b46074c16e7d57ea09302badff4 ] + +Start off the conversion to xdr_stream by de-duplicating the functions +that decode void arguments and encode void results. + +Signed-off-by: Chuck Lever +Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs2acl.c | 21 ++++----------------- + fs/nfsd/nfs3acl.c | 8 ++++---- + fs/nfsd/nfs3proc.c | 10 ++++------ + fs/nfsd/nfs3xdr.c | 11 ----------- + fs/nfsd/nfs4proc.c | 11 ++++------- + fs/nfsd/nfs4xdr.c | 12 ------------ + fs/nfsd/nfsd.h | 8 ++++++++ + fs/nfsd/nfsproc.c | 25 ++++++++++++------------- + fs/nfsd/nfssvc.c | 28 ++++++++++++++++++++++++++++ + fs/nfsd/nfsxdr.c | 10 ---------- + fs/nfsd/xdr.h | 2 -- + fs/nfsd/xdr3.h | 2 -- + fs/nfsd/xdr4.h | 2 -- + 13 files changed, 64 insertions(+), 86 deletions(-) + +diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c +index 6a900f770dd2..b0f66604532a 100644 +--- a/fs/nfsd/nfs2acl.c ++++ b/fs/nfsd/nfs2acl.c +@@ -185,10 +185,6 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp) + /* + * XDR decode functions + */ +-static int nfsaclsvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +-{ +- return 1; +-} + + static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) + { +@@ -255,15 +251,6 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) + * XDR encode functions + */ + +-/* +- * There must be an encoding function for void results so svc_process +- * will work properly. +- */ +-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +-{ +- return xdr_ressize_check(rqstp, p); +-} +- + /* GETACL */ + static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) + { +@@ -378,10 +365,10 @@ struct nfsd3_voidargs { int dummy; }; + static const struct svc_procedure nfsd_acl_procedures2[5] = { + [ACLPROC2_NULL] = { + .pc_func = nfsacld_proc_null, +- .pc_decode = nfsaclsvc_decode_voidarg, +- .pc_encode = nfsaclsvc_encode_voidres, +- .pc_argsize = sizeof(struct nfsd3_voidargs), +- .pc_ressize = sizeof(struct nfsd3_voidargs), ++ .pc_decode = nfssvc_decode_voidarg, ++ .pc_encode = nfssvc_encode_voidres, ++ .pc_argsize = sizeof(struct nfsd_voidargs), ++ .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, +diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c +index 34a394e50e1d..7c30876a31a1 100644 +--- a/fs/nfsd/nfs3acl.c ++++ b/fs/nfsd/nfs3acl.c +@@ -245,10 +245,10 @@ struct nfsd3_voidargs { int dummy; }; + static const struct svc_procedure nfsd_acl_procedures3[3] = { + [ACLPROC3_NULL] = { + .pc_func = nfsd3_proc_null, +- .pc_decode = nfs3svc_decode_voidarg, +- .pc_encode = nfs3svc_encode_voidres, +- .pc_argsize = sizeof(struct nfsd3_voidargs), +- .pc_ressize = sizeof(struct nfsd3_voidargs), ++ .pc_decode = nfssvc_decode_voidarg, ++ .pc_encode = nfssvc_encode_voidres, ++ .pc_argsize = sizeof(struct nfsd_voidargs), ++ .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, +diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c +index 981a4e4c9a3c..a4dfe8160d55 100644 +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -698,8 +698,6 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) + #define nfsd3_attrstatres nfsd3_attrstat + #define nfsd3_wccstatres nfsd3_attrstat + #define nfsd3_createres nfsd3_diropres +-#define nfsd3_voidres nfsd3_voidargs +-struct nfsd3_voidargs { int dummy; }; + + #define ST 1 /* status*/ + #define FH 17 /* filehandle with length */ +@@ -710,10 +708,10 @@ struct nfsd3_voidargs { int dummy; }; + static const struct svc_procedure nfsd_procedures3[22] = { + [NFS3PROC_NULL] = { + .pc_func = nfsd3_proc_null, +- .pc_decode = nfs3svc_decode_voidarg, +- .pc_encode = nfs3svc_encode_voidres, +- .pc_argsize = sizeof(struct nfsd3_voidargs), +- .pc_ressize = sizeof(struct nfsd3_voidres), ++ .pc_decode = nfssvc_decode_voidarg, ++ .pc_encode = nfssvc_encode_voidres, ++ .pc_argsize = sizeof(struct nfsd_voidargs), ++ .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, +diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c +index 716566da400e..9dc22d917bd2 100644 +--- a/fs/nfsd/nfs3xdr.c ++++ b/fs/nfsd/nfs3xdr.c +@@ -304,11 +304,6 @@ void fill_post_wcc(struct svc_fh *fhp) + /* + * XDR decode functions + */ +-int +-nfs3svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +-{ +- return 1; +-} + + int + nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) +@@ -642,12 +637,6 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) + * XDR encode functions + */ + +-int +-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +-{ +- return xdr_ressize_check(rqstp, p); +-} +- + /* GETATTR */ + int + nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index 9aeeb51e8c61..1acafc39f008 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -3293,16 +3293,13 @@ static const char *nfsd4_op_name(unsigned opnum) + return "unknown_operation"; + } + +-#define nfsd4_voidres nfsd4_voidargs +-struct nfsd4_voidargs { int dummy; }; +- + static const struct svc_procedure nfsd_procedures4[2] = { + [NFSPROC4_NULL] = { + .pc_func = nfsd4_proc_null, +- .pc_decode = nfs4svc_decode_voidarg, +- .pc_encode = nfs4svc_encode_voidres, +- .pc_argsize = sizeof(struct nfsd4_voidargs), +- .pc_ressize = sizeof(struct nfsd4_voidres), ++ .pc_decode = nfssvc_decode_voidarg, ++ .pc_encode = nfssvc_encode_voidres, ++ .pc_argsize = sizeof(struct nfsd_voidargs), ++ .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 1, + }, +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index cc605ee0b2fa..e7b891a19bf8 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -5263,12 +5263,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) + p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); + } + +-int +-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +-{ +- return xdr_ressize_check(rqstp, p); +-} +- + void nfsd4_release_compoundargs(struct svc_rqst *rqstp) + { + struct nfsd4_compoundargs *args = rqstp->rq_argp; +@@ -5286,12 +5280,6 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) + } + } + +-int +-nfs4svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +-{ +- return 1; +-} +- + int + nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) + { +diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h +index cb742e17e04a..7907de3f2ee6 100644 +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -73,6 +73,14 @@ extern unsigned long nfsd_drc_mem_used; + + extern const struct seq_operations nfs_exports_op; + ++/* ++ * Common void argument and result helpers ++ */ ++struct nfsd_voidargs { }; ++struct nfsd_voidres { }; ++int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p); ++int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p); ++ + /* + * Function prototypes. + */ +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c +index bbd01e8397f6..dbd8d3604653 100644 +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -609,7 +609,6 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) + * NFSv2 Server procedures. + * Only the results of non-idempotent operations are cached. + */ +-struct nfsd_void { int dummy; }; + + #define ST 1 /* status */ + #define FH 8 /* filehandle */ +@@ -618,10 +617,10 @@ struct nfsd_void { int dummy; }; + static const struct svc_procedure nfsd_procedures2[18] = { + [NFSPROC_NULL] = { + .pc_func = nfsd_proc_null, +- .pc_decode = nfssvc_decode_void, +- .pc_encode = nfssvc_encode_void, +- .pc_argsize = sizeof(struct nfsd_void), +- .pc_ressize = sizeof(struct nfsd_void), ++ .pc_decode = nfssvc_decode_voidarg, ++ .pc_encode = nfssvc_encode_voidres, ++ .pc_argsize = sizeof(struct nfsd_voidargs), ++ .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 0, + }, +@@ -647,10 +646,10 @@ static const struct svc_procedure nfsd_procedures2[18] = { + }, + [NFSPROC_ROOT] = { + .pc_func = nfsd_proc_root, +- .pc_decode = nfssvc_decode_void, +- .pc_encode = nfssvc_encode_void, +- .pc_argsize = sizeof(struct nfsd_void), +- .pc_ressize = sizeof(struct nfsd_void), ++ .pc_decode = nfssvc_decode_voidarg, ++ .pc_encode = nfssvc_encode_voidres, ++ .pc_argsize = sizeof(struct nfsd_voidargs), ++ .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 0, + }, +@@ -685,10 +684,10 @@ static const struct svc_procedure nfsd_procedures2[18] = { + }, + [NFSPROC_WRITECACHE] = { + .pc_func = nfsd_proc_writecache, +- .pc_decode = nfssvc_decode_void, +- .pc_encode = nfssvc_encode_void, +- .pc_argsize = sizeof(struct nfsd_void), +- .pc_ressize = sizeof(struct nfsd_void), ++ .pc_decode = nfssvc_decode_voidarg, ++ .pc_encode = nfssvc_encode_voidres, ++ .pc_argsize = sizeof(struct nfsd_voidargs), ++ .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 0, + }, +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index ad6fedf37a40..8b675e8e6a6e 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -1074,6 +1074,34 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + return 1; + } + ++/** ++ * nfssvc_decode_voidarg - Decode void arguments ++ * @rqstp: Server RPC transaction context ++ * @p: buffer containing arguments to decode ++ * ++ * Return values: ++ * %0: Arguments were not valid ++ * %1: Decoding was successful ++ */ ++int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) ++{ ++ return 1; ++} ++ ++/** ++ * nfssvc_encode_voidres - Encode void results ++ * @rqstp: Server RPC transaction context ++ * @p: buffer in which to encode results ++ * ++ * Return values: ++ * %0: Local error while encoding ++ * %1: Encoding was successful ++ */ ++int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) ++{ ++ return xdr_ressize_check(rqstp, p); ++} ++ + int nfsd_pool_stats_open(struct inode *inode, struct file *file) + { + int ret; +diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c +index 8a288c8fcd57..13df5464a087 100644 +--- a/fs/nfsd/nfsxdr.c ++++ b/fs/nfsd/nfsxdr.c +@@ -192,11 +192,6 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f + /* + * XDR decode functions + */ +-int +-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p) +-{ +- return xdr_argsize_check(rqstp, p); +-} + + int + nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) +@@ -423,11 +418,6 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) + /* + * XDR encode functions + */ +-int +-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p) +-{ +- return xdr_ressize_check(rqstp, p); +-} + + int + nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p) +diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h +index b8cc6a4b2e0e..edd87688ff86 100644 +--- a/fs/nfsd/xdr.h ++++ b/fs/nfsd/xdr.h +@@ -144,7 +144,6 @@ union nfsd_xdrstore { + #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) + + +-int nfssvc_decode_void(struct svc_rqst *, __be32 *); + int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *); + int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); + int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); +@@ -156,7 +155,6 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *); + int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); + int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); + int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); +-int nfssvc_encode_void(struct svc_rqst *, __be32 *); + int nfssvc_encode_stat(struct svc_rqst *, __be32 *); + int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *); + int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); +diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h +index ae6fa6c9cb46..456fcd7a1038 100644 +--- a/fs/nfsd/xdr3.h ++++ b/fs/nfsd/xdr3.h +@@ -273,7 +273,6 @@ union nfsd3_xdrstore { + + #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) + +-int nfs3svc_decode_voidarg(struct svc_rqst *, __be32 *); + int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *); + int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); + int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); +@@ -290,7 +289,6 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); + int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); + int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); + int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); +-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *); + int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *); + int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); + int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *); +diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h +index 679d40af1bbb..37f89ad5e992 100644 +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -781,8 +781,6 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) + + + bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); +-int nfs4svc_decode_voidarg(struct svc_rqst *, __be32 *); +-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *); + int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *); + int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); + __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); +-- +2.35.1 + diff --git a/queue-5.10/nfsd-fix-the-behavior-of-read-near-offset_max.patch b/queue-5.10/nfsd-fix-the-behavior-of-read-near-offset_max.patch new file mode 100644 index 00000000000..ef6f1b05d3d --- /dev/null +++ b/queue-5.10/nfsd-fix-the-behavior-of-read-near-offset_max.patch @@ -0,0 +1,123 @@ +From 3d69c488be74d18b9be2d42a67f4f2326a499362 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Feb 2022 15:19:34 -0500 +Subject: NFSD: Fix the behavior of READ near OFFSET_MAX + +From: Chuck Lever + +[ Upstream commit 0cb4d23ae08c48f6bf3c29a8e5c4a74b8388b960 ] + +Dan Aloni reports: +> Due to commit 8cfb9015280d ("NFS: Always provide aligned buffers to +> the RPC read layers") on the client, a read of 0xfff is aligned up +> to server rsize of 0x1000. +> +> As a result, in a test where the server has a file of size +> 0x7fffffffffffffff, and the client tries to read from the offset +> 0x7ffffffffffff000, the read causes loff_t overflow in the server +> and it returns an NFS code of EINVAL to the client. The client as +> a result indefinitely retries the request. + +The Linux NFS client does not handle NFS?ERR_INVAL, even though all +NFS specifications permit servers to return that status code for a +READ. + +Instead of NFS?ERR_INVAL, have out-of-range READ requests succeed +and return a short result. Set the EOF flag in the result to prevent +the client from retrying the READ request. This behavior appears to +be consistent with Solaris NFS servers. + +Note that NFSv3 and NFSv4 use u64 offset values on the wire. These +must be converted to loff_t internally before use -- an implicit +type cast is not adequate for this purpose. Otherwise VFS checks +against sb->s_maxbytes do not work properly. + +Reported-by: Dan Aloni +Cc: stable@vger.kernel.org +Signed-off-by: Chuck Lever +Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs3proc.c | 8 ++++++-- + fs/nfsd/nfs4proc.c | 8 ++++++-- + fs/nfsd/nfs4xdr.c | 8 ++------ + 3 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c +index 104e7d705ea8..60faa5b8eccf 100644 +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -148,13 +148,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + unsigned int len; + int v; + +- argp->count = min_t(u32, argp->count, max_blocksize); +- + dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", + SVCFH_fmt(&argp->fh), + (unsigned long) argp->count, + (unsigned long long) argp->offset); + ++ argp->count = min_t(u32, argp->count, max_blocksize); ++ if (argp->offset > (u64)OFFSET_MAX) ++ argp->offset = (u64)OFFSET_MAX; ++ if (argp->offset + argp->count > (u64)OFFSET_MAX) ++ argp->count = (u64)OFFSET_MAX - argp->offset; ++ + v = 0; + len = argp->count; + while (len > 0) { +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index 5054dc66cbf9..363df0a795bc 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -772,12 +772,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + __be32 status; + + read->rd_nf = NULL; +- if (read->rd_offset >= OFFSET_MAX) +- return nfserr_inval; + + trace_nfsd_read_start(rqstp, &cstate->current_fh, + read->rd_offset, read->rd_length); + ++ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp)); ++ if (read->rd_offset > (u64)OFFSET_MAX) ++ read->rd_offset = (u64)OFFSET_MAX; ++ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX) ++ read->rd_length = (u64)OFFSET_MAX - read->rd_offset; ++ + /* + * If we do a zero copy read, then a client will see read data + * that reflects the state of the file *after* performing the +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index d0af93a0558f..930bed3e40a4 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -3754,10 +3754,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + } + xdr_commit_encode(xdr); + +- maxcount = svc_max_payload(resp->rqstp); +- maxcount = min_t(unsigned long, maxcount, ++ maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); +- maxcount = min_t(unsigned long, maxcount, read->rd_length); + + if (file->f_op->splice_read && + test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) +@@ -4585,10 +4583,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, + return nfserr_resource; + xdr_commit_encode(xdr); + +- maxcount = svc_max_payload(resp->rqstp); +- maxcount = min_t(unsigned long, maxcount, ++ maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); +- maxcount = min_t(unsigned long, maxcount, read->rd_length); + count = maxcount; + + eof = read->rd_offset >= i_size_read(file_inode(file)); +-- +2.35.1 + diff --git a/queue-5.10/nfsd-protect-against-send-buffer-overflow-in-nfsv2-r.patch b/queue-5.10/nfsd-protect-against-send-buffer-overflow-in-nfsv2-r.patch new file mode 100644 index 00000000000..e167699ef91 --- /dev/null +++ b/queue-5.10/nfsd-protect-against-send-buffer-overflow-in-nfsv2-r.patch @@ -0,0 +1,50 @@ +From 371805ae7d57acb15b4ae15945abde5c27336ac4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Sep 2022 15:10:18 -0400 +Subject: NFSD: Protect against send buffer overflow in NFSv2 READ + +From: Chuck Lever + +[ Upstream commit 401bc1f90874280a80b93f23be33a0e7e2d1f912 ] + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfsproc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c +index deaa34b89251..c540326c8e00 100644 +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -180,6 +180,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) + argp->count, argp->offset); + + argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + + v = 0; + len = argp->count; +-- +2.35.1 + diff --git a/queue-5.10/nfsd-protect-against-send-buffer-overflow-in-nfsv3-r.patch b/queue-5.10/nfsd-protect-against-send-buffer-overflow-in-nfsv3-r.patch new file mode 100644 index 00000000000..b5803398a5e --- /dev/null +++ b/queue-5.10/nfsd-protect-against-send-buffer-overflow-in-nfsv3-r.patch @@ -0,0 +1,60 @@ +From 4d327c12bb49db2ecc125a939bbc32a273a6607a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Sep 2022 15:10:24 -0400 +Subject: NFSD: Protect against send buffer overflow in NFSv3 READ + +From: Chuck Lever + +[ Upstream commit fa6be9cc6e80ec79892ddf08a8c10cabab9baf38 ] + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs3proc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c +index 60faa5b8eccf..84e700a54d01 100644 +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -144,7 +144,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + { + struct nfsd3_readargs *argp = rqstp->rq_argp; + struct nfsd3_readres *resp = rqstp->rq_resp; +- u32 max_blocksize = svc_max_payload(rqstp); + unsigned int len; + int v; + +@@ -153,7 +152,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + (unsigned long) argp->count, + (unsigned long long) argp->offset); + +- argp->count = min_t(u32, argp->count, max_blocksize); ++ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + if (argp->offset > (u64)OFFSET_MAX) + argp->offset = (u64)OFFSET_MAX; + if (argp->offset + argp->count > (u64)OFFSET_MAX) +-- +2.35.1 + diff --git a/queue-5.10/nfsd-replace-read-macros-in-nfsd4_decode_commit.patch b/queue-5.10/nfsd-replace-read-macros-in-nfsd4_decode_commit.patch new file mode 100644 index 00000000000..5db7821c37d --- /dev/null +++ b/queue-5.10/nfsd-replace-read-macros-in-nfsd4_decode_commit.patch @@ -0,0 +1,75 @@ +From add4e4ab8734bc7519f9c446cb6b5a4aa1bb5b63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Nov 2020 13:19:51 -0500 +Subject: NFSD: Replace READ* macros in nfsd4_decode_commit() + +From: Chuck Lever + +[ Upstream commit cbd9abb3706e96563b36af67595707a7054ab693 ] + +Signed-off-by: Chuck Lever +Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4xdr.c | 12 +++++------- + include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ + 2 files changed, 26 insertions(+), 7 deletions(-) + +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index 69233350b061..d0af93a0558f 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -568,13 +568,11 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) + static __be32 + nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) + { +- DECODE_HEAD; +- +- READ_BUF(12); +- p = xdr_decode_hyper(p, &commit->co_offset); +- commit->co_count = be32_to_cpup(p++); +- +- DECODE_TAIL; ++ if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) ++ return nfserr_bad_xdr; ++ if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) ++ return nfserr_bad_xdr; ++ return nfs_ok; + } + + static __be32 +diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h +index c03f7bf585c9..6b1757543747 100644 +--- a/include/linux/sunrpc/xdr.h ++++ b/include/linux/sunrpc/xdr.h +@@ -569,6 +569,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) + return 0; + } + ++/** ++ * xdr_stream_decode_u64 - Decode a 64-bit integer ++ * @xdr: pointer to xdr_stream ++ * @ptr: location to store 64-bit integer ++ * ++ * Return values: ++ * %0 on success ++ * %-EBADMSG on XDR buffer overflow ++ */ ++static inline ssize_t ++xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) ++{ ++ const size_t count = sizeof(*ptr); ++ __be32 *p = xdr_inline_decode(xdr, count); ++ ++ if (unlikely(!p)) ++ return -EBADMSG; ++ xdr_decode_hyper(p, ptr); ++ return 0; ++} ++ + /** + * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data + * @xdr: pointer to xdr_stream +-- +2.35.1 + diff --git a/queue-5.10/nfsd-replace-the-internals-of-the-read_buf-macro.patch b/queue-5.10/nfsd-replace-the-internals-of-the-read_buf-macro.patch new file mode 100644 index 00000000000..b51d22a0d6e --- /dev/null +++ b/queue-5.10/nfsd-replace-the-internals-of-the-read_buf-macro.patch @@ -0,0 +1,412 @@ +From fbbbdfeb61de7dfbaa5e5498af2254b49f0cb417 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Nov 2020 11:54:23 -0500 +Subject: NFSD: Replace the internals of the READ_BUF() macro + +From: Chuck Lever + +[ Upstream commit c1346a1216ab5cb04a265380ac9035d91b16b6d5 ] + +Convert the READ_BUF macro in nfs4xdr.c from open code to instead +use the new xdr_stream-style decoders already in use by the encode +side (and by the in-kernel NFS client implementation). Once this +conversion is done, each individual NFSv4 argument decoder can be +independently cleaned up to replace these macros with C code. + +Signed-off-by: Chuck Lever +Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4proc.c | 4 +- + fs/nfsd/nfs4xdr.c | 181 ++++++------------------------------- + fs/nfsd/xdr4.h | 10 +- + include/linux/sunrpc/xdr.h | 2 + + net/sunrpc/xdr.c | 45 +++++++++ + 5 files changed, 77 insertions(+), 165 deletions(-) + +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index 1acafc39f008..5054dc66cbf9 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -1024,8 +1024,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + + write->wr_how_written = write->wr_stable_how; + +- nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, +- &write->wr_head, write->wr_buflen); ++ nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages, ++ write->wr_payload.head, write->wr_buflen); + WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); + + status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index e7b891a19bf8..69233350b061 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -129,90 +129,13 @@ xdr_error: \ + memcpy((x), p, nbytes); \ + p += XDR_QUADLEN(nbytes); \ + } while (0) +- +-/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */ +-#define READ_BUF(nbytes) do { \ +- if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \ +- p = argp->p; \ +- argp->p += XDR_QUADLEN(nbytes); \ +- } else if (!(p = read_buf(argp, nbytes))) { \ +- dprintk("NFSD: xdr error (%s:%d)\n", \ +- __FILE__, __LINE__); \ +- goto xdr_error; \ +- } \ +-} while (0) +- +-static void next_decode_page(struct nfsd4_compoundargs *argp) +-{ +- argp->p = page_address(argp->pagelist[0]); +- argp->pagelist++; +- if (argp->pagelen < PAGE_SIZE) { +- argp->end = argp->p + XDR_QUADLEN(argp->pagelen); +- argp->pagelen = 0; +- } else { +- argp->end = argp->p + (PAGE_SIZE>>2); +- argp->pagelen -= PAGE_SIZE; +- } +-} +- +-static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) +-{ +- /* We want more bytes than seem to be available. +- * Maybe we need a new page, maybe we have just run out +- */ +- unsigned int avail = (char *)argp->end - (char *)argp->p; +- __be32 *p; +- +- if (argp->pagelen == 0) { +- struct kvec *vec = &argp->rqstp->rq_arg.tail[0]; +- +- if (!argp->tail) { +- argp->tail = true; +- avail = vec->iov_len; +- argp->p = vec->iov_base; +- argp->end = vec->iov_base + avail; +- } +- +- if (avail < nbytes) +- return NULL; +- +- p = argp->p; +- argp->p += XDR_QUADLEN(nbytes); +- return p; +- } +- +- if (avail + argp->pagelen < nbytes) +- return NULL; +- if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ +- return NULL; +- /* ok, we can do it with the current plus the next page */ +- if (nbytes <= sizeof(argp->tmp)) +- p = argp->tmp; +- else { +- kfree(argp->tmpp); +- p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL); +- if (!p) +- return NULL; +- +- } +- /* +- * The following memcpy is safe because read_buf is always +- * called with nbytes > avail, and the two cases above both +- * guarantee p points to at least nbytes bytes. +- */ +- memcpy(p, argp->p, avail); +- next_decode_page(argp); +- memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); +- argp->p += XDR_QUADLEN(nbytes - avail); +- return p; +-} +- +-static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp) +-{ +- unsigned int this = (char *)argp->end - (char *)argp->p; +- +- return this + argp->pagelen; +-} ++#define READ_BUF(nbytes) \ ++ do { \ ++ p = xdr_inline_decode(argp->xdr,\ ++ nbytes); \ ++ if (!p) \ ++ goto xdr_error; \ ++ } while (0) + + static int zero_clientid(clientid_t *clid) + { +@@ -259,44 +182,6 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) + return p; + } + +-static __be32 +-svcxdr_construct_vector(struct nfsd4_compoundargs *argp, struct kvec *head, +- struct page ***pagelist, u32 buflen) +-{ +- int avail; +- int len; +- int pages; +- +- /* Sorry .. no magic macros for this.. * +- * READ_BUF(write->wr_buflen); +- * SAVEMEM(write->wr_buf, write->wr_buflen); +- */ +- avail = (char *)argp->end - (char *)argp->p; +- if (avail + argp->pagelen < buflen) { +- dprintk("NFSD: xdr error (%s:%d)\n", +- __FILE__, __LINE__); +- return nfserr_bad_xdr; +- } +- head->iov_base = argp->p; +- head->iov_len = avail; +- *pagelist = argp->pagelist; +- +- len = XDR_QUADLEN(buflen) << 2; +- if (len >= avail) { +- len -= avail; +- +- pages = len >> PAGE_SHIFT; +- argp->pagelist += pages; +- argp->pagelen -= pages * PAGE_SIZE; +- len -= pages * PAGE_SIZE; +- +- next_decode_page(argp); +- } +- argp->p += XDR_QUADLEN(len); +- +- return 0; +-} +- + /** + * savemem - duplicate a chunk of memory for later processing + * @argp: NFSv4 compound argument structure to be freed with +@@ -396,7 +281,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, + READ_BUF(4); len += 4; + nace = be32_to_cpup(p++); + +- if (nace > compoundargs_bytes_left(argp)/20) ++ if (nace > xdr_stream_remaining(argp->xdr) / sizeof(struct nfs4_ace)) + /* + * Even with 4-byte names there wouldn't be + * space for that many aces; something fishy is +@@ -927,7 +812,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) + + static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) + { +- __be32 *p; ++ DECODE_HEAD; + + READ_BUF(4); + o->len = be32_to_cpup(p++); +@@ -937,9 +822,8 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne + + READ_BUF(o->len); + SAVEMEM(o->data, o->len); +- return nfs_ok; +-xdr_error: +- return nfserr_bad_xdr; ++ ++ DECODE_TAIL; + } + + static __be32 +@@ -1317,10 +1201,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) + goto xdr_error; + write->wr_buflen = be32_to_cpup(p++); + +- status = svcxdr_construct_vector(argp, &write->wr_head, +- &write->wr_pagelist, write->wr_buflen); +- if (status) +- return status; ++ if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen)) ++ goto xdr_error; + + DECODE_TAIL; + } +@@ -1889,13 +1771,14 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) + */ + + /* +- * Decode data into buffer. Uses head and pages constructed by +- * svcxdr_construct_vector. ++ * Decode data into buffer. + */ + static __be32 +-nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head, +- struct page **pages, char **bufp, u32 buflen) ++nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, ++ char **bufp, u32 buflen) + { ++ struct page **pages = xdr->pages; ++ struct kvec *head = xdr->head; + char *tmp, *dp; + u32 len; + +@@ -2010,8 +1893,6 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, + { + DECODE_HEAD; + u32 flags, maxcount, size; +- struct kvec head; +- struct page **pagelist; + + READ_BUF(4); + flags = be32_to_cpup(p++); +@@ -2034,12 +1915,12 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, + + setxattr->setxa_len = size; + if (size > 0) { +- status = svcxdr_construct_vector(argp, &head, &pagelist, size); +- if (status) +- return status; ++ struct xdr_buf payload; + +- status = nfsd4_vbuf_from_vector(argp, &head, pagelist, +- &setxattr->setxa_buf, size); ++ if (!xdr_stream_subsegment(argp->xdr, &payload, size)) ++ goto xdr_error; ++ status = nfsd4_vbuf_from_vector(argp, &payload, ++ &setxattr->setxa_buf, size); + } + + DECODE_TAIL; +@@ -5271,8 +5152,6 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) + kfree(args->ops); + args->ops = args->iops; + } +- kfree(args->tmpp); +- args->tmpp = NULL; + while (args->to_free) { + struct svcxdr_tmpbuf *tb = args->to_free; + args->to_free = tb->next; +@@ -5285,19 +5164,11 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) + { + struct nfsd4_compoundargs *args = rqstp->rq_argp; + +- if (rqstp->rq_arg.head[0].iov_len % 4) { +- /* client is nuts */ +- dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", +- __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); +- return 0; +- } +- args->p = p; +- args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; +- args->pagelist = rqstp->rq_arg.pages; +- args->pagelen = rqstp->rq_arg.page_len; +- args->tail = false; ++ /* svcxdr_tmp_alloc */ + args->tmpp = NULL; + args->to_free = NULL; ++ ++ args->xdr = &rqstp->rq_arg_stream; + args->ops = args->iops; + args->rqstp = rqstp; + +diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h +index 37f89ad5e992..0eb13bd603ea 100644 +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -419,8 +419,7 @@ struct nfsd4_write { + u64 wr_offset; /* request */ + u32 wr_stable_how; /* request */ + u32 wr_buflen; /* request */ +- struct kvec wr_head; +- struct page ** wr_pagelist; /* request */ ++ struct xdr_buf wr_payload; /* request */ + + u32 wr_bytes_written; /* response */ + u32 wr_how_written; /* response */ +@@ -696,15 +695,10 @@ struct svcxdr_tmpbuf { + + struct nfsd4_compoundargs { + /* scratch variables for XDR decode */ +- __be32 * p; +- __be32 * end; +- struct page ** pagelist; +- int pagelen; +- bool tail; + __be32 tmp[8]; + __be32 * tmpp; ++ struct xdr_stream *xdr; + struct svcxdr_tmpbuf *to_free; +- + struct svc_rqst *rqstp; + + u32 taglen; +diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h +index 0c8cab6210b3..c03f7bf585c9 100644 +--- a/include/linux/sunrpc/xdr.h ++++ b/include/linux/sunrpc/xdr.h +@@ -252,6 +252,8 @@ extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); + extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); + extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); + extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); ++extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, ++ unsigned int len); + + /** + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. +diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c +index 02adc5c7f034..722586696fad 100644 +--- a/net/sunrpc/xdr.c ++++ b/net/sunrpc/xdr.c +@@ -1412,6 +1412,51 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, + } + EXPORT_SYMBOL_GPL(xdr_buf_subsegment); + ++/** ++ * xdr_stream_subsegment - set @subbuf to a portion of @xdr ++ * @xdr: an xdr_stream set up for decoding ++ * @subbuf: the result buffer ++ * @nbytes: length of @xdr to extract, in bytes ++ * ++ * Sets up @subbuf to represent a portion of @xdr. The portion ++ * starts at the current offset in @xdr, and extends for a length ++ * of @nbytes. If this is successful, @xdr is advanced to the next ++ * position following that portion. ++ * ++ * Return values: ++ * %true: @subbuf has been initialized, and @xdr has been advanced. ++ * %false: a bounds error has occurred ++ */ ++bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, ++ unsigned int nbytes) ++{ ++ unsigned int remaining, offset, len; ++ ++ if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes)) ++ return false; ++ ++ if (subbuf->head[0].iov_len) ++ if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len)) ++ return false; ++ ++ remaining = subbuf->page_len; ++ offset = subbuf->page_base; ++ while (remaining) { ++ len = min_t(unsigned int, remaining, PAGE_SIZE) - offset; ++ ++ if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) ++ return false; ++ if (!__xdr_inline_decode(xdr, len)) ++ return false; ++ ++ remaining -= len; ++ offset = 0; ++ } ++ ++ return true; ++} ++EXPORT_SYMBOL_GPL(xdr_stream_subsegment); ++ + /** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed +-- +2.35.1 + diff --git a/queue-5.10/nfsd-update-getattr3args-decoder-to-use-struct-xdr_s.patch b/queue-5.10/nfsd-update-getattr3args-decoder-to-use-struct-xdr_s.patch new file mode 100644 index 00000000000..390213d6ab5 --- /dev/null +++ b/queue-5.10/nfsd-update-getattr3args-decoder-to-use-struct-xdr_s.patch @@ -0,0 +1,115 @@ +From f846eda22099bbc03b9706b9a0e4318b753d8077 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Oct 2020 14:30:02 -0400 +Subject: NFSD: Update GETATTR3args decoder to use struct xdr_stream + +From: Chuck Lever + +[ Upstream commit 9575363a9e4c8d7e2f9ba5e79884d623fff0be6f ] + +Signed-off-by: Chuck Lever +Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs3proc.c | 3 +-- + fs/nfsd/nfs3xdr.c | 31 +++++++++++++++++++++++++------ + fs/nfsd/xdr3.h | 2 +- + 3 files changed, 27 insertions(+), 9 deletions(-) + +diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c +index a4dfe8160d55..3d741ea482f4 100644 +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -688,7 +688,6 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) + * NFSv3 Server procedures. + * Only the results of non-idempotent operations are cached. + */ +-#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle + #define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat + #define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat + #define nfsd3_mkdirargs nfsd3_createargs +@@ -720,7 +719,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { + .pc_decode = nfs3svc_decode_fhandleargs, + .pc_encode = nfs3svc_encode_attrstatres, + .pc_release = nfs3svc_release_fhandle, +- .pc_argsize = sizeof(struct nfsd3_fhandleargs), ++ .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd3_attrstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, +diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c +index 9dc22d917bd2..ed500f254337 100644 +--- a/fs/nfsd/nfs3xdr.c ++++ b/fs/nfsd/nfs3xdr.c +@@ -29,8 +29,9 @@ static u32 nfs3_ftypes[] = { + + + /* +- * XDR functions for basic NFS types ++ * Basic NFSv3 data types (RFC 1813 Sections 2.5 and 2.6) + */ ++ + static __be32 * + encode_time3(__be32 *p, struct timespec64 *time) + { +@@ -46,6 +47,26 @@ decode_time3(__be32 *p, struct timespec64 *time) + return p; + } + ++static bool ++svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) ++{ ++ __be32 *p; ++ u32 size; ++ ++ if (xdr_stream_decode_u32(xdr, &size) < 0) ++ return false; ++ if (size == 0 || size > NFS3_FHSIZE) ++ return false; ++ p = xdr_inline_decode(xdr, size); ++ if (!p) ++ return false; ++ fh_init(fhp, NFS3_FHSIZE); ++ fhp->fh_handle.fh_size = size; ++ memcpy(&fhp->fh_handle.fh_base, p, size); ++ ++ return true; ++} ++ + static __be32 * + decode_fh(__be32 *p, struct svc_fh *fhp) + { +@@ -306,14 +327,12 @@ void fill_post_wcc(struct svc_fh *fhp) + */ + + int +-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) ++nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) + { ++ struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct nfsd_fhandle *args = rqstp->rq_argp; + +- p = decode_fh(p, &args->fh); +- if (!p) +- return 0; +- return xdr_argsize_check(rqstp, p); ++ return svcxdr_decode_nfs_fh3(xdr, &args->fh); + } + + int +diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h +index 456fcd7a1038..62ea669768cf 100644 +--- a/fs/nfsd/xdr3.h ++++ b/fs/nfsd/xdr3.h +@@ -273,7 +273,7 @@ union nfsd3_xdrstore { + + #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) + +-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *); ++int nfs3svc_decode_fhandleargs(struct svc_rqst *, __be32 *); + int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); + int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); + int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *); +-- +2.35.1 + diff --git a/queue-5.10/nfsd-update-read3arg-decoder-to-use-struct-xdr_strea.patch b/queue-5.10/nfsd-update-read3arg-decoder-to-use-struct-xdr_strea.patch new file mode 100644 index 00000000000..2f66e4b812a --- /dev/null +++ b/queue-5.10/nfsd-update-read3arg-decoder-to-use-struct-xdr_strea.patch @@ -0,0 +1,127 @@ +From ec8c3337fcceb6bc1f0cbbd1ae44d5e419cbf171 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Oct 2020 14:34:40 -0400 +Subject: NFSD: Update READ3arg decoder to use struct xdr_stream + +From: Chuck Lever + +[ Upstream commit be63bd2ac6bbf8c065a0ef6dfbea76934326c352 ] + +The code that sets up rq_vec is refactored so that it is now +adjacent to the nfsd_read() call site where it is used. + +Signed-off-by: Chuck Lever +Stable-dep-of: fa6be9cc6e80 ("NFSD: Protect against send buffer overflow in NFSv3 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs3proc.c | 23 ++++++++++++++++++----- + fs/nfsd/nfs3xdr.c | 28 +++++++--------------------- + fs/nfsd/xdr3.h | 1 - + 3 files changed, 25 insertions(+), 27 deletions(-) + +diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c +index 3d741ea482f4..104e7d705ea8 100644 +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -144,25 +144,38 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + { + struct nfsd3_readargs *argp = rqstp->rq_argp; + struct nfsd3_readres *resp = rqstp->rq_resp; +- u32 max_blocksize = svc_max_payload(rqstp); +- unsigned long cnt = min(argp->count, max_blocksize); ++ u32 max_blocksize = svc_max_payload(rqstp); ++ unsigned int len; ++ int v; ++ ++ argp->count = min_t(u32, argp->count, max_blocksize); + + dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", + SVCFH_fmt(&argp->fh), + (unsigned long) argp->count, + (unsigned long long) argp->offset); + ++ v = 0; ++ len = argp->count; ++ while (len > 0) { ++ struct page *page = *(rqstp->rq_next_page++); ++ ++ rqstp->rq_vec[v].iov_base = page_address(page); ++ rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); ++ len -= rqstp->rq_vec[v].iov_len; ++ v++; ++ } ++ + /* Obtain buffer pointer for payload. + * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) + * + 1 (xdr opaque byte count) = 26 + */ +- resp->count = cnt; ++ resp->count = argp->count; + svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); + + fh_copy(&resp->fh, &argp->fh); + resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, +- rqstp->rq_vec, argp->vlen, &resp->count, +- &resp->eof); ++ rqstp->rq_vec, v, &resp->count, &resp->eof); + return rpc_success; + } + +diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c +index ed500f254337..0c51f241c047 100644 +--- a/fs/nfsd/nfs3xdr.c ++++ b/fs/nfsd/nfs3xdr.c +@@ -382,31 +382,17 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) + int + nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) + { ++ struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct nfsd3_readargs *args = rqstp->rq_argp; +- unsigned int len; +- int v; +- u32 max_blocksize = svc_max_payload(rqstp); + +- p = decode_fh(p, &args->fh); +- if (!p) ++ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) ++ return 0; ++ if (xdr_stream_decode_u64(xdr, &args->offset) < 0) ++ return 0; ++ if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return 0; +- p = xdr_decode_hyper(p, &args->offset); +- +- args->count = ntohl(*p++); +- len = min(args->count, max_blocksize); +- +- /* set up the kvec */ +- v=0; +- while (len > 0) { +- struct page *p = *(rqstp->rq_next_page++); + +- rqstp->rq_vec[v].iov_base = page_address(p); +- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); +- len -= rqstp->rq_vec[v].iov_len; +- v++; +- } +- args->vlen = v; +- return xdr_argsize_check(rqstp, p); ++ return 1; + } + + int +diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h +index 62ea669768cf..68cbb9e24afa 100644 +--- a/fs/nfsd/xdr3.h ++++ b/fs/nfsd/xdr3.h +@@ -32,7 +32,6 @@ struct nfsd3_readargs { + struct svc_fh fh; + __u64 offset; + __u32 count; +- int vlen; + }; + + struct nfsd3_writeargs { +-- +2.35.1 + diff --git a/queue-5.10/nfsd-update-the-nfsv2-getattr-argument-decoder-to-us.patch b/queue-5.10/nfsd-update-the-nfsv2-getattr-argument-decoder-to-us.patch new file mode 100644 index 00000000000..c92e58ae550 --- /dev/null +++ b/queue-5.10/nfsd-update-the-nfsv2-getattr-argument-decoder-to-us.patch @@ -0,0 +1,112 @@ +From 1e265741e8e5554e2fd1e4a42aa49e827a518da2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Oct 2020 12:14:23 -0400 +Subject: NFSD: Update the NFSv2 GETATTR argument decoder to use struct + xdr_stream + +From: Chuck Lever + +[ Upstream commit ebcd8e8b28535b643a4c06685bd363b3b73a96af ] + +Signed-off-by: Chuck Lever +Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfsproc.c | 4 ++-- + fs/nfsd/nfsxdr.c | 26 ++++++++++++++++++++------ + fs/nfsd/xdr.h | 2 +- + 3 files changed, 23 insertions(+), 9 deletions(-) + +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c +index dbd8d3604653..5c187d3bcb57 100644 +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -626,7 +626,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { + }, + [NFSPROC_GETATTR] = { + .pc_func = nfsd_proc_getattr, +- .pc_decode = nfssvc_decode_fhandle, ++ .pc_decode = nfssvc_decode_fhandleargs, + .pc_encode = nfssvc_encode_attrstat, + .pc_release = nfssvc_release_attrstat, + .pc_argsize = sizeof(struct nfsd_fhandle), +@@ -776,7 +776,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { + }, + [NFSPROC_STATFS] = { + .pc_func = nfsd_proc_statfs, +- .pc_decode = nfssvc_decode_fhandle, ++ .pc_decode = nfssvc_decode_fhandleargs, + .pc_encode = nfssvc_encode_statfsres, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_statfsres), +diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c +index 13df5464a087..45d980d624f4 100644 +--- a/fs/nfsd/nfsxdr.c ++++ b/fs/nfsd/nfsxdr.c +@@ -23,8 +23,9 @@ static u32 nfs_ftypes[] = { + + + /* +- * XDR functions for basic NFS types ++ * Basic NFSv2 data types (RFC 1094 Section 2.3) + */ ++ + static __be32 * + decode_fh(__be32 *p, struct svc_fh *fhp) + { +@@ -37,6 +38,21 @@ decode_fh(__be32 *p, struct svc_fh *fhp) + return p + (NFS_FHSIZE >> 2); + } + ++static bool ++svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) ++{ ++ __be32 *p; ++ ++ p = xdr_inline_decode(xdr, NFS_FHSIZE); ++ if (!p) ++ return false; ++ fh_init(fhp, NFS_FHSIZE); ++ memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE); ++ fhp->fh_handle.fh_size = NFS_FHSIZE; ++ ++ return true; ++} ++ + /* Helper function for NFSv2 ACL code */ + __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp) + { +@@ -194,14 +210,12 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f + */ + + int +-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) ++nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) + { ++ struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct nfsd_fhandle *args = rqstp->rq_argp; + +- p = decode_fh(p, &args->fh); +- if (!p) +- return 0; +- return xdr_argsize_check(rqstp, p); ++ return svcxdr_decode_fhandle(xdr, &args->fh); + } + + int +diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h +index edd87688ff86..50466ac6200c 100644 +--- a/fs/nfsd/xdr.h ++++ b/fs/nfsd/xdr.h +@@ -144,7 +144,7 @@ union nfsd_xdrstore { + #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) + + +-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *); ++int nfssvc_decode_fhandleargs(struct svc_rqst *, __be32 *); + int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); + int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); + int nfssvc_decode_readargs(struct svc_rqst *, __be32 *); +-- +2.35.1 + diff --git a/queue-5.10/nfsd-update-the-nfsv2-read-argument-decoder-to-use-s.patch b/queue-5.10/nfsd-update-the-nfsv2-read-argument-decoder-to-use-s.patch new file mode 100644 index 00000000000..6ea6a861816 --- /dev/null +++ b/queue-5.10/nfsd-update-the-nfsv2-read-argument-decoder-to-use-s.patch @@ -0,0 +1,141 @@ +From 8085ba35427e0fa7b6378cddbd1d5cc0e89c8b09 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Oct 2020 12:15:51 -0400 +Subject: NFSD: Update the NFSv2 READ argument decoder to use struct xdr_stream + +From: Chuck Lever + +[ Upstream commit 8c293ef993c8df0b1bea9ecb0de6eb96dec3ac9d ] + +The code that sets up rq_vec is refactored so that it is now +adjacent to the nfsd_read() call site where it is used. + +Signed-off-by: Chuck Lever +Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfsproc.c | 32 ++++++++++++++++++-------------- + fs/nfsd/nfsxdr.c | 36 ++++++++++++------------------------ + fs/nfsd/xdr.h | 1 - + 3 files changed, 30 insertions(+), 39 deletions(-) + +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c +index 5c187d3bcb57..deaa34b89251 100644 +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -171,32 +171,36 @@ nfsd_proc_read(struct svc_rqst *rqstp) + { + struct nfsd_readargs *argp = rqstp->rq_argp; + struct nfsd_readres *resp = rqstp->rq_resp; ++ unsigned int len; + u32 eof; ++ int v; + + dprintk("nfsd: READ %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->count, argp->offset); + ++ argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); ++ ++ v = 0; ++ len = argp->count; ++ while (len > 0) { ++ struct page *page = *(rqstp->rq_next_page++); ++ ++ rqstp->rq_vec[v].iov_base = page_address(page); ++ rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); ++ len -= rqstp->rq_vec[v].iov_len; ++ v++; ++ } ++ + /* Obtain buffer pointer for payload. 19 is 1 word for + * status, 17 words for fattr, and 1 word for the byte count. + */ +- +- if (NFSSVC_MAXBLKSIZE_V2 < argp->count) { +- char buf[RPC_MAX_ADDRBUFLEN]; +- printk(KERN_NOTICE +- "oversized read request from %s (%d bytes)\n", +- svc_print_addr(rqstp, buf, sizeof(buf)), +- argp->count); +- argp->count = NFSSVC_MAXBLKSIZE_V2; +- } + svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); + + resp->count = argp->count; +- resp->status = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), +- argp->offset, +- rqstp->rq_vec, argp->vlen, +- &resp->count, +- &eof); ++ fh_copy(&resp->fh, &argp->fh); ++ resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, ++ rqstp->rq_vec, v, &resp->count, &eof); + if (resp->status == nfs_ok) + resp->status = fh_getattr(&resp->fh, &resp->stat); + else if (resp->status == nfserr_jukebox) +diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c +index 45d980d624f4..a44c42d35351 100644 +--- a/fs/nfsd/nfsxdr.c ++++ b/fs/nfsd/nfsxdr.c +@@ -246,33 +246,21 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) + int + nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) + { ++ struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct nfsd_readargs *args = rqstp->rq_argp; +- unsigned int len; +- int v; +- p = decode_fh(p, &args->fh); +- if (!p) +- return 0; ++ u32 totalcount; + +- args->offset = ntohl(*p++); +- len = args->count = ntohl(*p++); +- p++; /* totalcount - unused */ +- +- len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); ++ if (!svcxdr_decode_fhandle(xdr, &args->fh)) ++ return 0; ++ if (xdr_stream_decode_u32(xdr, &args->offset) < 0) ++ return 0; ++ if (xdr_stream_decode_u32(xdr, &args->count) < 0) ++ return 0; ++ /* totalcount is ignored */ ++ if (xdr_stream_decode_u32(xdr, &totalcount) < 0) ++ return 0; + +- /* set up somewhere to store response. +- * We take pages, put them on reslist and include in iovec +- */ +- v=0; +- while (len > 0) { +- struct page *p = *(rqstp->rq_next_page++); +- +- rqstp->rq_vec[v].iov_base = page_address(p); +- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); +- len -= rqstp->rq_vec[v].iov_len; +- v++; +- } +- args->vlen = v; +- return xdr_argsize_check(rqstp, p); ++ return 1; + } + + int +diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h +index 50466ac6200c..7c704fa3215e 100644 +--- a/fs/nfsd/xdr.h ++++ b/fs/nfsd/xdr.h +@@ -27,7 +27,6 @@ struct nfsd_readargs { + struct svc_fh fh; + __u32 offset; + __u32 count; +- int vlen; + }; + + struct nfsd_writeargs { +-- +2.35.1 + diff --git a/queue-5.10/nvme-hwmon-consistently-ignore-errors-from-nvme_hwmo.patch b/queue-5.10/nvme-hwmon-consistently-ignore-errors-from-nvme_hwmo.patch new file mode 100644 index 00000000000..01da9b071e8 --- /dev/null +++ b/queue-5.10/nvme-hwmon-consistently-ignore-errors-from-nvme_hwmo.patch @@ -0,0 +1,85 @@ +From 5d0a2f22b089bc5f655f8fe3365e7cb780b89ebe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 16:55:55 +0200 +Subject: nvme-hwmon: consistently ignore errors from nvme_hwmon_init + +From: Christoph Hellwig + +[ Upstream commit 6b8cf94005187952f794c0c4ed3920a1e8accfa3 ] + +An NVMe controller works perfectly fine even when the hwmon +initialization fails. Stop returning errors that do not come from a +controller reset from nvme_hwmon_init to handle this case consistently. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Guenter Roeck +Reviewed-by: Serge Semin +Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer") +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 6 +++++- + drivers/nvme/host/hwmon.c | 13 ++++++++----- + 2 files changed, 13 insertions(+), 6 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 51e5c12988fe..3f106771d15b 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -3232,8 +3232,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) + return ret; + + if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { ++ /* ++ * Do not return errors unless we are in a controller reset, ++ * the controller works perfectly fine without hwmon. ++ */ + ret = nvme_hwmon_init(ctrl); +- if (ret < 0) ++ if (ret == -EINTR) + return ret; + } + +diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c +index 0a586d712920..23918bb7bdca 100644 +--- a/drivers/nvme/host/hwmon.c ++++ b/drivers/nvme/host/hwmon.c +@@ -230,7 +230,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) +- return 0; ++ return -ENOMEM; + + data->ctrl = ctrl; + mutex_init(&data->read_lock); +@@ -238,8 +238,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) + err = nvme_hwmon_get_smart_log(data); + if (err) { + dev_warn(dev, "Failed to read smart log (error %d)\n", err); +- kfree(data); +- return err; ++ goto err_free_data; + } + + hwmon = hwmon_device_register_with_info(dev, "nvme", +@@ -247,11 +246,15 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) + NULL); + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); +- kfree(data); +- return PTR_ERR(hwmon); ++ err = PTR_ERR(hwmon); ++ goto err_free_data; + } + ctrl->hwmon_device = hwmon; + return 0; ++ ++err_free_data: ++ kfree(data); ++ return err; + } + + void nvme_hwmon_exit(struct nvme_ctrl *ctrl) +-- +2.35.1 + diff --git a/queue-5.10/nvme-hwmon-kmalloc-the-nvme-smart-log-buffer.patch b/queue-5.10/nvme-hwmon-kmalloc-the-nvme-smart-log-buffer.patch new file mode 100644 index 00000000000..3aa9784eaae --- /dev/null +++ b/queue-5.10/nvme-hwmon-kmalloc-the-nvme-smart-log-buffer.patch @@ -0,0 +1,144 @@ +From b6006cb7a37dfc50438b101482cb35a2872bc944 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 17:33:52 +0200 +Subject: nvme-hwmon: kmalloc the NVME SMART log buffer + +From: Serge Semin + +[ Upstream commit c94b7f9bab22ac504f9153767676e659988575ad ] + +Recent commit 52fde2c07da6 ("nvme: set dma alignment to dword") has +caused a regression on our platform. + +It turned out that the nvme_get_log() method invocation caused the +nvme_hwmon_data structure instance corruption. In particular the +nvme_hwmon_data.ctrl pointer was overwritten either with zeros or with +garbage. After some research we discovered that the problem happened +even before the actual NVME DMA execution, but during the buffer mapping. +Since our platform is DMA-noncoherent, the mapping implied the cache-line +invalidations or write-backs depending on the DMA-direction parameter. +In case of the NVME SMART log getting the DMA was performed +from-device-to-memory, thus the cache-invalidation was activated during +the buffer mapping. Since the log-buffer isn't cache-line aligned, the +cache-invalidation caused the neighbour data to be discarded. The +neighbouring data turned to be the data surrounding the buffer in the +framework of the nvme_hwmon_data structure. + +In order to fix that we need to make sure that the whole log-buffer is +defined within the cache-line-aligned memory region so the +cache-invalidation procedure wouldn't involve the adjacent data. One of +the option to guarantee that is to kmalloc the DMA-buffer [1]. Seeing the +rest of the NVME core driver prefer that method it has been chosen to fix +this problem too. + +Note after a deeper researches we found out that the denoted commit wasn't +a root cause of the problem. It just revealed the invalidity by activating +the DMA-based NVME SMART log getting performed in the framework of the +NVME hwmon driver. The problem was here since the initial commit of the +driver. + +[1] Documentation/core-api/dma-api-howto.rst + +Fixes: 400b6a7b13a3 ("nvme: Add hardware monitoring support") +Signed-off-by: Serge Semin +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/hwmon.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c +index 23918bb7bdca..9e6e56c20ec9 100644 +--- a/drivers/nvme/host/hwmon.c ++++ b/drivers/nvme/host/hwmon.c +@@ -12,7 +12,7 @@ + + struct nvme_hwmon_data { + struct nvme_ctrl *ctrl; +- struct nvme_smart_log log; ++ struct nvme_smart_log *log; + struct mutex read_lock; + }; + +@@ -60,14 +60,14 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, + static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) + { + return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, +- NVME_CSI_NVM, &data->log, sizeof(data->log), 0); ++ NVME_CSI_NVM, data->log, sizeof(*data->log), 0); + } + + static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) + { + struct nvme_hwmon_data *data = dev_get_drvdata(dev); +- struct nvme_smart_log *log = &data->log; ++ struct nvme_smart_log *log = data->log; + int temp; + int err; + +@@ -163,7 +163,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data, + case hwmon_temp_max: + case hwmon_temp_min: + if ((!channel && data->ctrl->wctemp) || +- (channel && data->log.temp_sensor[channel - 1])) { ++ (channel && data->log->temp_sensor[channel - 1])) { + if (data->ctrl->quirks & + NVME_QUIRK_NO_TEMP_THRESH_CHANGE) + return 0444; +@@ -176,7 +176,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data, + break; + case hwmon_temp_input: + case hwmon_temp_label: +- if (!channel || data->log.temp_sensor[channel - 1]) ++ if (!channel || data->log->temp_sensor[channel - 1]) + return 0444; + break; + default: +@@ -232,13 +232,19 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) + if (!data) + return -ENOMEM; + ++ data->log = kzalloc(sizeof(*data->log), GFP_KERNEL); ++ if (!data->log) { ++ err = -ENOMEM; ++ goto err_free_data; ++ } ++ + data->ctrl = ctrl; + mutex_init(&data->read_lock); + + err = nvme_hwmon_get_smart_log(data); + if (err) { + dev_warn(dev, "Failed to read smart log (error %d)\n", err); +- goto err_free_data; ++ goto err_free_log; + } + + hwmon = hwmon_device_register_with_info(dev, "nvme", +@@ -247,11 +253,13 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); + err = PTR_ERR(hwmon); +- goto err_free_data; ++ goto err_free_log; + } + ctrl->hwmon_device = hwmon; + return 0; + ++err_free_log: ++ kfree(data->log); + err_free_data: + kfree(data); + return err; +@@ -265,6 +273,7 @@ void nvme_hwmon_exit(struct nvme_ctrl *ctrl) + + hwmon_device_unregister(ctrl->hwmon_device); + ctrl->hwmon_device = NULL; ++ kfree(data->log); + kfree(data); + } + } +-- +2.35.1 + diff --git a/queue-5.10/nvme-hwmon-return-error-code-when-registration-fails.patch b/queue-5.10/nvme-hwmon-return-error-code-when-registration-fails.patch new file mode 100644 index 00000000000..012aae7a54c --- /dev/null +++ b/queue-5.10/nvme-hwmon-return-error-code-when-registration-fails.patch @@ -0,0 +1,38 @@ +From 9b1decb3ed2711a2dbb96720490578f9a9d6f236 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 Feb 2021 10:30:15 +0100 +Subject: nvme-hwmon: Return error code when registration fails + +From: Daniel Wagner + +[ Upstream commit 78570f8873c8cd44c12714c7fa7db2601ec5617d ] + +The hwmon pointer wont be NULL if the registration fails. Though the +exit code path will assign it to ctrl->hwmon_device. Later +nvme_hwmon_exit() will try to free the invalid pointer. Avoid this by +returning the error code from hwmon_device_register_with_info(). + +Fixes: ed7770f66286 ("nvme/hwmon: rework to avoid devm allocation") +Signed-off-by: Daniel Wagner +Signed-off-by: Christoph Hellwig +Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer") +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/hwmon.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c +index 8f9e96986780..0a586d712920 100644 +--- a/drivers/nvme/host/hwmon.c ++++ b/drivers/nvme/host/hwmon.c +@@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); + kfree(data); ++ return PTR_ERR(hwmon); + } + ctrl->hwmon_device = hwmon; + return 0; +-- +2.35.1 + diff --git a/queue-5.10/nvme-hwmon-rework-to-avoid-devm-allocation.patch b/queue-5.10/nvme-hwmon-rework-to-avoid-devm-allocation.patch new file mode 100644 index 00000000000..507657eff2c --- /dev/null +++ b/queue-5.10/nvme-hwmon-rework-to-avoid-devm-allocation.patch @@ -0,0 +1,134 @@ +From 10615e7e2ba8e3473dbb0063ad45919c1a4689fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Jan 2021 07:43:18 +0100 +Subject: nvme-hwmon: rework to avoid devm allocation + +From: Hannes Reinecke + +[ Upstream commit ed7770f6628691c13c9423bce7eee7cff2399c12 ] + +The original design to use device-managed resource allocation +doesn't really work as the NVMe controller has a vastly different +lifetime than the hwmon sysfs attributes, causing warning about +duplicate sysfs entries upon reconnection. +This patch reworks the hwmon allocation to avoid device-managed +resource allocation, and uses the NVMe controller as parent for +the sysfs attributes. + +Cc: Guenter Roeck +Signed-off-by: Hannes Reinecke +Tested-by: Enzo Matsumiya +Tested-by: Daniel Wagner +Signed-off-by: Christoph Hellwig +Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer") +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 1 + + drivers/nvme/host/hwmon.c | 31 +++++++++++++++++++++---------- + drivers/nvme/host/nvme.h | 8 ++++++++ + 3 files changed, 30 insertions(+), 10 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index e9c13804760e..51e5c12988fe 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -4485,6 +4485,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); + + void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) + { ++ nvme_hwmon_exit(ctrl); + nvme_fault_inject_fini(&ctrl->fault_inject); + dev_pm_qos_hide_latency_tolerance(ctrl->device); + cdev_device_del(&ctrl->cdev, ctrl->device); +diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c +index 552dbc04567b..8f9e96986780 100644 +--- a/drivers/nvme/host/hwmon.c ++++ b/drivers/nvme/host/hwmon.c +@@ -223,12 +223,12 @@ static const struct hwmon_chip_info nvme_hwmon_chip_info = { + + int nvme_hwmon_init(struct nvme_ctrl *ctrl) + { +- struct device *dev = ctrl->dev; ++ struct device *dev = ctrl->device; + struct nvme_hwmon_data *data; + struct device *hwmon; + int err; + +- data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); ++ data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return 0; + +@@ -237,19 +237,30 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) + + err = nvme_hwmon_get_smart_log(data); + if (err) { +- dev_warn(ctrl->device, +- "Failed to read smart log (error %d)\n", err); +- devm_kfree(dev, data); ++ dev_warn(dev, "Failed to read smart log (error %d)\n", err); ++ kfree(data); + return err; + } + +- hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data, +- &nvme_hwmon_chip_info, +- NULL); ++ hwmon = hwmon_device_register_with_info(dev, "nvme", ++ data, &nvme_hwmon_chip_info, ++ NULL); + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); +- devm_kfree(dev, data); ++ kfree(data); + } +- ++ ctrl->hwmon_device = hwmon; + return 0; + } ++ ++void nvme_hwmon_exit(struct nvme_ctrl *ctrl) ++{ ++ if (ctrl->hwmon_device) { ++ struct nvme_hwmon_data *data = ++ dev_get_drvdata(ctrl->hwmon_device); ++ ++ hwmon_device_unregister(ctrl->hwmon_device); ++ ctrl->hwmon_device = NULL; ++ kfree(data); ++ } ++} +diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h +index 58cf9e39d613..abae7ef2ac51 100644 +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -257,6 +257,9 @@ struct nvme_ctrl { + struct rw_semaphore namespaces_rwsem; + struct device ctrl_device; + struct device *device; /* char device */ ++#ifdef CONFIG_NVME_HWMON ++ struct device *hwmon_device; ++#endif + struct cdev cdev; + struct work_struct reset_work; + struct work_struct delete_work; +@@ -876,11 +879,16 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) + + #ifdef CONFIG_NVME_HWMON + int nvme_hwmon_init(struct nvme_ctrl *ctrl); ++void nvme_hwmon_exit(struct nvme_ctrl *ctrl); + #else + static inline int nvme_hwmon_init(struct nvme_ctrl *ctrl) + { + return 0; + } ++ ++static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl) ++{ ++} + #endif + + u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, +-- +2.35.1 + diff --git a/queue-5.10/perf-pmu-validate-raw-event-with-sysfs-exported-form.patch b/queue-5.10/perf-pmu-validate-raw-event-with-sysfs-exported-form.patch new file mode 100644 index 00000000000..d24700a1b6c --- /dev/null +++ b/queue-5.10/perf-pmu-validate-raw-event-with-sysfs-exported-form.patch @@ -0,0 +1,166 @@ +From 5ba80251870dc82e661ccc83e6d60223d07dd2a4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Mar 2021 13:11:38 +0800 +Subject: perf pmu: Validate raw event with sysfs exported format bits + +From: Jin Yao + +[ Upstream commit e40647762fb5881360874e08e03e972d58d63c42 ] + +A raw PMU event (eventsel+umask) in the form of rNNN is supported +by perf but lacks of checking for the validity of raw encoding. + +For example, bit 16 and bit 17 are not valid on KBL but perf doesn't +report warning when encoding with these bits. + +Before: + + # ./perf stat -e cpu/r031234/ -a -- sleep 1 + + Performance counter stats for 'system wide': + + 0 cpu/r031234/ + + 1.003798924 seconds time elapsed + +It may silently measure the wrong event! + +The kernel supported bits have been exported through +/sys/devices//format/. Perf collects the information to +'struct perf_pmu_format' and links it to 'pmu->format' list. + +The 'struct perf_pmu_format' has a bitmap which records the +valid bits for this format. For example, + + root@kbl-ppc:/sys/devices/cpu/format# cat umask + config:8-15 + +The valid bits (bit8-bit15) are recorded in bitmap of format 'umask'. + +We collect total valid bits of all formats, save to a local variable +'masks' and reverse it. Now '~masks' represents total invalid bits. + +bits = config & ~masks; + +The set bits in 'bits' indicate the invalid bits used in config. +Finally we use bitmap_scnprintf to report the invalid bits. + +Some architectures may not export supported bits through sysfs, +so if masks is 0, perf_pmu__warn_invalid_config directly returns. + +After: + +Single event without name: + + # ./perf stat -e cpu/r031234/ -a -- sleep 1 + WARNING: event 'N/A' not valid (bits 16-17 of config '31234' not supported by kernel)! + + Performance counter stats for 'system wide': + + 0 cpu/r031234/ + + 1.001597373 seconds time elapsed + +Multiple events with names: + + # ./perf stat -e cpu/rf01234,name=aaa/,cpu/r031234,name=bbb/ -a -- sleep 1 + WARNING: event 'aaa' not valid (bits 20,22 of config 'f01234' not supported by kernel)! + WARNING: event 'bbb' not valid (bits 16-17 of config '31234' not supported by kernel)! + + Performance counter stats for 'system wide': + + 0 aaa + 0 bbb + + 1.001573787 seconds time elapsed + +Warnings are reported for invalid bits. + +Co-developed-by: Jiri Olsa +Signed-off-by: Jin Yao +Reviewed-by: Jiri Olsa +Cc: Alexander Shishkin +Cc: Andi Kleen +Cc: Jin Yao +Cc: Kan Liang +Cc: Peter Zijlstra +Link: http://lore.kernel.org/lkml/20210310051138.12154-1-yao.jin@linux.intel.com +Signed-off-by: Arnaldo Carvalho de Melo +Stable-dep-of: e552b7be12ed ("perf: Skip and warn on unknown format 'configN' attrs") +Signed-off-by: Sasha Levin +--- + tools/perf/util/parse-events.c | 3 +++ + tools/perf/util/pmu.c | 33 +++++++++++++++++++++++++++++++++ + tools/perf/util/pmu.h | 3 +++ + 3 files changed, 39 insertions(+) + +diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c +index 3a0a7930cd10..36969fc8f1fc 100644 +--- a/tools/perf/util/parse-events.c ++++ b/tools/perf/util/parse-events.c +@@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx, + struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : + cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + ++ if (pmu && attr->type == PERF_TYPE_RAW) ++ perf_pmu__warn_invalid_config(pmu, attr->config, name); ++ + if (init_attr) + event_attr_init(attr); + +diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c +index d41caeb35cf6..349012f7defb 100644 +--- a/tools/perf/util/pmu.c ++++ b/tools/perf/util/pmu.c +@@ -1716,3 +1716,36 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) + + return nr_caps; + } ++ ++void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, ++ char *name) ++{ ++ struct perf_pmu_format *format; ++ __u64 masks = 0, bits; ++ char buf[100]; ++ unsigned int i; ++ ++ list_for_each_entry(format, &pmu->format, list) { ++ if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG) ++ continue; ++ ++ for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS) ++ masks |= 1ULL << i; ++ } ++ ++ /* ++ * Kernel doesn't export any valid format bits. ++ */ ++ if (masks == 0) ++ return; ++ ++ bits = config & ~masks; ++ if (bits == 0) ++ return; ++ ++ bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf)); ++ ++ pr_warning("WARNING: event '%s' not valid (bits %s of config " ++ "'%llx' not supported by kernel)!\n", ++ name ?: "N/A", buf, config); ++} +diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h +index a64e9c9ce731..d9aa8c958d21 100644 +--- a/tools/perf/util/pmu.h ++++ b/tools/perf/util/pmu.h +@@ -120,4 +120,7 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval); + + int perf_pmu__caps_parse(struct perf_pmu *pmu); + ++void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, ++ char *name); ++ + #endif /* __PMU_H */ +-- +2.35.1 + diff --git a/queue-5.10/perf-skip-and-warn-on-unknown-format-confign-attrs.patch b/queue-5.10/perf-skip-and-warn-on-unknown-format-confign-attrs.patch new file mode 100644 index 00000000000..8de4a6b850e --- /dev/null +++ b/queue-5.10/perf-skip-and-warn-on-unknown-format-confign-attrs.patch @@ -0,0 +1,172 @@ +From bec74548fda891a38764e2bf08e0063c3f82dc33 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Oct 2022 14:12:35 -0500 +Subject: perf: Skip and warn on unknown format 'configN' attrs + +From: Rob Herring + +[ Upstream commit e552b7be12ed62357df84392efa525ecb01910fb ] + +If the kernel exposes a new perf_event_attr field in a format attr, perf +will return an error stating the specified PMU can't be found. For +example, a format attr with 'config3:0-63' causes an error as config3 is +unknown to perf. This causes a compatibility issue between a newer +kernel with older perf tool. + +Before this change with a kernel adding 'config3' I get: + + $ perf record -e arm_spe// -- true + event syntax error: 'arm_spe//' + \___ Cannot find PMU `arm_spe'. Missing kernel support? + Run 'perf list' for a list of valid events + + Usage: perf record [] [] + or: perf record [] -- [] + + -e, --event event selector. use 'perf list' to list + available events + +After this change, I get: + + $ perf record -e arm_spe// -- true + WARNING: 'arm_spe_0' format 'inv_event_filter' requires 'perf_event_attr::config3' which is not supported by this version of perf! + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 0.091 MB perf.data ] + +To support unknown configN formats, rework the YACC implementation to +pass any config[0-9]+ format to perf_pmu__new_format() to handle with a +warning. + +Reviewed-by: Namhyung Kim +Signed-off-by: Rob Herring +Tested-by: Leo Yan +Cc: Alexander Shishkin +Cc: Ingo Molnar +Cc: James Clark +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Peter Zijlstra +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220914-arm-perf-tool-spe1-2-v2-v4-1-83c098e6212e@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/parse-events.c | 3 +++ + tools/perf/util/pmu.c | 17 +++++++++++++++++ + tools/perf/util/pmu.h | 2 ++ + tools/perf/util/pmu.l | 2 -- + tools/perf/util/pmu.y | 15 ++++----------- + 5 files changed, 26 insertions(+), 13 deletions(-) + +diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c +index 36969fc8f1fc..c56a4d9c3be9 100644 +--- a/tools/perf/util/parse-events.c ++++ b/tools/perf/util/parse-events.c +@@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx, + struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : + cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + ++ if (pmu) ++ perf_pmu__warn_invalid_formats(pmu); ++ + if (pmu && attr->type == PERF_TYPE_RAW) + perf_pmu__warn_invalid_config(pmu, attr->config, name); + +diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c +index 349012f7defb..ac45da0302a7 100644 +--- a/tools/perf/util/pmu.c ++++ b/tools/perf/util/pmu.c +@@ -862,6 +862,23 @@ static struct perf_pmu *pmu_lookup(const char *name) + return pmu; + } + ++void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) ++{ ++ struct perf_pmu_format *format; ++ ++ /* fake pmu doesn't have format list */ ++ if (pmu == &perf_pmu__fake) ++ return; ++ ++ list_for_each_entry(format, &pmu->format, list) ++ if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) { ++ pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'" ++ "which is not supported by this version of perf!\n", ++ pmu->name, format->name, format->value); ++ return; ++ } ++} ++ + static struct perf_pmu *pmu_find(const char *name) + { + struct perf_pmu *pmu; +diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h +index d9aa8c958d21..7d208b850769 100644 +--- a/tools/perf/util/pmu.h ++++ b/tools/perf/util/pmu.h +@@ -15,6 +15,7 @@ enum { + PERF_PMU_FORMAT_VALUE_CONFIG, + PERF_PMU_FORMAT_VALUE_CONFIG1, + PERF_PMU_FORMAT_VALUE_CONFIG2, ++ PERF_PMU_FORMAT_VALUE_CONFIG_END, + }; + + #define PERF_PMU_FORMAT_BITS 64 +@@ -122,5 +123,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu); + + void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, + char *name); ++void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); + + #endif /* __PMU_H */ +diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l +index a15d9fbd7c0e..58b4926cfaca 100644 +--- a/tools/perf/util/pmu.l ++++ b/tools/perf/util/pmu.l +@@ -27,8 +27,6 @@ num_dec [0-9]+ + + {num_dec} { return value(10); } + config { return PP_CONFIG; } +-config1 { return PP_CONFIG1; } +-config2 { return PP_CONFIG2; } + - { return '-'; } + : { return ':'; } + , { return ','; } +diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y +index bfd7e8509869..283efe059819 100644 +--- a/tools/perf/util/pmu.y ++++ b/tools/perf/util/pmu.y +@@ -20,7 +20,7 @@ do { \ + + %} + +-%token PP_CONFIG PP_CONFIG1 PP_CONFIG2 ++%token PP_CONFIG + %token PP_VALUE PP_ERROR + %type PP_VALUE + %type bit_term +@@ -47,18 +47,11 @@ PP_CONFIG ':' bits + $3)); + } + | +-PP_CONFIG1 ':' bits ++PP_CONFIG PP_VALUE ':' bits + { + ABORT_ON(perf_pmu__new_format(format, name, +- PERF_PMU_FORMAT_VALUE_CONFIG1, +- $3)); +-} +-| +-PP_CONFIG2 ':' bits +-{ +- ABORT_ON(perf_pmu__new_format(format, name, +- PERF_PMU_FORMAT_VALUE_CONFIG2, +- $3)); ++ $2, ++ $4)); + } + + bits: +-- +2.35.1 + diff --git a/queue-5.10/revert-crypto-qat-reduce-size-of-mapped-region.patch b/queue-5.10/revert-crypto-qat-reduce-size-of-mapped-region.patch new file mode 100644 index 00000000000..5163584403f --- /dev/null +++ b/queue-5.10/revert-crypto-qat-reduce-size-of-mapped-region.patch @@ -0,0 +1,103 @@ +From 44dd123969ab1fda162e65b27c98f6cc86979678 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Sep 2022 11:49:13 +0100 +Subject: Revert "crypto: qat - reduce size of mapped region" + +From: Giovanni Cabiddu + +[ Upstream commit 9c5f21b198d259bfe1191b1fedf08e2eab15b33b ] + +This reverts commit e48767c17718067ba21fb2ef461779ec2506f845. + +In an attempt to resolve a set of warnings reported by the static +analyzer Smatch, the reverted commit improperly reduced the sizes of the +DMA mappings used for the input and output parameters for both RSA and +DH creating a mismatch (map size=8 bytes, unmap size=64 bytes). + +This issue is reported when CONFIG_DMA_API_DEBUG is selected, when the +crypto self test is run. The function dma_unmap_single() reports a +warning similar to the one below, saying that the `device driver frees +DMA memory with different size`. + + DMA-API: 4xxx 0000:06:00.0: device driver frees DMA memory with different size [device address=0x0000000123206c80] [map size=8 bytes] [unmap size=64 bytes] + WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:973 check_unmap+0x3d0/0x8c0\ + ... + Call Trace: + + debug_dma_unmap_page+0x5c/0x60 + qat_dh_cb+0xd7/0x110 [intel_qat] + qat_alg_asym_callback+0x1a/0x30 [intel_qat] + adf_response_handler+0xbd/0x1a0 [intel_qat] + tasklet_action_common.constprop.0+0xcd/0xe0 + __do_softirq+0xf8/0x30c + __irq_exit_rcu+0xbf/0x140 + common_interrupt+0xb9/0xd0 + + + +The original commit was correct. + +Cc: +Reported-by: Herbert Xu +Signed-off-by: Giovanni Cabiddu +Signed-off-by: Herbert Xu +Signed-off-by: Sasha Levin +--- + drivers/crypto/qat/qat_common/qat_asym_algs.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c +index 2b1aca487fc3..846569ec9066 100644 +--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c ++++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c +@@ -326,13 +326,13 @@ static int qat_dh_compute_value(struct kpp_request *req) + qat_req->out.dh.out_tab[1] = 0; + /* Mapping in.in.b or in.in_g2.xa is the same */ + qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b, +- sizeof(qat_req->in.dh.in.b), ++ sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r, +- sizeof(qat_req->out.dh.r), ++ sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; +@@ -721,13 +721,13 @@ static int qat_rsa_enc(struct akcipher_request *req) + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, +- sizeof(qat_req->in.rsa.enc.m), ++ sizeof(struct qat_rsa_input_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c, +- sizeof(qat_req->out.rsa.enc.c), ++ sizeof(struct qat_rsa_output_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; +@@ -869,13 +869,13 @@ static int qat_rsa_dec(struct akcipher_request *req) + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c, +- sizeof(qat_req->in.rsa.dec.c), ++ sizeof(struct qat_rsa_input_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m, +- sizeof(qat_req->out.rsa.dec.m), ++ sizeof(struct qat_rsa_output_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; +-- +2.35.1 + diff --git a/queue-5.10/riscv-add-machine-name-to-kernel-boot-log-and-stack-.patch b/queue-5.10/riscv-add-machine-name-to-kernel-boot-log-and-stack-.patch new file mode 100644 index 00000000000..750af27ea9a --- /dev/null +++ b/queue-5.10/riscv-add-machine-name-to-kernel-boot-log-and-stack-.patch @@ -0,0 +1,45 @@ +From f70267a06e7f8e62e815e2253de4b2ac7fa5a28b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Nov 2020 19:44:15 +0800 +Subject: riscv: Add machine name to kernel boot log and stack dump output + +From: Kefeng Wang + +[ Upstream commit 46ad48e8a28da7cc37a16c7e7fc632ecf906e4bf ] + +Add the machine name to kernel boot-up log, and install +the machine name to stack dump for DT boot mode. + +Signed-off-by: Kefeng Wang +Reviewed-by: Atish Patra +Signed-off-by: Palmer Dabbelt +Stable-dep-of: 10f6913c548b ("riscv: always honor the CONFIG_CMDLINE_FORCE when parsing dtb") +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/setup.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c +index 117f3212a8e4..5d17d3ce36fd 100644 +--- a/arch/riscv/kernel/setup.c ++++ b/arch/riscv/kernel/setup.c +@@ -54,8 +54,15 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); + static void __init parse_dtb(void) + { + /* Early scan of device tree from init memory */ +- if (early_init_dt_scan(dtb_early_va)) ++ if (early_init_dt_scan(dtb_early_va)) { ++ const char *name = of_flat_dt_get_machine_name(); ++ ++ if (name) { ++ pr_info("Machine model: %s\n", name); ++ dump_stack_set_arch_desc("%s (DT)", name); ++ } + return; ++ } + + pr_err("No DTB passed to the kernel\n"); + #ifdef CONFIG_CMDLINE_FORCE +-- +2.35.1 + diff --git a/queue-5.10/riscv-always-honor-the-config_cmdline_force-when-par.patch b/queue-5.10/riscv-always-honor-the-config_cmdline_force-when-par.patch new file mode 100644 index 00000000000..1bc1fa5c727 --- /dev/null +++ b/queue-5.10/riscv-always-honor-the-config_cmdline_force-when-par.patch @@ -0,0 +1,57 @@ +From 2c967e87d16194adbf78dfc575762eb3f5d226e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Jul 2022 16:38:22 -0400 +Subject: riscv: always honor the CONFIG_CMDLINE_FORCE when parsing dtb +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Wenting Zhang + +[ Upstream commit 10f6913c548b32ecb73801a16b120e761c6957ea ] + +When CONFIG_CMDLINE_FORCE is enabled, cmdline provided by +CONFIG_CMDLINE are always used. This allows CONFIG_CMDLINE to be +used regardless of the result of device tree scanning. + +This especially fixes the case where a device tree without the +chosen node is supplied to the kernel. In such cases, +early_init_dt_scan would return true. But inside +early_init_dt_scan_chosen, the cmdline won't be updated as there +is no chosen node in the device tree. As a result, CONFIG_CMDLINE +is not copied into boot_command_line even if CONFIG_CMDLINE_FORCE +is enabled. This commit allows properly update boot_command_line +in this situation. + +Fixes: 8fd6e05c7463 ("arch: riscv: support kernel command line forcing when no DTB passed") +Signed-off-by: Wenting Zhang +Reviewed-by: Björn Töpel +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/PSBPR04MB399135DFC54928AB958D0638B1829@PSBPR04MB3991.apcprd04.prod.outlook.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/setup.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c +index 5d17d3ce36fd..cc85858f7fe8 100644 +--- a/arch/riscv/kernel/setup.c ++++ b/arch/riscv/kernel/setup.c +@@ -61,10 +61,10 @@ static void __init parse_dtb(void) + pr_info("Machine model: %s\n", name); + dump_stack_set_arch_desc("%s (DT)", name); + } +- return; ++ } else { ++ pr_err("No DTB passed to the kernel\n"); + } + +- pr_err("No DTB passed to the kernel\n"); + #ifdef CONFIG_CMDLINE_FORCE + strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + pr_info("Forcing kernel command line to: %s\n", boot_command_line); +-- +2.35.1 + diff --git a/queue-5.10/series b/queue-5.10/series index f0142d52a06..01f84e08a2e 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -22,3 +22,76 @@ block-wbt-remove-unnecessary-invoking-of-wbt_update_limits-in-wbt_init.patch blk-wbt-call-rq_qos_add-after-wb_normal-is-initialized.patch arm64-errata-remove-aes-hwcap-for-compat-tasks.patch r8152-add-pid-for-the-lenovo-onelink-dock.patch +btrfs-fix-processing-of-delayed-data-refs-during-bac.patch +btrfs-fix-processing-of-delayed-tree-block-refs-duri.patch +acpi-extlog-handle-multiple-records.patch +tipc-fix-recognition-of-trial-period.patch +tipc-fix-an-information-leak-in-tipc_topsrv_kern_sub.patch +i40e-fix-dma-mappings-leak.patch +hid-magicmouse-do-not-set-btn_mouse-on-double-report.patch +sfc-change-vf-mac-via-pf-as-first-preference-if-avai.patch +net-atm-fix-proc_mpc_write-incorrect-return-value.patch +net-phy-dp83867-extend-rx-strap-quirk-for-sgmii-mode.patch +tcp-add-num_closed_socks-to-struct-sock_reuseport.patch +udp-update-reuse-has_conns-under-reuseport_lock.patch +cifs-fix-xid-leak-in-cifs_copy_file_range.patch +cifs-fix-xid-leak-in-cifs_flock.patch +cifs-fix-xid-leak-in-cifs_ses_add_channel.patch +net-hsr-avoid-possible-null-deref-in-skb_clone.patch +ionic-catch-null-pointer-issue-on-reconfig.patch +libbpf-use-is_err_or_null-in-hashmap__free.patch +nvme-hwmon-rework-to-avoid-devm-allocation.patch +nvme-hwmon-return-error-code-when-registration-fails.patch +nvme-hwmon-consistently-ignore-errors-from-nvme_hwmo.patch +nvme-hwmon-kmalloc-the-nvme-smart-log-buffer.patch +net-sched-cake-fix-null-pointer-access-issue-when-ca.patch +net-sched-delete-duplicate-cleanup-of-backlog-and-ql.patch +net-sched-sfb-fix-null-pointer-access-issue-when-sfb.patch +sfc-include-vport_id-in-filter-spec-hash-and-equal.patch +net-hns-fix-possible-memory-leak-in-hnae_ae_register.patch +net-sched-fix-race-condition-in-qdisc_graft.patch +net-phy-dp83822-disable-mdi-crossover-status-change-.patch +iommu-vt-d-allow-nvs-regions-in-arch_rmrr_sanity_che.patch +iommu-vt-d-clean-up-si_domain-in-the-init_dmars-erro.patch +drm-virtio-use-appropriate-atomic-state-in-virtio_gp.patch +crypto-qat-reduce-size-of-mapped-region.patch +revert-crypto-qat-reduce-size-of-mapped-region.patch +usb-add-reset_resume-quirk-for-nvidia-jetson-devices.patch +kernfs-fix-use-after-free-in-__kernfs_remove.patch +sunrpc-add-xdr_set_scratch_page-and-xdr_reset_scratc.patch +sunrpc-prepare-for-xdr_stream-style-decoding-on-the-.patch +nfsd-add-common-helpers-to-decode-void-args-and-enco.patch +nfsd-update-the-nfsv2-getattr-argument-decoder-to-us.patch +nfsd-update-the-nfsv2-read-argument-decoder-to-use-s.patch +nfsd-protect-against-send-buffer-overflow-in-nfsv2-r.patch +nfsd-replace-the-internals-of-the-read_buf-macro.patch +nfsd-replace-read-macros-in-nfsd4_decode_commit.patch +nfsd-update-getattr3args-decoder-to-use-struct-xdr_s.patch +nfsd-update-read3arg-decoder-to-use-struct-xdr_strea.patch +nfsd-fix-the-behavior-of-read-near-offset_max.patch +nfsd-protect-against-send-buffer-overflow-in-nfsv3-r.patch +dmaengine-mxs-dma-remove-the-unused-.id_table.patch +dmaengine-mxs-use-platform_driver_register.patch +alsa-hda-realtek-fix-speakers-and-micmute-on-hp-855-.patch +writeback-don-t-warn-on-an-unregistered-bdi-in-__mar.patch +fs-correctly-document-the-inode-dirty-flags.patch +fs-don-t-call-dirty_inode-for-lazytime-timestamp-upd.patch +fs-pass-only-i_dirty_inode-flags-to-dirty_inode.patch +fs-clean-up-__mark_inode_dirty-a-bit.patch +writeback-cgroup-keep-list-of-inodes-attached-to-bdi.patch +writeback-avoid-skipping-inode-writeback.patch +writeback-fix-inode-i_io_list-not-be-protected-by-in.patch +fs-record-i_dirty_time-even-if-inode-already-has-i_d.patch +tracing-simplify-conditional-compilation-code-in-tra.patch +tracing-do-not-free-snapshot-if-tracer-is-on-cmdline.patch +xen-assume-xenfeat_gnttab_map_avail_bits-being-set-f.patch +xen-gntdev-accommodate-vma-splitting.patch +mmc-core-support-zeroout-using-trim-for-emmc.patch +mmc-core-add-sd-card-quirk-for-broken-discard.patch +mmc-sdhci-tegra-use-actual-clock-rate-for-sw-tuning-.patch +riscv-add-machine-name-to-kernel-boot-log-and-stack-.patch +riscv-always-honor-the-config_cmdline_force-when-par.patch +perf-pmu-validate-raw-event-with-sysfs-exported-form.patch +perf-skip-and-warn-on-unknown-format-confign-attrs.patch +fcntl-make-f_getown-ex-return-0-on-dead-owner-task.patch +fcntl-fix-potential-deadlocks-for-fown_struct.lock.patch diff --git a/queue-5.10/sfc-change-vf-mac-via-pf-as-first-preference-if-avai.patch b/queue-5.10/sfc-change-vf-mac-via-pf-as-first-preference-if-avai.patch new file mode 100644 index 00000000000..5cd62c03ba8 --- /dev/null +++ b/queue-5.10/sfc-change-vf-mac-via-pf-as-first-preference-if-avai.patch @@ -0,0 +1,128 @@ +From 5614b64913269b7e6ee3aab8e68784a56e2755c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Oct 2022 10:55:53 +0100 +Subject: sfc: Change VF mac via PF as first preference if available. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jonathan Cooper + +[ Upstream commit a8aed7b35becfd21f22a77c7014029ea837b018f ] + +Changing a VF's mac address through the VF (rather than via the PF) +fails with EPERM because the latter part of efx_ef10_set_mac_address +attempts to change the vport mac address list as the VF. +Even with this fixed it still fails with EBUSY because the vadaptor +is still assigned on the VF - the vadaptor reassignment must be within +a section where the VF has torn down its state. + +A major reason this has broken is because we have two functions that +ostensibly do the same thing - have a PF and VF cooperate to change a +VF mac address. Rather than do this, if we are changing the mac of a VF +that has a link to the PF in the same VM then simply call +sriov_set_vf_mac instead, which is a proven working function that does +that. + +If there is no PF available, or that fails non-fatally, then attempt to +change the VF's mac address as we would a PF, without updating the PF's +data. + +Test case: +Create a VF: + echo 1 > /sys/class/net//device/sriov_numvfs +Set the mac address of the VF directly: + ip link set addr 00:11:22:33:44:55 +Set the MAC address of the VF via the PF: + ip link set vf 0 mac 00:11:22:33:44:66 +Without this patch the last command will fail with ENOENT. + +Signed-off-by: Jonathan Cooper +Reported-by: Íñigo Huguet +Fixes: 910c8789a777 ("set the MAC address using MC_CMD_VADAPTOR_SET_MAC") +Acked-by: Edward Cree +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/sfc/ef10.c | 58 ++++++++++++++------------------- + 1 file changed, 24 insertions(+), 34 deletions(-) + +diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c +index 5b7413305be6..eb1be7302082 100644 +--- a/drivers/net/ethernet/sfc/ef10.c ++++ b/drivers/net/ethernet/sfc/ef10.c +@@ -3255,6 +3255,30 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) + bool was_enabled = efx->port_enabled; + int rc; + ++#ifdef CONFIG_SFC_SRIOV ++ /* If this function is a VF and we have access to the parent PF, ++ * then use the PF control path to attempt to change the VF MAC address. ++ */ ++ if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { ++ struct efx_nic *efx_pf = pci_get_drvdata(efx->pci_dev->physfn); ++ struct efx_ef10_nic_data *nic_data = efx->nic_data; ++ u8 mac[ETH_ALEN]; ++ ++ /* net_dev->dev_addr can be zeroed by efx_net_stop in ++ * efx_ef10_sriov_set_vf_mac, so pass in a copy. ++ */ ++ ether_addr_copy(mac, efx->net_dev->dev_addr); ++ ++ rc = efx_ef10_sriov_set_vf_mac(efx_pf, nic_data->vf_index, mac); ++ if (!rc) ++ return 0; ++ ++ netif_dbg(efx, drv, efx->net_dev, ++ "Updating VF mac via PF failed (%d), setting directly\n", ++ rc); ++ } ++#endif ++ + efx_device_detach_sync(efx); + efx_net_stop(efx->net_dev); + +@@ -3277,40 +3301,6 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) + efx_net_open(efx->net_dev); + efx_device_attach_if_not_resetting(efx); + +-#ifdef CONFIG_SFC_SRIOV +- if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { +- struct efx_ef10_nic_data *nic_data = efx->nic_data; +- struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; +- +- if (rc == -EPERM) { +- struct efx_nic *efx_pf; +- +- /* Switch to PF and change MAC address on vport */ +- efx_pf = pci_get_drvdata(pci_dev_pf); +- +- rc = efx_ef10_sriov_set_vf_mac(efx_pf, +- nic_data->vf_index, +- efx->net_dev->dev_addr); +- } else if (!rc) { +- struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); +- struct efx_ef10_nic_data *nic_data = efx_pf->nic_data; +- unsigned int i; +- +- /* MAC address successfully changed by VF (with MAC +- * spoofing) so update the parent PF if possible. +- */ +- for (i = 0; i < efx_pf->vf_count; ++i) { +- struct ef10_vf *vf = nic_data->vf + i; +- +- if (vf->efx == efx) { +- ether_addr_copy(vf->mac, +- efx->net_dev->dev_addr); +- return 0; +- } +- } +- } +- } else +-#endif + if (rc == -EPERM) { + netif_err(efx, drv, efx->net_dev, + "Cannot change MAC address; use sfboot to enable" +-- +2.35.1 + diff --git a/queue-5.10/sfc-include-vport_id-in-filter-spec-hash-and-equal.patch b/queue-5.10/sfc-include-vport_id-in-filter-spec-hash-and-equal.patch new file mode 100644 index 00000000000..5fd477037fe --- /dev/null +++ b/queue-5.10/sfc-include-vport_id-in-filter-spec-hash-and-equal.patch @@ -0,0 +1,70 @@ +From 62bb027c64c869690a4be307a2421d93aea96c68 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 10:28:41 +0100 +Subject: sfc: include vport_id in filter spec hash and equal() + +From: Pieter Jansen van Vuuren + +[ Upstream commit c2bf23e4a5af37a4d77901d9ff14c50a269f143d ] + +Filters on different vports are qualified by different implicit MACs and/or +VLANs, so shouldn't be considered equal even if their other match fields +are identical. + +Fixes: 7c460d9be610 ("sfc: Extend and abstract efx_filter_spec to cover Huntington/EF10") +Co-developed-by: Edward Cree +Signed-off-by: Edward Cree +Signed-off-by: Pieter Jansen van Vuuren +Reviewed-by: Martin Habets +Link: https://lore.kernel.org/r/20221018092841.32206-1-pieter.jansen-van-vuuren@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/sfc/filter.h | 3 ++- + drivers/net/ethernet/sfc/rx_common.c | 10 +++++----- + 2 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h +index 40b2af8bfb81..2ac3c8f1b04b 100644 +--- a/drivers/net/ethernet/sfc/filter.h ++++ b/drivers/net/ethernet/sfc/filter.h +@@ -157,7 +157,8 @@ struct efx_filter_spec { + u32 flags:6; + u32 dmaq_id:12; + u32 rss_context; +- __be16 outer_vid __aligned(4); /* allow jhash2() of match values */ ++ u32 vport_id; ++ __be16 outer_vid; + __be16 inner_vid; + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; +diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c +index 2c09afac5beb..36b46ddb6710 100644 +--- a/drivers/net/ethernet/sfc/rx_common.c ++++ b/drivers/net/ethernet/sfc/rx_common.c +@@ -676,17 +676,17 @@ bool efx_filter_spec_equal(const struct efx_filter_spec *left, + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) + return false; + +- return memcmp(&left->outer_vid, &right->outer_vid, ++ return memcmp(&left->vport_id, &right->vport_id, + sizeof(struct efx_filter_spec) - +- offsetof(struct efx_filter_spec, outer_vid)) == 0; ++ offsetof(struct efx_filter_spec, vport_id)) == 0; + } + + u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) + { +- BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); +- return jhash2((const u32 *)&spec->outer_vid, ++ BUILD_BUG_ON(offsetof(struct efx_filter_spec, vport_id) & 3); ++ return jhash2((const u32 *)&spec->vport_id, + (sizeof(struct efx_filter_spec) - +- offsetof(struct efx_filter_spec, outer_vid)) / 4, ++ offsetof(struct efx_filter_spec, vport_id)) / 4, + 0); + } + +-- +2.35.1 + diff --git a/queue-5.10/sunrpc-add-xdr_set_scratch_page-and-xdr_reset_scratc.patch b/queue-5.10/sunrpc-add-xdr_set_scratch_page-and-xdr_reset_scratc.patch new file mode 100644 index 00000000000..d7503513116 --- /dev/null +++ b/queue-5.10/sunrpc-add-xdr_set_scratch_page-and-xdr_reset_scratc.patch @@ -0,0 +1,309 @@ +From 520c12fe7527e8c6fa85982c46d0d079fa704a08 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Nov 2020 15:52:47 -0500 +Subject: SUNRPC: Add xdr_set_scratch_page() and xdr_reset_scratch_buffer() + +From: Chuck Lever + +[ Upstream commit 0ae4c3e8a64ace1b8d7de033b0751afe43024416 ] + +Clean up: De-duplicate some frequently-used code. + +Signed-off-by: Chuck Lever +Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ") +Signed-off-by: Sasha Levin +--- + fs/nfs/blocklayout/blocklayout.c | 2 +- + fs/nfs/blocklayout/dev.c | 2 +- + fs/nfs/dir.c | 2 +- + fs/nfs/filelayout/filelayout.c | 2 +- + fs/nfs/filelayout/filelayoutdev.c | 2 +- + fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- + fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- + fs/nfs/nfs42xdr.c | 2 +- + fs/nfs/nfs4xdr.c | 6 ++-- + fs/nfsd/nfs4proc.c | 2 +- + include/linux/sunrpc/xdr.h | 44 ++++++++++++++++++++++- + net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +- + net/sunrpc/xdr.c | 28 +++------------ + 13 files changed, 59 insertions(+), 39 deletions(-) + +diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c +index 08108b6d2fa1..3be6836074ae 100644 +--- a/fs/nfs/blocklayout/blocklayout.c ++++ b/fs/nfs/blocklayout/blocklayout.c +@@ -697,7 +697,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, + + xdr_init_decode_pages(&xdr, &buf, + lgr->layoutp->pages, lgr->layoutp->len); +- xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(&xdr, scratch); + + status = -EIO; + p = xdr_inline_decode(&xdr, 4); +diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c +index dec5880ac6de..acb1d22907da 100644 +--- a/fs/nfs/blocklayout/dev.c ++++ b/fs/nfs/blocklayout/dev.c +@@ -510,7 +510,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, + goto out; + + xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); +- xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(&xdr, scratch); + + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (!p) +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index 9f88ca7b2001..935029632d5f 100644 +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -576,7 +576,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en + goto out_nopages; + + xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); +- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(&stream, scratch); + + do { + if (entry->label) +diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c +index ae5ed3a07494..d2103852475f 100644 +--- a/fs/nfs/filelayout/filelayout.c ++++ b/fs/nfs/filelayout/filelayout.c +@@ -666,7 +666,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, + return -ENOMEM; + + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); +- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(&stream, scratch); + + /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), + * num_fh (4) */ +diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c +index d913e818858f..86c3f7e69ec4 100644 +--- a/fs/nfs/filelayout/filelayoutdev.c ++++ b/fs/nfs/filelayout/filelayoutdev.c +@@ -82,7 +82,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, + goto out_err; + + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); +- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(&stream, scratch); + + /* Get the stripe count (number of stripe index) */ + p = xdr_inline_decode(&stream, 4); +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index a8a02081942d..0200d96b8d5b 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -378,7 +378,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, + + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, + lgr->layoutp->len); +- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(&stream, scratch); + + /* stripe unit and mirror_array_cnt */ + rc = -EIO; +diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c +index 1f12297109b4..bfa7202ca7be 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c ++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c +@@ -69,7 +69,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, + INIT_LIST_HEAD(&dsaddrs); + + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); +- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(&stream, scratch); + + /* multipath count */ + p = xdr_inline_decode(&stream, 4); +diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c +index f2248d9d4db5..df5bee2f505c 100644 +--- a/fs/nfs/nfs42xdr.c ++++ b/fs/nfs/nfs42xdr.c +@@ -1536,7 +1536,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, + struct compound_hdr hdr; + int status; + +- xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE); ++ xdr_set_scratch_page(xdr, res->scratch); + + status = decode_compound_hdr(xdr, &hdr); + if (status) +diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c +index e2f0e3446e22..f21dc4284468 100644 +--- a/fs/nfs/nfs4xdr.c ++++ b/fs/nfs/nfs4xdr.c +@@ -6406,10 +6406,8 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct compound_hdr hdr; + int status; + +- if (res->acl_scratch != NULL) { +- void *p = page_address(res->acl_scratch); +- xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); +- } ++ if (res->acl_scratch != NULL) ++ xdr_set_scratch_page(xdr, res->acl_scratch); + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index 735ee8a79870..9aeeb51e8c61 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2274,7 +2274,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; +- xdr->scratch.iov_len = 0; ++ xdr_reset_scratch_buffer(xdr); + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - rqstp->rq_auth_slack; +diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h +index 6d9d1520612b..0c8cab6210b3 100644 +--- a/include/linux/sunrpc/xdr.h ++++ b/include/linux/sunrpc/xdr.h +@@ -246,7 +246,6 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, + __be32 *p, struct rpc_rqst *rqst); + extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len); +-extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); + extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); + extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); + extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); +@@ -254,6 +253,49 @@ extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned in + extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); + extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); + ++/** ++ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. ++ * @xdr: pointer to xdr_stream struct ++ * @buf: pointer to an empty buffer ++ * @buflen: size of 'buf' ++ * ++ * The scratch buffer is used when decoding from an array of pages. ++ * If an xdr_inline_decode() call spans across page boundaries, then ++ * we copy the data into the scratch buffer in order to allow linear ++ * access. ++ */ ++static inline void ++xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) ++{ ++ xdr->scratch.iov_base = buf; ++ xdr->scratch.iov_len = buflen; ++} ++ ++/** ++ * xdr_set_scratch_page - Attach a scratch buffer for decoding data ++ * @xdr: pointer to xdr_stream struct ++ * @page: an anonymous page ++ * ++ * See xdr_set_scratch_buffer(). ++ */ ++static inline void ++xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) ++{ ++ xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); ++} ++ ++/** ++ * xdr_reset_scratch_buffer - Clear scratch buffer information ++ * @xdr: pointer to xdr_stream struct ++ * ++ * See xdr_set_scratch_buffer(). ++ */ ++static inline void ++xdr_reset_scratch_buffer(struct xdr_stream *xdr) ++{ ++ xdr_set_scratch_buffer(xdr, NULL, 0); ++} ++ + /** + * xdr_stream_remaining - Return the number of bytes remaining in the stream + * @xdr: pointer to struct xdr_stream +diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c +index 2ff7b7083eba..c636c648849b 100644 +--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c ++++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c +@@ -789,7 +789,7 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, + scratch = alloc_page(GFP_KERNEL); + if (!scratch) + return -ENOMEM; +- xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE); ++ xdr_set_scratch_page(xdr, scratch); + + /* res->status */ + err = gssx_dec_status(xdr, &res->status); +diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c +index d84bb5037bb5..02adc5c7f034 100644 +--- a/net/sunrpc/xdr.c ++++ b/net/sunrpc/xdr.c +@@ -669,7 +669,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, + struct kvec *iov = buf->head; + int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; + +- xdr_set_scratch_buffer(xdr, NULL, 0); ++ xdr_reset_scratch_buffer(xdr); + BUG_ON(scratch_len < 0); + xdr->buf = buf; + xdr->iov = iov; +@@ -713,7 +713,7 @@ inline void xdr_commit_encode(struct xdr_stream *xdr) + page = page_address(*xdr->page_ptr); + memcpy(xdr->scratch.iov_base, page, shift); + memmove(page, page + shift, (void *)xdr->p - page); +- xdr->scratch.iov_len = 0; ++ xdr_reset_scratch_buffer(xdr); + } + EXPORT_SYMBOL_GPL(xdr_commit_encode); + +@@ -743,8 +743,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, + * the "scratch" iov to track any temporarily unused fragment of + * space at the end of the previous buffer: + */ +- xdr->scratch.iov_base = xdr->p; +- xdr->scratch.iov_len = frag1bytes; ++ xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes); + p = page_address(*xdr->page_ptr); + /* + * Note this is where the next encode will start after we've +@@ -1056,8 +1055,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, + struct rpc_rqst *rqst) + { + xdr->buf = buf; +- xdr->scratch.iov_base = NULL; +- xdr->scratch.iov_len = 0; ++ xdr_reset_scratch_buffer(xdr); + xdr->nwords = XDR_QUADLEN(buf->len); + if (buf->head[0].iov_len != 0) + xdr_set_iov(xdr, buf->head, buf->len); +@@ -1105,24 +1103,6 @@ static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) + return p; + } + +-/** +- * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. +- * @xdr: pointer to xdr_stream struct +- * @buf: pointer to an empty buffer +- * @buflen: size of 'buf' +- * +- * The scratch buffer is used when decoding from an array of pages. +- * If an xdr_inline_decode() call spans across page boundaries, then +- * we copy the data into the scratch buffer in order to allow linear +- * access. +- */ +-void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +-{ +- xdr->scratch.iov_base = buf; +- xdr->scratch.iov_len = buflen; +-} +-EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); +- + static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) + { + __be32 *p; +-- +2.35.1 + diff --git a/queue-5.10/sunrpc-prepare-for-xdr_stream-style-decoding-on-the-.patch b/queue-5.10/sunrpc-prepare-for-xdr_stream-style-decoding-on-the-.patch new file mode 100644 index 00000000000..70895e879fa --- /dev/null +++ b/queue-5.10/sunrpc-prepare-for-xdr_stream-style-decoding-on-the-.patch @@ -0,0 +1,103 @@ +From ba564cda1b99d035764257f9c3fc615b81efb780 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Nov 2020 11:19:42 -0500 +Subject: SUNRPC: Prepare for xdr_stream-style decoding on the server-side + +From: Chuck Lever + +[ Upstream commit 5191955d6fc65e6d4efe8f4f10a6028298f57281 ] + +A "permanent" struct xdr_stream is allocated in struct svc_rqst so +that it is usable by all server-side decoders. A per-rqst scratch +buffer is also allocated to handle decoding XDR data items that +cross page boundaries. + +To demonstrate how it will be used, add the first call site for the +new svcxdr_init_decode() API. + +As an additional part of the overall conversion, add symbolic +constants for successful and failed XDR operations. Returning "0" is +overloaded. Sometimes it means something failed, but sometimes it +means success. To make it more clear when XDR decoding functions +succeed or fail, introduce symbolic constants. + +Signed-off-by: Chuck Lever +Stable-dep-of: 401bc1f90874 ("NFSD: Protect against send buffer overflow in NFSv2 READ") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfssvc.c | 2 ++ + include/linux/sunrpc/svc.h | 16 ++++++++++++++++ + net/sunrpc/svc.c | 5 +++++ + 3 files changed, 23 insertions(+) + +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index 9323e30a7eaf..ad6fedf37a40 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -1019,6 +1019,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + * (necessary in the NFSv4.0 compound case) + */ + rqstp->rq_cachetype = proc->pc_cachetype; ++ ++ svcxdr_init_decode(rqstp); + if (!proc->pc_decode(rqstp, argv->iov_base)) + goto out_decode_err; + +diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h +index 386628b36bc7..6ce2e12589cb 100644 +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -247,6 +247,8 @@ struct svc_rqst { + + size_t rq_xprt_hlen; /* xprt header len */ + struct xdr_buf rq_arg; ++ struct xdr_stream rq_arg_stream; ++ struct page *rq_scratch_page; + struct xdr_buf rq_res; + struct page *rq_pages[RPCSVC_MAXPAGES + 1]; + struct page * *rq_respages; /* points into rq_pages */ +@@ -557,4 +559,18 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) + svc_reserve(rqstp, space + rqstp->rq_auth_slack); + } + ++/** ++ * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding ++ * @rqstp: controlling server RPC transaction context ++ * ++ */ ++static inline void svcxdr_init_decode(struct svc_rqst *rqstp) ++{ ++ struct xdr_stream *xdr = &rqstp->rq_arg_stream; ++ struct kvec *argv = rqstp->rq_arg.head; ++ ++ xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); ++ xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); ++} ++ + #endif /* SUNRPC_SVC_H */ +diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c +index d38788cd9433..bb55f124b62e 100644 +--- a/net/sunrpc/svc.c ++++ b/net/sunrpc/svc.c +@@ -614,6 +614,10 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) + rqstp->rq_server = serv; + rqstp->rq_pool = pool; + ++ rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0); ++ if (!rqstp->rq_scratch_page) ++ goto out_enomem; ++ + rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); + if (!rqstp->rq_argp) + goto out_enomem; +@@ -842,6 +846,7 @@ void + svc_rqst_free(struct svc_rqst *rqstp) + { + svc_release_buffer(rqstp); ++ put_page(rqstp->rq_scratch_page); + kfree(rqstp->rq_resp); + kfree(rqstp->rq_argp); + kfree(rqstp->rq_auth_data); +-- +2.35.1 + diff --git a/queue-5.10/tcp-add-num_closed_socks-to-struct-sock_reuseport.patch b/queue-5.10/tcp-add-num_closed_socks-to-struct-sock_reuseport.patch new file mode 100644 index 00000000000..e55bd1da35b --- /dev/null +++ b/queue-5.10/tcp-add-num_closed_socks-to-struct-sock_reuseport.patch @@ -0,0 +1,208 @@ +From b1cdd1c781f9b8335f6b5c708bd99b180896c5bb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 12 Jun 2021 21:32:15 +0900 +Subject: tcp: Add num_closed_socks to struct sock_reuseport. + +From: Kuniyuki Iwashima + +[ Upstream commit 5c040eaf5d1753aafe12989ca712175df0b9c436 ] + +As noted in the following commit, a closed listener has to hold the +reference to the reuseport group for socket migration. This patch adds a +field (num_closed_socks) to struct sock_reuseport to manage closed sockets +within the same reuseport group. Moreover, this and the following commits +introduce some helper functions to split socks[] into two sections and keep +TCP_LISTEN and TCP_CLOSE sockets in each section. Like a double-ended +queue, we will place TCP_LISTEN sockets from the front and TCP_CLOSE +sockets from the end. + + TCP_LISTEN----------> <-------TCP_CLOSE + +---+---+ --- +---+ --- +---+ --- +---+ + | 0 | 1 | ... | i | ... | j | ... | k | + +---+---+ --- +---+ --- +---+ --- +---+ + + i = num_socks - 1 + j = max_socks - num_closed_socks + k = max_socks - 1 + +This patch also extends reuseport_add_sock() and reuseport_grow() to +support num_closed_socks. + +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Daniel Borkmann +Reviewed-by: Eric Dumazet +Acked-by: Martin KaFai Lau +Link: https://lore.kernel.org/bpf/20210612123224.12525-3-kuniyu@amazon.co.jp +Stable-dep-of: 69421bf98482 ("udp: Update reuse->has_conns under reuseport_lock.") +Signed-off-by: Sasha Levin +--- + include/net/sock_reuseport.h | 5 ++- + net/core/sock_reuseport.c | 75 +++++++++++++++++++++++++++--------- + 2 files changed, 60 insertions(+), 20 deletions(-) + +diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h +index 505f1e18e9bf..0e558ca7afbf 100644 +--- a/include/net/sock_reuseport.h ++++ b/include/net/sock_reuseport.h +@@ -13,8 +13,9 @@ extern spinlock_t reuseport_lock; + struct sock_reuseport { + struct rcu_head rcu; + +- u16 max_socks; /* length of socks */ +- u16 num_socks; /* elements in socks */ ++ u16 max_socks; /* length of socks */ ++ u16 num_socks; /* elements in socks */ ++ u16 num_closed_socks; /* closed elements in socks */ + /* The last synq overflow event timestamp of this + * reuse->socks[] group. + */ +diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c +index b065f0a103ed..f478c65a281b 100644 +--- a/net/core/sock_reuseport.c ++++ b/net/core/sock_reuseport.c +@@ -18,6 +18,49 @@ DEFINE_SPINLOCK(reuseport_lock); + + static DEFINE_IDA(reuseport_ida); + ++static int reuseport_sock_index(struct sock *sk, ++ const struct sock_reuseport *reuse, ++ bool closed) ++{ ++ int left, right; ++ ++ if (!closed) { ++ left = 0; ++ right = reuse->num_socks; ++ } else { ++ left = reuse->max_socks - reuse->num_closed_socks; ++ right = reuse->max_socks; ++ } ++ ++ for (; left < right; left++) ++ if (reuse->socks[left] == sk) ++ return left; ++ return -1; ++} ++ ++static void __reuseport_add_sock(struct sock *sk, ++ struct sock_reuseport *reuse) ++{ ++ reuse->socks[reuse->num_socks] = sk; ++ /* paired with smp_rmb() in reuseport_select_sock() */ ++ smp_wmb(); ++ reuse->num_socks++; ++} ++ ++static bool __reuseport_detach_sock(struct sock *sk, ++ struct sock_reuseport *reuse) ++{ ++ int i = reuseport_sock_index(sk, reuse, false); ++ ++ if (i == -1) ++ return false; ++ ++ reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; ++ reuse->num_socks--; ++ ++ return true; ++} ++ + static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) + { + unsigned int size = sizeof(struct sock_reuseport) + +@@ -72,9 +115,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany) + } + + reuse->reuseport_id = id; ++ reuse->bind_inany = bind_inany; + reuse->socks[0] = sk; + reuse->num_socks = 1; +- reuse->bind_inany = bind_inany; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + out: +@@ -98,6 +141,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) + return NULL; + + more_reuse->num_socks = reuse->num_socks; ++ more_reuse->num_closed_socks = reuse->num_closed_socks; + more_reuse->prog = reuse->prog; + more_reuse->reuseport_id = reuse->reuseport_id; + more_reuse->bind_inany = reuse->bind_inany; +@@ -105,9 +149,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) + + memcpy(more_reuse->socks, reuse->socks, + reuse->num_socks * sizeof(struct sock *)); ++ memcpy(more_reuse->socks + ++ (more_reuse->max_socks - more_reuse->num_closed_socks), ++ reuse->socks + (reuse->max_socks - reuse->num_closed_socks), ++ reuse->num_closed_socks * sizeof(struct sock *)); + more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts); + +- for (i = 0; i < reuse->num_socks; ++i) ++ for (i = 0; i < reuse->max_socks; ++i) + rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, + more_reuse); + +@@ -158,7 +206,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) + return -EBUSY; + } + +- if (reuse->num_socks == reuse->max_socks) { ++ if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) { + reuse = reuseport_grow(reuse); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); +@@ -166,10 +214,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) + } + } + +- reuse->socks[reuse->num_socks] = sk; +- /* paired with smp_rmb() in reuseport_select_sock() */ +- smp_wmb(); +- reuse->num_socks++; ++ __reuseport_add_sock(sk, reuse); + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + spin_unlock_bh(&reuseport_lock); +@@ -183,7 +228,6 @@ EXPORT_SYMBOL(reuseport_add_sock); + void reuseport_detach_sock(struct sock *sk) + { + struct sock_reuseport *reuse; +- int i; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, +@@ -200,16 +244,11 @@ void reuseport_detach_sock(struct sock *sk) + bpf_sk_reuseport_detach(sk); + + rcu_assign_pointer(sk->sk_reuseport_cb, NULL); ++ __reuseport_detach_sock(sk, reuse); ++ ++ if (reuse->num_socks + reuse->num_closed_socks == 0) ++ call_rcu(&reuse->rcu, reuseport_free_rcu); + +- for (i = 0; i < reuse->num_socks; i++) { +- if (reuse->socks[i] == sk) { +- reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; +- reuse->num_socks--; +- if (reuse->num_socks == 0) +- call_rcu(&reuse->rcu, reuseport_free_rcu); +- break; +- } +- } + spin_unlock_bh(&reuseport_lock); + } + EXPORT_SYMBOL(reuseport_detach_sock); +@@ -274,7 +313,7 @@ struct sock *reuseport_select_sock(struct sock *sk, + prog = rcu_dereference(reuse->prog); + socks = READ_ONCE(reuse->num_socks); + if (likely(socks)) { +- /* paired with smp_wmb() in reuseport_add_sock() */ ++ /* paired with smp_wmb() in __reuseport_add_sock() */ + smp_rmb(); + + if (!prog || !skb) +-- +2.35.1 + diff --git a/queue-5.10/tipc-fix-an-information-leak-in-tipc_topsrv_kern_sub.patch b/queue-5.10/tipc-fix-an-information-leak-in-tipc_topsrv_kern_sub.patch new file mode 100644 index 00000000000..32487b6d79a --- /dev/null +++ b/queue-5.10/tipc-fix-an-information-leak-in-tipc_topsrv_kern_sub.patch @@ -0,0 +1,87 @@ +From 680f5683bb4055bc0d86d2aa1961ec9e1d8f728e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Oct 2022 17:25:14 +0200 +Subject: tipc: fix an information leak in tipc_topsrv_kern_subscr + +From: Alexander Potapenko + +[ Upstream commit 777ecaabd614d47c482a5c9031579e66da13989a ] + +Use a 8-byte write to initialize sub.usr_handle in +tipc_topsrv_kern_subscr(), otherwise four bytes remain uninitialized +when issuing setsockopt(..., SOL_TIPC, ...). +This resulted in an infoleak reported by KMSAN when the packet was +received: + + ===================================================== + BUG: KMSAN: kernel-infoleak in copyout+0xbc/0x100 lib/iov_iter.c:169 + instrument_copy_to_user ./include/linux/instrumented.h:121 + copyout+0xbc/0x100 lib/iov_iter.c:169 + _copy_to_iter+0x5c0/0x20a0 lib/iov_iter.c:527 + copy_to_iter ./include/linux/uio.h:176 + simple_copy_to_iter+0x64/0xa0 net/core/datagram.c:513 + __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:419 + skb_copy_datagram_iter+0x58/0x200 net/core/datagram.c:527 + skb_copy_datagram_msg ./include/linux/skbuff.h:3903 + packet_recvmsg+0x521/0x1e70 net/packet/af_packet.c:3469 + ____sys_recvmsg+0x2c4/0x810 net/socket.c:? + ___sys_recvmsg+0x217/0x840 net/socket.c:2743 + __sys_recvmsg net/socket.c:2773 + __do_sys_recvmsg net/socket.c:2783 + __se_sys_recvmsg net/socket.c:2780 + __x64_sys_recvmsg+0x364/0x540 net/socket.c:2780 + do_syscall_x64 arch/x86/entry/common.c:50 + do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120 + + ... + + Uninit was stored to memory at: + tipc_sub_subscribe+0x42d/0xb50 net/tipc/subscr.c:156 + tipc_conn_rcv_sub+0x246/0x620 net/tipc/topsrv.c:375 + tipc_topsrv_kern_subscr+0x2e8/0x400 net/tipc/topsrv.c:579 + tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190 + tipc_sk_join+0x2a8/0x770 net/tipc/socket.c:3084 + tipc_setsockopt+0xae5/0xe40 net/tipc/socket.c:3201 + __sys_setsockopt+0x87f/0xdc0 net/socket.c:2252 + __do_sys_setsockopt net/socket.c:2263 + __se_sys_setsockopt net/socket.c:2260 + __x64_sys_setsockopt+0xe0/0x160 net/socket.c:2260 + do_syscall_x64 arch/x86/entry/common.c:50 + do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120 + + Local variable sub created at: + tipc_topsrv_kern_subscr+0x57/0x400 net/tipc/topsrv.c:562 + tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190 + + Bytes 84-87 of 88 are uninitialized + Memory access of size 88 starts at ffff88801ed57cd0 + Data copied to user address 0000000020000400 + ... + ===================================================== + +Signed-off-by: Alexander Potapenko +Fixes: 026321c6d056a5 ("tipc: rename tipc_server to tipc_topsrv") +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/tipc/topsrv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c +index 13f3143609f9..d9e2c0fea3f2 100644 +--- a/net/tipc/topsrv.c ++++ b/net/tipc/topsrv.c +@@ -568,7 +568,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = filter; +- *(u32 *)&sub.usr_handle = port; ++ *(u64 *)&sub.usr_handle = (u64)port; + + con = tipc_conn_alloc(tipc_topsrv(net)); + if (IS_ERR(con)) +-- +2.35.1 + diff --git a/queue-5.10/tipc-fix-recognition-of-trial-period.patch b/queue-5.10/tipc-fix-recognition-of-trial-period.patch new file mode 100644 index 00000000000..3013c2ece40 --- /dev/null +++ b/queue-5.10/tipc-fix-recognition-of-trial-period.patch @@ -0,0 +1,39 @@ +From c7e813435954cb70adcbcad1079b3ae4db834d85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Oct 2022 15:46:13 +1300 +Subject: tipc: Fix recognition of trial period + +From: Mark Tomlinson + +[ Upstream commit 28be7ca4fcfd69a2d52aaa331adbf9dbe91f9e6e ] + +The trial period exists until jiffies is after addr_trial_end. But as +jiffies will eventually overflow, just using time_after will eventually +give incorrect results. As the node address is set once the trial period +ends, this can be used to know that we are not in the trial period. + +Fixes: e415577f57f4 ("tipc: correct discovery message handling during address trial period") +Signed-off-by: Mark Tomlinson +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/tipc/discover.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/tipc/discover.c b/net/tipc/discover.c +index 14bc20604051..2ae268b67465 100644 +--- a/net/tipc/discover.c ++++ b/net/tipc/discover.c +@@ -147,8 +147,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, + { + struct net *net = d->net; + struct tipc_net *tn = tipc_net(net); +- bool trial = time_before(jiffies, tn->addr_trial_end); + u32 self = tipc_own_addr(net); ++ bool trial = time_before(jiffies, tn->addr_trial_end) && !self; + + if (mtyp == DSC_TRIAL_FAIL_MSG) { + if (!trial) +-- +2.35.1 + diff --git a/queue-5.10/tracing-do-not-free-snapshot-if-tracer-is-on-cmdline.patch b/queue-5.10/tracing-do-not-free-snapshot-if-tracer-is-on-cmdline.patch new file mode 100644 index 00000000000..ba79529791f --- /dev/null +++ b/queue-5.10/tracing-do-not-free-snapshot-if-tracer-is-on-cmdline.patch @@ -0,0 +1,87 @@ +From 9eeabe2096e18c6b3c110aec18bdcb17fcc3d8a5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Oct 2022 11:37:57 -0400 +Subject: tracing: Do not free snapshot if tracer is on cmdline + +From: Steven Rostedt (Google) + +[ Upstream commit a541a9559bb0a8ecc434de01d3e4826c32e8bb53 ] + +The ftrace_boot_snapshot and alloc_snapshot cmdline options allocate the +snapshot buffer at boot up for use later. The ftrace_boot_snapshot in +particular requires the snapshot to be allocated because it will take a +snapshot at the end of boot up allowing to see the traces that happened +during boot so that it's not lost when user space takes over. + +When a tracer is registered (started) there's a path that checks if it +requires the snapshot buffer or not, and if it does not and it was +allocated it will do a synchronization and free the snapshot buffer. + +This is only required if the previous tracer was using it for "max +latency" snapshots, as it needs to make sure all max snapshots are +complete before freeing. But this is only needed if the previous tracer +was using the snapshot buffer for latency (like irqoff tracer and +friends). But it does not make sense to free it, if the previous tracer +was not using it, and the snapshot was allocated by the cmdline +parameters. This basically takes away the point of allocating it in the +first place! + +Note, the allocated snapshot worked fine for just trace events, but fails +when a tracer is enabled on the cmdline. + +Further investigation, this goes back even further and it does not require +a tracer on the cmdline to fail. Simply enable snapshots and then enable a +tracer, and it will remove the snapshot. + +Link: https://lkml.kernel.org/r/20221005113757.041df7fe@gandalf.local.home + +Cc: Masami Hiramatsu +Cc: Andrew Morton +Cc: stable@vger.kernel.org +Fixes: 45ad21ca5530 ("tracing: Have trace_array keep track if snapshot buffer is allocated") +Reported-by: Ross Zwisler +Tested-by: Ross Zwisler +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 870033f9c198..b7cb9147f0c5 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6008,12 +6008,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + if (tr->current_trace->reset) + tr->current_trace->reset(tr); + ++#ifdef CONFIG_TRACER_MAX_TRACE ++ had_max_tr = tr->current_trace->use_max_tr; ++ + /* Current trace needs to be nop_trace before synchronize_rcu */ + tr->current_trace = &nop_trace; + +-#ifdef CONFIG_TRACER_MAX_TRACE +- had_max_tr = tr->allocated_snapshot; +- + if (had_max_tr && !t->use_max_tr) { + /* + * We need to make sure that the update_max_tr sees that +@@ -6026,11 +6026,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + free_snapshot(tr); + } + +- if (t->use_max_tr && !had_max_tr) { ++ if (t->use_max_tr && !tr->allocated_snapshot) { + ret = tracing_alloc_snapshot_instance(tr); + if (ret < 0) + goto out; + } ++#else ++ tr->current_trace = &nop_trace; + #endif + + if (t->init) { +-- +2.35.1 + diff --git a/queue-5.10/tracing-simplify-conditional-compilation-code-in-tra.patch b/queue-5.10/tracing-simplify-conditional-compilation-code-in-tra.patch new file mode 100644 index 00000000000..5475503d584 --- /dev/null +++ b/queue-5.10/tracing-simplify-conditional-compilation-code-in-tra.patch @@ -0,0 +1,41 @@ +From c7f70e344e8cc18a5dcfd489c091922085641031 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Jun 2022 22:06:13 +0800 +Subject: tracing: Simplify conditional compilation code in + tracing_set_tracer() + +From: sunliming + +[ Upstream commit f4b0d318097e45cbac5e14976f8bb56aa2cef504 ] + +Two conditional compilation directives "#ifdef CONFIG_TRACER_MAX_TRACE" +are used consecutively, and no other code in between. Simplify conditional +the compilation code and only use one "#ifdef CONFIG_TRACER_MAX_TRACE". + +Link: https://lkml.kernel.org/r/20220602140613.545069-1-sunliming@kylinos.cn + +Signed-off-by: sunliming +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: a541a9559bb0 ("tracing: Do not free snapshot if tracer is on cmdline") +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index a5245362ce7a..870033f9c198 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6025,9 +6025,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + synchronize_rcu(); + free_snapshot(tr); + } +-#endif + +-#ifdef CONFIG_TRACER_MAX_TRACE + if (t->use_max_tr && !had_max_tr) { + ret = tracing_alloc_snapshot_instance(tr); + if (ret < 0) +-- +2.35.1 + diff --git a/queue-5.10/udp-update-reuse-has_conns-under-reuseport_lock.patch b/queue-5.10/udp-update-reuse-has_conns-under-reuseport_lock.patch new file mode 100644 index 00000000000..67916d54c39 --- /dev/null +++ b/queue-5.10/udp-update-reuse-has_conns-under-reuseport_lock.patch @@ -0,0 +1,193 @@ +From 4b84c215237ebacc8ee55b1ab4c829614b619842 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Oct 2022 11:26:25 -0700 +Subject: udp: Update reuse->has_conns under reuseport_lock. + +From: Kuniyuki Iwashima + +[ Upstream commit 69421bf98482d089e50799f45e48b25ce4a8d154 ] + +When we call connect() for a UDP socket in a reuseport group, we have +to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel +could select a unconnected socket wrongly for packets sent to the +connected socket. + +However, the current way to set has_conns is illegal and possible to +trigger that problem. reuseport_has_conns() changes has_conns under +rcu_read_lock(), which upgrades the RCU reader to the updater. Then, +it must do the update under the updater's lock, reuseport_lock, but +it doesn't for now. + +For this reason, there is a race below where we fail to set has_conns +resulting in the wrong socket selection. To avoid the race, let's split +the reader and updater with proper locking. + + cpu1 cpu2 ++----+ +----+ + +__ip[46]_datagram_connect() reuseport_grow() +. . +|- reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size) +| . | +| |- rcu_read_lock() +| |- reuse = rcu_dereference(sk->sk_reuseport_cb) +| | +| | | /* reuse->has_conns == 0 here */ +| | |- more_reuse->has_conns = reuse->has_conns +| |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */ +| | | +| | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, +| | | more_reuse) +| `- rcu_read_unlock() `- kfree_rcu(reuse, rcu) +| +|- sk->sk_state = TCP_ESTABLISHED + +Note the likely(reuse) in reuseport_has_conns_set() is always true, +but we put the test there for ease of review. [0] + +For the record, usually, sk_reuseport_cb is changed under lock_sock(). +The only exception is reuseport_grow() & TCP reqsk migration case. + + 1) shutdown() TCP listener, which is moved into the latter part of + reuse->socks[] to migrate reqsk. + + 2) New listen() overflows reuse->socks[] and call reuseport_grow(). + + 3) reuse->max_socks overflows u16 with the new listener. + + 4) reuseport_grow() pops the old shutdown()ed listener from the array + and update its sk->sk_reuseport_cb as NULL without lock_sock(). + +shutdown()ed TCP sk->sk_reuseport_cb can be changed without lock_sock(), +but, reuseport_has_conns_set() is called only for UDP under lock_sock(), +so likely(reuse) never be false in reuseport_has_conns_set(). + +[0]: https://lore.kernel.org/netdev/CANn89iLja=eQHbsM_Ta2sQF0tOGU8vAGrh_izRuuHjuO1ouUag@mail.gmail.com/ + +Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20221014182625.89913-1-kuniyu@amazon.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/net/sock_reuseport.h | 11 +++++------ + net/core/sock_reuseport.c | 16 ++++++++++++++++ + net/ipv4/datagram.c | 2 +- + net/ipv4/udp.c | 2 +- + net/ipv6/datagram.c | 2 +- + net/ipv6/udp.c | 2 +- + 6 files changed, 25 insertions(+), 10 deletions(-) + +diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h +index 0e558ca7afbf..6348c6f26903 100644 +--- a/include/net/sock_reuseport.h ++++ b/include/net/sock_reuseport.h +@@ -39,21 +39,20 @@ extern struct sock *reuseport_select_sock(struct sock *sk, + extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); + extern int reuseport_detach_prog(struct sock *sk); + +-static inline bool reuseport_has_conns(struct sock *sk, bool set) ++static inline bool reuseport_has_conns(struct sock *sk) + { + struct sock_reuseport *reuse; + bool ret = false; + + rcu_read_lock(); + reuse = rcu_dereference(sk->sk_reuseport_cb); +- if (reuse) { +- if (set) +- reuse->has_conns = 1; +- ret = reuse->has_conns; +- } ++ if (reuse && reuse->has_conns) ++ ret = true; + rcu_read_unlock(); + + return ret; + } + ++void reuseport_has_conns_set(struct sock *sk); ++ + #endif /* _SOCK_REUSEPORT_H */ +diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c +index f478c65a281b..364cf6c6912b 100644 +--- a/net/core/sock_reuseport.c ++++ b/net/core/sock_reuseport.c +@@ -18,6 +18,22 @@ DEFINE_SPINLOCK(reuseport_lock); + + static DEFINE_IDA(reuseport_ida); + ++void reuseport_has_conns_set(struct sock *sk) ++{ ++ struct sock_reuseport *reuse; ++ ++ if (!rcu_access_pointer(sk->sk_reuseport_cb)) ++ return; ++ ++ spin_lock_bh(&reuseport_lock); ++ reuse = rcu_dereference_protected(sk->sk_reuseport_cb, ++ lockdep_is_held(&reuseport_lock)); ++ if (likely(reuse)) ++ reuse->has_conns = 1; ++ spin_unlock_bh(&reuseport_lock); ++} ++EXPORT_SYMBOL(reuseport_has_conns_set); ++ + static int reuseport_sock_index(struct sock *sk, + const struct sock_reuseport *reuse, + bool closed) +diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c +index 4a8550c49202..112c6e892d30 100644 +--- a/net/ipv4/datagram.c ++++ b/net/ipv4/datagram.c +@@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len + } + inet->inet_daddr = fl4->daddr; + inet->inet_dport = usin->sin_port; +- reuseport_has_conns(sk, true); ++ reuseport_has_conns_set(sk); + sk->sk_state = TCP_ESTABLISHED; + sk_set_txhash(sk); + inet->inet_id = prandom_u32(); +diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c +index 4446aa8237ff..b093daaa3deb 100644 +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -446,7 +446,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + /* Fall back to scoring if group has connections */ +- if (result && !reuseport_has_conns(sk, false)) ++ if (result && !reuseport_has_conns(sk)) + return result; + + result = result ? : sk; +diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c +index 206f66310a88..f4559e5bc84b 100644 +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, + goto out; + } + +- reuseport_has_conns(sk, true); ++ reuseport_has_conns_set(sk); + sk->sk_state = TCP_ESTABLISHED; + sk_set_txhash(sk); + out: +diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c +index 9b504bf49214..514e6a55959f 100644 +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -179,7 +179,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + /* Fall back to scoring if group has connections */ +- if (result && !reuseport_has_conns(sk, false)) ++ if (result && !reuseport_has_conns(sk)) + return result; + + result = result ? : sk; +-- +2.35.1 + diff --git a/queue-5.10/usb-add-reset_resume-quirk-for-nvidia-jetson-devices.patch b/queue-5.10/usb-add-reset_resume-quirk-for-nvidia-jetson-devices.patch new file mode 100644 index 00000000000..1e6f50562ff --- /dev/null +++ b/queue-5.10/usb-add-reset_resume-quirk-for-nvidia-jetson-devices.patch @@ -0,0 +1,54 @@ +From 15bd6fce2c8be3bc43ba3d6e682eb958e2ad5800 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Sep 2022 20:16:10 +0300 +Subject: USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM + +From: Hannu Hartikainen + +[ Upstream commit fc4ade55c617dc73c7e9756b57f3230b4ff24540 ] + +NVIDIA Jetson devices in Force Recovery mode (RCM) do not support +suspending, ie. flashing fails if the device has been suspended. The +devices are still visible in lsusb and seem to work otherwise, making +the issue hard to debug. This has been discovered in various forum +posts, eg. [1]. + +The patch has been tested on NVIDIA Jetson AGX Xavier, but I'm adding +all the Jetson models listed in [2] on the assumption that they all +behave similarly. + +[1]: https://forums.developer.nvidia.com/t/flashing-not-working/72365 +[2]: https://docs.nvidia.com/jetson/archives/l4t-archived/l4t-3271/index.html#page/Tegra%20Linux%20Driver%20Package%20Development%20Guide/quick_start.html + +Signed-off-by: Hannu Hartikainen +Cc: stable # after 6.1-rc3 +Link: https://lore.kernel.org/r/20220919171610.30484-1-hannu@hrtk.in +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/core/quirks.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c +index 03473e20e218..eb3ea45d5d13 100644 +--- a/drivers/usb/core/quirks.c ++++ b/drivers/usb/core/quirks.c +@@ -388,6 +388,15 @@ static const struct usb_device_id usb_quirk_list[] = { + /* Kingston DataTraveler 3.0 */ + { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM }, + ++ /* NVIDIA Jetson devices in Force Recovery mode */ ++ { USB_DEVICE(0x0955, 0x7018), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7019), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7418), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7721), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7c18), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7e19), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7f21), .driver_info = USB_QUIRK_RESET_RESUME }, ++ + /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ + { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, + +-- +2.35.1 + diff --git a/queue-5.10/writeback-avoid-skipping-inode-writeback.patch b/queue-5.10/writeback-avoid-skipping-inode-writeback.patch new file mode 100644 index 00000000000..9286c51a9b3 --- /dev/null +++ b/queue-5.10/writeback-avoid-skipping-inode-writeback.patch @@ -0,0 +1,74 @@ +From c48b354b20bb7b07b2e92f5e3a77a4a22cec038e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 10:35:14 +0800 +Subject: writeback: Avoid skipping inode writeback + +From: Jing Xia + +[ Upstream commit 846a3351ddfe4a86eede4bb26a205c3f38ef84d3 ] + +We have run into an issue that a task gets stuck in +balance_dirty_pages_ratelimited() when perform I/O stress testing. +The reason we observed is that an I_DIRTY_PAGES inode with lots +of dirty pages is in b_dirty_time list and standard background +writeback cannot writeback the inode. +After studing the relevant code, the following scenario may lead +to the issue: + +task1 task2 +----- ----- +fuse_flush + write_inode_now //in b_dirty_time + writeback_single_inode + __writeback_single_inode + fuse_write_end + filemap_dirty_folio + __xa_set_mark:PAGECACHE_TAG_DIRTY + lock inode->i_lock + if mapping tagged PAGECACHE_TAG_DIRTY + inode->i_state |= I_DIRTY_PAGES + unlock inode->i_lock + __mark_inode_dirty:I_DIRTY_PAGES + lock inode->i_lock + -was dirty,inode stays in + -b_dirty_time + unlock inode->i_lock + + if(!(inode->i_state & I_DIRTY_All)) + -not true,so nothing done + +This patch moves the dirty inode to b_dirty list when the inode +currently is not queued in b_io or b_more_io list at the end of +writeback_single_inode. + +Reviewed-by: Jan Kara +Reviewed-by: Christoph Hellwig +CC: stable@vger.kernel.org +Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") +Signed-off-by: Jing Xia +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220510023514.27399-1-jing.xia@unisoc.com +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + fs/fs-writeback.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 489514bcd7e1..645e3f6ffe44 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -1612,6 +1612,10 @@ static int writeback_single_inode(struct inode *inode, + */ + if (!(inode->i_state & I_DIRTY_ALL)) + inode_cgwb_move_to_attached(inode, wb); ++ else if (!(inode->i_state & I_SYNC_QUEUED) && ++ (inode->i_state & I_DIRTY)) ++ redirty_tail_locked(inode, wb); ++ + spin_unlock(&wb->list_lock); + inode_sync_complete(inode); + out: +-- +2.35.1 + diff --git a/queue-5.10/writeback-cgroup-keep-list-of-inodes-attached-to-bdi.patch b/queue-5.10/writeback-cgroup-keep-list-of-inodes-attached-to-bdi.patch new file mode 100644 index 00000000000..8ae3779898a --- /dev/null +++ b/queue-5.10/writeback-cgroup-keep-list-of-inodes-attached-to-bdi.patch @@ -0,0 +1,231 @@ +From 6ebfdd72f515b6ccb3f7f3900a4f12c715dd8290 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Jun 2021 19:35:53 -0700 +Subject: writeback, cgroup: keep list of inodes attached to bdi_writeback + +From: Roman Gushchin + +[ Upstream commit f3b6a6df38aa514d97e8c6fcc748be1d4142bec9 ] + +Currently there is no way to iterate over inodes attached to a specific +cgwb structure. It limits the ability to efficiently reclaim the +writeback structure itself and associated memory and block cgroup +structures without scanning all inodes belonging to a sb, which can be +prohibitively expensive. + +While dirty/in-active-writeback an inode belongs to one of the +bdi_writeback's io lists: b_dirty, b_io, b_more_io and b_dirty_time. Once +cleaned up, it's removed from all io lists. So the inode->i_io_list can +be reused to maintain the list of inodes, attached to a bdi_writeback +structure. + +This patch introduces a new wb->b_attached list, which contains all inodes +which were dirty at least once and are attached to the given cgwb. Inodes +attached to the root bdi_writeback structures are never placed on such +list. The following patch will use this list to try to release cgwbs +structures more efficiently. + +Link: https://lkml.kernel.org/r/20210608230225.2078447-6-guro@fb.com +Signed-off-by: Roman Gushchin +Suggested-by: Jan Kara +Reviewed-by: Jan Kara +Acked-by: Tejun Heo +Acked-by: Dennis Zhou +Cc: Alexander Viro +Cc: Dave Chinner +Cc: Jan Kara +Cc: Jens Axboe +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + fs/fs-writeback.c | 93 ++++++++++++++++++++------------ + include/linux/backing-dev-defs.h | 1 + + mm/backing-dev.c | 2 + + 3 files changed, 62 insertions(+), 34 deletions(-) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 71043e847e7c..489514bcd7e1 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -131,25 +131,6 @@ static bool inode_io_list_move_locked(struct inode *inode, + return false; + } + +-/** +- * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list +- * @inode: inode to be removed +- * @wb: bdi_writeback @inode is being removed from +- * +- * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and +- * clear %WB_has_dirty_io if all are empty afterwards. +- */ +-static void inode_io_list_del_locked(struct inode *inode, +- struct bdi_writeback *wb) +-{ +- assert_spin_locked(&wb->list_lock); +- assert_spin_locked(&inode->i_lock); +- +- inode->i_state &= ~I_SYNC_QUEUED; +- list_del_init(&inode->i_io_list); +- wb_io_lists_depopulated(wb); +-} +- + static void wb_wakeup(struct bdi_writeback *wb) + { + spin_lock_bh(&wb->work_lock); +@@ -278,6 +259,28 @@ void __inode_attach_wb(struct inode *inode, struct page *page) + } + EXPORT_SYMBOL_GPL(__inode_attach_wb); + ++/** ++ * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list ++ * @inode: inode of interest with i_lock held ++ * @wb: target bdi_writeback ++ * ++ * Remove the inode from wb's io lists and if necessarily put onto b_attached ++ * list. Only inodes attached to cgwb's are kept on this list. ++ */ ++static void inode_cgwb_move_to_attached(struct inode *inode, ++ struct bdi_writeback *wb) ++{ ++ assert_spin_locked(&wb->list_lock); ++ assert_spin_locked(&inode->i_lock); ++ ++ inode->i_state &= ~I_SYNC_QUEUED; ++ if (wb != &wb->bdi->wb) ++ list_move(&inode->i_io_list, &wb->b_attached); ++ else ++ list_del_init(&inode->i_io_list); ++ wb_io_lists_depopulated(wb); ++} ++ + /** + * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it + * @inode: inode of interest with i_lock held +@@ -419,21 +422,28 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) + wb_get(new_wb); + + /* +- * Transfer to @new_wb's IO list if necessary. The specific list +- * @inode was on is ignored and the inode is put on ->b_dirty which +- * is always correct including from ->b_dirty_time. The transfer +- * preserves @inode->dirtied_when ordering. ++ * Transfer to @new_wb's IO list if necessary. If the @inode is dirty, ++ * the specific list @inode was on is ignored and the @inode is put on ++ * ->b_dirty which is always correct including from ->b_dirty_time. ++ * The transfer preserves @inode->dirtied_when ordering. If the @inode ++ * was clean, it means it was on the b_attached list, so move it onto ++ * the b_attached list of @new_wb. + */ + if (!list_empty(&inode->i_io_list)) { +- struct inode *pos; +- +- inode_io_list_del_locked(inode, old_wb); + inode->i_wb = new_wb; +- list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) +- if (time_after_eq(inode->dirtied_when, +- pos->dirtied_when)) +- break; +- inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev); ++ ++ if (inode->i_state & I_DIRTY_ALL) { ++ struct inode *pos; ++ ++ list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) ++ if (time_after_eq(inode->dirtied_when, ++ pos->dirtied_when)) ++ break; ++ inode_io_list_move_locked(inode, new_wb, ++ pos->i_io_list.prev); ++ } else { ++ inode_cgwb_move_to_attached(inode, new_wb); ++ } + } else { + inode->i_wb = new_wb; + } +@@ -1030,6 +1040,17 @@ fs_initcall(cgroup_writeback_init); + static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } + static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } + ++static void inode_cgwb_move_to_attached(struct inode *inode, ++ struct bdi_writeback *wb) ++{ ++ assert_spin_locked(&wb->list_lock); ++ assert_spin_locked(&inode->i_lock); ++ ++ inode->i_state &= ~I_SYNC_QUEUED; ++ list_del_init(&inode->i_io_list); ++ wb_io_lists_depopulated(wb); ++} ++ + static struct bdi_writeback * + locked_inode_to_wb_and_lock_list(struct inode *inode) + __releases(&inode->i_lock) +@@ -1130,7 +1151,11 @@ void inode_io_list_del(struct inode *inode) + + wb = inode_to_wb_and_lock_list(inode); + spin_lock(&inode->i_lock); +- inode_io_list_del_locked(inode, wb); ++ ++ inode->i_state &= ~I_SYNC_QUEUED; ++ list_del_init(&inode->i_io_list); ++ wb_io_lists_depopulated(wb); ++ + spin_unlock(&inode->i_lock); + spin_unlock(&wb->list_lock); + } +@@ -1443,7 +1468,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, + inode->i_state &= ~I_SYNC_QUEUED; + } else { + /* The inode is clean. Remove from writeback lists. */ +- inode_io_list_del_locked(inode, wb); ++ inode_cgwb_move_to_attached(inode, wb); + } + } + +@@ -1586,7 +1611,7 @@ static int writeback_single_inode(struct inode *inode, + * touch it. See comment above for explanation. + */ + if (!(inode->i_state & I_DIRTY_ALL)) +- inode_io_list_del_locked(inode, wb); ++ inode_cgwb_move_to_attached(inode, wb); + spin_unlock(&wb->list_lock); + inode_sync_complete(inode); + out: +diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h +index fff9367a6348..e5dc238ebe4f 100644 +--- a/include/linux/backing-dev-defs.h ++++ b/include/linux/backing-dev-defs.h +@@ -154,6 +154,7 @@ struct bdi_writeback { + struct cgroup_subsys_state *blkcg_css; /* and blkcg */ + struct list_head memcg_node; /* anchored at memcg->cgwb_list */ + struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ ++ struct list_head b_attached; /* attached inodes, protected by list_lock */ + + union { + struct work_struct release_work; +diff --git a/mm/backing-dev.c b/mm/backing-dev.c +index ca770a783a9f..1c1b44fcaf7d 100644 +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -397,6 +397,7 @@ static void cgwb_release_workfn(struct work_struct *work) + fprop_local_destroy_percpu(&wb->memcg_completions); + percpu_ref_exit(&wb->refcnt); + wb_exit(wb); ++ WARN_ON_ONCE(!list_empty(&wb->b_attached)); + kfree_rcu(wb, rcu); + } + +@@ -473,6 +474,7 @@ static int cgwb_create(struct backing_dev_info *bdi, + + wb->memcg_css = memcg_css; + wb->blkcg_css = blkcg_css; ++ INIT_LIST_HEAD(&wb->b_attached); + INIT_WORK(&wb->release_work, cgwb_release_workfn); + set_bit(WB_registered, &wb->state); + +-- +2.35.1 + diff --git a/queue-5.10/writeback-don-t-warn-on-an-unregistered-bdi-in-__mar.patch b/queue-5.10/writeback-don-t-warn-on-an-unregistered-bdi-in-__mar.patch new file mode 100644 index 00000000000..ccfd10ecfdf --- /dev/null +++ b/queue-5.10/writeback-don-t-warn-on-an-unregistered-bdi-in-__mar.patch @@ -0,0 +1,41 @@ +From 4854300901e994f74b1801192c89cbab97d1a9da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Sep 2020 14:26:13 +0200 +Subject: writeback: don't warn on an unregistered BDI in __mark_inode_dirty + +From: Christoph Hellwig + +[ Upstream commit f7387170339afb473a0d95b7732f904346f9795e ] + +BDIs get unregistered during device removal, and this WARN can be +trivially triggered by hot-removing a NVMe device while running fsx +It is otherwise harmless as we still hold a BDI reference, and the +writeback has been shut down already. + +Link: https://lore.kernel.org/r/20200928122613.434820-1-hch@lst.de +Signed-off-by: Christoph Hellwig +Signed-off-by: Jan Kara +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + fs/fs-writeback.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 46c15dd2405c..2011199476ea 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -2307,10 +2307,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) + + wb = locked_inode_to_wb_and_lock_list(inode); + +- WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) && +- !test_bit(WB_registered, &wb->state), +- "bdi-%s not registered\n", bdi_dev_name(wb->bdi)); +- + inode->dirtied_when = jiffies; + if (dirtytime) + inode->dirtied_time_when = jiffies; +-- +2.35.1 + diff --git a/queue-5.10/writeback-fix-inode-i_io_list-not-be-protected-by-in.patch b/queue-5.10/writeback-fix-inode-i_io_list-not-be-protected-by-in.patch new file mode 100644 index 00000000000..e96e2a80e94 --- /dev/null +++ b/queue-5.10/writeback-fix-inode-i_io_list-not-be-protected-by-in.patch @@ -0,0 +1,171 @@ +From a6d977e8d36035d60883d72a9a881a77520bfc03 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 May 2022 08:05:40 -0700 +Subject: writeback: Fix inode->i_io_list not be protected by inode->i_lock + error + +From: Jchao Sun + +[ Upstream commit 10e14073107dd0b6d97d9516a02845a8e501c2c9 ] + +Commit b35250c0816c ("writeback: Protect inode->i_io_list with +inode->i_lock") made inode->i_io_list not only protected by +wb->list_lock but also inode->i_lock, but inode_io_list_move_locked() +was missed. Add lock there and also update comment describing +things protected by inode->i_lock. This also fixes a race where +__mark_inode_dirty() could move inode under flush worker's hands +and thus sync(2) could miss writing some inodes. + +Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock") +Link: https://lore.kernel.org/r/20220524150540.12552-1-sunjunchao2870@gmail.com +CC: stable@vger.kernel.org +Signed-off-by: Jchao Sun +Signed-off-by: Jan Kara +Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") +Signed-off-by: Sasha Levin +--- + fs/fs-writeback.c | 37 ++++++++++++++++++++++++++++--------- + fs/inode.c | 2 +- + 2 files changed, 29 insertions(+), 10 deletions(-) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 645e3f6ffe44..4c667662a4d9 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -120,6 +120,7 @@ static bool inode_io_list_move_locked(struct inode *inode, + struct list_head *head) + { + assert_spin_locked(&wb->list_lock); ++ assert_spin_locked(&inode->i_lock); + + list_move(&inode->i_io_list, head); + +@@ -1282,9 +1283,9 @@ static int move_expired_inodes(struct list_head *delaying_queue, + inode = wb_inode(delaying_queue->prev); + if (inode_dirtied_after(inode, dirtied_before)) + break; ++ spin_lock(&inode->i_lock); + list_move(&inode->i_io_list, &tmp); + moved++; +- spin_lock(&inode->i_lock); + inode->i_state |= I_SYNC_QUEUED; + spin_unlock(&inode->i_lock); + if (sb_is_blkdev_sb(inode->i_sb)) +@@ -1300,7 +1301,12 @@ static int move_expired_inodes(struct list_head *delaying_queue, + goto out; + } + +- /* Move inodes from one superblock together */ ++ /* ++ * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue', ++ * we don't take inode->i_lock here because it is just a pointless overhead. ++ * Inode is already marked as I_SYNC_QUEUED so writeback list handling is ++ * fully under our control. ++ */ + while (!list_empty(&tmp)) { + sb = wb_inode(tmp.prev)->i_sb; + list_for_each_prev_safe(pos, node, &tmp) { +@@ -1726,8 +1732,8 @@ static long writeback_sb_inodes(struct super_block *sb, + * We'll have another go at writing back this inode + * when we completed a full scan of b_io. + */ +- spin_unlock(&inode->i_lock); + requeue_io(inode, wb); ++ spin_unlock(&inode->i_lock); + trace_writeback_sb_inodes_requeue(inode); + continue; + } +@@ -2265,6 +2271,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) + { + struct super_block *sb = inode->i_sb; + int dirtytime = 0; ++ struct bdi_writeback *wb = NULL; + + trace_writeback_mark_inode_dirty(inode, flags); + +@@ -2316,6 +2323,17 @@ void __mark_inode_dirty(struct inode *inode, int flags) + inode->i_state &= ~I_DIRTY_TIME; + inode->i_state |= flags; + ++ /* ++ * Grab inode's wb early because it requires dropping i_lock and we ++ * need to make sure following checks happen atomically with dirty ++ * list handling so that we don't move inodes under flush worker's ++ * hands. ++ */ ++ if (!was_dirty) { ++ wb = locked_inode_to_wb_and_lock_list(inode); ++ spin_lock(&inode->i_lock); ++ } ++ + /* + * If the inode is queued for writeback by flush worker, just + * update its dirty state. Once the flush worker is done with +@@ -2323,7 +2341,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) + * list, based upon its state. + */ + if (inode->i_state & I_SYNC_QUEUED) +- goto out_unlock_inode; ++ goto out_unlock; + + /* + * Only add valid (hashed) inodes to the superblock's +@@ -2331,22 +2349,19 @@ void __mark_inode_dirty(struct inode *inode, int flags) + */ + if (!S_ISBLK(inode->i_mode)) { + if (inode_unhashed(inode)) +- goto out_unlock_inode; ++ goto out_unlock; + } + if (inode->i_state & I_FREEING) +- goto out_unlock_inode; ++ goto out_unlock; + + /* + * If the inode was already on b_dirty/b_io/b_more_io, don't + * reposition it (that would break b_dirty time-ordering). + */ + if (!was_dirty) { +- struct bdi_writeback *wb; + struct list_head *dirty_list; + bool wakeup_bdi = false; + +- wb = locked_inode_to_wb_and_lock_list(inode); +- + inode->dirtied_when = jiffies; + if (dirtytime) + inode->dirtied_time_when = jiffies; +@@ -2360,6 +2375,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) + dirty_list); + + spin_unlock(&wb->list_lock); ++ spin_unlock(&inode->i_lock); + trace_writeback_dirty_inode_enqueue(inode); + + /* +@@ -2374,6 +2390,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) + return; + } + } ++out_unlock: ++ if (wb) ++ spin_unlock(&wb->list_lock); + out_unlock_inode: + spin_unlock(&inode->i_lock); + } +diff --git a/fs/inode.c b/fs/inode.c +index 9f49e0bdc2f7..51726f2ad994 100644 +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -28,7 +28,7 @@ + * Inode locking rules: + * + * inode->i_lock protects: +- * inode->i_state, inode->i_hash, __iget() ++ * inode->i_state, inode->i_hash, __iget(), inode->i_io_list + * Inode LRU list locks protect: + * inode->i_sb->s_inode_lru, inode->i_lru + * inode->i_sb->s_inode_list_lock protects: +-- +2.35.1 + diff --git a/queue-5.10/xen-assume-xenfeat_gnttab_map_avail_bits-being-set-f.patch b/queue-5.10/xen-assume-xenfeat_gnttab_map_avail_bits-being-set-f.patch new file mode 100644 index 00000000000..a2477147f74 --- /dev/null +++ b/queue-5.10/xen-assume-xenfeat_gnttab_map_avail_bits-being-set-f.patch @@ -0,0 +1,92 @@ +From d6435ea3208226d59ffeae497311f5ad7759d1d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Jul 2021 09:18:04 +0200 +Subject: xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests + +From: Juergen Gross + +[ Upstream commit 30dcc56bba911db561c35d4131baf983a41023f8 ] + +XENFEAT_gnttab_map_avail_bits is always set in Xen 4.0 and newer. +Remove coding assuming it might be zero. + +Signed-off-by: Juergen Gross +Acked-by: Peter Zijlstra (Intel) +Reviewed-by: Boris Ostrovsky +Link: https://lore.kernel.org/r/20210730071804.4302-4-jgross@suse.com +Signed-off-by: Juergen Gross +Stable-dep-of: 5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting") +Signed-off-by: Sasha Levin +--- + drivers/xen/gntdev.c | 36 ++---------------------------------- + 1 file changed, 2 insertions(+), 34 deletions(-) + +diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c +index 54fee4087bf1..5dd9d1ac755e 100644 +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -289,20 +289,13 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) + { + struct gntdev_grant_map *map = data; + unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; +- int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; ++ int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | ++ (1 << _GNTMAP_guest_avail0); + u64 pte_maddr; + + BUG_ON(pgnr >= map->count); + pte_maddr = arbitrary_virt_to_machine(pte).maddr; + +- /* +- * Set the PTE as special to force get_user_pages_fast() fall +- * back to the slow path. If this is not supported as part of +- * the grant map, it will be done afterwards. +- */ +- if (xen_feature(XENFEAT_gnttab_map_avail_bits)) +- flags |= (1 << _GNTMAP_guest_avail0); +- + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, + map->grants[pgnr].ref, + map->grants[pgnr].domid); +@@ -311,14 +304,6 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) + return 0; + } + +-#ifdef CONFIG_X86 +-static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data) +-{ +- set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); +- return 0; +-} +-#endif +- + int gntdev_map_grant_pages(struct gntdev_grant_map *map) + { + size_t alloced = 0; +@@ -1102,23 +1087,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) + err = vm_map_pages_zero(vma, map->pages, map->count); + if (err) + goto out_put_map; +- } else { +-#ifdef CONFIG_X86 +- /* +- * If the PTEs were not made special by the grant map +- * hypercall, do so here. +- * +- * This is racy since the mapping is already visible +- * to userspace but userspace should be well-behaved +- * enough to not touch it until the mmap() call +- * returns. +- */ +- if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { +- apply_to_page_range(vma->vm_mm, vma->vm_start, +- vma->vm_end - vma->vm_start, +- set_grant_ptes_as_special, NULL); +- } +-#endif + } + + return 0; +-- +2.35.1 + diff --git a/queue-5.10/xen-gntdev-accommodate-vma-splitting.patch b/queue-5.10/xen-gntdev-accommodate-vma-splitting.patch new file mode 100644 index 00000000000..0380c41e3ac --- /dev/null +++ b/queue-5.10/xen-gntdev-accommodate-vma-splitting.patch @@ -0,0 +1,270 @@ +From e4fb93409b5f43ec8c06d049b1901a2e4b1f2870 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 2 Oct 2022 18:20:06 -0400 +Subject: xen/gntdev: Accommodate VMA splitting + +From: M. Vefa Bicakci + +[ Upstream commit 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c ] + +Prior to this commit, the gntdev driver code did not handle the +following scenario correctly with paravirtualized (PV) Xen domains: + +* User process sets up a gntdev mapping composed of two grant mappings + (i.e., two pages shared by another Xen domain). +* User process munmap()s one of the pages. +* User process munmap()s the remaining page. +* User process exits. + +In the scenario above, the user process would cause the kernel to log +the following messages in dmesg for the first munmap(), and the second +munmap() call would result in similar log messages: + + BUG: Bad page map in process doublemap.test pte:... pmd:... + page:0000000057c97bff refcount:1 mapcount:-1 \ + mapping:0000000000000000 index:0x0 pfn:... + ... + page dumped because: bad pte + ... + file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0 + ... + Call Trace: + + dump_stack_lvl+0x46/0x5e + print_bad_pte.cold+0x66/0xb6 + unmap_page_range+0x7e5/0xdc0 + unmap_vmas+0x78/0xf0 + unmap_region+0xa8/0x110 + __do_munmap+0x1ea/0x4e0 + __vm_munmap+0x75/0x120 + __x64_sys_munmap+0x28/0x40 + do_syscall_64+0x38/0x90 + entry_SYSCALL_64_after_hwframe+0x61/0xcb + ... + +For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG) +would print out the following and trigger a general protection fault in +the affected Xen PV domain: + + (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... + (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... + +As of this writing, gntdev_grant_map structure's vma field (referred to +as map->vma below) is mainly used for checking the start and end +addresses of mappings. However, with split VMAs, these may change, and +there could be more than one VMA associated with a gntdev mapping. +Hence, remove the use of map->vma and rely on map->pages_vm_start for +the original start address and on (map->count << PAGE_SHIFT) for the +original mapping size. Let the invalidate() and find_special_page() +hooks use these. + +Also, given that there can be multiple VMAs associated with a gntdev +mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to +the end of gntdev_put_map, so that the MMU notifier is only removed +after the closing of the last remaining VMA. + +Finally, use an atomic to prevent inadvertent gntdev mapping re-use, +instead of using the map->live_grants atomic counter and/or the map->vma +pointer (the latter of which is now removed). This prevents the +userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the +same address range as a previously set up gntdev mapping. This scenario +can be summarized with the following call-trace, which was valid prior +to this commit: + + mmap + gntdev_mmap + mmap (repeat mmap with MAP_FIXED over the same address range) + gntdev_invalidate + unmap_grant_pages (sets 'being_removed' entries to true) + gnttab_unmap_refs_async + unmap_single_vma + gntdev_mmap (maps the shared pages again) + munmap + gntdev_invalidate + unmap_grant_pages + (no-op because 'being_removed' entries are true) + unmap_single_vma (For PV domains, Xen reports that a granted page + is being unmapped and triggers a general protection fault in the + affected domain, if Xen was built with CONFIG_DEBUG) + +The fix for this last scenario could be worth its own commit, but we +opted for a single commit, because removing the gntdev_grant_map +structure's vma field requires guarding the entry to gntdev_mmap(), and +the live_grants atomic counter is not sufficient on its own to prevent +the mmap() over a pre-existing mapping. + +Link: https://github.com/QubesOS/qubes-issues/issues/7631 +Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages") +Cc: stable@vger.kernel.org +Signed-off-by: M. Vefa Bicakci +Reviewed-by: Juergen Gross +Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com +Signed-off-by: Juergen Gross +Signed-off-by: Sasha Levin +--- + drivers/xen/gntdev-common.h | 3 +- + drivers/xen/gntdev.c | 58 ++++++++++++++++--------------------- + 2 files changed, 27 insertions(+), 34 deletions(-) + +diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h +index 40ef379c28ab..9c286b2a1900 100644 +--- a/drivers/xen/gntdev-common.h ++++ b/drivers/xen/gntdev-common.h +@@ -44,9 +44,10 @@ struct gntdev_unmap_notify { + }; + + struct gntdev_grant_map { ++ atomic_t in_use; + struct mmu_interval_notifier notifier; ++ bool notifier_init; + struct list_head next; +- struct vm_area_struct *vma; + int index; + int count; + int flags; +diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c +index 5dd9d1ac755e..ff195b571763 100644 +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -276,6 +276,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) + */ + } + ++ if (use_ptemod && map->notifier_init) ++ mmu_interval_notifier_remove(&map->notifier); ++ + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { + notify_remote_via_evtchn(map->notify.event); + evtchn_put(map->notify.event); +@@ -288,7 +291,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) + static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) + { + struct gntdev_grant_map *map = data; +- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; ++ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT; + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | + (1 << _GNTMAP_guest_avail0); + u64 pte_maddr; +@@ -478,11 +481,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma) + struct gntdev_priv *priv = file->private_data; + + pr_debug("gntdev_vma_close %p\n", vma); +- if (use_ptemod) { +- WARN_ON(map->vma != vma); +- mmu_interval_notifier_remove(&map->notifier); +- map->vma = NULL; +- } ++ + vma->vm_private_data = NULL; + gntdev_put_map(priv, map); + } +@@ -510,29 +509,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, + struct gntdev_grant_map *map = + container_of(mn, struct gntdev_grant_map, notifier); + unsigned long mstart, mend; ++ unsigned long map_start, map_end; + + if (!mmu_notifier_range_blockable(range)) + return false; + ++ map_start = map->pages_vm_start; ++ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT); ++ + /* + * If the VMA is split or otherwise changed the notifier is not + * updated, but we don't want to process VA's outside the modified + * VMA. FIXME: It would be much more understandable to just prevent + * modifying the VMA in the first place. + */ +- if (map->vma->vm_start >= range->end || +- map->vma->vm_end <= range->start) ++ if (map_start >= range->end || map_end <= range->start) + return true; + +- mstart = max(range->start, map->vma->vm_start); +- mend = min(range->end, map->vma->vm_end); ++ mstart = max(range->start, map_start); ++ mend = min(range->end, map_end); + pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", +- map->index, map->count, +- map->vma->vm_start, map->vma->vm_end, +- range->start, range->end, mstart, mend); +- unmap_grant_pages(map, +- (mstart - map->vma->vm_start) >> PAGE_SHIFT, +- (mend - mstart) >> PAGE_SHIFT); ++ map->index, map->count, map_start, map_end, ++ range->start, range->end, mstart, mend); ++ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT, ++ (mend - mstart) >> PAGE_SHIFT); + + return true; + } +@@ -1012,18 +1012,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) + return -EINVAL; + + pr_debug("map %d+%d at %lx (pgoff %lx)\n", +- index, count, vma->vm_start, vma->vm_pgoff); ++ index, count, vma->vm_start, vma->vm_pgoff); + + mutex_lock(&priv->lock); + map = gntdev_find_map_index(priv, index, count); + if (!map) + goto unlock_out; +- if (use_ptemod && map->vma) +- goto unlock_out; +- if (atomic_read(&map->live_grants)) { +- err = -EAGAIN; ++ if (!atomic_add_unless(&map->in_use, 1, 1)) + goto unlock_out; +- } ++ + refcount_inc(&map->users); + + vma->vm_ops = &gntdev_vmops; +@@ -1044,15 +1041,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) + map->flags |= GNTMAP_readonly; + } + ++ map->pages_vm_start = vma->vm_start; ++ + if (use_ptemod) { +- map->vma = vma; + err = mmu_interval_notifier_insert_locked( + &map->notifier, vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, &gntdev_mmu_ops); +- if (err) { +- map->vma = NULL; ++ if (err) + goto out_unlock_put; +- } ++ ++ map->notifier_init = true; + } + mutex_unlock(&priv->lock); + +@@ -1069,7 +1067,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) + */ + mmu_interval_read_begin(&map->notifier); + +- map->pages_vm_start = vma->vm_start; + err = apply_to_page_range(vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, + find_grant_ptes, map); +@@ -1098,13 +1095,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) + out_unlock_put: + mutex_unlock(&priv->lock); + out_put_map: +- if (use_ptemod) { ++ if (use_ptemod) + unmap_grant_pages(map, 0, map->count); +- if (map->vma) { +- mmu_interval_notifier_remove(&map->notifier); +- map->vma = NULL; +- } +- } + gntdev_put_map(priv, map); + return err; + } +-- +2.35.1 +