From 818e5a8f1919cbd5382ecc722a2f94c4fb966e73 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 7 Mar 2021 15:16:00 +0100 Subject: [PATCH] 5.11-stable patches added patches: alsa-hda-realtek-enable-headset-mic-of-acer-swift-with-alc256.patch alsa-usb-audio-allow-modifying-parameters-with-succeeding-hw_params-calls.patch alsa-usb-audio-don-t-abort-even-if-the-clock-rate-differs.patch alsa-usb-audio-drop-bogus-db-range-in-too-low-level.patch alsa-usb-audio-use-corsair-virtuoso-mapping-for-corsair-virtuoso-se.patch btrfs-avoid-double-put-of-block-group-when-emptying-cluster.patch btrfs-fix-race-between-extent-freeing-allocation-when-using-bitmaps.patch btrfs-fix-race-between-swap-file-activation-and-snapshot-creation.patch btrfs-fix-race-between-writes-to-swap-files-and-scrub.patch btrfs-fix-raid6-qstripe-kmap.patch btrfs-fix-spurious-free_space_tree-remount-warning.patch btrfs-fix-stale-data-exposure-after-cloning-a-hole-with-no_holes-enabled.patch btrfs-fix-warning-when-creating-a-directory-with-smack-enabled.patch btrfs-free-correct-amount-of-space-in-btrfs_delayed_inode_reserve_metadata.patch btrfs-tree-checker-do-not-error-out-if-extent-ref-hash-doesn-t-match.patch btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-quota-reservation-errors.patch btrfs-validate-qgroup-inherit-for-snap_create_v2-ioctl.patch dm-bufio-subtract-the-number-of-initial-sectors-in-dm_bufio_get_device_size.patch dm-verity-fix-fec-for-rs-roots-unaligned-to-block-size.patch drm-amd-pm-correct-arcturus-mmthm_baco_cntl-register-address.patch drm-amdgpu-disable-vcn-for-navi12-sku.patch drm-amdgpu-fix-parameter-error-of-rreg32_pcie-in-amdgpu_regs_pcie.patch drm-amdgpu-only-check-for-s0ix-if-amd_pmc-is-configured.patch io_uring-ignore-double-poll-add-on-the-same-waitqueue-head.patch pm-runtime-update-device-status-before-letting-suppliers-suspend.patch ring-buffer-force-before_stamp-and-write_stamp-to-be-different-on-discard.patch tpm-tpm_tis-decorate-tpm_get_timeouts-with-request_locality.patch tpm-tpm_tis-decorate-tpm_tis_gen_interrupt-with-request_locality.patch --- ...eadset-mic-of-acer-swift-with-alc256.patch | 64 +++++ ...ters-with-succeeding-hw_params-calls.patch | 71 ++++++ ...abort-even-if-the-clock-rate-differs.patch | 50 ++++ ...drop-bogus-db-range-in-too-low-level.patch | 51 ++++ ...uoso-mapping-for-corsair-virtuoso-se.patch | 42 ++++ ...of-block-group-when-emptying-cluster.patch | 53 ++++ ...reeing-allocation-when-using-bitmaps.patch | 79 ++++++ ...ile-activation-and-snapshot-creation.patch | 109 +++++++++ ...tween-writes-to-swap-files-and-scrub.patch | 228 ++++++++++++++++++ queue-5.11/btrfs-fix-raid6-qstripe-kmap.patch | 94 ++++++++ ...ious-free_space_tree-remount-warning.patch | 61 +++++ ...cloning-a-hole-with-no_holes-enabled.patch | 180 ++++++++++++++ ...ating-a-directory-with-smack-enabled.patch | 146 +++++++++++ ...btrfs_delayed_inode_reserve_metadata.patch | 37 +++ ...out-if-extent-ref-hash-doesn-t-match.patch | 91 +++++++ ...-in-case-of-quota-reservation-errors.patch | 40 +++ ...oup-inherit-for-snap_create_v2-ioctl.patch | 60 +++++ ...-sectors-in-dm_bufio_get_device_size.patch | 40 +++ ...for-rs-roots-unaligned-to-block-size.patch | 142 +++++++++++ ...rus-mmthm_baco_cntl-register-address.patch | 52 ++++ ...rm-amdgpu-disable-vcn-for-navi12-sku.patch | 42 ++++ ...r-of-rreg32_pcie-in-amdgpu_regs_pcie.patch | 40 +++ ...ck-for-s0ix-if-amd_pmc-is-configured.patch | 37 +++ ...-poll-add-on-the-same-waitqueue-head.patch | 121 ++++++++++ ...tus-before-letting-suppliers-suspend.patch | 122 ++++++++++ ...ite_stamp-to-be-different-on-discard.patch | 60 +++++ queue-5.11/series | 1 - ...m_get_timeouts-with-request_locality.patch | 66 +++++ ..._gen_interrupt-with-request_locality.patch | 69 ++++++ 29 files changed, 2247 insertions(+), 1 deletion(-) create mode 100644 queue-5.11/alsa-hda-realtek-enable-headset-mic-of-acer-swift-with-alc256.patch create mode 100644 queue-5.11/alsa-usb-audio-allow-modifying-parameters-with-succeeding-hw_params-calls.patch create mode 100644 queue-5.11/alsa-usb-audio-don-t-abort-even-if-the-clock-rate-differs.patch create mode 100644 queue-5.11/alsa-usb-audio-drop-bogus-db-range-in-too-low-level.patch create mode 100644 queue-5.11/alsa-usb-audio-use-corsair-virtuoso-mapping-for-corsair-virtuoso-se.patch create mode 100644 queue-5.11/btrfs-avoid-double-put-of-block-group-when-emptying-cluster.patch create mode 100644 queue-5.11/btrfs-fix-race-between-extent-freeing-allocation-when-using-bitmaps.patch create mode 100644 queue-5.11/btrfs-fix-race-between-swap-file-activation-and-snapshot-creation.patch create mode 100644 queue-5.11/btrfs-fix-race-between-writes-to-swap-files-and-scrub.patch create mode 100644 queue-5.11/btrfs-fix-raid6-qstripe-kmap.patch create mode 100644 queue-5.11/btrfs-fix-spurious-free_space_tree-remount-warning.patch create mode 100644 queue-5.11/btrfs-fix-stale-data-exposure-after-cloning-a-hole-with-no_holes-enabled.patch create mode 100644 queue-5.11/btrfs-fix-warning-when-creating-a-directory-with-smack-enabled.patch create mode 100644 queue-5.11/btrfs-free-correct-amount-of-space-in-btrfs_delayed_inode_reserve_metadata.patch create mode 100644 queue-5.11/btrfs-tree-checker-do-not-error-out-if-extent-ref-hash-doesn-t-match.patch create mode 100644 queue-5.11/btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-quota-reservation-errors.patch create mode 100644 queue-5.11/btrfs-validate-qgroup-inherit-for-snap_create_v2-ioctl.patch create mode 100644 queue-5.11/dm-bufio-subtract-the-number-of-initial-sectors-in-dm_bufio_get_device_size.patch create mode 100644 queue-5.11/dm-verity-fix-fec-for-rs-roots-unaligned-to-block-size.patch create mode 100644 queue-5.11/drm-amd-pm-correct-arcturus-mmthm_baco_cntl-register-address.patch create mode 100644 queue-5.11/drm-amdgpu-disable-vcn-for-navi12-sku.patch create mode 100644 queue-5.11/drm-amdgpu-fix-parameter-error-of-rreg32_pcie-in-amdgpu_regs_pcie.patch create mode 100644 queue-5.11/drm-amdgpu-only-check-for-s0ix-if-amd_pmc-is-configured.patch create mode 100644 queue-5.11/io_uring-ignore-double-poll-add-on-the-same-waitqueue-head.patch create mode 100644 queue-5.11/pm-runtime-update-device-status-before-letting-suppliers-suspend.patch create mode 100644 queue-5.11/ring-buffer-force-before_stamp-and-write_stamp-to-be-different-on-discard.patch create mode 100644 queue-5.11/tpm-tpm_tis-decorate-tpm_get_timeouts-with-request_locality.patch create mode 100644 queue-5.11/tpm-tpm_tis-decorate-tpm_tis_gen_interrupt-with-request_locality.patch diff --git a/queue-5.11/alsa-hda-realtek-enable-headset-mic-of-acer-swift-with-alc256.patch b/queue-5.11/alsa-hda-realtek-enable-headset-mic-of-acer-swift-with-alc256.patch new file mode 100644 index 00000000000..e259874723a --- /dev/null +++ b/queue-5.11/alsa-hda-realtek-enable-headset-mic-of-acer-swift-with-alc256.patch @@ -0,0 +1,64 @@ +From d0e185616a0331c87ce3aa1d7dfde8df39d6d002 Mon Sep 17 00:00:00 2001 +From: Chris Chiu +Date: Fri, 26 Feb 2021 09:04:40 +0800 +Subject: ALSA: hda/realtek: Enable headset mic of Acer SWIFT with ALC256 + +From: Chris Chiu + +commit d0e185616a0331c87ce3aa1d7dfde8df39d6d002 upstream. + +The Acer SWIFT Swift SF314-54/55 laptops with ALC256 cannot detect +both the headset mic and the internal mic. Introduce new fixup +to enable the jack sense and the headset mic. However, the internal +mic actually connects to Intel SST audio. It still needs Intel SST +support to make internal mic capture work. + +Signed-off-by: Chris Chiu +Acked-by: Jian-Hong Pan +Cc: +Link: https://lore.kernel.org/r/20210226010440.8474-1-chris.chiu@canonical.com +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/pci/hda/patch_realtek.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -6408,6 +6408,7 @@ enum { + ALC236_FIXUP_DELL_AIO_HEADSET_MIC, + ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST, ++ ALC256_FIXUP_ACER_HEADSET_MIC, + }; + + static const struct hda_fixup alc269_fixups[] = { +@@ -7864,6 +7865,16 @@ static const struct hda_fixup alc269_fix + .chained = true, + .chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE, + }, ++ [ALC256_FIXUP_ACER_HEADSET_MIC] = { ++ .type = HDA_FIXUP_PINS, ++ .v.pins = (const struct hda_pintbl[]) { ++ { 0x19, 0x02a1113c }, /* use as headset mic, without its own jack detect */ ++ { 0x1a, 0x90a1092f }, /* use as internal mic */ ++ { } ++ }, ++ .chained = true, ++ .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -7890,9 +7901,11 @@ static const struct snd_pci_quirk alc269 + SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), + SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), + SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), ++ SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), ++ SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), diff --git a/queue-5.11/alsa-usb-audio-allow-modifying-parameters-with-succeeding-hw_params-calls.patch b/queue-5.11/alsa-usb-audio-allow-modifying-parameters-with-succeeding-hw_params-calls.patch new file mode 100644 index 00000000000..c9ebd687735 --- /dev/null +++ b/queue-5.11/alsa-usb-audio-allow-modifying-parameters-with-succeeding-hw_params-calls.patch @@ -0,0 +1,71 @@ +From 5f5e6a3e8b1df52f79122e447855cffbf1710540 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Sun, 28 Feb 2021 09:01:38 +0100 +Subject: ALSA: usb-audio: Allow modifying parameters with succeeding hw_params calls + +From: Takashi Iwai + +commit 5f5e6a3e8b1df52f79122e447855cffbf1710540 upstream. + +The recent fix for the hw constraints for implicit feedback streams +via commit e4ea77f8e53f ("ALSA: usb-audio: Always apply the hw +constraints for implicit fb sync") added the check of the matching +endpoints and whether those EPs are already opened. This is needed +and correct, per se, even for the normal streams without the implicit +feedback, as the endpoint setup is exclusive. + +However, it's reported that there seem applications that behave in +unexpected ways to update the hw_params without clearing the previous +setup via hw_free, and those hit a problem now: then hw_params is +called with still the previous EP setup kept, hence it's restricted +with the previous own setup. Although the obvious fix is to call +snd_pcm_hw_free() API in the application side, it's a kind of +unwelcome change. + +This patch tries to ease the situation: in the endpoint check, we add +a couple of more conditions and now skip the endpoint that is being +used only by the stream in question itself. That is, in addition to +the presence check of ep (ep->cur_audiofmt is non-NULL), when the +following conditions are met, we skip such an ep: +- ep->opened == 1, and +- ep->cur_audiofmt == subs->cur_audiofmt. + +subs->cur_audiofmt is non-NULL only if it's a re-setup of hw_params, +and ep->cur_audiofmt points to the currently set up parameters. So if +those match, it must be this stream itself. + +Fixes: e4ea77f8e53f ("ALSA: usb-audio: Always apply the hw constraints for implicit fb sync") +BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211941 +Cc: +Link: https://lore.kernel.org/r/20210228080138.9936-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/pcm.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/sound/usb/pcm.c ++++ b/sound/usb/pcm.c +@@ -845,13 +845,19 @@ get_sync_ep_from_substream(struct snd_us + + list_for_each_entry(fp, &subs->fmt_list, list) { + ep = snd_usb_get_endpoint(chip, fp->endpoint); +- if (ep && ep->cur_rate) +- return ep; ++ if (ep && ep->cur_audiofmt) { ++ /* if EP is already opened solely for this substream, ++ * we still allow us to change the parameter; otherwise ++ * this substream has to follow the existing parameter ++ */ ++ if (ep->cur_audiofmt != subs->cur_audiofmt || ep->opened > 1) ++ return ep; ++ } + if (!fp->implicit_fb) + continue; + /* for the implicit fb, check the sync ep as well */ + ep = snd_usb_get_endpoint(chip, fp->sync_ep); +- if (ep && ep->cur_rate) ++ if (ep && ep->cur_audiofmt) + return ep; + } + return NULL; diff --git a/queue-5.11/alsa-usb-audio-don-t-abort-even-if-the-clock-rate-differs.patch b/queue-5.11/alsa-usb-audio-don-t-abort-even-if-the-clock-rate-differs.patch new file mode 100644 index 00000000000..bfa9d47449b --- /dev/null +++ b/queue-5.11/alsa-usb-audio-don-t-abort-even-if-the-clock-rate-differs.patch @@ -0,0 +1,50 @@ +From dcf269b3f703f5dbc2101824d9dbe95feed87b3d Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Sat, 27 Feb 2021 09:20:02 +0100 +Subject: ALSA: usb-audio: Don't abort even if the clock rate differs + +From: Takashi Iwai + +commit dcf269b3f703f5dbc2101824d9dbe95feed87b3d upstream. + +The commit 93db51d06b32 ("ALSA: usb-audio: Check valid altsetting at +parsing rates for UAC2/3") changed the behavior of the function +set_sample_rate_v2v3() slightly to treat the inconsistent sample rate +as an error. It was done by assumption that the sample rate +validation should have been done at the parser phase as implemented in +that patch. But the validation is later selectively enabled only for +certain devices as it causes a regression (the commit fe773b8711e3 +"ALSA: usb-audio: workaround for iface reset issue"), and now the +inconsistency surfaced as a fatal error while it worked in the past as +is, as reported for FiiO M3K DAC. + +For recovering from the regression, change set_sample_rate_v2v3() +again to ignore the sample rate difference as non-error. + +BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1182633 +Fixes: 93db51d06b32 ("ALSA: usb-audio: Check valid altsetting at parsing rates for UAC2/3") +Cc: +Link: https://lore.kernel.org/r/20210227082002.21185-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/clock.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/sound/usb/clock.c ++++ b/sound/usb/clock.c +@@ -646,10 +646,10 @@ static int set_sample_rate_v2v3(struct s + cur_rate = prev_rate; + + if (cur_rate != rate) { +- usb_audio_warn(chip, +- "%d:%d: freq mismatch (RO clock): req %d, clock runs @%d\n", +- fmt->iface, fmt->altsetting, rate, cur_rate); +- return -ENXIO; ++ usb_audio_dbg(chip, ++ "%d:%d: freq mismatch: req %d, clock runs @%d\n", ++ fmt->iface, fmt->altsetting, rate, cur_rate); ++ /* continue processing */ + } + + validation: diff --git a/queue-5.11/alsa-usb-audio-drop-bogus-db-range-in-too-low-level.patch b/queue-5.11/alsa-usb-audio-drop-bogus-db-range-in-too-low-level.patch new file mode 100644 index 00000000000..fc5e5fe4b58 --- /dev/null +++ b/queue-5.11/alsa-usb-audio-drop-bogus-db-range-in-too-low-level.patch @@ -0,0 +1,51 @@ +From 21cba9c5359dd9d1bffe355336cfec0b66d1ee52 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Sat, 27 Feb 2021 11:57:37 +0100 +Subject: ALSA: usb-audio: Drop bogus dB range in too low level + +From: Takashi Iwai + +commit 21cba9c5359dd9d1bffe355336cfec0b66d1ee52 upstream. + +Some USB audio firmware seem to report broken dB values for the volume +controls, and this screws up applications like PulseAudio who blindly +trusts the given data. For example, Edifier G2000 reports a PCM +volume from -128dB to -127dB, and this results in barely inaudible +sound. + +This patch adds a sort of sanity check at parsing the dB values in +USB-audio driver and disables the dB reporting if the range looks +bogus. Here, we assume -96dB as the bottom line of the max dB. + +Note that, if one can figure out that proper dB range later, it can be +patched in the mixer maps. + +BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211929 +Cc: +Link: https://lore.kernel.org/r/20210227105737.3656-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/mixer.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/sound/usb/mixer.c ++++ b/sound/usb/mixer.c +@@ -1301,6 +1301,17 @@ no_res_check: + /* totally crap, return an error */ + return -EINVAL; + } ++ } else { ++ /* if the max volume is too low, it's likely a bogus range; ++ * here we use -96dB as the threshold ++ */ ++ if (cval->dBmax <= -9600) { ++ usb_audio_info(cval->head.mixer->chip, ++ "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n", ++ cval->head.id, mixer_ctrl_intf(cval->head.mixer), ++ cval->dBmin, cval->dBmax); ++ cval->dBmin = cval->dBmax = 0; ++ } + } + + return 0; diff --git a/queue-5.11/alsa-usb-audio-use-corsair-virtuoso-mapping-for-corsair-virtuoso-se.patch b/queue-5.11/alsa-usb-audio-use-corsair-virtuoso-mapping-for-corsair-virtuoso-se.patch new file mode 100644 index 00000000000..ebb9d26f91d --- /dev/null +++ b/queue-5.11/alsa-usb-audio-use-corsair-virtuoso-mapping-for-corsair-virtuoso-se.patch @@ -0,0 +1,42 @@ +From 11302bb69e72d0526bc626ee5c451a3d22cde904 Mon Sep 17 00:00:00 2001 +From: Andrea Fagiani +Date: Tue, 19 Jan 2021 08:47:44 +0000 +Subject: ALSA: usb-audio: use Corsair Virtuoso mapping for Corsair Virtuoso SE + +From: Andrea Fagiani + +commit 11302bb69e72d0526bc626ee5c451a3d22cde904 upstream. + +The Corsair Virtuoso SE RGB Wireless is a USB headset with a mic and a +sidetone feature. Assign the Corsair Virtuoso name map to the SE product +ids as well, in order to label its mixer appropriately and allow +userspace to pick the correct volume controls. + +Signed-off-by: Andrea Fagiani +Cc: +Link: https://lore.kernel.org/r/40bbdf55-f854-e2ee-87b4-183e6451352c@gmail.com +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/mixer_maps.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/sound/usb/mixer_maps.c ++++ b/sound/usb/mixer_maps.c +@@ -537,6 +537,16 @@ static const struct usbmix_ctl_map usbmi + .map = bose_companion5_map, + }, + { ++ /* Corsair Virtuoso SE (wired mode) */ ++ .id = USB_ID(0x1b1c, 0x0a3d), ++ .map = corsair_virtuoso_map, ++ }, ++ { ++ /* Corsair Virtuoso SE (wireless mode) */ ++ .id = USB_ID(0x1b1c, 0x0a3e), ++ .map = corsair_virtuoso_map, ++ }, ++ { + /* Corsair Virtuoso (wired mode) */ + .id = USB_ID(0x1b1c, 0x0a41), + .map = corsair_virtuoso_map, diff --git a/queue-5.11/btrfs-avoid-double-put-of-block-group-when-emptying-cluster.patch b/queue-5.11/btrfs-avoid-double-put-of-block-group-when-emptying-cluster.patch new file mode 100644 index 00000000000..483076328e3 --- /dev/null +++ b/queue-5.11/btrfs-avoid-double-put-of-block-group-when-emptying-cluster.patch @@ -0,0 +1,53 @@ +From 95c85fba1f64c3249c67f0078a29f8a125078189 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 25 Jan 2021 16:42:35 -0500 +Subject: btrfs: avoid double put of block group when emptying cluster + +From: Josef Bacik + +commit 95c85fba1f64c3249c67f0078a29f8a125078189 upstream. + +It's wrong calling btrfs_put_block_group in +__btrfs_return_cluster_to_free_space if the block group passed is +different than the block group the cluster represents. As this means the +cluster doesn't have a reference to the passed block group. This results +in double put and a use-after-free bug. + +Fix this by simply bailing if the block group we passed in does not +match the block group on the cluster. + +Fixes: fa9c0d795f7b ("Btrfs: rework allocation clustering") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +[ update changelog ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/free-space-cache.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/free-space-cache.c ++++ b/fs/btrfs/free-space-cache.c +@@ -2708,8 +2708,10 @@ static void __btrfs_return_cluster_to_fr + struct rb_node *node; + + spin_lock(&cluster->lock); +- if (cluster->block_group != block_group) +- goto out; ++ if (cluster->block_group != block_group) { ++ spin_unlock(&cluster->lock); ++ return; ++ } + + cluster->block_group = NULL; + cluster->window_start = 0; +@@ -2747,8 +2749,6 @@ static void __btrfs_return_cluster_to_fr + entry->offset, &entry->offset_index, bitmap); + } + cluster->root = RB_ROOT; +- +-out: + spin_unlock(&cluster->lock); + btrfs_put_block_group(block_group); + } diff --git a/queue-5.11/btrfs-fix-race-between-extent-freeing-allocation-when-using-bitmaps.patch b/queue-5.11/btrfs-fix-race-between-extent-freeing-allocation-when-using-bitmaps.patch new file mode 100644 index 00000000000..47ca87d8985 --- /dev/null +++ b/queue-5.11/btrfs-fix-race-between-extent-freeing-allocation-when-using-bitmaps.patch @@ -0,0 +1,79 @@ +From 3c17916510428dbccdf657de050c34e208347089 Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Mon, 8 Feb 2021 10:26:54 +0200 +Subject: btrfs: fix race between extent freeing/allocation when using bitmaps + +From: Nikolay Borisov + +commit 3c17916510428dbccdf657de050c34e208347089 upstream. + +During allocation the allocator will try to allocate an extent using +cluster policy. Once the current cluster is exhausted it will remove the +entry under btrfs_free_cluster::lock and subsequently acquire +btrfs_free_space_ctl::tree_lock to dispose of the already-deleted entry +and adjust btrfs_free_space_ctl::total_bitmap. This poses a problem +because there exists a race condition between removing the entry under +one lock and doing the necessary accounting holding a different lock +since extent freeing only uses the 2nd lock. This can result in the +following situation: + +T1: T2: +btrfs_alloc_from_cluster insert_into_bitmap + if (entry->bytes == 0) if (block_group && !list_empty(&block_group->cluster_list)) { + rb_erase(entry) + + spin_unlock(&cluster->lock); + (total_bitmaps is still 4) spin_lock(&cluster->lock); + root> + spin_lock(&ctl->tree_lock); + recalculate_thresholds + +To fix this ensure that once depleted, the cluster entry is deleted when +both cluster lock and tree locks are held in the allocator (T1), this +ensures that even if there is a race with a concurrent +insert_into_bitmap call it will correctly find the entry in the cluster +and add the new space to it. + +CC: # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Nikolay Borisov +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/free-space-cache.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/free-space-cache.c ++++ b/fs/btrfs/free-space-cache.c +@@ -3028,8 +3028,6 @@ u64 btrfs_alloc_from_cluster(struct btrf + entry->bytes -= bytes; + } + +- if (entry->bytes == 0) +- rb_erase(&entry->offset_index, &cluster->root); + break; + } + out: +@@ -3046,7 +3044,10 @@ out: + ctl->free_space -= bytes; + if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) + ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; ++ ++ spin_lock(&cluster->lock); + if (entry->bytes == 0) { ++ rb_erase(&entry->offset_index, &cluster->root); + ctl->free_extents--; + if (entry->bitmap) { + kmem_cache_free(btrfs_free_space_bitmap_cachep, +@@ -3059,6 +3060,7 @@ out: + kmem_cache_free(btrfs_free_space_cachep, entry); + } + ++ spin_unlock(&cluster->lock); + spin_unlock(&ctl->tree_lock); + + return ret; diff --git a/queue-5.11/btrfs-fix-race-between-swap-file-activation-and-snapshot-creation.patch b/queue-5.11/btrfs-fix-race-between-swap-file-activation-and-snapshot-creation.patch new file mode 100644 index 00000000000..307c0aa34cb --- /dev/null +++ b/queue-5.11/btrfs-fix-race-between-swap-file-activation-and-snapshot-creation.patch @@ -0,0 +1,109 @@ +From dd0734f2a866f9d619d4abf97c3d71bcdee40ea9 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 5 Feb 2021 12:55:38 +0000 +Subject: btrfs: fix race between swap file activation and snapshot creation + +From: Filipe Manana + +commit dd0734f2a866f9d619d4abf97c3d71bcdee40ea9 upstream. + +When creating a snapshot we check if the current number of swap files, in +the root, is non-zero, and if it is, we error out and warn that we can not +create the snapshot because there are active swap files. + +However this is racy because when a task started activation of a swap +file, another task might have started already snapshot creation and might +have seen the counter for the number of swap files as zero. This means +that after the swap file is activated we may end up with a snapshot of the +same root successfully created, and therefore when the first write to the +swap file happens it has to fall back into COW mode, which should never +happen for active swap files. + +Basically what can happen is: + +1) Task A starts snapshot creation and enters ioctl.c:create_snapshot(). + There it sees that root->nr_swapfiles has a value of 0 so it continues; + +2) Task B enters btrfs_swap_activate(). It is not aware that another task + started snapshot creation but it did not finish yet. It increments + root->nr_swapfiles from 0 to 1; + +3) Task B checks that the file meets all requirements to be an active + swap file - it has NOCOW set, there are no snapshots for the inode's + root at the moment, no file holes, no reflinked extents, etc; + +4) Task B returns success and now the file is an active swap file; + +5) Task A commits the transaction to create the snapshot and finishes. + The swap file's extents are now shared between the original root and + the snapshot; + +6) A write into an extent of the swap file is attempted - there is a + snapshot of the file's root, so we fall back to COW mode and therefore + the physical location of the extent changes on disk. + +So fix this by taking the snapshot lock during swap file activation before +locking the extent range, as that is the order in which we lock these +during buffered writes. + +Fixes: ed46ff3d42378 ("Btrfs: support swap files") +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Anand Jain +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -10099,7 +10099,8 @@ static int btrfs_swap_activate(struct sw + sector_t *span) + { + struct inode *inode = file_inode(file); +- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; ++ struct btrfs_root *root = BTRFS_I(inode)->root; ++ struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_state *cached_state = NULL; + struct extent_map *em = NULL; +@@ -10150,13 +10151,27 @@ static int btrfs_swap_activate(struct sw + "cannot activate swapfile while exclusive operation is running"); + return -EBUSY; + } ++ ++ /* ++ * Prevent snapshot creation while we are activating the swap file. ++ * We do not want to race with snapshot creation. If snapshot creation ++ * already started before we bumped nr_swapfiles from 0 to 1 and ++ * completes before the first write into the swap file after it is ++ * activated, than that write would fallback to COW. ++ */ ++ if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { ++ btrfs_exclop_finish(fs_info); ++ btrfs_warn(fs_info, ++ "cannot activate swapfile because snapshot creation is in progress"); ++ return -EINVAL; ++ } + /* + * Snapshots can create extents which require COW even if NODATACOW is + * set. We use this counter to prevent snapshots. We must increment it + * before walking the extents because we don't want a concurrent + * snapshot to run after we've already checked the extents. + */ +- atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); ++ atomic_inc(&root->nr_swapfiles); + + isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); + +@@ -10302,6 +10317,8 @@ out: + if (ret) + btrfs_swap_deactivate(file); + ++ btrfs_drew_write_unlock(&root->snapshot_lock); ++ + btrfs_exclop_finish(fs_info); + + if (ret) diff --git a/queue-5.11/btrfs-fix-race-between-writes-to-swap-files-and-scrub.patch b/queue-5.11/btrfs-fix-race-between-writes-to-swap-files-and-scrub.patch new file mode 100644 index 00000000000..97ab5f2d455 --- /dev/null +++ b/queue-5.11/btrfs-fix-race-between-writes-to-swap-files-and-scrub.patch @@ -0,0 +1,228 @@ +From 195a49eaf655eb914896c92cecd96bc863c9feb3 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 5 Feb 2021 12:55:37 +0000 +Subject: btrfs: fix race between writes to swap files and scrub + +From: Filipe Manana + +commit 195a49eaf655eb914896c92cecd96bc863c9feb3 upstream. + +When we active a swap file, at btrfs_swap_activate(), we acquire the +exclusive operation lock to prevent the physical location of the swap +file extents to be changed by operations such as balance and device +replace/resize/remove. We also call there can_nocow_extent() which, +among other things, checks if the block group of a swap file extent is +currently RO, and if it is we can not use the extent, since a write +into it would result in COWing the extent. + +However we have no protection against a scrub operation running after we +activate the swap file, which can result in the swap file extents to be +COWed while the scrub is running and operating on the respective block +group, because scrub turns a block group into RO before it processes it +and then back again to RW mode after processing it. That means an attempt +to write into a swap file extent while scrub is processing the respective +block group, will result in COWing the extent, changing its physical +location on disk. + +Fix this by making sure that block groups that have extents that are used +by active swap files can not be turned into RO mode, therefore making it +not possible for a scrub to turn them into RO mode. When a scrub finds a +block group that can not be turned to RO due to the existence of extents +used by swap files, it proceeds to the next block group and logs a warning +message that mentions the block group was skipped due to active swap +files - this is the same approach we currently use for balance. + +Fixes: ed46ff3d42378 ("Btrfs: support swap files") +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Anand Jain +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/block-group.c | 33 ++++++++++++++++++++++++++++++++- + fs/btrfs/block-group.h | 9 +++++++++ + fs/btrfs/ctree.h | 5 +++++ + fs/btrfs/inode.c | 19 ++++++++++++++++++- + fs/btrfs/scrub.c | 9 ++++++++- + 5 files changed, 72 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -1150,6 +1150,11 @@ static int inc_block_group_ro(struct btr + spin_lock(&sinfo->lock); + spin_lock(&cache->lock); + ++ if (cache->swap_extents) { ++ ret = -ETXTBSY; ++ goto out; ++ } ++ + if (cache->ro) { + cache->ro++; + ret = 0; +@@ -2253,7 +2258,7 @@ again: + } + + ret = inc_block_group_ro(cache, 0); +- if (!do_chunk_alloc) ++ if (!do_chunk_alloc || ret == -ETXTBSY) + goto unlock_out; + if (!ret) + goto out; +@@ -2262,6 +2267,8 @@ again: + if (ret < 0) + goto out; + ret = inc_block_group_ro(cache, 0); ++ if (ret == -ETXTBSY) ++ goto unlock_out; + out: + if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); +@@ -3345,6 +3352,7 @@ int btrfs_free_block_groups(struct btrfs + ASSERT(list_empty(&block_group->io_list)); + ASSERT(list_empty(&block_group->bg_list)); + ASSERT(refcount_read(&block_group->refs) == 1); ++ ASSERT(block_group->swap_extents == 0); + btrfs_put_block_group(block_group); + + spin_lock(&info->block_group_cache_lock); +@@ -3411,3 +3419,26 @@ void btrfs_unfreeze_block_group(struct b + __btrfs_remove_free_space_cache(block_group->free_space_ctl); + } + } ++ ++bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg) ++{ ++ bool ret = true; ++ ++ spin_lock(&bg->lock); ++ if (bg->ro) ++ ret = false; ++ else ++ bg->swap_extents++; ++ spin_unlock(&bg->lock); ++ ++ return ret; ++} ++ ++void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount) ++{ ++ spin_lock(&bg->lock); ++ ASSERT(!bg->ro); ++ ASSERT(bg->swap_extents >= amount); ++ bg->swap_extents -= amount; ++ spin_unlock(&bg->lock); ++} +--- a/fs/btrfs/block-group.h ++++ b/fs/btrfs/block-group.h +@@ -181,6 +181,12 @@ struct btrfs_block_group { + */ + int needs_free_space; + ++ /* ++ * Number of extents in this block group used for swap files. ++ * All accesses protected by the spinlock 'lock'. ++ */ ++ int swap_extents; ++ + /* Record locked full stripes for RAID5/6 block group */ + struct btrfs_full_stripe_locks_tree full_stripe_locks_root; + }; +@@ -301,4 +307,7 @@ int btrfs_rmap_block(struct btrfs_fs_inf + u64 physical, u64 **logical, int *naddrs, int *stripe_len); + #endif + ++bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg); ++void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount); ++ + #endif /* BTRFS_BLOCK_GROUP_H */ +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -523,6 +523,11 @@ struct btrfs_swapfile_pin { + * points to a struct btrfs_device. + */ + bool is_block_group; ++ /* ++ * Only used when 'is_block_group' is true and it is the number of ++ * extents used by a swapfile for this block group ('ptr' field). ++ */ ++ int bg_extent_count; + }; + + bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9993,6 +9993,7 @@ static int btrfs_add_swapfile_pin(struct + sp->ptr = ptr; + sp->inode = inode; + sp->is_block_group = is_block_group; ++ sp->bg_extent_count = 1; + + spin_lock(&fs_info->swapfile_pins_lock); + p = &fs_info->swapfile_pins.rb_node; +@@ -10006,6 +10007,8 @@ static int btrfs_add_swapfile_pin(struct + (sp->ptr == entry->ptr && sp->inode > entry->inode)) { + p = &(*p)->rb_right; + } else { ++ if (is_block_group) ++ entry->bg_extent_count++; + spin_unlock(&fs_info->swapfile_pins_lock); + kfree(sp); + return 1; +@@ -10031,8 +10034,11 @@ static void btrfs_free_swapfile_pins(str + sp = rb_entry(node, struct btrfs_swapfile_pin, node); + if (sp->inode == inode) { + rb_erase(&sp->node, &fs_info->swapfile_pins); +- if (sp->is_block_group) ++ if (sp->is_block_group) { ++ btrfs_dec_block_group_swap_extents(sp->ptr, ++ sp->bg_extent_count); + btrfs_put_block_group(sp->ptr); ++ } + kfree(sp); + } + node = next; +@@ -10246,6 +10252,17 @@ static int btrfs_swap_activate(struct sw + ret = -EINVAL; + goto out; + } ++ ++ if (!btrfs_inc_block_group_swap_extents(bg)) { ++ btrfs_warn(fs_info, ++ "block group for swapfile at %llu is read-only%s", ++ bg->start, ++ atomic_read(&fs_info->scrubs_running) ? ++ " (scrub running)" : ""); ++ btrfs_put_block_group(bg); ++ ret = -EINVAL; ++ goto out; ++ } + + ret = btrfs_add_swapfile_pin(inode, bg, true); + if (ret) { +--- a/fs/btrfs/scrub.c ++++ b/fs/btrfs/scrub.c +@@ -3630,6 +3630,13 @@ int scrub_enumerate_chunks(struct scrub_ + * commit_transactions. + */ + ro_set = 0; ++ } else if (ret == -ETXTBSY) { ++ btrfs_warn(fs_info, ++ "skipping scrub of block group %llu due to active swapfile", ++ cache->start); ++ scrub_pause_off(fs_info); ++ ret = 0; ++ goto skip_unfreeze; + } else { + btrfs_warn(fs_info, + "failed setting block group ro: %d", ret); +@@ -3719,7 +3726,7 @@ int scrub_enumerate_chunks(struct scrub_ + } else { + spin_unlock(&cache->lock); + } +- ++skip_unfreeze: + btrfs_unfreeze_block_group(cache); + btrfs_put_block_group(cache); + if (ret) diff --git a/queue-5.11/btrfs-fix-raid6-qstripe-kmap.patch b/queue-5.11/btrfs-fix-raid6-qstripe-kmap.patch new file mode 100644 index 00000000000..154e51a5426 --- /dev/null +++ b/queue-5.11/btrfs-fix-raid6-qstripe-kmap.patch @@ -0,0 +1,94 @@ +From d70cef0d46729808dc53f145372c02b145c92604 Mon Sep 17 00:00:00 2001 +From: Ira Weiny +Date: Wed, 27 Jan 2021 22:15:03 -0800 +Subject: btrfs: fix raid6 qstripe kmap + +From: Ira Weiny + +commit d70cef0d46729808dc53f145372c02b145c92604 upstream. + +When a qstripe is required an extra page is allocated and mapped. There +were 3 problems: + +1) There is no corresponding call of kunmap() for the qstripe page. +2) There is no reason to map the qstripe page more than once if the + number of bits set in rbio->dbitmap is greater than one. +3) There is no reason to map the parity page and unmap it each time + through the loop. + +The page memory can continue to be reused with a single mapping on each +iteration by raid6_call.gen_syndrome() without remapping. So map the +page for the duration of the loop. + +Similarly, improve the algorithm by mapping the parity page just 1 time. + +Fixes: 5a6ac9eacb49 ("Btrfs, raid56: support parity scrub on raid56") +CC: stable@vger.kernel.org # 4.4.x: c17af96554a8: btrfs: raid56: simplify tracking of Q stripe presence +CC: stable@vger.kernel.org # 4.4.x +Signed-off-by: Ira Weiny +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/raid56.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/fs/btrfs/raid56.c ++++ b/fs/btrfs/raid56.c +@@ -2363,16 +2363,21 @@ static noinline void finish_parity_scrub + SetPageUptodate(p_page); + + if (has_qstripe) { ++ /* RAID6, allocate and map temp space for the Q stripe */ + q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!q_page) { + __free_page(p_page); + goto cleanup; + } + SetPageUptodate(q_page); ++ pointers[rbio->real_stripes - 1] = kmap(q_page); + } + + atomic_set(&rbio->error, 0); + ++ /* Map the parity stripe just once */ ++ pointers[nr_data] = kmap(p_page); ++ + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { + struct page *p; + void *parity; +@@ -2382,16 +2387,8 @@ static noinline void finish_parity_scrub + pointers[stripe] = kmap(p); + } + +- /* then add the parity stripe */ +- pointers[stripe++] = kmap(p_page); +- + if (has_qstripe) { +- /* +- * raid6, add the qstripe and call the +- * library function to fill in our p/q +- */ +- pointers[stripe++] = kmap(q_page); +- ++ /* RAID6, call the library function to fill in our P/Q */ + raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, + pointers); + } else { +@@ -2412,12 +2409,14 @@ static noinline void finish_parity_scrub + + for (stripe = 0; stripe < nr_data; stripe++) + kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); +- kunmap(p_page); + } + ++ kunmap(p_page); + __free_page(p_page); +- if (q_page) ++ if (q_page) { ++ kunmap(q_page); + __free_page(q_page); ++ } + + writeback: + /* diff --git a/queue-5.11/btrfs-fix-spurious-free_space_tree-remount-warning.patch b/queue-5.11/btrfs-fix-spurious-free_space_tree-remount-warning.patch new file mode 100644 index 00000000000..2fd59dc410e --- /dev/null +++ b/queue-5.11/btrfs-fix-spurious-free_space_tree-remount-warning.patch @@ -0,0 +1,61 @@ +From c55a4319c4f2c3ba0a385b1ebc454fa283cfe920 Mon Sep 17 00:00:00 2001 +From: Boris Burkov +Date: Tue, 23 Feb 2021 10:22:32 -0800 +Subject: btrfs: fix spurious free_space_tree remount warning + +From: Boris Burkov + +commit c55a4319c4f2c3ba0a385b1ebc454fa283cfe920 upstream. + +The intended logic of the check is to catch cases where the desired +free_space_tree setting doesn't match the mounted setting, and the +remount is anything but ro->rw. However, it makes the mistake of +checking equality on a masked integer (btrfs_test_opt) against a boolean +(btrfs_fs_compat_ro). + +If you run the reproducer: + $ mount -o space_cache=v2 dev mnt + $ mount -o remount,ro mnt + +you would expect no warning, because the remount is not attempting to +change the free space tree setting, but we do see the warning. + +To fix this, add explicit bool type casts to the condition. + +I tested a variety of transitions: +sudo mount -o space_cache=v2 /dev/vg0/lv0 mnt/lol +(fst enabled) +mount -o remount,ro mnt/lol +(no warning, no fst change) +sudo mount -o remount,rw,space_cache=v1,clear_cache +(no warning, ro->rw) +sudo mount -o remount,rw,space_cache=v2 mnt +(warning, rw->rw with change) +sudo mount -o remount,ro mnt +(no warning, no fst change) +sudo mount -o remount,rw,space_cache=v2 mnt +(no warning, no fst change) + +Reported-by: Chris Murphy +CC: stable@vger.kernel.org # 5.11 +Signed-off-by: Boris Burkov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/super.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -1919,8 +1919,8 @@ static int btrfs_remount(struct super_bl + btrfs_resize_thread_pool(fs_info, + fs_info->thread_pool_size, old_thread_pool_size); + +- if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) != +- btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && ++ if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != ++ (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + (!sb_rdonly(sb) || (*flags & SB_RDONLY))) { + btrfs_warn(fs_info, + "remount supports changing free space tree only from ro to rw"); diff --git a/queue-5.11/btrfs-fix-stale-data-exposure-after-cloning-a-hole-with-no_holes-enabled.patch b/queue-5.11/btrfs-fix-stale-data-exposure-after-cloning-a-hole-with-no_holes-enabled.patch new file mode 100644 index 00000000000..613f09107e7 --- /dev/null +++ b/queue-5.11/btrfs-fix-stale-data-exposure-after-cloning-a-hole-with-no_holes-enabled.patch @@ -0,0 +1,180 @@ +From 3660d0bcdb82807d434da9d2e57d88b37331182d Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 16 Feb 2021 11:09:25 +0000 +Subject: btrfs: fix stale data exposure after cloning a hole with NO_HOLES enabled + +From: Filipe Manana + +commit 3660d0bcdb82807d434da9d2e57d88b37331182d upstream. + +When using the NO_HOLES feature, if we clone a file range that spans only +a hole into a range that is at or beyond the current i_size of the +destination file, we end up not setting the full sync runtime flag on the +inode. As a result, if we then fsync the destination file and have a power +failure, after log replay we can end up exposing stale data instead of +having a hole for that range. + +The conditions for this to happen are the following: + +1) We have a file with a size of, for example, 1280K; + +2) There is a written (non-prealloc) extent for the file range from 1024K + to 1280K with a length of 256K; + +3) This particular file extent layout is durably persisted, so that the + existing superblock persisted on disk points to a subvolume root where + the file has that exact file extent layout and state; + +4) The file is truncated to a smaller size, to an offset lower than the + start offset of its last extent, for example to 800K. The truncate sets + the full sync runtime flag on the inode; + +6) Fsync the file to log it and clear the full sync runtime flag; + +7) Clone a region that covers only a hole (implicit hole due to NO_HOLES) + into the file with a destination offset that starts at or beyond the + 256K file extent item we had - for example to offset 1024K; + +8) Since the clone operation does not find extents in the source range, + we end up in the if branch at the bottom of btrfs_clone() where we + punch a hole for the file range starting at offset 1024K by calling + btrfs_replace_file_extents(). There we end up not setting the full + sync flag on the inode, because we don't know we are being called in + a clone context (and not fallocate's punch hole operation), and + neither do we create an extent map to represent a hole because the + requested range is beyond eof; + +9) A further fsync to the file will be a fast fsync, since the clone + operation did not set the full sync flag, and therefore it relies on + modified extent maps to correctly log the file layout. But since + it does not find any extent map marking the range from 1024K (the + previous eof) to the new eof, it does not log a file extent item + for that range representing the hole; + +10) After a power failure no hole for the range starting at 1024K is + punched and we end up exposing stale data from the old 256K extent. + +Turning this into exact steps: + + $ mkfs.btrfs -f -O no-holes /dev/sdi + $ mount /dev/sdi /mnt + + # Create our test file with 3 extents of 256K and a 256K hole at offset + # 256K. The file has a size of 1280K. + $ xfs_io -f -s \ + -c "pwrite -S 0xab -b 256K 0 256K" \ + -c "pwrite -S 0xcd -b 256K 512K 256K" \ + -c "pwrite -S 0xef -b 256K 768K 256K" \ + -c "pwrite -S 0x73 -b 256K 1024K 256K" \ + /mnt/sdi/foobar + + # Make sure it's durably persisted. We want the last committed super + # block to point to this particular file extent layout. + sync + + # Now truncate our file to a smaller size, falling within a position of + # the second extent. This sets the full sync runtime flag on the inode. + # Then fsync the file to log it and clear the full sync flag from the + # inode. The third extent is no longer part of the file and therefore + # it is not logged. + $ xfs_io -c "truncate 800K" -c "fsync" /mnt/foobar + + # Now do a clone operation that only clones the hole and sets back the + # file size to match the size it had before the truncate operation + # (1280K). + $ xfs_io \ + -c "reflink /mnt/foobar 256K 1024K 256K" \ + -c "fsync" \ + /mnt/foobar + + # File data before power failure: + $ od -A d -t x1 /mnt/foobar + 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab + * + 0262144 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * + 0524288 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd + * + 0786432 ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef + * + 0819200 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * + 1310720 + + + + # Mount the fs again to replay the log tree. + $ mount /dev/sdi /mnt + + # File data after power failure: + $ od -A d -t x1 /mnt/foobar + 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab + * + 0262144 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * + 0524288 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd + * + 0786432 ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef + * + 0819200 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * + 1048576 73 73 73 73 73 73 73 73 73 73 73 73 73 73 73 73 + * + 1310720 + +The range from 1024K to 1280K should correspond to a hole but instead it +points to stale data, to the 256K extent that should not exist after the +truncate operation. + +The issue does not exists when not using NO_HOLES, because for that case +we use file extent items to represent holes, these are found and copied +during the loop that iterates over extents at btrfs_clone(), and that +causes btrfs_replace_file_extents() to be called with a non-NULL +extent_info argument and therefore set the full sync runtime flag on the +inode. + +So fix this by making the code that deals with a trailing hole during +cloning, at btrfs_clone(), to set the full sync flag on the inode, if the +range starts at or beyond the current i_size. + +A test case for fstests will follow soon. + +Backporting notes: for kernel 5.4 the change goes to ioctl.c into +btrfs_clone before the last call to btrfs_punch_hole_range. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/reflink.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +--- a/fs/btrfs/reflink.c ++++ b/fs/btrfs/reflink.c +@@ -550,6 +550,24 @@ process_slot: + */ + btrfs_release_path(path); + ++ /* ++ * When using NO_HOLES and we are cloning a range that covers ++ * only a hole (no extents) into a range beyond the current ++ * i_size, punching a hole in the target range will not create ++ * an extent map defining a hole, because the range starts at or ++ * beyond current i_size. If the file previously had an i_size ++ * greater than the new i_size set by this clone operation, we ++ * need to make sure the next fsync is a full fsync, so that it ++ * detects and logs a hole covering a range from the current ++ * i_size to the new i_size. If the clone range covers extents, ++ * besides a hole, then we know the full sync flag was already ++ * set by previous calls to btrfs_replace_file_extents() that ++ * replaced file extent items. ++ */ ++ if (last_dest_end >= i_size_read(inode)) ++ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, ++ &BTRFS_I(inode)->runtime_flags); ++ + ret = btrfs_replace_file_extents(inode, path, last_dest_end, + destoff + len - 1, NULL, &trans); + if (ret) diff --git a/queue-5.11/btrfs-fix-warning-when-creating-a-directory-with-smack-enabled.patch b/queue-5.11/btrfs-fix-warning-when-creating-a-directory-with-smack-enabled.patch new file mode 100644 index 00000000000..58e5c019ef6 --- /dev/null +++ b/queue-5.11/btrfs-fix-warning-when-creating-a-directory-with-smack-enabled.patch @@ -0,0 +1,146 @@ +From fd57a98d6f0c98fa295813087f13afb26c224e73 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 26 Feb 2021 17:51:44 +0000 +Subject: btrfs: fix warning when creating a directory with smack enabled + +From: Filipe Manana + +commit fd57a98d6f0c98fa295813087f13afb26c224e73 upstream. + +When we have smack enabled, during the creation of a directory smack may +attempt to add a "smack transmute" xattr on the inode, which results in +the following warning and trace: + + WARNING: CPU: 3 PID: 2548 at fs/btrfs/transaction.c:537 start_transaction+0x489/0x4f0 + Modules linked in: nft_objref nf_conntrack_netbios_ns (...) + CPU: 3 PID: 2548 Comm: mkdir Not tainted 5.9.0-rc2smack+ #81 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 + RIP: 0010:start_transaction+0x489/0x4f0 + Code: e9 be fc ff ff (...) + RSP: 0018:ffffc90001887d10 EFLAGS: 00010202 + RAX: ffff88816f1e0000 RBX: 0000000000000201 RCX: 0000000000000003 + RDX: 0000000000000201 RSI: 0000000000000002 RDI: ffff888177849000 + RBP: ffff888177849000 R08: 0000000000000001 R09: 0000000000000004 + R10: ffffffff825e8f7a R11: 0000000000000003 R12: ffffffffffffffe2 + R13: 0000000000000000 R14: ffff88803d884270 R15: ffff8881680d8000 + FS: 00007f67317b8440(0000) GS:ffff88817bcc0000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f67247a22a8 CR3: 000000004bfbc002 CR4: 0000000000370ee0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + ? slab_free_freelist_hook+0xea/0x1b0 + ? trace_hardirqs_on+0x1c/0xe0 + btrfs_setxattr_trans+0x3c/0xf0 + __vfs_setxattr+0x63/0x80 + smack_d_instantiate+0x2d3/0x360 + security_d_instantiate+0x29/0x40 + d_instantiate_new+0x38/0x90 + btrfs_mkdir+0x1cf/0x1e0 + vfs_mkdir+0x14f/0x200 + do_mkdirat+0x6d/0x110 + do_syscall_64+0x2d/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + RIP: 0033:0x7f673196ae6b + Code: 8b 05 11 (...) + RSP: 002b:00007ffc3c679b18 EFLAGS: 00000246 ORIG_RAX: 0000000000000053 + RAX: ffffffffffffffda RBX: 00000000000001ff RCX: 00007f673196ae6b + RDX: 0000000000000000 RSI: 00000000000001ff RDI: 00007ffc3c67a30d + RBP: 00007ffc3c67a30d R08: 00000000000001ff R09: 0000000000000000 + R10: 000055d3e39fe930 R11: 0000000000000246 R12: 0000000000000000 + R13: 00007ffc3c679cd8 R14: 00007ffc3c67a30d R15: 00007ffc3c679ce0 + irq event stamp: 11029 + hardirqs last enabled at (11037): [] console_unlock+0x486/0x670 + hardirqs last disabled at (11044): [] console_unlock+0xa1/0x670 + softirqs last enabled at (8864): [] asm_call_on_stack+0xf/0x20 + softirqs last disabled at (8851): [] asm_call_on_stack+0xf/0x20 + +This happens because at btrfs_mkdir() we call d_instantiate_new() while +holding a transaction handle, which results in the following call chain: + + btrfs_mkdir() + trans = btrfs_start_transaction(root, 5); + + d_instantiate_new() + smack_d_instantiate() + __vfs_setxattr() + btrfs_setxattr_trans() + btrfs_start_transaction() + start_transaction() + WARN_ON() + --> a tansaction start has TRANS_EXTWRITERS + set in its type + h->orig_rsv = h->block_rsv + h->block_rsv = NULL + + btrfs_end_transaction(trans) + +Besides the warning triggered at start_transaction, we set the handle's +block_rsv to NULL which may cause some surprises later on. + +So fix this by making btrfs_setxattr_trans() not start a transaction when +we already have a handle on one, stored in current->journal_info, and use +that handle. We are good to use the handle because at btrfs_mkdir() we did +reserve space for the xattr and the inode item. + +Reported-by: Casey Schaufler +CC: stable@vger.kernel.org # 5.4+ +Acked-by: Casey Schaufler +Tested-by: Casey Schaufler +Link: https://lore.kernel.org/linux-btrfs/434d856f-bd7b-4889-a6ec-e81aaebfa735@schaufler-ca.com/ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/xattr.c | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *i + { + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; ++ const bool start_trans = (current->journal_info == NULL); + int ret; + +- trans = btrfs_start_transaction(root, 2); +- if (IS_ERR(trans)) +- return PTR_ERR(trans); ++ if (start_trans) { ++ /* ++ * 1 unit for inserting/updating/deleting the xattr ++ * 1 unit for the inode item update ++ */ ++ trans = btrfs_start_transaction(root, 2); ++ if (IS_ERR(trans)) ++ return PTR_ERR(trans); ++ } else { ++ /* ++ * This can happen when smack is enabled and a directory is being ++ * created. It happens through d_instantiate_new(), which calls ++ * smack_d_instantiate(), which in turn calls __vfs_setxattr() to ++ * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the ++ * inode. We have already reserved space for the xattr and inode ++ * update at btrfs_mkdir(), so just use the transaction handle. ++ * We don't join or start a transaction, as that will reset the ++ * block_rsv of the handle and trigger a warning for the start ++ * case. ++ */ ++ ASSERT(strncmp(name, XATTR_SECURITY_PREFIX, ++ XATTR_SECURITY_PREFIX_LEN) == 0); ++ trans = current->journal_info; ++ } + + ret = btrfs_setxattr(trans, inode, name, value, size, flags); + if (ret) +@@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *i + ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); + BUG_ON(ret); + out: +- btrfs_end_transaction(trans); ++ if (start_trans) ++ btrfs_end_transaction(trans); + return ret; + } + diff --git a/queue-5.11/btrfs-free-correct-amount-of-space-in-btrfs_delayed_inode_reserve_metadata.patch b/queue-5.11/btrfs-free-correct-amount-of-space-in-btrfs_delayed_inode_reserve_metadata.patch new file mode 100644 index 00000000000..b6b54b03130 --- /dev/null +++ b/queue-5.11/btrfs-free-correct-amount-of-space-in-btrfs_delayed_inode_reserve_metadata.patch @@ -0,0 +1,37 @@ +From 0f9c03d824f6f522d3bc43629635c9765546ebc5 Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Mon, 22 Feb 2021 18:40:42 +0200 +Subject: btrfs: free correct amount of space in btrfs_delayed_inode_reserve_metadata + +From: Nikolay Borisov + +commit 0f9c03d824f6f522d3bc43629635c9765546ebc5 upstream. + +Following commit f218ea6c4792 ("btrfs: delayed-inode: Remove wrong +qgroup meta reservation calls") this function now reserves num_bytes, +rather than the fixed amount of nodesize. As such this requires the +same amount to be freed in case of failure. Fix this by adjusting +the amount we are freeing. + +Fixes: f218ea6c4792 ("btrfs: delayed-inode: Remove wrong qgroup meta reservation calls") +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Qu Wenruo +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/delayed-inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -649,7 +649,7 @@ static int btrfs_delayed_inode_reserve_m + btrfs_ino(inode), + num_bytes, 1); + } else { +- btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); ++ btrfs_qgroup_free_meta_prealloc(root, num_bytes); + } + return ret; + } diff --git a/queue-5.11/btrfs-tree-checker-do-not-error-out-if-extent-ref-hash-doesn-t-match.patch b/queue-5.11/btrfs-tree-checker-do-not-error-out-if-extent-ref-hash-doesn-t-match.patch new file mode 100644 index 00000000000..b9f5de0b1a2 --- /dev/null +++ b/queue-5.11/btrfs-tree-checker-do-not-error-out-if-extent-ref-hash-doesn-t-match.patch @@ -0,0 +1,91 @@ +From 1119a72e223f3073a604f8fccb3a470ccd8a4416 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Tue, 16 Feb 2021 15:43:22 -0500 +Subject: btrfs: tree-checker: do not error out if extent ref hash doesn't match +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Josef Bacik + +commit 1119a72e223f3073a604f8fccb3a470ccd8a4416 upstream. + +The tree checker checks the extent ref hash at read and write time to +make sure we do not corrupt the file system. Generally extent +references go inline, but if we have enough of them we need to make an +item, which looks like + +key.objectid = +key.type = +key.offset = hash(tree, owner, offset) + +However if key.offset collide with an unrelated extent reference we'll +simply key.offset++ until we get something that doesn't collide. +Obviously this doesn't match at tree checker time, and thus we error +while writing out the transaction. This is relatively easy to +reproduce, simply do something like the following + + xfs_io -f -c "pwrite 0 1M" file + offset=2 + + for i in {0..10000} + do + xfs_io -c "reflink file 0 ${offset}M 1M" file + offset=$(( offset + 2 )) + done + + xfs_io -c "reflink file 0 17999258914816 1M" file + xfs_io -c "reflink file 0 35998517829632 1M" file + xfs_io -c "reflink file 0 53752752058368 1M" file + + btrfs filesystem sync + +And the sync will error out because we'll abort the transaction. The +magic values above are used because they generate hash collisions with +the first file in the main subvol. + +The fix for this is to remove the hash value check from tree checker, as +we have no idea which offset ours should belong to. + +Reported-by: Tuomas Lähdekorpi +Fixes: 0785a9aacf9d ("btrfs: tree-checker: Add EXTENT_DATA_REF check") +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +[ add comment] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 16 ++++------------ + 1 file changed, 4 insertions(+), 12 deletions(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct + return -EUCLEAN; + } + for (; ptr < end; ptr += sizeof(*dref)) { +- u64 root_objectid; +- u64 owner; + u64 offset; +- u64 hash; + ++ /* ++ * We cannot check the extent_data_ref hash due to possible ++ * overflow from the leaf due to hash collisions. ++ */ + dref = (struct btrfs_extent_data_ref *)ptr; +- root_objectid = btrfs_extent_data_ref_root(leaf, dref); +- owner = btrfs_extent_data_ref_objectid(leaf, dref); + offset = btrfs_extent_data_ref_offset(leaf, dref); +- hash = hash_extent_data_ref(root_objectid, owner, offset); +- if (unlikely(hash != key->offset)) { +- extent_err(leaf, slot, +- "invalid extent data ref hash, item has 0x%016llx key has 0x%016llx", +- hash, key->offset); +- return -EUCLEAN; +- } + if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { + extent_err(leaf, slot, + "invalid extent data backref offset, have %llu expect aligned to %u", diff --git a/queue-5.11/btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-quota-reservation-errors.patch b/queue-5.11/btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-quota-reservation-errors.patch new file mode 100644 index 00000000000..d19cae4e608 --- /dev/null +++ b/queue-5.11/btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-quota-reservation-errors.patch @@ -0,0 +1,40 @@ +From 4f6a49de64fd1b1dba5229c02047376da7cf24fd Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Tue, 23 Feb 2021 15:20:42 +0200 +Subject: btrfs: unlock extents in btrfs_zero_range in case of quota reservation errors + +From: Nikolay Borisov + +commit 4f6a49de64fd1b1dba5229c02047376da7cf24fd upstream. + +If btrfs_qgroup_reserve_data returns an error (i.e quota limit reached) +the handling logic directly goes to the 'out' label without first +unlocking the extent range between lockstart, lockend. This results in +deadlocks as other processes try to lock the same extent. + +Fixes: a7f8b1c2ac21 ("btrfs: file: reserve qgroup space after the hole punch range is locked") +CC: stable@vger.kernel.org # 5.10+ +Reviewed-by: Qu Wenruo +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/file.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -3264,8 +3264,11 @@ reserve_space: + goto out; + ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, + alloc_start, bytes_to_reserve); +- if (ret) ++ if (ret) { ++ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, ++ lockend, &cached_state); + goto out; ++ } + ret = btrfs_prealloc_file_range(inode, mode, alloc_start, + alloc_end - alloc_start, + i_blocksize(inode), diff --git a/queue-5.11/btrfs-validate-qgroup-inherit-for-snap_create_v2-ioctl.patch b/queue-5.11/btrfs-validate-qgroup-inherit-for-snap_create_v2-ioctl.patch new file mode 100644 index 00000000000..db91a12abbc --- /dev/null +++ b/queue-5.11/btrfs-validate-qgroup-inherit-for-snap_create_v2-ioctl.patch @@ -0,0 +1,60 @@ +From 5011c5a663b9c6d6aff3d394f11049b371199627 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Wed, 17 Feb 2021 09:04:34 +0300 +Subject: btrfs: validate qgroup inherit for SNAP_CREATE_V2 ioctl + +From: Dan Carpenter + +commit 5011c5a663b9c6d6aff3d394f11049b371199627 upstream. + +The problem is we're copying "inherit" from user space but we don't +necessarily know that we're copying enough data for a 64 byte +struct. Then the next problem is that 'inherit' has a variable size +array at the end, and we have to verify that array is the size we +expected. + +Fixes: 6f72c7e20dba ("Btrfs: add qgroup inheritance") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Dan Carpenter +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1926,7 +1926,10 @@ static noinline int btrfs_ioctl_snap_cre + if (vol_args->flags & BTRFS_SUBVOL_RDONLY) + readonly = true; + if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { +- if (vol_args->size > PAGE_SIZE) { ++ u64 nums; ++ ++ if (vol_args->size < sizeof(*inherit) || ++ vol_args->size > PAGE_SIZE) { + ret = -EINVAL; + goto free_args; + } +@@ -1935,6 +1938,20 @@ static noinline int btrfs_ioctl_snap_cre + ret = PTR_ERR(inherit); + goto free_args; + } ++ ++ if (inherit->num_qgroups > PAGE_SIZE || ++ inherit->num_ref_copies > PAGE_SIZE || ++ inherit->num_excl_copies > PAGE_SIZE) { ++ ret = -EINVAL; ++ goto free_inherit; ++ } ++ ++ nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + ++ 2 * inherit->num_excl_copies; ++ if (vol_args->size != struct_size(inherit, qgroups, nums)) { ++ ret = -EINVAL; ++ goto free_inherit; ++ } + } + + ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, diff --git a/queue-5.11/dm-bufio-subtract-the-number-of-initial-sectors-in-dm_bufio_get_device_size.patch b/queue-5.11/dm-bufio-subtract-the-number-of-initial-sectors-in-dm_bufio_get_device_size.patch new file mode 100644 index 00000000000..e0df7405706 --- /dev/null +++ b/queue-5.11/dm-bufio-subtract-the-number-of-initial-sectors-in-dm_bufio_get_device_size.patch @@ -0,0 +1,40 @@ +From a14e5ec66a7a66e57b24e2469f9212a78460207e Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 23 Feb 2021 21:21:20 +0100 +Subject: dm bufio: subtract the number of initial sectors in dm_bufio_get_device_size + +From: Mikulas Patocka + +commit a14e5ec66a7a66e57b24e2469f9212a78460207e upstream. + +dm_bufio_get_device_size returns the device size in blocks. Before +returning the value, we must subtract the nubmer of starting +sectors. The number of starting sectors may not be divisible by block +size. + +Note that currently, no target is using dm_bufio_set_sector_offset and +dm_bufio_get_device_size simultaneously, so this change has no effect. +However, an upcoming dm-verity-fec fix needs this change. + +Signed-off-by: Mikulas Patocka +Reviewed-by: Milan Broz +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-bufio.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/md/dm-bufio.c ++++ b/drivers/md/dm-bufio.c +@@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_siz + sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) + { + sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; ++ if (s >= c->start) ++ s -= c->start; ++ else ++ s = 0; + if (likely(c->sectors_per_block_bits >= 0)) + s >>= c->sectors_per_block_bits; + else diff --git a/queue-5.11/dm-verity-fix-fec-for-rs-roots-unaligned-to-block-size.patch b/queue-5.11/dm-verity-fix-fec-for-rs-roots-unaligned-to-block-size.patch new file mode 100644 index 00000000000..6d8b8ee8c0c --- /dev/null +++ b/queue-5.11/dm-verity-fix-fec-for-rs-roots-unaligned-to-block-size.patch @@ -0,0 +1,142 @@ +From df7b59ba9245c4a3115ebaa905e3e5719a3810da Mon Sep 17 00:00:00 2001 +From: Milan Broz +Date: Tue, 23 Feb 2021 21:21:21 +0100 +Subject: dm verity: fix FEC for RS roots unaligned to block size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Milan Broz + +commit df7b59ba9245c4a3115ebaa905e3e5719a3810da upstream. + +Optional Forward Error Correction (FEC) code in dm-verity uses +Reed-Solomon code and should support roots from 2 to 24. + +The error correction parity bytes (of roots lengths per RS block) are +stored on a separate device in sequence without any padding. + +Currently, to access FEC device, the dm-verity-fec code uses dm-bufio +client with block size set to verity data block (usually 4096 or 512 +bytes). + +Because this block size is not divisible by some (most!) of the roots +supported lengths, data repair cannot work for partially stored parity +bytes. + +This fix changes FEC device dm-bufio block size to "roots << SECTOR_SHIFT" +where we can be sure that the full parity data is always available. +(There cannot be partial FEC blocks because parity must cover whole +sectors.) + +Because the optional FEC starting offset could be unaligned to this +new block size, we have to use dm_bufio_set_sector_offset() to +configure it. + +The problem is easily reproduced using veritysetup, e.g. for roots=13: + + # create verity device with RS FEC + dd if=/dev/urandom of=data.img bs=4096 count=8 status=none + veritysetup format data.img hash.img --fec-device=fec.img --fec-roots=13 | awk '/^Root hash/{ print $3 }' >roothash + + # create an erasure that should be always repairable with this roots setting + dd if=/dev/zero of=data.img conv=notrunc bs=1 count=8 seek=4088 status=none + + # try to read it through dm-verity + veritysetup open data.img test hash.img --fec-device=fec.img --fec-roots=13 $(cat roothash) + dd if=/dev/mapper/test of=/dev/null bs=4096 status=noxfer + # wait for possible recursive recovery in kernel + udevadm settle + veritysetup close test + +With this fix, errors are properly repaired. + device-mapper: verity-fec: 7:1: FEC 0: corrected 8 errors + ... + +Without it, FEC code usually ends on unrecoverable failure in RS decoder: + device-mapper: verity-fec: 7:1: FEC 0: failed to correct: -74 + ... + +This problem is present in all kernels since the FEC code's +introduction (kernel 4.5). + +It is thought that this problem is not visible in Android ecosystem +because it always uses a default RS roots=2. + +Depends-on: a14e5ec66a7a ("dm bufio: subtract the number of initial sectors in dm_bufio_get_device_size") +Signed-off-by: Milan Broz +Tested-by: Jérôme Carretero +Reviewed-by: Sami Tolvanen +Cc: stable@vger.kernel.org # 4.5+ +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-verity-fec.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +--- a/drivers/md/dm-verity-fec.c ++++ b/drivers/md/dm-verity-fec.c +@@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_veri + static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, + unsigned *offset, struct dm_buffer **buf) + { +- u64 position, block; ++ u64 position, block, rem; + u8 *res; + + position = (index + rsb) * v->fec->roots; +- block = position >> v->data_dev_block_bits; +- *offset = (unsigned)(position - (block << v->data_dev_block_bits)); ++ block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem); ++ *offset = (unsigned)rem; + +- res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); ++ res = dm_bufio_read(v->fec->bufio, block, buf); + if (IS_ERR(res)) { + DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", + v->data_dev->name, (unsigned long long)rsb, +- (unsigned long long)(v->fec->start + block), +- PTR_ERR(res)); ++ (unsigned long long)block, PTR_ERR(res)); + *buf = NULL; + } + +@@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_ver + + /* read the next block when we run out of parity bytes */ + offset += v->fec->roots; +- if (offset >= 1 << v->data_dev_block_bits) { ++ if (offset >= v->fec->roots << SECTOR_SHIFT) { + dm_bufio_release(buf); + + par = fec_read_parity(v, rsb, block_offset, &offset, &buf); +@@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v) + { + struct dm_verity_fec *f = v->fec; + struct dm_target *ti = v->ti; +- u64 hash_blocks; ++ u64 hash_blocks, fec_blocks; + int ret; + + if (!verity_fec_is_enabled(v)) { +@@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v) + } + + f->bufio = dm_bufio_client_create(f->dev->bdev, +- 1 << v->data_dev_block_bits, ++ f->roots << SECTOR_SHIFT, + 1, 0, NULL, NULL); + if (IS_ERR(f->bufio)) { + ti->error = "Cannot initialize FEC bufio client"; + return PTR_ERR(f->bufio); + } + +- if (dm_bufio_get_device_size(f->bufio) < +- ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) { ++ dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT)); ++ ++ fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT); ++ if (dm_bufio_get_device_size(f->bufio) < fec_blocks) { + ti->error = "FEC device is too small"; + return -E2BIG; + } diff --git a/queue-5.11/drm-amd-pm-correct-arcturus-mmthm_baco_cntl-register-address.patch b/queue-5.11/drm-amd-pm-correct-arcturus-mmthm_baco_cntl-register-address.patch new file mode 100644 index 00000000000..e987ae9b356 --- /dev/null +++ b/queue-5.11/drm-amd-pm-correct-arcturus-mmthm_baco_cntl-register-address.patch @@ -0,0 +1,52 @@ +From 6efda1671312e8432216ee8b106e71fa3102e1d3 Mon Sep 17 00:00:00 2001 +From: Evan Quan +Date: Fri, 19 Feb 2021 16:18:47 +0800 +Subject: drm/amd/pm: correct Arcturus mmTHM_BACO_CNTL register address + +From: Evan Quan + +commit 6efda1671312e8432216ee8b106e71fa3102e1d3 upstream. + +Arcturus has a different register address from other SMU V11 +ASICs. + +Signed-off-by: Evan Quan +Acked-by: Guchun Chen +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +@@ -78,6 +78,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish + #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xC000 + #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0xE + ++#define mmTHM_BACO_CNTL_ARCT 0xA7 ++#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0 ++ + static int link_width[] = {0, 1, 2, 4, 8, 12, 16}; + static int link_speed[] = {25, 50, 80, 160}; + +@@ -1581,9 +1584,15 @@ int smu_v11_0_baco_set_state(struct smu_ + break; + default: + if (!ras || !ras->supported) { +- data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); +- data |= 0x80000000; +- WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); ++ if (adev->asic_type == CHIP_ARCTURUS) { ++ data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT); ++ data |= 0x80000000; ++ WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT, data); ++ } else { ++ data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); ++ data |= 0x80000000; ++ WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); ++ } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0, NULL); + } else { diff --git a/queue-5.11/drm-amdgpu-disable-vcn-for-navi12-sku.patch b/queue-5.11/drm-amdgpu-disable-vcn-for-navi12-sku.patch new file mode 100644 index 00000000000..33a063f6201 --- /dev/null +++ b/queue-5.11/drm-amdgpu-disable-vcn-for-navi12-sku.patch @@ -0,0 +1,42 @@ +From 0c61ac8134ffc851681ce5d4bd60d97c3d5aed27 Mon Sep 17 00:00:00 2001 +From: "Asher.Song" +Date: Wed, 24 Feb 2021 18:41:34 +0800 +Subject: drm/amdgpu:disable VCN for Navi12 SKU + +From: Asher.Song + +commit 0c61ac8134ffc851681ce5d4bd60d97c3d5aed27 upstream. + +Navi12 0x7360/C7 SKU has no video support, so remove it. + +Reviewed-by: Guchun Chen +Signed-off-by: Asher.Song +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/nv.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/nv.c ++++ b/drivers/gpu/drm/amd/amdgpu/nv.c +@@ -498,7 +498,8 @@ static bool nv_is_headless_sku(struct pc + { + if ((pdev->device == 0x731E && + (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || +- (pdev->device == 0x7340 && pdev->revision == 0xC9)) ++ (pdev->device == 0x7340 && pdev->revision == 0xC9) || ++ (pdev->device == 0x7360 && pdev->revision == 0xC7)) + return true; + return false; + } +@@ -568,7 +569,8 @@ int nv_set_ip_blocks(struct amdgpu_devic + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); +- amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); ++ if (!nv_is_headless_sku(adev->pdev)) ++ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); + break; diff --git a/queue-5.11/drm-amdgpu-fix-parameter-error-of-rreg32_pcie-in-amdgpu_regs_pcie.patch b/queue-5.11/drm-amdgpu-fix-parameter-error-of-rreg32_pcie-in-amdgpu_regs_pcie.patch new file mode 100644 index 00000000000..c1b2a59a86a --- /dev/null +++ b/queue-5.11/drm-amdgpu-fix-parameter-error-of-rreg32_pcie-in-amdgpu_regs_pcie.patch @@ -0,0 +1,40 @@ +From 1aa46901ee51c1c5779b3b239ea0374a50c6d9ff Mon Sep 17 00:00:00 2001 +From: Kevin Wang +Date: Tue, 2 Mar 2021 15:54:00 +0800 +Subject: drm/amdgpu: fix parameter error of RREG32_PCIE() in amdgpu_regs_pcie + +From: Kevin Wang + +commit 1aa46901ee51c1c5779b3b239ea0374a50c6d9ff upstream. + +the register offset isn't needed division by 4 to pass RREG32_PCIE() + +Signed-off-by: Kevin Wang +Reviewed-by: Lijo Lazar +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +@@ -356,7 +356,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_ + while (size) { + uint32_t value; + +- value = RREG32_PCIE(*pos >> 2); ++ value = RREG32_PCIE(*pos); + r = put_user(value, (uint32_t *)buf); + if (r) { + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); +@@ -423,7 +423,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_ + return r; + } + +- WREG32_PCIE(*pos >> 2, value); ++ WREG32_PCIE(*pos, value); + + result += 4; + buf += 4; diff --git a/queue-5.11/drm-amdgpu-only-check-for-s0ix-if-amd_pmc-is-configured.patch b/queue-5.11/drm-amdgpu-only-check-for-s0ix-if-amd_pmc-is-configured.patch new file mode 100644 index 00000000000..61dff8c3880 --- /dev/null +++ b/queue-5.11/drm-amdgpu-only-check-for-s0ix-if-amd_pmc-is-configured.patch @@ -0,0 +1,37 @@ +From 31ada99bdd1b4d6b80462eeb87d383f374409e2a Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Thu, 25 Feb 2021 10:21:49 -0500 +Subject: drm/amdgpu: Only check for S0ix if AMD_PMC is configured + +From: Alex Deucher + +commit 31ada99bdd1b4d6b80462eeb87d383f374409e2a upstream. + +The S0ix check only makes sense if the AMD PMC driver is +present. We need to use the legacy S3 pathes when the +PMC driver is not present. + +Reviewed-by: Prike Liang +Reviewed-by: Rajneesh Bhardwaj +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +@@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_devi + */ + bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) + { ++#if defined(CONFIG_AMD_PMC) + if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { + if (adev->flags & AMD_IS_APU) + return true; + } +- ++#endif + return false; + } diff --git a/queue-5.11/io_uring-ignore-double-poll-add-on-the-same-waitqueue-head.patch b/queue-5.11/io_uring-ignore-double-poll-add-on-the-same-waitqueue-head.patch new file mode 100644 index 00000000000..e6b70908d69 --- /dev/null +++ b/queue-5.11/io_uring-ignore-double-poll-add-on-the-same-waitqueue-head.patch @@ -0,0 +1,121 @@ +From 1c3b3e6527e57156bf4082f11c2151957560fe6a Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sun, 28 Feb 2021 16:07:30 -0700 +Subject: io_uring: ignore double poll add on the same waitqueue head + +From: Jens Axboe + +commit 1c3b3e6527e57156bf4082f11c2151957560fe6a upstream. + +syzbot reports a deadlock, attempting to lock the same spinlock twice: + +============================================ +WARNING: possible recursive locking detected +5.11.0-syzkaller #0 Not tainted +-------------------------------------------- +swapper/1/0 is trying to acquire lock: +ffff88801b2b1130 (&runtime->sleep){..-.}-{2:2}, at: spin_lock include/linux/spinlock.h:354 [inline] +ffff88801b2b1130 (&runtime->sleep){..-.}-{2:2}, at: io_poll_double_wake+0x25f/0x6a0 fs/io_uring.c:4960 + +but task is already holding lock: +ffff88801b2b3130 (&runtime->sleep){..-.}-{2:2}, at: __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:137 + +other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&runtime->sleep); + lock(&runtime->sleep); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + +2 locks held by swapper/1/0: + #0: ffff888147474908 (&group->lock){..-.}-{2:2}, at: _snd_pcm_stream_lock_irqsave+0x9f/0xd0 sound/core/pcm_native.c:170 + #1: ffff88801b2b3130 (&runtime->sleep){..-.}-{2:2}, at: __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:137 + +stack backtrace: +CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.11.0-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:79 [inline] + dump_stack+0xfa/0x151 lib/dump_stack.c:120 + print_deadlock_bug kernel/locking/lockdep.c:2829 [inline] + check_deadlock kernel/locking/lockdep.c:2872 [inline] + validate_chain kernel/locking/lockdep.c:3661 [inline] + __lock_acquire.cold+0x14c/0x3b4 kernel/locking/lockdep.c:4900 + lock_acquire kernel/locking/lockdep.c:5510 [inline] + lock_acquire+0x1ab/0x730 kernel/locking/lockdep.c:5475 + __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] + _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151 + spin_lock include/linux/spinlock.h:354 [inline] + io_poll_double_wake+0x25f/0x6a0 fs/io_uring.c:4960 + __wake_up_common+0x147/0x650 kernel/sched/wait.c:108 + __wake_up_common_lock+0xd0/0x130 kernel/sched/wait.c:138 + snd_pcm_update_state+0x46a/0x540 sound/core/pcm_lib.c:203 + snd_pcm_update_hw_ptr0+0xa75/0x1a50 sound/core/pcm_lib.c:464 + snd_pcm_period_elapsed+0x160/0x250 sound/core/pcm_lib.c:1805 + dummy_hrtimer_callback+0x94/0x1b0 sound/drivers/dummy.c:378 + __run_hrtimer kernel/time/hrtimer.c:1519 [inline] + __hrtimer_run_queues+0x609/0xe40 kernel/time/hrtimer.c:1583 + hrtimer_run_softirq+0x17b/0x360 kernel/time/hrtimer.c:1600 + __do_softirq+0x29b/0x9f6 kernel/softirq.c:345 + invoke_softirq kernel/softirq.c:221 [inline] + __irq_exit_rcu kernel/softirq.c:422 [inline] + irq_exit_rcu+0x134/0x200 kernel/softirq.c:434 + sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1100 + + asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:632 +RIP: 0010:native_save_fl arch/x86/include/asm/irqflags.h:29 [inline] +RIP: 0010:arch_local_save_flags arch/x86/include/asm/irqflags.h:70 [inline] +RIP: 0010:arch_irqs_disabled arch/x86/include/asm/irqflags.h:137 [inline] +RIP: 0010:acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline] +RIP: 0010:acpi_idle_do_entry+0x1c9/0x250 drivers/acpi/processor_idle.c:516 +Code: dd 38 6e f8 84 db 75 ac e8 54 32 6e f8 e8 0f 1c 74 f8 e9 0c 00 00 00 e8 45 32 6e f8 0f 00 2d 4e 4a c5 00 e8 39 32 6e f8 fb f4 <9c> 5b 81 e3 00 02 00 00 fa 31 ff 48 89 de e8 14 3a 6e f8 48 85 db +RSP: 0018:ffffc90000d47d18 EFLAGS: 00000293 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: ffff8880115c3780 RSI: ffffffff89052537 RDI: 0000000000000000 +RBP: ffff888141127064 R08: 0000000000000001 R09: 0000000000000001 +R10: ffffffff81794168 R11: 0000000000000000 R12: 0000000000000001 +R13: ffff888141127000 R14: ffff888141127064 R15: ffff888143331804 + acpi_idle_enter+0x361/0x500 drivers/acpi/processor_idle.c:647 + cpuidle_enter_state+0x1b1/0xc80 drivers/cpuidle/cpuidle.c:237 + cpuidle_enter+0x4a/0xa0 drivers/cpuidle/cpuidle.c:351 + call_cpuidle kernel/sched/idle.c:158 [inline] + cpuidle_idle_call kernel/sched/idle.c:239 [inline] + do_idle+0x3e1/0x590 kernel/sched/idle.c:300 + cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:397 + start_secondary+0x274/0x350 arch/x86/kernel/smpboot.c:272 + secondary_startup_64_no_verify+0xb0/0xbb + +which is due to the driver doing poll_wait() twice on the same +wait_queue_head. That is perfectly valid, but from checking the rest +of the kernel tree, it's the only driver that does this. + +We can handle this just fine, we just need to ignore the second addition +as we'll get woken just fine on the first one. + +Cc: stable@vger.kernel.org # 5.8+ +Fixes: 18bceab101ad ("io_uring: allow POLL_ADD with double poll_wait() users") +Reported-by: syzbot+28abd693db9e92c160d8@syzkaller.appspotmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -5316,6 +5316,9 @@ static void __io_queue_proc(struct io_po + pt->error = -EINVAL; + return; + } ++ /* double add on the same waitqueue head, ignore */ ++ if (poll->head == head) ++ return; + poll = kmalloc(sizeof(*poll), GFP_ATOMIC); + if (!poll) { + pt->error = -ENOMEM; diff --git a/queue-5.11/pm-runtime-update-device-status-before-letting-suppliers-suspend.patch b/queue-5.11/pm-runtime-update-device-status-before-letting-suppliers-suspend.patch new file mode 100644 index 00000000000..13b52e98e90 --- /dev/null +++ b/queue-5.11/pm-runtime-update-device-status-before-letting-suppliers-suspend.patch @@ -0,0 +1,122 @@ +From 44cc89f764646b2f1f2ea5d1a08b230131707851 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Thu, 25 Feb 2021 19:23:27 +0100 +Subject: PM: runtime: Update device status before letting suppliers suspend + +From: Rafael J. Wysocki + +commit 44cc89f764646b2f1f2ea5d1a08b230131707851 upstream. + +Because the PM-runtime status of the device is not updated in +__rpm_callback(), attempts to suspend the suppliers of the given +device triggered by rpm_put_suppliers() called by it may fail. + +Fix this by making __rpm_callback() update the device's status to +RPM_SUSPENDED before calling rpm_put_suppliers() if the current +status of the device is RPM_SUSPENDING and the callback just invoked +by it has returned 0 (success). + +While at it, modify the code in __rpm_callback() to always check +the device's PM-runtime status under its PM lock. + +Link: https://lore.kernel.org/linux-pm/CAPDyKFqm06KDw_p8WXsM4dijDbho4bb6T4k50UqqvR1_COsp8g@mail.gmail.com/ +Fixes: 21d5c57b3726 ("PM / runtime: Use device links") +Reported-by: Elaine Zhang +Diagnosed-by: Ulf Hansson +Signed-off-by: Rafael J. Wysocki +Tested-by: Elaine Zhang +Reviewed-by: Ulf Hansson +Cc: 4.10+ # 4.10+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/power/runtime.c | 62 +++++++++++++++++++++++++------------------ + 1 file changed, 37 insertions(+), 25 deletions(-) + +--- a/drivers/base/power/runtime.c ++++ b/drivers/base/power/runtime.c +@@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct dev + static int __rpm_callback(int (*cb)(struct device *), struct device *dev) + __releases(&dev->power.lock) __acquires(&dev->power.lock) + { +- int retval, idx; + bool use_links = dev->power.links_count > 0; ++ bool get = false; ++ int retval, idx; ++ bool put; + + if (dev->power.irq_safe) { + spin_unlock(&dev->power.lock); ++ } else if (!use_links) { ++ spin_unlock_irq(&dev->power.lock); + } else { ++ get = dev->power.runtime_status == RPM_RESUMING; ++ + spin_unlock_irq(&dev->power.lock); + +- /* +- * Resume suppliers if necessary. +- * +- * The device's runtime PM status cannot change until this +- * routine returns, so it is safe to read the status outside of +- * the lock. +- */ +- if (use_links && dev->power.runtime_status == RPM_RESUMING) { ++ /* Resume suppliers if necessary. */ ++ if (get) { + idx = device_links_read_lock(); + + retval = rpm_get_suppliers(dev); +@@ -355,24 +355,36 @@ static int __rpm_callback(int (*cb)(stru + + if (dev->power.irq_safe) { + spin_lock(&dev->power.lock); +- } else { +- /* +- * If the device is suspending and the callback has returned +- * success, drop the usage counters of the suppliers that have +- * been reference counted on its resume. +- * +- * Do that if resume fails too. +- */ +- if (use_links +- && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) +- || (dev->power.runtime_status == RPM_RESUMING && retval))) { +- idx = device_links_read_lock(); ++ return retval; ++ } + +- fail: +- rpm_put_suppliers(dev); ++ spin_lock_irq(&dev->power.lock); + +- device_links_read_unlock(idx); +- } ++ if (!use_links) ++ return retval; ++ ++ /* ++ * If the device is suspending and the callback has returned success, ++ * drop the usage counters of the suppliers that have been reference ++ * counted on its resume. ++ * ++ * Do that if the resume fails too. ++ */ ++ put = dev->power.runtime_status == RPM_SUSPENDING && !retval; ++ if (put) ++ __update_runtime_status(dev, RPM_SUSPENDED); ++ else ++ put = get && retval; ++ ++ if (put) { ++ spin_unlock_irq(&dev->power.lock); ++ ++ idx = device_links_read_lock(); ++ ++fail: ++ rpm_put_suppliers(dev); ++ ++ device_links_read_unlock(idx); + + spin_lock_irq(&dev->power.lock); + } diff --git a/queue-5.11/ring-buffer-force-before_stamp-and-write_stamp-to-be-different-on-discard.patch b/queue-5.11/ring-buffer-force-before_stamp-and-write_stamp-to-be-different-on-discard.patch new file mode 100644 index 00000000000..cd664f070c6 --- /dev/null +++ b/queue-5.11/ring-buffer-force-before_stamp-and-write_stamp-to-be-different-on-discard.patch @@ -0,0 +1,60 @@ +From 6f6be606e763f2da9fc21de00538c97fe4ca1492 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 3 Mar 2021 18:03:52 -0500 +Subject: ring-buffer: Force before_stamp and write_stamp to be different on discard + +From: Steven Rostedt (VMware) + +commit 6f6be606e763f2da9fc21de00538c97fe4ca1492 upstream. + +Part of the logic of the new time stamp code depends on the before_stamp and +the write_stamp to be different if the write_stamp does not match the last +event on the buffer, as it will be used to calculate the delta of the next +event written on the buffer. + +The discard logic depends on this, as the next event to come in needs to +inject a full timestamp as it can not rely on the last event timestamp in +the buffer because it is unknown due to events after it being discarded. But +by changing the write_stamp back to the time before it, it forces the next +event to use a full time stamp, instead of relying on it. + +The issue came when a full time stamp was used for the event, and +rb_time_delta() returns zero in that case. The update to the write_stamp +(which subtracts delta) made it not change. Then when the event is removed +from the buffer, because the before_stamp and write_stamp still match, the +next event written would calculate its delta from the write_stamp, but that +would be wrong as the write_stamp is of the time of the event that was +discarded. + +In the case that the delta change being made to write_stamp is zero, set the +before_stamp to zero as well, and this will force the next event to inject a +full timestamp and not use the current write_stamp. + +Cc: stable@vger.kernel.org +Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -2822,6 +2822,17 @@ rb_try_to_discard(struct ring_buffer_per + return 0; + + /* ++ * It's possible that the event time delta is zero ++ * (has the same time stamp as the previous event) ++ * in which case write_stamp and before_stamp could ++ * be the same. In such a case, force before_stamp ++ * to be different than write_stamp. It doesn't ++ * matter what it is, as long as its different. ++ */ ++ if (!delta) ++ rb_time_set(&cpu_buffer->before_stamp, 0); ++ ++ /* + * If an event were to come in now, it would see that the + * write_stamp and the before_stamp are different, and assume + * that this event just added itself before updating diff --git a/queue-5.11/series b/queue-5.11/series index a7eb0d6763a..855083d1dfd 100644 --- a/queue-5.11/series +++ b/queue-5.11/series @@ -14,7 +14,6 @@ btrfs-tree-checker-do-not-error-out-if-extent-ref-hash-doesn-t-match.patch btrfs-fix-race-between-extent-freeing-allocation-when-using-bitmaps.patch btrfs-validate-qgroup-inherit-for-snap_create_v2-ioctl.patch btrfs-free-correct-amount-of-space-in-btrfs_delayed_inode_reserve_metadata.patch -btrfs-don-t-flush-from-btrfs_delayed_inode_reserve_metadata.patch btrfs-fix-spurious-free_space_tree-remount-warning.patch btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-quota-reservation-errors.patch btrfs-fix-warning-when-creating-a-directory-with-smack-enabled.patch diff --git a/queue-5.11/tpm-tpm_tis-decorate-tpm_get_timeouts-with-request_locality.patch b/queue-5.11/tpm-tpm_tis-decorate-tpm_get_timeouts-with-request_locality.patch new file mode 100644 index 00000000000..80246c06fbc --- /dev/null +++ b/queue-5.11/tpm-tpm_tis-decorate-tpm_get_timeouts-with-request_locality.patch @@ -0,0 +1,66 @@ +From a5665ec2affdba21bff3b0d4d3aed83b3951e8ff Mon Sep 17 00:00:00 2001 +From: Jarkko Sakkinen +Date: Sat, 20 Feb 2021 00:55:59 +0200 +Subject: tpm, tpm_tis: Decorate tpm_get_timeouts() with request_locality() + +From: Jarkko Sakkinen + +commit a5665ec2affdba21bff3b0d4d3aed83b3951e8ff upstream. + +This is shown with Samsung Chromebook Pro (Caroline) with TPM 1.2 +(SLB 9670): + +[ 4.324298] TPM returned invalid status +[ 4.324806] WARNING: CPU: 2 PID: 1 at drivers/char/tpm/tpm_tis_core.c:275 tpm_tis_status+0x86/0x8f + +Background +========== + +TCG PC Client Platform TPM Profile (PTP) Specification, paragraph 6.1 FIFO +Interface Locality Usage per Register, Table 39 Register Behavior Based on +Locality Setting for FIFO - a read attempt to TPM_STS_x Registers returns +0xFF in case of lack of locality. + +The fix +======= + +Decorate tpm_get_timeouts() with request_locality() and release_locality(). + +Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") +Cc: James Bottomley +Cc: Guenter Roeck +Cc: Laurent Bigonville +Cc: stable@vger.kernel.org +Reported-by: Lukasz Majczak +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm_tis_core.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/drivers/char/tpm/tpm_tis_core.c ++++ b/drivers/char/tpm/tpm_tis_core.c +@@ -1029,11 +1029,21 @@ int tpm_tis_core_init(struct device *dev + init_waitqueue_head(&priv->read_queue); + init_waitqueue_head(&priv->int_queue); + if (irq != -1) { +- /* Before doing irq testing issue a command to the TPM in polling mode ++ /* ++ * Before doing irq testing issue a command to the TPM in polling mode + * to make sure it works. May as well use that command to set the + * proper timeouts for the driver. + */ +- if (tpm_get_timeouts(chip)) { ++ ++ rc = request_locality(chip, 0); ++ if (rc < 0) ++ goto out_err; ++ ++ rc = tpm_get_timeouts(chip); ++ ++ release_locality(chip, 0); ++ ++ if (rc) { + dev_err(dev, "Could not get TPM timeouts and durations\n"); + rc = -ENODEV; + goto out_err; diff --git a/queue-5.11/tpm-tpm_tis-decorate-tpm_tis_gen_interrupt-with-request_locality.patch b/queue-5.11/tpm-tpm_tis-decorate-tpm_tis_gen_interrupt-with-request_locality.patch new file mode 100644 index 00000000000..c4091991590 --- /dev/null +++ b/queue-5.11/tpm-tpm_tis-decorate-tpm_tis_gen_interrupt-with-request_locality.patch @@ -0,0 +1,69 @@ +From d53a6adfb553969809eb2b736a976ebb5146cd95 Mon Sep 17 00:00:00 2001 +From: Lukasz Majczak +Date: Tue, 16 Feb 2021 10:17:49 +0200 +Subject: tpm, tpm_tis: Decorate tpm_tis_gen_interrupt() with request_locality() + +From: Lukasz Majczak + +commit d53a6adfb553969809eb2b736a976ebb5146cd95 upstream. + +This is shown with Samsung Chromebook Pro (Caroline) with TPM 1.2 +(SLB 9670): + +[ 4.324298] TPM returned invalid status +[ 4.324806] WARNING: CPU: 2 PID: 1 at drivers/char/tpm/tpm_tis_core.c:275 tpm_tis_status+0x86/0x8f + +Background +========== + +TCG PC Client Platform TPM Profile (PTP) Specification, paragraph 6.1 FIFO +Interface Locality Usage per Register, Table 39 Register Behavior Based on +Locality Setting for FIFO - a read attempt to TPM_STS_x Registers returns +0xFF in case of lack of locality. + +The fix +======= + +Decorate tpm_tis_gen_interrupt() with request_locality() and +release_locality(). + +Cc: Laurent Bigonville +Cc: James Bottomley +Cc: Guenter Roeck +Cc: stable@vger.kernel.org +Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") +Signed-off-by: Lukasz Majczak +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm_tis_core.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +--- a/drivers/char/tpm/tpm_tis_core.c ++++ b/drivers/char/tpm/tpm_tis_core.c +@@ -707,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct + const char *desc = "attempting to generate an interrupt"; + u32 cap2; + cap_t cap; ++ int ret; + ++ /* TPM 2.0 */ + if (chip->flags & TPM_CHIP_FLAG_TPM2) + return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc); +- else +- return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, +- 0); ++ ++ /* TPM 1.2 */ ++ ret = request_locality(chip, 0); ++ if (ret < 0) ++ return ret; ++ ++ ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0); ++ ++ release_locality(chip, 0); ++ ++ return ret; + } + + /* Register the IRQ and issue a command that will cause an interrupt. If an -- 2.47.3