From: Greg Kroah-Hartman Date: Mon, 23 Dec 2019 17:20:45 +0000 (-0500) Subject: 5.4-stable patches X-Git-Tag: v4.14.161~42 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=852620e164662ba5ac079b9e314fd9154e570e70;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: alsa-hda-ca0132-avoid-endless-loop.patch alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch alsa-hda-ca0132-keep-power-on-during-processing-dsp-response.patch alsa-pcm-avoid-possible-info-leaks-from-pcm-stream-buffers.patch btrfs-abort-transaction-after-failed-inode-updates-in-create_subvol.patch btrfs-do-not-call-synchronize_srcu-in-inode_tree_del.patch btrfs-do-not-leak-reloc-root-if-we-fail-to-read-the-fs-root.patch btrfs-don-t-double-lock-the-subvol_sem-for-rename-exchange.patch btrfs-fix-missing-data-checksums-after-replaying-a-log-tree.patch btrfs-fix-removal-logic-of-the-tree-mod-log-that-leads-to-use-after-free-issues.patch btrfs-handle-enoent-in-btrfs_uuid_tree_iterate.patch btrfs-make-tree-checker-detect-checksum-items-with-overlapping-ranges.patch btrfs-return-error-pointer-from-alloc_test_extent_buffer.patch btrfs-send-remove-warn_on-for-readonly-mount.patch btrfs-skip-log-replay-on-orphaned-roots.patch --- diff --git a/queue-5.4/alsa-hda-ca0132-avoid-endless-loop.patch b/queue-5.4/alsa-hda-ca0132-avoid-endless-loop.patch new file mode 100644 index 00000000000..4cc0dc80fb7 --- /dev/null +++ b/queue-5.4/alsa-hda-ca0132-avoid-endless-loop.patch @@ -0,0 +1,42 @@ +From cb04fc3b6b076f67d228a0b7d096c69ad486c09c Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Fri, 13 Dec 2019 09:51:10 +0100 +Subject: ALSA: hda/ca0132 - Avoid endless loop + +From: Takashi Iwai + +commit cb04fc3b6b076f67d228a0b7d096c69ad486c09c upstream. + +Introduce a timeout to dspio_clear_response_queue() so that it won't +be caught in an endless loop even if the hardware doesn't respond +properly. + +Fixes: a73d511c4867 ("ALSA: hda/ca0132: Add unsol handler for DSP and jack detection") +Cc: +Link: https://lore.kernel.org/r/20191213085111.22855-3-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/patch_ca0132.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/sound/pci/hda/patch_ca0132.c ++++ b/sound/pci/hda/patch_ca0132.c +@@ -1809,13 +1809,14 @@ struct scp_msg { + + static void dspio_clear_response_queue(struct hda_codec *codec) + { ++ unsigned long timeout = jiffies + msecs_to_jiffies(1000); + unsigned int dummy = 0; +- int status = -1; ++ int status; + + /* clear all from the response queue */ + do { + status = dspio_read(codec, &dummy); +- } while (status == 0); ++ } while (status == 0 && time_before(jiffies, timeout)); + } + + static int dspio_get_response_data(struct hda_codec *codec) diff --git a/queue-5.4/alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch b/queue-5.4/alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch new file mode 100644 index 00000000000..ae236fa3dd8 --- /dev/null +++ b/queue-5.4/alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch @@ -0,0 +1,65 @@ +From 42fb6b1d41eb5905d77c06cad2e87b70289bdb76 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Fri, 13 Dec 2019 09:51:11 +0100 +Subject: ALSA: hda/ca0132 - Fix work handling in delayed HP detection + +From: Takashi Iwai + +commit 42fb6b1d41eb5905d77c06cad2e87b70289bdb76 upstream. + +CA0132 has the delayed HP jack detection code that is invoked from the +unsol handler, but it does a few weird things: it contains the cancel +of a work inside the work handler, and yet it misses the cancel-sync +call at (runtime-)suspend. This patch addresses those issues. + +Fixes: 15c2b3cc09a3 ("ALSA: hda/ca0132 - Fix possible workqueue stall") +Cc: +Link: https://lore.kernel.org/r/20191213085111.22855-4-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/patch_ca0132.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/sound/pci/hda/patch_ca0132.c ++++ b/sound/pci/hda/patch_ca0132.c +@@ -7607,11 +7607,10 @@ static void hp_callback(struct hda_codec + /* Delay enabling the HP amp, to let the mic-detection + * state machine run. + */ +- cancel_delayed_work(&spec->unsol_hp_work); +- schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); + tbl = snd_hda_jack_tbl_get(codec, cb->nid); + if (tbl) + tbl->block_report = 1; ++ schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); + } + + static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb) +@@ -8457,12 +8456,25 @@ static void ca0132_reboot_notify(struct + codec->patch_ops.free(codec); + } + ++#ifdef CONFIG_PM ++static int ca0132_suspend(struct hda_codec *codec) ++{ ++ struct ca0132_spec *spec = codec->spec; ++ ++ cancel_delayed_work_sync(&spec->unsol_hp_work); ++ return 0; ++} ++#endif ++ + static const struct hda_codec_ops ca0132_patch_ops = { + .build_controls = ca0132_build_controls, + .build_pcms = ca0132_build_pcms, + .init = ca0132_init, + .free = ca0132_free, + .unsol_event = snd_hda_jack_unsol_event, ++#ifdef CONFIG_PM ++ .suspend = ca0132_suspend, ++#endif + .reboot_notify = ca0132_reboot_notify, + }; + diff --git a/queue-5.4/alsa-hda-ca0132-keep-power-on-during-processing-dsp-response.patch b/queue-5.4/alsa-hda-ca0132-keep-power-on-during-processing-dsp-response.patch new file mode 100644 index 00000000000..1766d1c39a5 --- /dev/null +++ b/queue-5.4/alsa-hda-ca0132-keep-power-on-during-processing-dsp-response.patch @@ -0,0 +1,41 @@ +From 377bc0cfabce0244632dada19060839ced4e6949 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Fri, 13 Dec 2019 09:51:09 +0100 +Subject: ALSA: hda/ca0132 - Keep power on during processing DSP response + +From: Takashi Iwai + +commit 377bc0cfabce0244632dada19060839ced4e6949 upstream. + +We need to keep power on while processing the DSP response via unsol +event. Each snd_hda_codec_read() call does the power management, so +it should work normally, but still it's safer to keep the power up for +the whole function. + +Fixes: a73d511c4867 ("ALSA: hda/ca0132: Add unsol handler for DSP and jack detection") +Cc: +Link: https://lore.kernel.org/r/20191213085111.22855-2-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/patch_ca0132.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/sound/pci/hda/patch_ca0132.c ++++ b/sound/pci/hda/patch_ca0132.c +@@ -7588,12 +7588,14 @@ static void ca0132_process_dsp_response( + struct ca0132_spec *spec = codec->spec; + + codec_dbg(codec, "ca0132_process_dsp_response\n"); ++ snd_hda_power_up_pm(codec); + if (spec->wait_scp) { + if (dspio_get_response_data(codec) >= 0) + spec->wait_scp = 0; + } + + dspio_clear_response_queue(codec); ++ snd_hda_power_down_pm(codec); + } + + static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) diff --git a/queue-5.4/alsa-pcm-avoid-possible-info-leaks-from-pcm-stream-buffers.patch b/queue-5.4/alsa-pcm-avoid-possible-info-leaks-from-pcm-stream-buffers.patch new file mode 100644 index 00000000000..3d221d558cb --- /dev/null +++ b/queue-5.4/alsa-pcm-avoid-possible-info-leaks-from-pcm-stream-buffers.patch @@ -0,0 +1,43 @@ +From add9d56d7b3781532208afbff5509d7382fb6efe Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Wed, 11 Dec 2019 16:57:42 +0100 +Subject: ALSA: pcm: Avoid possible info leaks from PCM stream buffers + +From: Takashi Iwai + +commit add9d56d7b3781532208afbff5509d7382fb6efe upstream. + +The current PCM code doesn't initialize explicitly the buffers +allocated for PCM streams, hence it might leak some uninitialized +kernel data or previous stream contents by mmapping or reading the +buffer before actually starting the stream. + +Since this is a common problem, this patch simply adds the clearance +of the buffer data at hw_params callback. Although this does only +zero-clear no matter which format is used, which doesn't mean the +silence for some formats, but it should be OK because the intention is +just to clear the previous data on the buffer. + +Reported-by: Lionel Koenig +Cc: +Link: https://lore.kernel.org/r/20191211155742.3213-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/pcm_native.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/sound/core/pcm_native.c ++++ b/sound/core/pcm_native.c +@@ -705,6 +705,10 @@ static int snd_pcm_hw_params(struct snd_ + while (runtime->boundary * 2 <= LONG_MAX - runtime->buffer_size) + runtime->boundary *= 2; + ++ /* clear the buffer for avoiding possible kernel info leaks */ ++ if (runtime->dma_area && !substream->ops->copy_user) ++ memset(runtime->dma_area, 0, runtime->dma_bytes); ++ + snd_pcm_timer_resolution_change(substream); + snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); + diff --git a/queue-5.4/btrfs-abort-transaction-after-failed-inode-updates-in-create_subvol.patch b/queue-5.4/btrfs-abort-transaction-after-failed-inode-updates-in-create_subvol.patch new file mode 100644 index 00000000000..191db8cc85a --- /dev/null +++ b/queue-5.4/btrfs-abort-transaction-after-failed-inode-updates-in-create_subvol.patch @@ -0,0 +1,46 @@ +From c7e54b5102bf3614cadb9ca32d7be73bad6cecf0 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 6 Dec 2019 09:37:15 -0500 +Subject: btrfs: abort transaction after failed inode updates in create_subvol + +From: Josef Bacik + +commit c7e54b5102bf3614cadb9ca32d7be73bad6cecf0 upstream. + +We can just abort the transaction here, and in fact do that for every +other failure in this function except these two cases. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -705,11 +705,17 @@ static noinline int create_subvol(struct + + btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2); + ret = btrfs_update_inode(trans, root, dir); +- BUG_ON(ret); ++ if (ret) { ++ btrfs_abort_transaction(trans, ret); ++ goto fail; ++ } + + ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid, + btrfs_ino(BTRFS_I(dir)), index, name, namelen); +- BUG_ON(ret); ++ if (ret) { ++ btrfs_abort_transaction(trans, ret); ++ goto fail; ++ } + + ret = btrfs_uuid_tree_add(trans, root_item->uuid, + BTRFS_UUID_KEY_SUBVOL, objectid); diff --git a/queue-5.4/btrfs-do-not-call-synchronize_srcu-in-inode_tree_del.patch b/queue-5.4/btrfs-do-not-call-synchronize_srcu-in-inode_tree_del.patch new file mode 100644 index 00000000000..6f96b641c25 --- /dev/null +++ b/queue-5.4/btrfs-do-not-call-synchronize_srcu-in-inode_tree_del.patch @@ -0,0 +1,65 @@ +From f72ff01df9cf5db25c76674cac16605992d15467 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Tue, 19 Nov 2019 13:59:35 -0500 +Subject: btrfs: do not call synchronize_srcu() in inode_tree_del + +From: Josef Bacik + +commit f72ff01df9cf5db25c76674cac16605992d15467 upstream. + +Testing with the new fsstress uncovered a pretty nasty deadlock with +lookup and snapshot deletion. + +Process A +unlink + -> final iput + -> inode_tree_del + -> synchronize_srcu(subvol_srcu) + +Process B +btrfs_lookup <- srcu_read_lock() acquired here + -> btrfs_iget + -> find inode that has I_FREEING set + -> __wait_on_freeing_inode() + +We're holding the srcu_read_lock() while doing the iget in order to make +sure our fs root doesn't go away, and then we are waiting for the inode +to finish freeing. However because the free'ing process is doing a +synchronize_srcu() we deadlock. + +Fix this by dropping the synchronize_srcu() in inode_tree_del(). We +don't need people to stop accessing the fs root at this point, we're +only adding our empty root to the dead roots list. + +A larger much more invasive fix is forthcoming to address how we deal +with fs roots, but this fixes the immediate problem. + +Fixes: 76dda93c6ae2 ("Btrfs: add snapshot/subvolume destroy ioctl") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -5697,7 +5697,6 @@ static void inode_tree_add(struct inode + + static void inode_tree_del(struct inode *inode) + { +- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; + int empty = 0; + +@@ -5710,7 +5709,6 @@ static void inode_tree_del(struct inode + spin_unlock(&root->inode_lock); + + if (empty && btrfs_root_refs(&root->root_item) == 0) { +- synchronize_srcu(&fs_info->subvol_srcu); + spin_lock(&root->inode_lock); + empty = RB_EMPTY_ROOT(&root->inode_tree); + spin_unlock(&root->inode_lock); diff --git a/queue-5.4/btrfs-do-not-leak-reloc-root-if-we-fail-to-read-the-fs-root.patch b/queue-5.4/btrfs-do-not-leak-reloc-root-if-we-fail-to-read-the-fs-root.patch new file mode 100644 index 00000000000..a2dcd21a0eb --- /dev/null +++ b/queue-5.4/btrfs-do-not-leak-reloc-root-if-we-fail-to-read-the-fs-root.patch @@ -0,0 +1,37 @@ +From ca1aa2818a53875cfdd175fb5e9a2984e997cce9 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 6 Dec 2019 09:37:18 -0500 +Subject: btrfs: do not leak reloc root if we fail to read the fs root + +From: Josef Bacik + +commit ca1aa2818a53875cfdd175fb5e9a2984e997cce9 upstream. + +If we fail to read the fs root corresponding with a reloc root we'll +just break out and free the reloc roots. But we remove our current +reloc_root from this list higher up, which means we'll leak this +reloc_root. Fix this by adding ourselves back to the reloc_roots list +so we are properly cleaned up. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/relocation.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -4555,6 +4555,7 @@ int btrfs_recover_relocation(struct btrf + fs_root = read_fs_root(fs_info, reloc_root->root_key.offset); + if (IS_ERR(fs_root)) { + err = PTR_ERR(fs_root); ++ list_add_tail(&reloc_root->root_list, &reloc_roots); + goto out_free; + } + diff --git a/queue-5.4/btrfs-don-t-double-lock-the-subvol_sem-for-rename-exchange.patch b/queue-5.4/btrfs-don-t-double-lock-the-subvol_sem-for-rename-exchange.patch new file mode 100644 index 00000000000..b949c6cece2 --- /dev/null +++ b/queue-5.4/btrfs-don-t-double-lock-the-subvol_sem-for-rename-exchange.patch @@ -0,0 +1,49 @@ +From 943eb3bf25f4a7b745dd799e031be276aa104d82 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Tue, 19 Nov 2019 13:59:20 -0500 +Subject: btrfs: don't double lock the subvol_sem for rename exchange + +From: Josef Bacik + +commit 943eb3bf25f4a7b745dd799e031be276aa104d82 upstream. + +If we're rename exchanging two subvols we'll try to lock this lock +twice, which is bad. Just lock once if either of the ino's are subvols. + +Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9535,9 +9535,8 @@ static int btrfs_rename_exchange(struct + btrfs_init_log_ctx(&ctx_dest, new_inode); + + /* close the race window with snapshot create/destroy ioctl */ +- if (old_ino == BTRFS_FIRST_FREE_OBJECTID) +- down_read(&fs_info->subvol_sem); +- if (new_ino == BTRFS_FIRST_FREE_OBJECTID) ++ if (old_ino == BTRFS_FIRST_FREE_OBJECTID || ++ new_ino == BTRFS_FIRST_FREE_OBJECTID) + down_read(&fs_info->subvol_sem); + + /* +@@ -9771,9 +9770,8 @@ out_fail: + ret = ret ? ret : ret2; + } + out_notrans: +- if (new_ino == BTRFS_FIRST_FREE_OBJECTID) +- up_read(&fs_info->subvol_sem); +- if (old_ino == BTRFS_FIRST_FREE_OBJECTID) ++ if (new_ino == BTRFS_FIRST_FREE_OBJECTID || ++ old_ino == BTRFS_FIRST_FREE_OBJECTID) + up_read(&fs_info->subvol_sem); + + ASSERT(list_empty(&ctx_root.list)); diff --git a/queue-5.4/btrfs-fix-missing-data-checksums-after-replaying-a-log-tree.patch b/queue-5.4/btrfs-fix-missing-data-checksums-after-replaying-a-log-tree.patch new file mode 100644 index 00000000000..bb63dcc57c6 --- /dev/null +++ b/queue-5.4/btrfs-fix-missing-data-checksums-after-replaying-a-log-tree.patch @@ -0,0 +1,266 @@ +From 40e046acbd2f369cfbf93c3413639c66514cec2d Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 5 Dec 2019 16:58:30 +0000 +Subject: Btrfs: fix missing data checksums after replaying a log tree + +From: Filipe Manana + +commit 40e046acbd2f369cfbf93c3413639c66514cec2d upstream. + +When logging a file that has shared extents (reflinked with other files or +with itself), we can end up logging multiple checksum items that cover +overlapping ranges. This confuses the search for checksums at log replay +time causing some checksums to never be added to the fs/subvolume tree. + +Consider the following example of a file that shares the same extent at +offsets 0 and 256Kb: + + [ bytenr 13893632, offset 64Kb, len 64Kb ] + 0 64Kb + + [ bytenr 13631488, offset 64Kb, len 192Kb ] + 64Kb 256Kb + + [ bytenr 13893632, offset 0, len 256Kb ] + 256Kb 512Kb + +When logging the inode, at tree-log.c:copy_items(), when processing the +file extent item at offset 0, we log a checksum item covering the range +13959168 to 14024704, which corresponds to 13893632 + 64Kb and 13893632 + +64Kb + 64Kb, respectively. + +Later when processing the extent item at offset 256K, we log the checksums +for the range from 13893632 to 14155776 (which corresponds to 13893632 + +256Kb). These checksums get merged with the checksum item for the range +from 13631488 to 13893632 (13631488 + 256Kb), logged by a previous fsync. +So after this we get the two following checksum items in the log tree: + + (...) + item 6 key (EXTENT_CSUM EXTENT_CSUM 13631488) itemoff 3095 itemsize 512 + range start 13631488 end 14155776 length 524288 + item 7 key (EXTENT_CSUM EXTENT_CSUM 13959168) itemoff 3031 itemsize 64 + range start 13959168 end 14024704 length 65536 + +The first one covers the range from the second one, they overlap. + +So far this does not cause a problem after replaying the log, because +when replaying the file extent item for offset 256K, we copy all the +checksums for the extent 13893632 from the log tree to the fs/subvolume +tree, since searching for an checksum item for bytenr 13893632 leaves us +at the first checksum item, which covers the whole range of the extent. + +However if we write 64Kb to file offset 256Kb for example, we will +not be able to find and copy the checksums for the last 128Kb of the +extent at bytenr 13893632, referenced by the file range 384Kb to 512Kb. + +After writing 64Kb into file offset 256Kb we get the following extent +layout for our file: + + [ bytenr 13893632, offset 64K, len 64Kb ] + 0 64Kb + + [ bytenr 13631488, offset 64Kb, len 192Kb ] + 64Kb 256Kb + + [ bytenr 14155776, offset 0, len 64Kb ] + 256Kb 320Kb + + [ bytenr 13893632, offset 64Kb, len 192Kb ] + 320Kb 512Kb + +After fsync'ing the file, if we have a power failure and then mount +the filesystem to replay the log, the following happens: + +1) When replaying the file extent item for file offset 320Kb, we + lookup for the checksums for the extent range from 13959168 + (13893632 + 64Kb) to 14155776 (13893632 + 256Kb), through a call + to btrfs_lookup_csums_range(); + +2) btrfs_lookup_csums_range() finds the checksum item that starts + precisely at offset 13959168 (item 7 in the log tree, shown before); + +3) However that checksum item only covers 64Kb of data, and not 192Kb + of data; + +4) As a result only the checksums for the first 64Kb of data referenced + by the file extent item are found and copied to the fs/subvolume tree. + The remaining 128Kb of data, file range 384Kb to 512Kb, doesn't get + the corresponding data checksums found and copied to the fs/subvolume + tree. + +5) After replaying the log userspace will not be able to read the file + range from 384Kb to 512Kb, because the checksums are missing and + resulting in an -EIO error. + +The following steps reproduce this scenario: + + $ mkfs.btrfs -f /dev/sdc + $ mount /dev/sdc /mnt/sdc + + $ xfs_io -f -c "pwrite -S 0xa3 0 256K" /mnt/sdc/foobar + $ xfs_io -c "fsync" /mnt/sdc/foobar + $ xfs_io -c "pwrite -S 0xc7 256K 256K" /mnt/sdc/foobar + + $ xfs_io -c "reflink /mnt/sdc/foobar 320K 0 64K" /mnt/sdc/foobar + $ xfs_io -c "fsync" /mnt/sdc/foobar + + $ xfs_io -c "pwrite -S 0xe5 256K 64K" /mnt/sdc/foobar + $ xfs_io -c "fsync" /mnt/sdc/foobar + + + + $ mount /dev/sdc /mnt/sdc + $ md5sum /mnt/sdc/foobar + md5sum: /mnt/sdc/foobar: Input/output error + + $ dmesg | tail + [165305.003464] BTRFS info (device sdc): no csum found for inode 257 start 401408 + [165305.004014] BTRFS info (device sdc): no csum found for inode 257 start 405504 + [165305.004559] BTRFS info (device sdc): no csum found for inode 257 start 409600 + [165305.005101] BTRFS info (device sdc): no csum found for inode 257 start 413696 + [165305.005627] BTRFS info (device sdc): no csum found for inode 257 start 417792 + [165305.006134] BTRFS info (device sdc): no csum found for inode 257 start 421888 + [165305.006625] BTRFS info (device sdc): no csum found for inode 257 start 425984 + [165305.007278] BTRFS info (device sdc): no csum found for inode 257 start 430080 + [165305.008248] BTRFS warning (device sdc): csum failed root 5 ino 257 off 393216 csum 0x1337385e expected csum 0x00000000 mirror 1 + [165305.009550] BTRFS warning (device sdc): csum failed root 5 ino 257 off 393216 csum 0x1337385e expected csum 0x00000000 mirror 1 + +Fix this simply by deleting first any checksums, from the log tree, for the +range of the extent we are logging at copy_items(). This ensures we do not +get checksum items in the log tree that have overlapping ranges. + +This is a long time issue that has been present since we have the clone +(and deduplication) ioctl, and can happen both when an extent is shared +between different files and within the same file. + +A test case for fstests follows soon. + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.h | 2 +- + fs/btrfs/extent-tree.c | 7 ++++--- + fs/btrfs/file-item.c | 7 +++++-- + fs/btrfs/tree-log.c | 29 ++++++++++++++++++++++++++--- + 4 files changed, 36 insertions(+), 9 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -2785,7 +2785,7 @@ struct btrfs_inode_extref *btrfs_find_na + /* file-item.c */ + struct btrfs_dio_private; + int btrfs_del_csums(struct btrfs_trans_handle *trans, +- struct btrfs_fs_info *fs_info, u64 bytenr, u64 len); ++ struct btrfs_root *root, u64 bytenr, u64 len); + blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, + u8 *dst); + blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -1848,8 +1848,8 @@ static int cleanup_ref_head(struct btrfs + btrfs_pin_extent(fs_info, head->bytenr, + head->num_bytes, 1); + if (head->is_data) { +- ret = btrfs_del_csums(trans, fs_info, head->bytenr, +- head->num_bytes); ++ ret = btrfs_del_csums(trans, fs_info->csum_root, ++ head->bytenr, head->num_bytes); + } + } + +@@ -3155,7 +3155,8 @@ static int __btrfs_free_extent(struct bt + btrfs_release_path(path); + + if (is_data) { +- ret = btrfs_del_csums(trans, info, bytenr, num_bytes); ++ ret = btrfs_del_csums(trans, info->csum_root, bytenr, ++ num_bytes); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; +--- a/fs/btrfs/file-item.c ++++ b/fs/btrfs/file-item.c +@@ -590,9 +590,9 @@ static noinline void truncate_one_csum(s + * range of bytes. + */ + int btrfs_del_csums(struct btrfs_trans_handle *trans, +- struct btrfs_fs_info *fs_info, u64 bytenr, u64 len) ++ struct btrfs_root *root, u64 bytenr, u64 len) + { +- struct btrfs_root *root = fs_info->csum_root; ++ struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_path *path; + struct btrfs_key key; + u64 end_byte = bytenr + len; +@@ -602,6 +602,9 @@ int btrfs_del_csums(struct btrfs_trans_h + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + int blocksize_bits = fs_info->sb->s_blocksize_bits; + ++ ASSERT(root == fs_info->csum_root || ++ root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); ++ + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -808,7 +808,8 @@ static noinline int replay_one_extent(st + struct btrfs_ordered_sum, + list); + if (!ret) +- ret = btrfs_del_csums(trans, fs_info, ++ ret = btrfs_del_csums(trans, ++ fs_info->csum_root, + sums->bytenr, + sums->len); + if (!ret) +@@ -3927,6 +3928,28 @@ static int log_inode_item(struct btrfs_t + return 0; + } + ++static int log_csums(struct btrfs_trans_handle *trans, ++ struct btrfs_root *log_root, ++ struct btrfs_ordered_sum *sums) ++{ ++ int ret; ++ ++ /* ++ * Due to extent cloning, we might have logged a csum item that covers a ++ * subrange of a cloned extent, and later we can end up logging a csum ++ * item for a larger subrange of the same extent or the entire range. ++ * This would leave csum items in the log tree that cover the same range ++ * and break the searches for checksums in the log tree, resulting in ++ * some checksums missing in the fs/subvolume tree. So just delete (or ++ * trim and adjust) any existing csum items in the log for this range. ++ */ ++ ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len); ++ if (ret) ++ return ret; ++ ++ return btrfs_csum_file_blocks(trans, log_root, sums); ++} ++ + static noinline int copy_items(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *dst_path, +@@ -4072,7 +4095,7 @@ static noinline int copy_items(struct bt + struct btrfs_ordered_sum, + list); + if (!ret) +- ret = btrfs_csum_file_blocks(trans, log, sums); ++ ret = log_csums(trans, log, sums); + list_del(&sums->list); + kfree(sums); + } +@@ -4292,7 +4315,7 @@ static int log_extent_csums(struct btrfs + struct btrfs_ordered_sum, + list); + if (!ret) +- ret = btrfs_csum_file_blocks(trans, log_root, sums); ++ ret = log_csums(trans, log_root, sums); + list_del(&sums->list); + kfree(sums); + } diff --git a/queue-5.4/btrfs-fix-removal-logic-of-the-tree-mod-log-that-leads-to-use-after-free-issues.patch b/queue-5.4/btrfs-fix-removal-logic-of-the-tree-mod-log-that-leads-to-use-after-free-issues.patch new file mode 100644 index 00000000000..118290323b8 --- /dev/null +++ b/queue-5.4/btrfs-fix-removal-logic-of-the-tree-mod-log-that-leads-to-use-after-free-issues.patch @@ -0,0 +1,112 @@ +From 6609fee8897ac475378388238456c84298bff802 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 6 Dec 2019 12:27:39 +0000 +Subject: Btrfs: fix removal logic of the tree mod log that leads to use-after-free issues + +From: Filipe Manana + +commit 6609fee8897ac475378388238456c84298bff802 upstream. + +When a tree mod log user no longer needs to use the tree it calls +btrfs_put_tree_mod_seq() to remove itself from the list of users and +delete all no longer used elements of the tree's red black tree, which +should be all elements with a sequence number less then our equals to +the caller's sequence number. However the logic is broken because it +can delete and free elements from the red black tree that have a +sequence number greater then the caller's sequence number: + +1) At a point in time we have sequence numbers 1, 2, 3 and 4 in the + tree mod log; + +2) The task which got assigned the sequence number 1 calls + btrfs_put_tree_mod_seq(); + +3) Sequence number 1 is deleted from the list of sequence numbers; + +4) The current minimum sequence number is computed to be the sequence + number 2; + +5) A task using sequence number 2 is at tree_mod_log_rewind() and gets + a pointer to one of its elements from the red black tree through + a call to tree_mod_log_search(); + +6) The task with sequence number 1 iterates the red black tree of tree + modification elements and deletes (and frees) all elements with a + sequence number less then or equals to 2 (the computed minimum sequence + number) - it ends up only leaving elements with sequence numbers of 3 + and 4; + +7) The task with sequence number 2 now uses the pointer to its element, + already freed by the other task, at __tree_mod_log_rewind(), resulting + in a use-after-free issue. When CONFIG_DEBUG_PAGEALLOC=y it produces + a trace like the following: + + [16804.546854] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI + [16804.547451] CPU: 0 PID: 28257 Comm: pool Tainted: G W 5.4.0-rc8-btrfs-next-51 #1 + [16804.548059] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 + [16804.548666] RIP: 0010:rb_next+0x16/0x50 + (...) + [16804.550581] RSP: 0018:ffffb948418ef9b0 EFLAGS: 00010202 + [16804.551227] RAX: 6b6b6b6b6b6b6b6b RBX: ffff90e0247f6600 RCX: 6b6b6b6b6b6b6b6b + [16804.551873] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff90e0247f6600 + [16804.552504] RBP: ffff90dffe0d4688 R08: 0000000000000001 R09: 0000000000000000 + [16804.553136] R10: ffff90dffa4a0040 R11: 0000000000000000 R12: 000000000000002e + [16804.553768] R13: ffff90e0247f6600 R14: 0000000000001663 R15: ffff90dff77862b8 + [16804.554399] FS: 00007f4b197ae700(0000) GS:ffff90e036a00000(0000) knlGS:0000000000000000 + [16804.555039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [16804.555683] CR2: 00007f4b10022000 CR3: 00000002060e2004 CR4: 00000000003606f0 + [16804.556336] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [16804.556968] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [16804.557583] Call Trace: + [16804.558207] __tree_mod_log_rewind+0xbf/0x280 [btrfs] + [16804.558835] btrfs_search_old_slot+0x105/0xd00 [btrfs] + [16804.559468] resolve_indirect_refs+0x1eb/0xc70 [btrfs] + [16804.560087] ? free_extent_buffer.part.19+0x5a/0xc0 [btrfs] + [16804.560700] find_parent_nodes+0x388/0x1120 [btrfs] + [16804.561310] btrfs_check_shared+0x115/0x1c0 [btrfs] + [16804.561916] ? extent_fiemap+0x59d/0x6d0 [btrfs] + [16804.562518] extent_fiemap+0x59d/0x6d0 [btrfs] + [16804.563112] ? __might_fault+0x11/0x90 + [16804.563706] do_vfs_ioctl+0x45a/0x700 + [16804.564299] ksys_ioctl+0x70/0x80 + [16804.564885] ? trace_hardirqs_off_thunk+0x1a/0x20 + [16804.565461] __x64_sys_ioctl+0x16/0x20 + [16804.566020] do_syscall_64+0x5c/0x250 + [16804.566580] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [16804.567153] RIP: 0033:0x7f4b1ba2add7 + (...) + [16804.568907] RSP: 002b:00007f4b197adc88 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 + [16804.569513] RAX: ffffffffffffffda RBX: 00007f4b100210d8 RCX: 00007f4b1ba2add7 + [16804.570133] RDX: 00007f4b100210d8 RSI: 00000000c020660b RDI: 0000000000000003 + [16804.570726] RBP: 000055de05a6cfe0 R08: 0000000000000000 R09: 00007f4b197add44 + [16804.571314] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4b197add48 + [16804.571905] R13: 00007f4b197add40 R14: 00007f4b100210d0 R15: 00007f4b197add50 + (...) + [16804.575623] ---[ end trace 87317359aad4ba50 ]--- + +Fix this by making btrfs_put_tree_mod_seq() skip deletion of elements that +have a sequence number equals to the computed minimum sequence number, and +not just elements with a sequence number greater then that minimum. + +Fixes: bd989ba359f2ac ("Btrfs: add tree modification log functions") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -383,7 +383,7 @@ void btrfs_put_tree_mod_seq(struct btrfs + for (node = rb_first(tm_root); node; node = next) { + next = rb_next(node); + tm = rb_entry(node, struct tree_mod_elem, node); +- if (tm->seq > min_seq) ++ if (tm->seq >= min_seq) + continue; + rb_erase(node, tm_root); + kfree(tm); diff --git a/queue-5.4/btrfs-handle-enoent-in-btrfs_uuid_tree_iterate.patch b/queue-5.4/btrfs-handle-enoent-in-btrfs_uuid_tree_iterate.patch new file mode 100644 index 00000000000..b42d32e20ce --- /dev/null +++ b/queue-5.4/btrfs-handle-enoent-in-btrfs_uuid_tree_iterate.patch @@ -0,0 +1,37 @@ +From 714cd3e8cba6841220dce9063a7388a81de03825 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 6 Dec 2019 11:39:00 -0500 +Subject: btrfs: handle ENOENT in btrfs_uuid_tree_iterate + +From: Josef Bacik + +commit 714cd3e8cba6841220dce9063a7388a81de03825 upstream. + +If we get an -ENOENT back from btrfs_uuid_iter_rem when iterating the +uuid tree we'll just continue and do btrfs_next_item(). However we've +done a btrfs_release_path() at this point and no longer have a valid +path. So increment the key and go back and do a normal search. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/uuid-tree.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/uuid-tree.c ++++ b/fs/btrfs/uuid-tree.c +@@ -324,6 +324,8 @@ again_search_slot: + } + if (ret < 0 && ret != -ENOENT) + goto out; ++ key.offset++; ++ goto again_search_slot; + } + item_size -= sizeof(subid_le); + offset += sizeof(subid_le); diff --git a/queue-5.4/btrfs-make-tree-checker-detect-checksum-items-with-overlapping-ranges.patch b/queue-5.4/btrfs-make-tree-checker-detect-checksum-items-with-overlapping-ranges.patch new file mode 100644 index 00000000000..b22ddac8bda --- /dev/null +++ b/queue-5.4/btrfs-make-tree-checker-detect-checksum-items-with-overlapping-ranges.patch @@ -0,0 +1,73 @@ +From ad1d8c439978ede77cbf73cbdd11bafe810421a5 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 2 Dec 2019 11:01:03 +0000 +Subject: Btrfs: make tree checker detect checksum items with overlapping ranges + +From: Filipe Manana + +commit ad1d8c439978ede77cbf73cbdd11bafe810421a5 upstream. + +Having checksum items, either on the checksums tree or in a log tree, that +represent ranges that overlap each other is a sign of a corruption. Such +case confuses the checksum lookup code and can result in not being able to +find checksums or find stale checksums. + +So add a check for such case. + +This is motivated by a recent fix for a case where a log tree had checksum +items covering ranges that overlap each other due to extent cloning, and +resulted in missing checksums after replaying the log tree. It also helps +detect past issues such as stale and outdated checksums due to overlapping, +commit 27b9a8122ff71a ("Btrfs: fix csum tree corruption, duplicate and +outdated checksums"). + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-checker.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -243,7 +243,7 @@ static int check_extent_data_item(struct + } + + static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key, +- int slot) ++ int slot, struct btrfs_key *prev_key) + { + struct btrfs_fs_info *fs_info = leaf->fs_info; + u32 sectorsize = fs_info->sectorsize; +@@ -267,6 +267,20 @@ static int check_csum_item(struct extent + btrfs_item_size_nr(leaf, slot), csumsize); + return -EUCLEAN; + } ++ if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) { ++ u64 prev_csum_end; ++ u32 prev_item_size; ++ ++ prev_item_size = btrfs_item_size_nr(leaf, slot - 1); ++ prev_csum_end = (prev_item_size / csumsize) * sectorsize; ++ prev_csum_end += prev_key->offset; ++ if (prev_csum_end > key->offset) { ++ generic_err(leaf, slot - 1, ++"csum end range (%llu) goes beyond the start range (%llu) of the next csum item", ++ prev_csum_end, key->offset); ++ return -EUCLEAN; ++ } ++ } + return 0; + } + +@@ -1239,7 +1253,7 @@ static int check_leaf_item(struct extent + ret = check_extent_data_item(leaf, key, slot, prev_key); + break; + case BTRFS_EXTENT_CSUM_KEY: +- ret = check_csum_item(leaf, key, slot); ++ ret = check_csum_item(leaf, key, slot, prev_key); + break; + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: diff --git a/queue-5.4/btrfs-return-error-pointer-from-alloc_test_extent_buffer.patch b/queue-5.4/btrfs-return-error-pointer-from-alloc_test_extent_buffer.patch new file mode 100644 index 00000000000..c3f16edb262 --- /dev/null +++ b/queue-5.4/btrfs-return-error-pointer-from-alloc_test_extent_buffer.patch @@ -0,0 +1,75 @@ +From b6293c821ea8fa2a631a2112cd86cd435effeb8b Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 3 Dec 2019 14:24:58 +0300 +Subject: btrfs: return error pointer from alloc_test_extent_buffer + +From: Dan Carpenter + +commit b6293c821ea8fa2a631a2112cd86cd435effeb8b upstream. + +Callers of alloc_test_extent_buffer have not correctly interpreted the +return value as error pointer, as alloc_test_extent_buffer should behave +as alloc_extent_buffer. The self-tests were unaffected but +btrfs_find_create_tree_block could call both functions and that would +cause problems up in the call chain. + +Fixes: faa2dbf004e8 ("Btrfs: add sanity tests for new qgroup accounting code") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Dan Carpenter +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent_io.c | 6 ++++-- + fs/btrfs/tests/free-space-tree-tests.c | 4 ++-- + fs/btrfs/tests/qgroup-tests.c | 4 ++-- + 3 files changed, 8 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -5066,12 +5066,14 @@ struct extent_buffer *alloc_test_extent_ + return eb; + eb = alloc_dummy_extent_buffer(fs_info, start); + if (!eb) +- return NULL; ++ return ERR_PTR(-ENOMEM); + eb->fs_info = fs_info; + again: + ret = radix_tree_preload(GFP_NOFS); +- if (ret) ++ if (ret) { ++ exists = ERR_PTR(ret); + goto free_eb; ++ } + spin_lock(&fs_info->buffer_lock); + ret = radix_tree_insert(&fs_info->buffer_radix, + start >> PAGE_SHIFT, eb); +--- a/fs/btrfs/tests/free-space-tree-tests.c ++++ b/fs/btrfs/tests/free-space-tree-tests.c +@@ -463,9 +463,9 @@ static int run_test(test_func_t test_fun + root->fs_info->tree_root = root; + + root->node = alloc_test_extent_buffer(root->fs_info, nodesize); +- if (!root->node) { ++ if (IS_ERR(root->node)) { + test_std_err(TEST_ALLOC_EXTENT_BUFFER); +- ret = -ENOMEM; ++ ret = PTR_ERR(root->node); + goto out; + } + btrfs_set_header_level(root->node, 0); +--- a/fs/btrfs/tests/qgroup-tests.c ++++ b/fs/btrfs/tests/qgroup-tests.c +@@ -484,9 +484,9 @@ int btrfs_test_qgroups(u32 sectorsize, u + * *cough*backref walking code*cough* + */ + root->node = alloc_test_extent_buffer(root->fs_info, nodesize); +- if (!root->node) { ++ if (IS_ERR(root->node)) { + test_err("couldn't allocate dummy buffer"); +- ret = -ENOMEM; ++ ret = PTR_ERR(root->node); + goto out; + } + btrfs_set_header_level(root->node, 0); diff --git a/queue-5.4/btrfs-send-remove-warn_on-for-readonly-mount.patch b/queue-5.4/btrfs-send-remove-warn_on-for-readonly-mount.patch new file mode 100644 index 00000000000..cb3f39e5ed1 --- /dev/null +++ b/queue-5.4/btrfs-send-remove-warn_on-for-readonly-mount.patch @@ -0,0 +1,98 @@ +From fbd542971aa1e9ec33212afe1d9b4f1106cd85a1 Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Thu, 5 Dec 2019 19:39:07 +0800 +Subject: btrfs: send: remove WARN_ON for readonly mount + +From: Anand Jain + +commit fbd542971aa1e9ec33212afe1d9b4f1106cd85a1 upstream. + +We log warning if root::orphan_cleanup_state is not set to +ORPHAN_CLEANUP_DONE in btrfs_ioctl_send(). However if the filesystem is +mounted as readonly we skip the orphan item cleanup during the lookup +and root::orphan_cleanup_state remains at the init state 0 instead of +ORPHAN_CLEANUP_DONE (2). So during send in btrfs_ioctl_send() we hit the +warning as below. + + WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); + +WARNING: CPU: 0 PID: 2616 at /Volumes/ws/btrfs-devel/fs/btrfs/send.c:7090 btrfs_ioctl_send+0xb2f/0x18c0 [btrfs] +:: +RIP: 0010:btrfs_ioctl_send+0xb2f/0x18c0 [btrfs] +:: +Call Trace: +:: +_btrfs_ioctl_send+0x7b/0x110 [btrfs] +btrfs_ioctl+0x150a/0x2b00 [btrfs] +:: +do_vfs_ioctl+0xa9/0x620 +? __fget+0xac/0xe0 +ksys_ioctl+0x60/0x90 +__x64_sys_ioctl+0x16/0x20 +do_syscall_64+0x49/0x130 +entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Reproducer: + mkfs.btrfs -fq /dev/sdb + mount /dev/sdb /btrfs + btrfs subvolume create /btrfs/sv1 + btrfs subvolume snapshot -r /btrfs/sv1 /btrfs/ss1 + umount /btrfs + mount -o ro /dev/sdb /btrfs + btrfs send /btrfs/ss1 -f /tmp/f + +The warning exists because having orphan inodes could confuse send and +cause it to fail or produce incorrect streams. The two cases that would +cause such send failures, which are already fixed are: + +1) Inodes that were unlinked - these are orphanized and remain with a + link count of 0. These caused send operations to fail because it + expected to always find at least one path for an inode. However this + is no longer a problem since send is now able to deal with such + inodes since commit 46b2f4590aab ("Btrfs: fix send failure when root + has deleted files still open") and treats them as having been + completely removed (the state after an orphan cleanup is performed). + +2) Inodes that were in the process of being truncated. These resulted in + send not knowing about the truncation and potentially issue write + operations full of zeroes for the range from the new file size to the + old file size. This is no longer a problem because we no longer + create orphan items for truncation since commit f7e9e8fc792f ("Btrfs: + stop creating orphan items for truncate"). + +As such before these commits, the WARN_ON here provided a clue in case +something went wrong. Instead of being a warning against the +root::orphan_cleanup_state value, it could have been more accurate by +checking if there were actually any orphan items, and then issue a +warning only if any exists, but that would be more expensive to check. +Since orphanized inodes no longer cause problems for send, just remove +the warning. + +Reported-by: Christoph Anton Mitterer +Link: https://lore.kernel.org/linux-btrfs/21cb5e8d059f6e1496a903fa7bfc0a297e2f5370.camel@scientia.net/ +CC: stable@vger.kernel.org # 4.19+ +Suggested-by: Filipe Manana +Reviewed-by: Filipe Manana +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/send.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -7076,12 +7076,6 @@ long btrfs_ioctl_send(struct file *mnt_f + spin_unlock(&send_root->root_item_lock); + + /* +- * This is done when we lookup the root, it should already be complete +- * by the time we get here. +- */ +- WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); +- +- /* + * Userspace tools do the checks and warn the user if it's + * not RO. + */ diff --git a/queue-5.4/btrfs-skip-log-replay-on-orphaned-roots.patch b/queue-5.4/btrfs-skip-log-replay-on-orphaned-roots.patch new file mode 100644 index 00000000000..822359331b5 --- /dev/null +++ b/queue-5.4/btrfs-skip-log-replay-on-orphaned-roots.patch @@ -0,0 +1,78 @@ +From 9bc574de590510eff899c3ca8dbaf013566b5efe Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 6 Dec 2019 09:37:17 -0500 +Subject: btrfs: skip log replay on orphaned roots + +From: Josef Bacik + +commit 9bc574de590510eff899c3ca8dbaf013566b5efe upstream. + +My fsstress modifications coupled with generic/475 uncovered a failure +to mount and replay the log if we hit a orphaned root. We do not want +to replay the log for an orphan root, but it's completely legitimate to +have an orphaned root with a log attached. Fix this by simply skipping +replaying the log. We still need to pin it's root node so that we do +not overwrite it while replaying other logs, as we re-read the log root +at every stage of the replay. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-log.c | 23 +++++++++++++++++++++-- + 1 file changed, 21 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -6337,9 +6337,28 @@ again: + wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); + if (IS_ERR(wc.replay_dest)) { + ret = PTR_ERR(wc.replay_dest); ++ ++ /* ++ * We didn't find the subvol, likely because it was ++ * deleted. This is ok, simply skip this log and go to ++ * the next one. ++ * ++ * We need to exclude the root because we can't have ++ * other log replays overwriting this log as we'll read ++ * it back in a few more times. This will keep our ++ * block from being modified, and we'll just bail for ++ * each subsequent pass. ++ */ ++ if (ret == -ENOENT) ++ ret = btrfs_pin_extent_for_log_replay(fs_info, ++ log->node->start, ++ log->node->len); + free_extent_buffer(log->node); + free_extent_buffer(log->commit_root); + kfree(log); ++ ++ if (!ret) ++ goto next; + btrfs_handle_fs_error(fs_info, ret, + "Couldn't read target root for tree log recovery."); + goto error; +@@ -6371,7 +6390,6 @@ again: + &root->highest_objectid); + } + +- key.offset = found_key.offset - 1; + wc.replay_dest->log_root = NULL; + free_extent_buffer(log->node); + free_extent_buffer(log->commit_root); +@@ -6379,9 +6397,10 @@ again: + + if (ret) + goto error; +- ++next: + if (found_key.offset == 0) + break; ++ key.offset = found_key.offset - 1; + } + btrfs_release_path(path); + diff --git a/queue-5.4/series b/queue-5.4/series index f4e7a8ab922..a0677d1bf89 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -1,3 +1,18 @@ +btrfs-don-t-double-lock-the-subvol_sem-for-rename-exchange.patch +btrfs-do-not-call-synchronize_srcu-in-inode_tree_del.patch +btrfs-make-tree-checker-detect-checksum-items-with-overlapping-ranges.patch +btrfs-return-error-pointer-from-alloc_test_extent_buffer.patch +btrfs-fix-missing-data-checksums-after-replaying-a-log-tree.patch +btrfs-send-remove-warn_on-for-readonly-mount.patch +btrfs-abort-transaction-after-failed-inode-updates-in-create_subvol.patch +btrfs-skip-log-replay-on-orphaned-roots.patch +btrfs-do-not-leak-reloc-root-if-we-fail-to-read-the-fs-root.patch +btrfs-handle-enoent-in-btrfs_uuid_tree_iterate.patch +btrfs-fix-removal-logic-of-the-tree-mod-log-that-leads-to-use-after-free-issues.patch +alsa-pcm-avoid-possible-info-leaks-from-pcm-stream-buffers.patch +alsa-hda-ca0132-keep-power-on-during-processing-dsp-response.patch +alsa-hda-ca0132-avoid-endless-loop.patch +alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch drm-vc4-vc4_hdmi-fill-in-connector-info.patch drm-virtio-switch-virtio_gpu_wait_ioctl-to-gem-helpe.patch drm-mst-fix-query_payload-ack-reply-struct.patch