--- /dev/null
+From b86652be7c83f70bf406bed18ecf55adb9bfb91b Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 10:52:45 -0400
+Subject: btrfs: fix error handling in btrfs_del_csums
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit b86652be7c83f70bf406bed18ecf55adb9bfb91b upstream.
+
+Error injection stress would sometimes fail with checksums on disk that
+did not have a corresponding extent. This occurred because the pattern
+in btrfs_del_csums was
+
+ while (1) {
+ ret = btrfs_search_slot();
+ if (ret < 0)
+ break;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+
+If we got an error from btrfs_search_slot we'd clear the error because
+we were breaking instead of goto out. Instead of using goto out, simply
+handle the cases where we may leave a random value in ret, and get rid
+of the
+
+ ret = 0;
+out:
+
+pattern and simply allow break to have the proper error reporting. With
+this fix we properly abort the transaction and do not commit thinking we
+successfully deleted the csum.
+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file-item.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -599,7 +599,7 @@ int btrfs_del_csums(struct btrfs_trans_h
+ u64 end_byte = bytenr + len;
+ u64 csum_end;
+ struct extent_buffer *leaf;
+- int ret;
++ int ret = 0;
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ int blocksize_bits = fs_info->sb->s_blocksize_bits;
+
+@@ -618,6 +618,7 @@ int btrfs_del_csums(struct btrfs_trans_h
+ path->leave_spinning = 1;
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
++ ret = 0;
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+@@ -674,7 +675,7 @@ int btrfs_del_csums(struct btrfs_trans_h
+ ret = btrfs_del_items(trans, root, path,
+ path->slots[0], del_nr);
+ if (ret)
+- goto out;
++ break;
+ if (key.offset == bytenr)
+ break;
+ } else if (key.offset < bytenr && csum_end > end_byte) {
+@@ -718,8 +719,9 @@ int btrfs_del_csums(struct btrfs_trans_h
+ ret = btrfs_split_item(trans, root, path, &key, offset);
+ if (ret && ret != -EAGAIN) {
+ btrfs_abort_transaction(trans, ret);
+- goto out;
++ break;
+ }
++ ret = 0;
+
+ key.offset = end_byte - 1;
+ } else {
+@@ -729,8 +731,6 @@ int btrfs_del_csums(struct btrfs_trans_h
+ }
+ btrfs_release_path(path);
+ }
+- ret = 0;
+-out:
+ btrfs_free_path(path);
+ return ret;
+ }
--- /dev/null
+From ea7036de0d36c4e6c9508f68789e9567d514333a Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 24 May 2021 11:35:53 +0100
+Subject: btrfs: fix fsync failure and transaction abort after writes to prealloc extents
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit ea7036de0d36c4e6c9508f68789e9567d514333a upstream.
+
+When doing a series of partial writes to different ranges of preallocated
+extents with transaction commits and fsyncs in between, we can end up with
+a checksum items in a log tree. This causes an fsync to fail with -EIO and
+abort the transaction, turning the filesystem to RO mode, when syncing the
+log.
+
+For this to happen, we need to have a full fsync of a file following one
+or more fast fsyncs.
+
+The following example reproduces the problem and explains how it happens:
+
+ $ mkfs.btrfs -f /dev/sdc
+ $ mount /dev/sdc /mnt
+
+ # Create our test file with 2 preallocated extents. Leave a 1M hole
+ # between them to ensure that we get two file extent items that will
+ # never be merged into a single one. The extents are contiguous on disk,
+ # which will later result in the checksums for their data to be merged
+ # into a single checksum item in the csums btree.
+ #
+ $ xfs_io -f \
+ -c "falloc 0 1M" \
+ -c "falloc 3M 3M" \
+ /mnt/foobar
+
+ # Now write to the second extent and leave only 1M of it as unwritten,
+ # which corresponds to the file range [4M, 5M[.
+ #
+ # Then fsync the file to flush delalloc and to clear full sync flag from
+ # the inode, so that a future fsync will use the fast code path.
+ #
+ # After the writeback triggered by the fsync we have 3 file extent items
+ # that point to the second extent we previously allocated:
+ #
+ # 1) One file extent item of type BTRFS_FILE_EXTENT_REG that covers the
+ # file range [3M, 4M[
+ #
+ # 2) One file extent item of type BTRFS_FILE_EXTENT_PREALLOC that covers
+ # the file range [4M, 5M[
+ #
+ # 3) One file extent item of type BTRFS_FILE_EXTENT_REG that covers the
+ # file range [5M, 6M[
+ #
+ # All these file extent items have a generation of 6, which is the ID of
+ # the transaction where they were created. The split of the original file
+ # extent item is done at btrfs_mark_extent_written() when ordered extents
+ # complete for the file ranges [3M, 4M[ and [5M, 6M[.
+ #
+ $ xfs_io -c "pwrite -S 0xab 3M 1M" \
+ -c "pwrite -S 0xef 5M 1M" \
+ -c "fsync" \
+ /mnt/foobar
+
+ # Commit the current transaction. This wipes out the log tree created by
+ # the previous fsync.
+ sync
+
+ # Now write to the unwritten range of the second extent we allocated,
+ # corresponding to the file range [4M, 5M[, and fsync the file, which
+ # triggers the fast fsync code path.
+ #
+ # The fast fsync code path sees that there is a new extent map covering
+ # the file range [4M, 5M[ and therefore it will log a checksum item
+ # covering the range [1M, 2M[ of the second extent we allocated.
+ #
+ # Also, after the fsync finishes we no longer have the 3 file extent
+ # items that pointed to 3 sections of the second extent we allocated.
+ # Instead we end up with a single file extent item pointing to the whole
+ # extent, with a type of BTRFS_FILE_EXTENT_REG and a generation of 7 (the
+ # current transaction ID). This is due to the file extent item merging we
+ # do when completing ordered extents into ranges that point to unwritten
+ # (preallocated) extents. This merging is done at
+ # btrfs_mark_extent_written().
+ #
+ $ xfs_io -c "pwrite -S 0xcd 4M 1M" \
+ -c "fsync" \
+ /mnt/foobar
+
+ # Now do some write to our file outside the range of the second extent
+ # that we allocated with fallocate() and truncate the file size from 6M
+ # down to 5M.
+ #
+ # The truncate operation sets the full sync runtime flag on the inode,
+ # forcing the next fsync to use the slow code path. It also changes the
+ # length of the second file extent item so that it represents the file
+ # range [3M, 5M[ and not the range [3M, 6M[ anymore.
+ #
+ # Finally fsync the file. Since this is a fsync that triggers the slow
+ # code path, it will remove all items associated to the inode from the
+ # log tree and then it will scan for file extent items in the
+ # fs/subvolume tree that have a generation matching the current
+ # transaction ID, which is 7. This means it will log 2 file extent
+ # items:
+ #
+ # 1) One for the first extent we allocated, covering the file range
+ # [0, 1M[
+ #
+ # 2) Another for the first 2M of the second extent we allocated,
+ # covering the file range [3M, 5M[
+ #
+ # When logging the first file extent item we log a single checksum item
+ # that has all the checksums for the entire extent.
+ #
+ # When logging the second file extent item, we also lookup for the
+ # checksums that are associated with the range [0, 2M[ of the second
+ # extent we allocated (file range [3M, 5M[), and then we log them with
+ # btrfs_csum_file_blocks(). However that results in ending up with a log
+ # that has two checksum items with ranges that overlap:
+ #
+ # 1) One for the range [1M, 2M[ of the second extent we allocated,
+ # corresponding to the file range [4M, 5M[, which we logged in the
+ # previous fsync that used the fast code path;
+ #
+ # 2) One for the ranges [0, 1M[ and [0, 2M[ of the first and second
+ # extents, respectively, corresponding to the files ranges [0, 1M[
+ # and [3M, 5M[. This one was added during this last fsync that uses
+ # the slow code path and overlaps with the previous one logged by
+ # the previous fast fsync.
+ #
+ # This happens because when logging the checksums for the second
+ # extent, we notice they start at an offset that matches the end of the
+ # checksums item that we logged for the first extent, and because both
+ # extents are contiguous on disk, btrfs_csum_file_blocks() decides to
+ # extend that existing checksums item and append the checksums for the
+ # second extent to this item. The end result is we end up with two
+ # checksum items in the log tree that have overlapping ranges, as
+ # listed before, resulting in the fsync to fail with -EIO and aborting
+ # the transaction, turning the filesystem into RO mode.
+ #
+ $ xfs_io -c "pwrite -S 0xff 0 1M" \
+ -c "truncate 5M" \
+ -c "fsync" \
+ /mnt/foobar
+ fsync: Input/output error
+
+After running the example, dmesg/syslog shows the tree checker complained
+about the checksum items with overlapping ranges and we aborted the
+transaction:
+
+ $ dmesg
+ (...)
+ [756289.557487] BTRFS critical (device sdc): corrupt leaf: root=18446744073709551610 block=30720000 slot=5, csum end range (16777216) goes beyond the start range (15728640) of the next csum item
+ [756289.560583] BTRFS info (device sdc): leaf 30720000 gen 7 total ptrs 7 free space 11677 owner 18446744073709551610
+ [756289.562435] BTRFS info (device sdc): refs 2 lock_owner 0 current 2303929
+ [756289.563654] item 0 key (257 1 0) itemoff 16123 itemsize 160
+ [756289.564649] inode generation 6 size 5242880 mode 100600
+ [756289.565636] item 1 key (257 12 256) itemoff 16107 itemsize 16
+ [756289.566694] item 2 key (257 108 0) itemoff 16054 itemsize 53
+ [756289.567725] extent data disk bytenr 13631488 nr 1048576
+ [756289.568697] extent data offset 0 nr 1048576 ram 1048576
+ [756289.569689] item 3 key (257 108 1048576) itemoff 16001 itemsize 53
+ [756289.570682] extent data disk bytenr 0 nr 0
+ [756289.571363] extent data offset 0 nr 2097152 ram 2097152
+ [756289.572213] item 4 key (257 108 3145728) itemoff 15948 itemsize 53
+ [756289.573246] extent data disk bytenr 14680064 nr 3145728
+ [756289.574121] extent data offset 0 nr 2097152 ram 3145728
+ [756289.574993] item 5 key (18446744073709551606 128 13631488) itemoff 12876 itemsize 3072
+ [756289.576113] item 6 key (18446744073709551606 128 15728640) itemoff 11852 itemsize 1024
+ [756289.577286] BTRFS error (device sdc): block=30720000 write time tree block corruption detected
+ [756289.578644] ------------[ cut here ]------------
+ [756289.579376] WARNING: CPU: 0 PID: 2303929 at fs/btrfs/disk-io.c:465 csum_one_extent_buffer+0xed/0x100 [btrfs]
+ [756289.580857] Modules linked in: btrfs dm_zero dm_dust loop dm_snapshot (...)
+ [756289.591534] CPU: 0 PID: 2303929 Comm: xfs_io Tainted: G W 5.12.0-rc8-btrfs-next-87 #1
+ [756289.592580] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
+ [756289.594161] RIP: 0010:csum_one_extent_buffer+0xed/0x100 [btrfs]
+ [756289.595122] Code: 5d c3 e8 76 60 (...)
+ [756289.597509] RSP: 0018:ffffb51b416cb898 EFLAGS: 00010282
+ [756289.598142] RAX: 0000000000000000 RBX: fffff02b8a365bc0 RCX: 0000000000000000
+ [756289.598970] RDX: 0000000000000000 RSI: ffffffffa9112421 RDI: 00000000ffffffff
+ [756289.599798] RBP: ffffa06500880000 R08: 0000000000000000 R09: 0000000000000000
+ [756289.600619] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
+ [756289.601456] R13: ffffa0652b1d8980 R14: ffffa06500880000 R15: 0000000000000000
+ [756289.602278] FS: 00007f08b23c9800(0000) GS:ffffa0682be00000(0000) knlGS:0000000000000000
+ [756289.603217] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [756289.603892] CR2: 00005652f32d0138 CR3: 000000025d616003 CR4: 0000000000370ef0
+ [756289.604725] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ [756289.605563] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ [756289.606400] Call Trace:
+ [756289.606704] btree_csum_one_bio+0x244/0x2b0 [btrfs]
+ [756289.607313] btrfs_submit_metadata_bio+0xb7/0x100 [btrfs]
+ [756289.608040] submit_one_bio+0x61/0x70 [btrfs]
+ [756289.608587] btree_write_cache_pages+0x587/0x610 [btrfs]
+ [756289.609258] ? free_debug_processing+0x1d5/0x240
+ [756289.609812] ? __module_address+0x28/0xf0
+ [756289.610298] ? lock_acquire+0x1a0/0x3e0
+ [756289.610754] ? lock_acquired+0x19f/0x430
+ [756289.611220] ? lock_acquire+0x1a0/0x3e0
+ [756289.611675] do_writepages+0x43/0xf0
+ [756289.612101] ? __filemap_fdatawrite_range+0xa4/0x100
+ [756289.612800] __filemap_fdatawrite_range+0xc5/0x100
+ [756289.613393] btrfs_write_marked_extents+0x68/0x160 [btrfs]
+ [756289.614085] btrfs_sync_log+0x21c/0xf20 [btrfs]
+ [756289.614661] ? finish_wait+0x90/0x90
+ [756289.615096] ? __mutex_unlock_slowpath+0x45/0x2a0
+ [756289.615661] ? btrfs_log_inode_parent+0x3c9/0xdc0 [btrfs]
+ [756289.616338] ? lock_acquire+0x1a0/0x3e0
+ [756289.616801] ? lock_acquired+0x19f/0x430
+ [756289.617284] ? lock_acquire+0x1a0/0x3e0
+ [756289.617750] ? lock_release+0x214/0x470
+ [756289.618221] ? lock_acquired+0x19f/0x430
+ [756289.618704] ? dput+0x20/0x4a0
+ [756289.619079] ? dput+0x20/0x4a0
+ [756289.619452] ? lockref_put_or_lock+0x9/0x30
+ [756289.619969] ? lock_release+0x214/0x470
+ [756289.620445] ? lock_release+0x214/0x470
+ [756289.620924] ? lock_release+0x214/0x470
+ [756289.621415] btrfs_sync_file+0x46a/0x5b0 [btrfs]
+ [756289.621982] do_fsync+0x38/0x70
+ [756289.622395] __x64_sys_fsync+0x10/0x20
+ [756289.622907] do_syscall_64+0x33/0x80
+ [756289.623438] entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [756289.624063] RIP: 0033:0x7f08b27fbb7b
+ [756289.624588] Code: 0f 05 48 3d 00 (...)
+ [756289.626760] RSP: 002b:00007ffe2583f940 EFLAGS: 00000293 ORIG_RAX: 000000000000004a
+ [756289.627639] RAX: ffffffffffffffda RBX: 00005652f32cd0f0 RCX: 00007f08b27fbb7b
+ [756289.628464] RDX: 00005652f32cbca0 RSI: 00005652f32cd110 RDI: 0000000000000003
+ [756289.629323] RBP: 00005652f32cd110 R08: 0000000000000000 R09: 00007f08b28c4be0
+ [756289.630172] R10: fffffffffffff39a R11: 0000000000000293 R12: 0000000000000001
+ [756289.631007] R13: 00005652f32cd0f0 R14: 0000000000000001 R15: 00005652f32cc480
+ [756289.631819] irq event stamp: 0
+ [756289.632188] hardirqs last enabled at (0): [<0000000000000000>] 0x0
+ [756289.632911] hardirqs last disabled at (0): [<ffffffffa7e97c29>] copy_process+0x879/0x1cc0
+ [756289.633893] softirqs last enabled at (0): [<ffffffffa7e97c29>] copy_process+0x879/0x1cc0
+ [756289.634871] softirqs last disabled at (0): [<0000000000000000>] 0x0
+ [756289.635606] ---[ end trace 0a039fdc16ff3fef ]---
+ [756289.636179] BTRFS: error (device sdc) in btrfs_sync_log:3136: errno=-5 IO failure
+ [756289.637082] BTRFS info (device sdc): forced readonly
+
+Having checksum items covering ranges that overlap is dangerous as in some
+cases it can lead to having extent ranges for which we miss checksums
+after log replay or getting the wrong checksum item. There were some fixes
+in the past for bugs that resulted in this problem, and were explained and
+fixed by the following commits:
+
+ 27b9a8122ff71a ("Btrfs: fix csum tree corruption, duplicate and outdated checksums")
+ b84b8390d6009c ("Btrfs: fix file read corruption after extent cloning and fsync")
+ 40e046acbd2f36 ("Btrfs: fix missing data checksums after replaying a log tree")
+ e289f03ea79bbc ("btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents")
+
+Fix the issue by making btrfs_csum_file_blocks() taking into account the
+start offset of the next checksum item when it decides to extend an
+existing checksum item, so that it never extends the checksum to end at a
+range that goes beyond the start range of the next checksum item.
+
+When we can not access the next checksum item without releasing the path,
+simply drop the optimization of extending the previous checksum item and
+fallback to inserting a new checksum item - this happens rarely and the
+optimization is not significant enough for a log tree in order to justify
+the extra complexity, as it would only save a few bytes (the size of a
+struct btrfs_item) of leaf space.
+
+This behaviour is only needed when inserting into a log tree because
+for the regular checksums tree we never have a case where we try to
+insert a range of checksums that overlap with a range that was previously
+inserted.
+
+A test case for fstests will follow soon.
+
+Reported-by: Philipp Fent <fent@in.tum.de>
+Link: https://lore.kernel.org/linux-btrfs/93c4600e-5263-5cba-adf0-6f47526e7561@in.tum.de/
+CC: stable@vger.kernel.org # 5.4+
+Tested-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file-item.c | 98 +++++++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 76 insertions(+), 22 deletions(-)
+
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -735,6 +735,37 @@ int btrfs_del_csums(struct btrfs_trans_h
+ return ret;
+ }
+
++static int find_next_csum_offset(struct btrfs_root *root,
++ struct btrfs_path *path,
++ u64 *next_offset)
++{
++ const u32 nritems = btrfs_header_nritems(path->nodes[0]);
++ struct btrfs_key found_key;
++ int slot = path->slots[0] + 1;
++ int ret;
++
++ if (nritems == 0 || slot >= nritems) {
++ ret = btrfs_next_leaf(root, path);
++ if (ret < 0) {
++ return ret;
++ } else if (ret > 0) {
++ *next_offset = (u64)-1;
++ return 0;
++ }
++ slot = path->slots[0];
++ }
++
++ btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
++
++ if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
++ found_key.type != BTRFS_EXTENT_CSUM_KEY)
++ *next_offset = (u64)-1;
++ else
++ *next_offset = found_key.offset;
++
++ return 0;
++}
++
+ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_ordered_sum *sums)
+@@ -750,7 +781,6 @@ int btrfs_csum_file_blocks(struct btrfs_
+ u64 total_bytes = 0;
+ u64 csum_offset;
+ u64 bytenr;
+- u32 nritems;
+ u32 ins_size;
+ int index = 0;
+ int found_next;
+@@ -793,26 +823,10 @@ again:
+ goto insert;
+ }
+ } else {
+- int slot = path->slots[0] + 1;
+- /* we didn't find a csum item, insert one */
+- nritems = btrfs_header_nritems(path->nodes[0]);
+- if (!nritems || (path->slots[0] >= nritems - 1)) {
+- ret = btrfs_next_leaf(root, path);
+- if (ret < 0) {
+- goto out;
+- } else if (ret > 0) {
+- found_next = 1;
+- goto insert;
+- }
+- slot = path->slots[0];
+- }
+- btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+- if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+- found_key.type != BTRFS_EXTENT_CSUM_KEY) {
+- found_next = 1;
+- goto insert;
+- }
+- next_offset = found_key.offset;
++ /* We didn't find a csum item, insert one. */
++ ret = find_next_csum_offset(root, path, &next_offset);
++ if (ret < 0)
++ goto out;
+ found_next = 1;
+ goto insert;
+ }
+@@ -860,8 +874,48 @@ again:
+ tmp = sums->len - total_bytes;
+ tmp >>= fs_info->sb->s_blocksize_bits;
+ WARN_ON(tmp < 1);
++ extend_nr = max_t(int, 1, tmp);
++
++ /*
++ * A log tree can already have checksum items with a subset of
++ * the checksums we are trying to log. This can happen after
++ * doing a sequence of partial writes into prealloc extents and
++ * fsyncs in between, with a full fsync logging a larger subrange
++ * of an extent for which a previous fast fsync logged a smaller
++ * subrange. And this happens in particular due to merging file
++ * extent items when we complete an ordered extent for a range
++ * covered by a prealloc extent - this is done at
++ * btrfs_mark_extent_written().
++ *
++ * So if we try to extend the previous checksum item, which has
++ * a range that ends at the start of the range we want to insert,
++ * make sure we don't extend beyond the start offset of the next
++ * checksum item. If we are at the last item in the leaf, then
++ * forget the optimization of extending and add a new checksum
++ * item - it is not worth the complexity of releasing the path,
++ * getting the first key for the next leaf, repeat the btree
++ * search, etc, because log trees are temporary anyway and it
++ * would only save a few bytes of leaf space.
++ */
++ if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
++ if (path->slots[0] + 1 >=
++ btrfs_header_nritems(path->nodes[0])) {
++ ret = find_next_csum_offset(root, path, &next_offset);
++ if (ret < 0)
++ goto out;
++ found_next = 1;
++ goto insert;
++ }
++
++ ret = find_next_csum_offset(root, path, &next_offset);
++ if (ret < 0)
++ goto out;
++
++ tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits;
++ if (tmp <= INT_MAX)
++ extend_nr = min_t(int, extend_nr, tmp);
++ }
+
+- extend_nr = max_t(int, 1, (int)tmp);
+ diff = (csum_offset + extend_nr) * csum_size;
+ diff = min(diff,
+ MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
--- /dev/null
+From 011b28acf940eb61c000059dd9e2cfcbf52ed96b Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 13:13:15 -0400
+Subject: btrfs: fixup error handling in fixup_inode_link_counts
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 011b28acf940eb61c000059dd9e2cfcbf52ed96b upstream.
+
+This function has the following pattern
+
+ while (1) {
+ ret = whatever();
+ if (ret)
+ goto out;
+ }
+ ret = 0
+out:
+ return ret;
+
+However several places in this while loop we simply break; when there's
+a problem, thus clearing the return value, and in one case we do a
+return -EIO, and leak the memory for the path.
+
+Fix this by re-arranging the loop to deal with ret == 1 coming from
+btrfs_search_slot, and then simply delete the
+
+ ret = 0;
+out:
+
+bit so everybody can break if there is an error, which will allow for
+proper error handling to occur.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1775,6 +1775,7 @@ static noinline int fixup_inode_link_cou
+ break;
+
+ if (ret == 1) {
++ ret = 0;
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+@@ -1787,17 +1788,19 @@ static noinline int fixup_inode_link_cou
+
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+- goto out;
++ break;
+
+ btrfs_release_path(path);
+ inode = read_one_inode(root, key.offset);
+- if (!inode)
+- return -EIO;
++ if (!inode) {
++ ret = -EIO;
++ break;
++ }
+
+ ret = fixup_inode_link_count(trans, root, inode);
+ iput(inode);
+ if (ret)
+- goto out;
++ break;
+
+ /*
+ * fixup on a directory may create new entries,
+@@ -1806,8 +1809,6 @@ static noinline int fixup_inode_link_cou
+ */
+ key.offset = (u64)-1;
+ }
+- ret = 0;
+-out:
+ btrfs_release_path(path);
+ return ret;
+ }
--- /dev/null
+From d61bec08b904cf171835db98168f82bc338e92e4 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 09:38:27 -0400
+Subject: btrfs: mark ordered extent and inode with error if we fail to finish
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit d61bec08b904cf171835db98168f82bc338e92e4 upstream.
+
+While doing error injection testing I saw that sometimes we'd get an
+abort that wouldn't stop the current transaction commit from completing.
+This abort was coming from finish ordered IO, but at this point in the
+transaction commit we should have gotten an error and stopped.
+
+It turns out the abort came from finish ordered io while trying to write
+out the free space cache. It occurred to me that any failure inside of
+finish_ordered_io isn't actually raised to the person doing the writing,
+so we could have any number of failures in this path and think the
+ordered extent completed successfully and the inode was fine.
+
+Fix this by marking the ordered extent with BTRFS_ORDERED_IOERR, and
+marking the mapping of the inode with mapping_set_error, so any callers
+that simply call fdatawait will also get the error.
+
+With this we're seeing the IO error on the free space inode when we fail
+to do the finish_ordered_io.
+
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3359,6 +3359,18 @@ out:
+ if (ret || truncated) {
+ u64 start, end;
+
++ /*
++ * If we failed to finish this ordered extent for any reason we
++ * need to make sure BTRFS_ORDERED_IOERR is set on the ordered
++ * extent, and mark the inode with the error if it wasn't
++ * already set. Any error during writeback would have already
++ * set the mapping error, so we need to set it if we're the ones
++ * marking this ordered extent as failed.
++ */
++ if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
++ &ordered_extent->flags))
++ mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
++
+ if (truncated)
+ start = ordered_extent->file_offset + logical_len;
+ else
--- /dev/null
+From 856bd270dc4db209c779ce1e9555c7641ffbc88e Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 10:52:46 -0400
+Subject: btrfs: return errors from btrfs_del_csums in cleanup_ref_head
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 856bd270dc4db209c779ce1e9555c7641ffbc88e upstream.
+
+We are unconditionally returning 0 in cleanup_ref_head, despite the fact
+that btrfs_del_csums could fail. We need to return the error so the
+transaction gets aborted properly, fix this by returning ret from
+btrfs_del_csums in cleanup_ref_head.
+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1879,7 +1879,7 @@ static int cleanup_ref_head(struct btrfs
+ trace_run_delayed_ref_head(fs_info, head, 0);
+ btrfs_delayed_ref_unlock(head);
+ btrfs_put_delayed_ref_head(head);
+- return 0;
++ return ret;
+ }
+
+ static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
--- /dev/null
+From dce3d8e1d070900e0feeb06787a319ff9379212c Mon Sep 17 00:00:00 2001
+From: Luben Tuikov <luben.tuikov@amd.com>
+Date: Wed, 12 May 2021 12:33:23 -0400
+Subject: drm/amdgpu: Don't query CE and UE errors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Luben Tuikov <luben.tuikov@amd.com>
+
+commit dce3d8e1d070900e0feeb06787a319ff9379212c upstream.
+
+On QUERY2 IOCTL don't query counts of correctable
+and uncorrectable errors, since when RAS is
+enabled and supported on Vega20 server boards,
+this takes insurmountably long time, in O(n^3),
+which slows the system down to the point of it
+being unusable when we have GUI up.
+
+Fixes: ae363a212b14 ("drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2")
+Cc: Alexander Deucher <Alexander.Deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
+Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 16 ----------------
+ 1 file changed, 16 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+@@ -351,7 +351,6 @@ static int amdgpu_ctx_query2(struct amdg
+ {
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr;
+- unsigned long ras_counter;
+
+ if (!fpriv)
+ return -EINVAL;
+@@ -376,21 +375,6 @@ static int amdgpu_ctx_query2(struct amdg
+ if (atomic_read(&ctx->guilty))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
+
+- /*query ue count*/
+- ras_counter = amdgpu_ras_query_error_count(adev, false);
+- /*ras counter is monotonic increasing*/
+- if (ras_counter != ctx->ras_counter_ue) {
+- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
+- ctx->ras_counter_ue = ras_counter;
+- }
+-
+- /*query ce count*/
+- ras_counter = amdgpu_ras_query_error_count(adev, true);
+- if (ras_counter != ctx->ras_counter_ce) {
+- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
+- ctx->ras_counter_ce = ras_counter;
+- }
+-
+ mutex_unlock(&mgr->lock);
+ return 0;
+ }
--- /dev/null
+From 07438603a07e52f1c6aa731842bd298d2725b7be Mon Sep 17 00:00:00 2001
+From: Nirmoy Das <nirmoy.das@amd.com>
+Date: Fri, 28 May 2021 16:54:16 +0200
+Subject: drm/amdgpu: make sure we unpin the UVD BO
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Nirmoy Das <nirmoy.das@amd.com>
+
+commit 07438603a07e52f1c6aa731842bd298d2725b7be upstream.
+
+Releasing pinned BOs is illegal now. UVD 6 was missing from:
+commit 2f40801dc553 ("drm/amdgpu: make sure we unpin the UVD BO")
+
+Fixes: 2f40801dc553 ("drm/amdgpu: make sure we unpin the UVD BO")
+Cc: stable@vger.kernel.org
+Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+@@ -354,6 +354,7 @@ static int uvd_v6_0_enc_ring_test_ib(str
+
+ error:
+ dma_fence_put(fence);
++ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+ return r;
--- /dev/null
+From 4ac06a1e013cf5fdd963317ffd3b968560f33bba Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 09:21:38 +0200
+Subject: nfc: fix NULL ptr dereference in llcp_sock_getname() after failed connect
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+commit 4ac06a1e013cf5fdd963317ffd3b968560f33bba upstream.
+
+It's possible to trigger NULL pointer dereference by local unprivileged
+user, when calling getsockname() after failed bind() (e.g. the bind
+fails because LLCP_SAP_MAX used as SAP):
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ CPU: 1 PID: 426 Comm: llcp_sock_getna Not tainted 5.13.0-rc2-next-20210521+ #9
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014
+ Call Trace:
+ llcp_sock_getname+0xb1/0xe0
+ __sys_getpeername+0x95/0xc0
+ ? lockdep_hardirqs_on_prepare+0xd5/0x180
+ ? syscall_enter_from_user_mode+0x1c/0x40
+ __x64_sys_getpeername+0x11/0x20
+ do_syscall_64+0x36/0x70
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+This can be reproduced with Syzkaller C repro (bind followed by
+getpeername):
+https://syzkaller.appspot.com/x/repro.c?x=14def446e00000
+
+Cc: <stable@vger.kernel.org>
+Fixes: d646960f7986 ("NFC: Initial LLCP support")
+Reported-by: syzbot+80fb126e7f7d8b1a5914@syzkaller.appspotmail.com
+Reported-by: butt3rflyh4ck <butterflyhuangxx@gmail.com>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Link: https://lore.kernel.org/r/20210531072138.5219-1-krzysztof.kozlowski@canonical.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/nfc/llcp_sock.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/nfc/llcp_sock.c
++++ b/net/nfc/llcp_sock.c
+@@ -110,6 +110,7 @@ static int llcp_sock_bind(struct socket
+ if (!llcp_sock->service_name) {
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
++ llcp_sock->dev = NULL;
+ ret = -ENOMEM;
+ goto put_dev;
+ }
+@@ -119,6 +120,7 @@ static int llcp_sock_bind(struct socket
+ llcp_sock->local = NULL;
+ kfree(llcp_sock->service_name);
+ llcp_sock->service_name = NULL;
++ llcp_sock->dev = NULL;
+ ret = -EADDRINUSE;
+ goto put_dev;
+ }
--- /dev/null
+From 6bba4471f0cc1296fe3c2089b9e52442d3074b2e Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Fri, 4 Jun 2021 20:01:42 -0700
+Subject: ocfs2: fix data corruption by fallocate
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 6bba4471f0cc1296fe3c2089b9e52442d3074b2e upstream.
+
+When fallocate punches holes out of inode size, if original isize is in
+the middle of last cluster, then the part from isize to the end of the
+cluster will be zeroed with buffer write, at that time isize is not yet
+updated to match the new size, if writeback is kicked in, it will invoke
+ocfs2_writepage()->block_write_full_page() where the pages out of inode
+size will be dropped. That will cause file corruption. Fix this by
+zero out eof blocks when extending the inode size.
+
+Running the following command with qemu-image 4.2.1 can get a corrupted
+coverted image file easily.
+
+ qemu-img convert -p -t none -T none -f qcow2 $qcow_image \
+ -O qcow2 -o compat=1.1 $qcow_image.conv
+
+The usage of fallocate in qemu is like this, it first punches holes out
+of inode size, then extend the inode size.
+
+ fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2276196352, 65536) = 0
+ fallocate(11, 0, 2276196352, 65536) = 0
+
+v1: https://www.spinics.net/lists/linux-fsdevel/msg193999.html
+v2: https://lore.kernel.org/linux-fsdevel/20210525093034.GB4112@quack2.suse.cz/T/
+
+Link: https://lkml.kernel.org/r/20210528210648.9124-1-junxiao.bi@oracle.com
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 50 insertions(+), 5 deletions(-)
+
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1856,6 +1856,45 @@ out:
+ }
+
+ /*
++ * zero out partial blocks of one cluster.
++ *
++ * start: file offset where zero starts, will be made upper block aligned.
++ * len: it will be trimmed to the end of current cluster if "start + len"
++ * is bigger than it.
++ */
++static int ocfs2_zeroout_partial_cluster(struct inode *inode,
++ u64 start, u64 len)
++{
++ int ret;
++ u64 start_block, end_block, nr_blocks;
++ u64 p_block, offset;
++ u32 cluster, p_cluster, nr_clusters;
++ struct super_block *sb = inode->i_sb;
++ u64 end = ocfs2_align_bytes_to_clusters(sb, start);
++
++ if (start + len < end)
++ end = start + len;
++
++ start_block = ocfs2_blocks_for_bytes(sb, start);
++ end_block = ocfs2_blocks_for_bytes(sb, end);
++ nr_blocks = end_block - start_block;
++ if (!nr_blocks)
++ return 0;
++
++ cluster = ocfs2_bytes_to_clusters(sb, start);
++ ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
++ &nr_clusters, NULL);
++ if (ret)
++ return ret;
++ if (!p_cluster)
++ return 0;
++
++ offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
++ p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
++ return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
++}
++
++/*
+ * Parts of this function taken from xfs_change_file_space()
+ */
+ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
+@@ -1865,7 +1904,7 @@ static int __ocfs2_change_file_space(str
+ {
+ int ret;
+ s64 llen;
+- loff_t size;
++ loff_t size, orig_isize;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct buffer_head *di_bh = NULL;
+ handle_t *handle;
+@@ -1896,6 +1935,7 @@ static int __ocfs2_change_file_space(str
+ goto out_inode_unlock;
+ }
+
++ orig_isize = i_size_read(inode);
+ switch (sr->l_whence) {
+ case 0: /*SEEK_SET*/
+ break;
+@@ -1903,7 +1943,7 @@ static int __ocfs2_change_file_space(str
+ sr->l_start += f_pos;
+ break;
+ case 2: /*SEEK_END*/
+- sr->l_start += i_size_read(inode);
++ sr->l_start += orig_isize;
+ break;
+ default:
+ ret = -EINVAL;
+@@ -1957,6 +1997,14 @@ static int __ocfs2_change_file_space(str
+ default:
+ ret = -EINVAL;
+ }
++
++ /* zeroout eof blocks in the cluster. */
++ if (!ret && change_size && orig_isize < size) {
++ ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
++ size - orig_isize);
++ if (!ret)
++ i_size_write(inode, size);
++ }
+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
+ if (ret) {
+ mlog_errno(ret);
+@@ -1973,9 +2021,6 @@ static int __ocfs2_change_file_space(str
+ goto out_inode_unlock;
+ }
+
+- if (change_size && i_size_read(inode) < size)
+- i_size_write(inode, size);
+-
+ inode->i_ctime = inode->i_mtime = current_time(inode);
+ ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
+ if (ret < 0)
ext4-fix-bug-on-in-ext4_es_cache_extent-as-ext4_split_extent_at-failed.patch
usb-dwc2-fix-build-in-periphal-only-mode.patch
pid-take-a-reference-when-initializing-cad_pid.patch
+ocfs2-fix-data-corruption-by-fallocate.patch
+nfc-fix-null-ptr-dereference-in-llcp_sock_getname-after-failed-connect.patch
+drm-amdgpu-don-t-query-ce-and-ue-errors.patch
+drm-amdgpu-make-sure-we-unpin-the-uvd-bo.patch
+x86-apic-mark-_all_-legacy-interrupts-when-io-apic-is-missing.patch
+btrfs-mark-ordered-extent-and-inode-with-error-if-we-fail-to-finish.patch
+btrfs-fix-error-handling-in-btrfs_del_csums.patch
+btrfs-return-errors-from-btrfs_del_csums-in-cleanup_ref_head.patch
+btrfs-fix-fsync-failure-and-transaction-abort-after-writes-to-prealloc-extents.patch
+btrfs-fixup-error-handling-in-fixup_inode_link_counts.patch
--- /dev/null
+From 7d65f9e80646c595e8c853640a9d0768a33e204c Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 25 May 2021 13:08:41 +0200
+Subject: x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 7d65f9e80646c595e8c853640a9d0768a33e204c upstream.
+
+PIC interrupts do not support affinity setting and they can end up on
+any online CPU. Therefore, it's required to mark the associated vectors
+as system-wide reserved. Otherwise, the corresponding irq descriptors
+are copied to the secondary CPUs but the vectors are not marked as
+assigned or reserved. This works correctly for the IO/APIC case.
+
+When the IO/APIC is disabled via config, kernel command line or lack of
+enumeration then all legacy interrupts are routed through the PIC, but
+nothing marks them as system-wide reserved vectors.
+
+As a consequence, a subsequent allocation on a secondary CPU can result in
+allocating one of these vectors, which triggers the BUG() in
+apic_update_vector() because the interrupt descriptor slot is not empty.
+
+Imran tried to work around that by marking those interrupts as allocated
+when a CPU comes online. But that's wrong in case that the IO/APIC is
+available and one of the legacy interrupts, e.g. IRQ0, has been switched to
+PIC mode because then marking them as allocated will fail as they are
+already marked as system vectors.
+
+Stay consistent and update the legacy vectors after attempting IO/APIC
+initialization and mark them as system vectors in case that no IO/APIC is
+available.
+
+Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment")
+Reported-by: Imran Khan <imran.f.khan@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210519233928.2157496-1-imran.f.khan@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/apic.h | 1 +
+ arch/x86/kernel/apic/apic.c | 1 +
+ arch/x86/kernel/apic/vector.c | 20 ++++++++++++++++++++
+ 3 files changed, 22 insertions(+)
+
+--- a/arch/x86/include/asm/apic.h
++++ b/arch/x86/include/asm/apic.h
+@@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(
+ extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+ extern void lapic_assign_system_vectors(void);
+ extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
++extern void lapic_update_legacy_vectors(void);
+ extern void lapic_online(void);
+ extern void lapic_offline(void);
+ extern bool apic_needs_pit(void);
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -2579,6 +2579,7 @@ static void __init apic_bsp_setup(bool u
+ end_local_APIC_setup();
+ irq_remap_enable_fault_handling();
+ setup_IO_APIC();
++ lapic_update_legacy_vectors();
+ }
+
+ #ifdef CONFIG_UP_LATE_INIT
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -680,6 +680,26 @@ void lapic_assign_legacy_vector(unsigned
+ irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
+ }
+
++void __init lapic_update_legacy_vectors(void)
++{
++ unsigned int i;
++
++ if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
++ return;
++
++ /*
++ * If the IO/APIC is disabled via config, kernel command line or
++ * lack of enumeration then all legacy interrupts are routed
++ * through the PIC. Make sure that they are marked as legacy
++ * vectors. PIC_CASCADE_IRQ has already been marked in
++ * lapic_assign_system_vectors().
++ */
++ for (i = 0; i < nr_legacy_irqs(); i++) {
++ if (i != PIC_CASCADE_IR)
++ lapic_assign_legacy_vector(i, true);
++ }
++}
++
+ void __init lapic_assign_system_vectors(void)
+ {
+ unsigned int i, vector = 0;