4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 8 Jun 2021 14:05:32 +0000 (16:05 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 8 Jun 2021 14:05:32 +0000 (16:05 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Jun 2021 14:05:32 +0000 (16:05 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Jun 2021 14:05:32 +0000 (16:05 +0200)
diff --git a/queue-4.19/btrfs-fix-error-handling-in-btrfs_del_csums.patch b/queue-4.19/btrfs-fix-error-handling-in-btrfs_del_csums.patch

new file mode 100644 (file)

index 0000000..cbfd14f
--- /dev/null
+++ b/queue-4.19/btrfs-fix-error-handling-in-btrfs_del_csums.patch
@@ -0,0 +1,93 @@
+From b86652be7c83f70bf406bed18ecf55adb9bfb91b Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 10:52:45 -0400
+Subject: btrfs: fix error handling in btrfs_del_csums
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit b86652be7c83f70bf406bed18ecf55adb9bfb91b upstream.
+
+Error injection stress would sometimes fail with checksums on disk that
+did not have a corresponding extent.  This occurred because the pattern
+in btrfs_del_csums was
+
+       while (1) {
+               ret = btrfs_search_slot();
+               if (ret < 0)
+                       break;
+       }
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       return ret;
+
+If we got an error from btrfs_search_slot we'd clear the error because
+we were breaking instead of goto out.  Instead of using goto out, simply
+handle the cases where we may leave a random value in ret, and get rid
+of the
+
+       ret = 0;
+out:
+
+pattern and simply allow break to have the proper error reporting.  With
+this fix we properly abort the transaction and do not commit thinking we
+successfully deleted the csum.
+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file-item.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -586,7 +586,7 @@ int btrfs_del_csums(struct btrfs_trans_h
+       u64 end_byte = bytenr + len;
+       u64 csum_end;
+       struct extent_buffer *leaf;
+-      int ret;
++      int ret = 0;
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+       int blocksize_bits = fs_info->sb->s_blocksize_bits;
+ 
+@@ -605,6 +605,7 @@ int btrfs_del_csums(struct btrfs_trans_h
+               path->leave_spinning = 1;
+               ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+               if (ret > 0) {
++                      ret = 0;
+                       if (path->slots[0] == 0)
+                               break;
+                       path->slots[0]--;
+@@ -661,7 +662,7 @@ int btrfs_del_csums(struct btrfs_trans_h
+                       ret = btrfs_del_items(trans, root, path,
+                                             path->slots[0], del_nr);
+                       if (ret)
+-                              goto out;
++                              break;
+                       if (key.offset == bytenr)
+                               break;
+               } else if (key.offset < bytenr && csum_end > end_byte) {
+@@ -705,8 +706,9 @@ int btrfs_del_csums(struct btrfs_trans_h
+                       ret = btrfs_split_item(trans, root, path, &key, offset);
+                       if (ret && ret != -EAGAIN) {
+                               btrfs_abort_transaction(trans, ret);
+-                              goto out;
++                              break;
+                       }
++                      ret = 0;
+ 
+                       key.offset = end_byte - 1;
+               } else {
+@@ -716,8 +718,6 @@ int btrfs_del_csums(struct btrfs_trans_h
+               }
+               btrfs_release_path(path);
+       }
+-      ret = 0;
+-out:
+       btrfs_free_path(path);
+       return ret;
+ }
diff --git a/queue-4.19/btrfs-fixup-error-handling-in-fixup_inode_link_counts.patch b/queue-4.19/btrfs-fixup-error-handling-in-fixup_inode_link_counts.patch

new file mode 100644 (file)

index 0000000..f57052c
--- /dev/null
+++ b/queue-4.19/btrfs-fixup-error-handling-in-fixup_inode_link_counts.patch
@@ -0,0 +1,85 @@
+From 011b28acf940eb61c000059dd9e2cfcbf52ed96b Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 13:13:15 -0400
+Subject: btrfs: fixup error handling in fixup_inode_link_counts
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 011b28acf940eb61c000059dd9e2cfcbf52ed96b upstream.
+
+This function has the following pattern
+
+       while (1) {
+               ret = whatever();
+               if (ret)
+                       goto out;
+       }
+       ret = 0
+out:
+       return ret;
+
+However several places in this while loop we simply break; when there's
+a problem, thus clearing the return value, and in one case we do a
+return -EIO, and leak the memory for the path.
+
+Fix this by re-arranging the loop to deal with ret == 1 coming from
+btrfs_search_slot, and then simply delete the
+
+       ret = 0;
+out:
+
+bit so everybody can break if there is an error, which will allow for
+proper error handling to occur.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1699,6 +1699,7 @@ static noinline int fixup_inode_link_cou
+                       break;
+ 
+               if (ret == 1) {
++                      ret = 0;
+                       if (path->slots[0] == 0)
+                               break;
+                       path->slots[0]--;
+@@ -1711,17 +1712,19 @@ static noinline int fixup_inode_link_cou
+ 
+               ret = btrfs_del_item(trans, root, path);
+               if (ret)
+-                      goto out;
++                      break;
+ 
+               btrfs_release_path(path);
+               inode = read_one_inode(root, key.offset);
+-              if (!inode)
+-                      return -EIO;
++              if (!inode) {
++                      ret = -EIO;
++                      break;
++              }
+ 
+               ret = fixup_inode_link_count(trans, root, inode);
+               iput(inode);
+               if (ret)
+-                      goto out;
++                      break;
+ 
+               /*
+                * fixup on a directory may create new entries,
+@@ -1730,8 +1733,6 @@ static noinline int fixup_inode_link_cou
+                */
+               key.offset = (u64)-1;
+       }
+-      ret = 0;
+-out:
+       btrfs_release_path(path);
+       return ret;
+ }
diff --git a/queue-4.19/btrfs-mark-ordered-extent-and-inode-with-error-if-we-fail-to-finish.patch b/queue-4.19/btrfs-mark-ordered-extent-and-inode-with-error-if-we-fail-to-finish.patch

new file mode 100644 (file)

index 0000000..1b074ed
--- /dev/null
+++ b/queue-4.19/btrfs-mark-ordered-extent-and-inode-with-error-if-we-fail-to-finish.patch
@@ -0,0 +1,57 @@
+From d61bec08b904cf171835db98168f82bc338e92e4 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 09:38:27 -0400
+Subject: btrfs: mark ordered extent and inode with error if we fail to finish
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit d61bec08b904cf171835db98168f82bc338e92e4 upstream.
+
+While doing error injection testing I saw that sometimes we'd get an
+abort that wouldn't stop the current transaction commit from completing.
+This abort was coming from finish ordered IO, but at this point in the
+transaction commit we should have gotten an error and stopped.
+
+It turns out the abort came from finish ordered io while trying to write
+out the free space cache.  It occurred to me that any failure inside of
+finish_ordered_io isn't actually raised to the person doing the writing,
+so we could have any number of failures in this path and think the
+ordered extent completed successfully and the inode was fine.
+
+Fix this by marking the ordered extent with BTRFS_ORDERED_IOERR, and
+marking the mapping of the inode with mapping_set_error, so any callers
+that simply call fdatawait will also get the error.
+
+With this we're seeing the IO error on the free space inode when we fail
+to do the finish_ordered_io.
+
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3162,6 +3162,18 @@ out:
+       if (ret || truncated) {
+               u64 start, end;
+ 
++              /*
++               * If we failed to finish this ordered extent for any reason we
++               * need to make sure BTRFS_ORDERED_IOERR is set on the ordered
++               * extent, and mark the inode with the error if it wasn't
++               * already set.  Any error during writeback would have already
++               * set the mapping error, so we need to set it if we're the ones
++               * marking this ordered extent as failed.
++               */
++              if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
++                                           &ordered_extent->flags))
++                      mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
++
+               if (truncated)
+                       start = ordered_extent->file_offset + logical_len;
+               else
diff --git a/queue-4.19/btrfs-return-errors-from-btrfs_del_csums-in-cleanup_ref_head.patch b/queue-4.19/btrfs-return-errors-from-btrfs_del_csums-in-cleanup_ref_head.patch

new file mode 100644 (file)

index 0000000..8037cfb
--- /dev/null
+++ b/queue-4.19/btrfs-return-errors-from-btrfs_del_csums-in-cleanup_ref_head.patch
@@ -0,0 +1,35 @@
+From 856bd270dc4db209c779ce1e9555c7641ffbc88e Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 19 May 2021 10:52:46 -0400
+Subject: btrfs: return errors from btrfs_del_csums in cleanup_ref_head
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 856bd270dc4db209c779ce1e9555c7641ffbc88e upstream.
+
+We are unconditionally returning 0 in cleanup_ref_head, despite the fact
+that btrfs_del_csums could fail.  We need to return the error so the
+transaction gets aborted properly, fix this by returning ret from
+btrfs_del_csums in cleanup_ref_head.
+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2501,7 +2501,7 @@ static int cleanup_ref_head(struct btrfs
+                                     head->qgroup_reserved);
+       btrfs_delayed_ref_unlock(head);
+       btrfs_put_delayed_ref_head(head);
+-      return 0;
++      return ret;
+ }
+ 
+ /*
diff --git a/queue-4.19/nfc-fix-null-ptr-dereference-in-llcp_sock_getname-after-failed-connect.patch b/queue-4.19/nfc-fix-null-ptr-dereference-in-llcp_sock_getname-after-failed-connect.patch

new file mode 100644 (file)

index 0000000..f752597
--- /dev/null
+++ b/queue-4.19/nfc-fix-null-ptr-dereference-in-llcp_sock_getname-after-failed-connect.patch
@@ -0,0 +1,59 @@
+From 4ac06a1e013cf5fdd963317ffd3b968560f33bba Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 09:21:38 +0200
+Subject: nfc: fix NULL ptr dereference in llcp_sock_getname() after failed connect
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+commit 4ac06a1e013cf5fdd963317ffd3b968560f33bba upstream.
+
+It's possible to trigger NULL pointer dereference by local unprivileged
+user, when calling getsockname() after failed bind() (e.g. the bind
+fails because LLCP_SAP_MAX used as SAP):
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000000
+  CPU: 1 PID: 426 Comm: llcp_sock_getna Not tainted 5.13.0-rc2-next-20210521+ #9
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014
+  Call Trace:
+   llcp_sock_getname+0xb1/0xe0
+   __sys_getpeername+0x95/0xc0
+   ? lockdep_hardirqs_on_prepare+0xd5/0x180
+   ? syscall_enter_from_user_mode+0x1c/0x40
+   __x64_sys_getpeername+0x11/0x20
+   do_syscall_64+0x36/0x70
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+This can be reproduced with Syzkaller C repro (bind followed by
+getpeername):
+https://syzkaller.appspot.com/x/repro.c?x=14def446e00000
+
+Cc: <stable@vger.kernel.org>
+Fixes: d646960f7986 ("NFC: Initial LLCP support")
+Reported-by: syzbot+80fb126e7f7d8b1a5914@syzkaller.appspotmail.com
+Reported-by: butt3rflyh4ck <butterflyhuangxx@gmail.com>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Link: https://lore.kernel.org/r/20210531072138.5219-1-krzysztof.kozlowski@canonical.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/nfc/llcp_sock.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/nfc/llcp_sock.c
++++ b/net/nfc/llcp_sock.c
+@@ -122,6 +122,7 @@ static int llcp_sock_bind(struct socket
+       if (!llcp_sock->service_name) {
+               nfc_llcp_local_put(llcp_sock->local);
+               llcp_sock->local = NULL;
++              llcp_sock->dev = NULL;
+               ret = -ENOMEM;
+               goto put_dev;
+       }
+@@ -131,6 +132,7 @@ static int llcp_sock_bind(struct socket
+               llcp_sock->local = NULL;
+               kfree(llcp_sock->service_name);
+               llcp_sock->service_name = NULL;
++              llcp_sock->dev = NULL;
+               ret = -EADDRINUSE;
+               goto put_dev;
+       }
diff --git a/queue-4.19/ocfs2-fix-data-corruption-by-fallocate.patch b/queue-4.19/ocfs2-fix-data-corruption-by-fallocate.patch

new file mode 100644 (file)

index 0000000..2e458c7
--- /dev/null
+++ b/queue-4.19/ocfs2-fix-data-corruption-by-fallocate.patch
@@ -0,0 +1,148 @@
+From 6bba4471f0cc1296fe3c2089b9e52442d3074b2e Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Fri, 4 Jun 2021 20:01:42 -0700
+Subject: ocfs2: fix data corruption by fallocate
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 6bba4471f0cc1296fe3c2089b9e52442d3074b2e upstream.
+
+When fallocate punches holes out of inode size, if original isize is in
+the middle of last cluster, then the part from isize to the end of the
+cluster will be zeroed with buffer write, at that time isize is not yet
+updated to match the new size, if writeback is kicked in, it will invoke
+ocfs2_writepage()->block_write_full_page() where the pages out of inode
+size will be dropped.  That will cause file corruption.  Fix this by
+zero out eof blocks when extending the inode size.
+
+Running the following command with qemu-image 4.2.1 can get a corrupted
+coverted image file easily.
+
+    qemu-img convert -p -t none -T none -f qcow2 $qcow_image \
+             -O qcow2 -o compat=1.1 $qcow_image.conv
+
+The usage of fallocate in qemu is like this, it first punches holes out
+of inode size, then extend the inode size.
+
+    fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2276196352, 65536) = 0
+    fallocate(11, 0, 2276196352, 65536) = 0
+
+v1: https://www.spinics.net/lists/linux-fsdevel/msg193999.html
+v2: https://lore.kernel.org/linux-fsdevel/20210525093034.GB4112@quack2.suse.cz/T/
+
+Link: https://lkml.kernel.org/r/20210528210648.9124-1-junxiao.bi@oracle.com
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/file.c |   55 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 50 insertions(+), 5 deletions(-)
+
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1864,6 +1864,45 @@ out:
+ }
+ 
+ /*
++ * zero out partial blocks of one cluster.
++ *
++ * start: file offset where zero starts, will be made upper block aligned.
++ * len: it will be trimmed to the end of current cluster if "start + len"
++ *      is bigger than it.
++ */
++static int ocfs2_zeroout_partial_cluster(struct inode *inode,
++                                      u64 start, u64 len)
++{
++      int ret;
++      u64 start_block, end_block, nr_blocks;
++      u64 p_block, offset;
++      u32 cluster, p_cluster, nr_clusters;
++      struct super_block *sb = inode->i_sb;
++      u64 end = ocfs2_align_bytes_to_clusters(sb, start);
++
++      if (start + len < end)
++              end = start + len;
++
++      start_block = ocfs2_blocks_for_bytes(sb, start);
++      end_block = ocfs2_blocks_for_bytes(sb, end);
++      nr_blocks = end_block - start_block;
++      if (!nr_blocks)
++              return 0;
++
++      cluster = ocfs2_bytes_to_clusters(sb, start);
++      ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
++                              &nr_clusters, NULL);
++      if (ret)
++              return ret;
++      if (!p_cluster)
++              return 0;
++
++      offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
++      p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
++      return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
++}
++
++/*
+  * Parts of this function taken from xfs_change_file_space()
+  */
+ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
+@@ -1873,7 +1912,7 @@ static int __ocfs2_change_file_space(str
+ {
+       int ret;
+       s64 llen;
+-      loff_t size;
++      loff_t size, orig_isize;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       struct buffer_head *di_bh = NULL;
+       handle_t *handle;
+@@ -1904,6 +1943,7 @@ static int __ocfs2_change_file_space(str
+               goto out_inode_unlock;
+       }
+ 
++      orig_isize = i_size_read(inode);
+       switch (sr->l_whence) {
+       case 0: /*SEEK_SET*/
+               break;
+@@ -1911,7 +1951,7 @@ static int __ocfs2_change_file_space(str
+               sr->l_start += f_pos;
+               break;
+       case 2: /*SEEK_END*/
+-              sr->l_start += i_size_read(inode);
++              sr->l_start += orig_isize;
+               break;
+       default:
+               ret = -EINVAL;
+@@ -1965,6 +2005,14 @@ static int __ocfs2_change_file_space(str
+       default:
+               ret = -EINVAL;
+       }
++
++      /* zeroout eof blocks in the cluster. */
++      if (!ret && change_size && orig_isize < size) {
++              ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
++                                      size - orig_isize);
++              if (!ret)
++                      i_size_write(inode, size);
++      }
+       up_write(&OCFS2_I(inode)->ip_alloc_sem);
+       if (ret) {
+               mlog_errno(ret);
+@@ -1981,9 +2029,6 @@ static int __ocfs2_change_file_space(str
+               goto out_inode_unlock;
+       }
+ 
+-      if (change_size && i_size_read(inode) < size)
+-              i_size_write(inode, size);
+-
+       inode->i_ctime = inode->i_mtime = current_time(inode);
+       ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
+       if (ret < 0)
diff --git a/queue-4.19/series b/queue-4.19/series

index eed2296fafc30fd57e89db8d58b0db0e47d0ee57..f8b4ffc2ad2c0002b1ec246d545435bb4d40fe80 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -29,3 +29,10 @@ arm-dts-imx6q-dhcom-add-pu-vdd1p1-vdd2p5-regulators.patch
  ext4-fix-bug-on-in-ext4_es_cache_extent-as-ext4_split_extent_at-failed.patch
  usb-dwc2-fix-build-in-periphal-only-mode.patch
  pid-take-a-reference-when-initializing-cad_pid.patch
+ocfs2-fix-data-corruption-by-fallocate.patch
+nfc-fix-null-ptr-dereference-in-llcp_sock_getname-after-failed-connect.patch
+x86-apic-mark-_all_-legacy-interrupts-when-io-apic-is-missing.patch
+btrfs-mark-ordered-extent-and-inode-with-error-if-we-fail-to-finish.patch
+btrfs-fix-error-handling-in-btrfs_del_csums.patch
+btrfs-return-errors-from-btrfs_del_csums-in-cleanup_ref_head.patch
+btrfs-fixup-error-handling-in-fixup_inode_link_counts.patch
diff --git a/queue-4.19/x86-apic-mark-_all_-legacy-interrupts-when-io-apic-is-missing.patch b/queue-4.19/x86-apic-mark-_all_-legacy-interrupts-when-io-apic-is-missing.patch

new file mode 100644 (file)

index 0000000..925b87c
--- /dev/null
+++ b/queue-4.19/x86-apic-mark-_all_-legacy-interrupts-when-io-apic-is-missing.patch
@@ -0,0 +1,95 @@
+From 7d65f9e80646c595e8c853640a9d0768a33e204c Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 25 May 2021 13:08:41 +0200
+Subject: x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 7d65f9e80646c595e8c853640a9d0768a33e204c upstream.
+
+PIC interrupts do not support affinity setting and they can end up on
+any online CPU. Therefore, it's required to mark the associated vectors
+as system-wide reserved. Otherwise, the corresponding irq descriptors
+are copied to the secondary CPUs but the vectors are not marked as
+assigned or reserved. This works correctly for the IO/APIC case.
+
+When the IO/APIC is disabled via config, kernel command line or lack of
+enumeration then all legacy interrupts are routed through the PIC, but
+nothing marks them as system-wide reserved vectors.
+
+As a consequence, a subsequent allocation on a secondary CPU can result in
+allocating one of these vectors, which triggers the BUG() in
+apic_update_vector() because the interrupt descriptor slot is not empty.
+
+Imran tried to work around that by marking those interrupts as allocated
+when a CPU comes online. But that's wrong in case that the IO/APIC is
+available and one of the legacy interrupts, e.g. IRQ0, has been switched to
+PIC mode because then marking them as allocated will fail as they are
+already marked as system vectors.
+
+Stay consistent and update the legacy vectors after attempting IO/APIC
+initialization and mark them as system vectors in case that no IO/APIC is
+available.
+
+Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment")
+Reported-by: Imran Khan <imran.f.khan@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210519233928.2157496-1-imran.f.khan@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/apic.h   |    1 +
+ arch/x86/kernel/apic/apic.c   |    1 +
+ arch/x86/kernel/apic/vector.c |   20 ++++++++++++++++++++
+ 3 files changed, 22 insertions(+)
+
+--- a/arch/x86/include/asm/apic.h
++++ b/arch/x86/include/asm/apic.h
+@@ -172,6 +172,7 @@ static inline int apic_is_clustered_box(
+ extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+ extern void lapic_assign_system_vectors(void);
+ extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
++extern void lapic_update_legacy_vectors(void);
+ extern void lapic_online(void);
+ extern void lapic_offline(void);
+ 
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -2507,6 +2507,7 @@ void __init apic_bsp_setup(bool upmode)
+       end_local_APIC_setup();
+       irq_remap_enable_fault_handling();
+       setup_IO_APIC();
++      lapic_update_legacy_vectors();
+ }
+ 
+ #ifdef CONFIG_UP_LATE_INIT
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -682,6 +682,26 @@ void lapic_assign_legacy_vector(unsigned
+       irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
+ }
+ 
++void __init lapic_update_legacy_vectors(void)
++{
++      unsigned int i;
++
++      if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
++              return;
++
++      /*
++       * If the IO/APIC is disabled via config, kernel command line or
++       * lack of enumeration then all legacy interrupts are routed
++       * through the PIC. Make sure that they are marked as legacy
++       * vectors. PIC_CASCADE_IRQ has already been marked in
++       * lapic_assign_system_vectors().
++       */
++      for (i = 0; i < nr_legacy_irqs(); i++) {
++              if (i != PIC_CASCADE_IR)
++                      lapic_assign_legacy_vector(i, true);
++      }
++}
++
+ void __init lapic_assign_system_vectors(void)
+ {
+       unsigned int i, vector = 0;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 8 Jun 2021 14:05:32 +0000 (16:05 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 8 Jun 2021 14:05:32 +0000 (16:05 +0200)
queue-4.19/btrfs-fix-error-handling-in-btrfs_del_csums.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/btrfs-fixup-error-handling-in-fixup_inode_link_counts.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/btrfs-mark-ordered-extent-and-inode-with-error-if-we-fail-to-finish.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/btrfs-return-errors-from-btrfs_del_csums-in-cleanup_ref_head.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/nfc-fix-null-ptr-dereference-in-llcp_sock_getname-after-failed-connect.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ocfs2-fix-data-corruption-by-fallocate.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history
queue-4.19/x86-apic-mark-_all_-legacy-interrupts-when-io-apic-is-missing.patch	[new file with mode: 0644]	patch \| blob