]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.16-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Mar 2022 14:09:05 +0000 (15:09 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Mar 2022 14:09:05 +0000 (15:09 +0100)
added patches:
arm-fix-thumb2-regression-with-spectre-bhb.patch
net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch
virtio-acknowledge-all-features-before-access.patch
virtio-unexport-virtio_finalize_features.patch
watch_queue-fix-filter-limit-check.patch
watch_queue-fix-lack-of-barrier-sync-lock-between-post-and-read.patch
watch_queue-fix-the-alloc-bitmap-size-to-reflect-notes-allocated.patch
watch_queue-fix-to-always-request-a-pow-of-2-pipe-ring-size.patch
watch_queue-fix-to-release-page-in-release.patch
watch_queue-free-the-alloc-bitmap-when-the-watch_queue-is-torn-down.patch
watch_queue-make-comment-about-setting-defunct-more-accurate.patch
watch_queue-pipe-free-watchqueue-state-after-clearing-pipe-ring.patch

13 files changed:
queue-5.16/arm-fix-thumb2-regression-with-spectre-bhb.patch [new file with mode: 0644]
queue-5.16/net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch [new file with mode: 0644]
queue-5.16/series
queue-5.16/virtio-acknowledge-all-features-before-access.patch [new file with mode: 0644]
queue-5.16/virtio-unexport-virtio_finalize_features.patch [new file with mode: 0644]
queue-5.16/watch_queue-fix-filter-limit-check.patch [new file with mode: 0644]
queue-5.16/watch_queue-fix-lack-of-barrier-sync-lock-between-post-and-read.patch [new file with mode: 0644]
queue-5.16/watch_queue-fix-the-alloc-bitmap-size-to-reflect-notes-allocated.patch [new file with mode: 0644]
queue-5.16/watch_queue-fix-to-always-request-a-pow-of-2-pipe-ring-size.patch [new file with mode: 0644]
queue-5.16/watch_queue-fix-to-release-page-in-release.patch [new file with mode: 0644]
queue-5.16/watch_queue-free-the-alloc-bitmap-when-the-watch_queue-is-torn-down.patch [new file with mode: 0644]
queue-5.16/watch_queue-make-comment-about-setting-defunct-more-accurate.patch [new file with mode: 0644]
queue-5.16/watch_queue-pipe-free-watchqueue-state-after-clearing-pipe-ring.patch [new file with mode: 0644]

diff --git a/queue-5.16/arm-fix-thumb2-regression-with-spectre-bhb.patch b/queue-5.16/arm-fix-thumb2-regression-with-spectre-bhb.patch
new file mode 100644 (file)
index 0000000..22548fb
--- /dev/null
@@ -0,0 +1,37 @@
+From 6c7cb60bff7aec24b834343ff433125f469886a3 Mon Sep 17 00:00:00 2001
+From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
+Date: Fri, 11 Mar 2022 17:13:17 +0000
+Subject: ARM: fix Thumb2 regression with Spectre BHB
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit 6c7cb60bff7aec24b834343ff433125f469886a3 upstream.
+
+When building for Thumb2, the vectors make use of a local label. Sadly,
+the Spectre BHB code also uses a local label with the same number which
+results in the Thumb2 reference pointing at the wrong place. Fix this
+by changing the number used for the Spectre BHB local label.
+
+Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround")
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/kernel/entry-armv.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -1040,9 +1040,9 @@ vector_bhb_loop8_\name:
+       @ bhb workaround
+       mov     r0, #8
+-1:    b       . + 4
++3:    b       . + 4
+       subs    r0, r0, #1
+-      bne     1b
++      bne     3b
+       dsb
+       isb
+       b       2b
diff --git a/queue-5.16/net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch b/queue-5.16/net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch
new file mode 100644 (file)
index 0000000..ea406af
--- /dev/null
@@ -0,0 +1,35 @@
+From 39bab83b119faac4bf7f07173a42ed35be95147e Mon Sep 17 00:00:00 2001
+From: Dima Chumak <dchumak@nvidia.com>
+Date: Mon, 17 Jan 2022 15:32:16 +0200
+Subject: net/mlx5: Fix offloading with ESWITCH_IPV4_TTL_MODIFY_ENABLE
+
+From: Dima Chumak <dchumak@nvidia.com>
+
+commit 39bab83b119faac4bf7f07173a42ed35be95147e upstream.
+
+Only prio 1 is supported for nic mode when there is no ignore flow level
+support in firmware. But for switchdev mode, which supports fixed number
+of statically pre-allocated prios, this restriction is not relevant so
+it can be relaxed.
+
+Fixes: d671e109bd85 ("net/mlx5: Fix tc max supported prio for nic mode")
+Signed-off-by: Dima Chumak <dchumak@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -121,9 +121,6 @@ u32 mlx5_chains_get_nf_ft_chain(struct m
+ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+ {
+-      if (!mlx5_chains_prios_supported(chains))
+-              return 1;
+-
+       if (mlx5_chains_ignore_flow_level_supported(chains))
+               return UINT_MAX;
index a7a949960794215548b6d0585c399da5633bef85..9619417a2e672dd8526219473f9180a2a32789b7 100644 (file)
@@ -98,3 +98,15 @@ arm64-dts-marvell-armada-37xx-remap-io-space-to-bus-address-0x0.patch
 arm64-ensure-execute-only-permissions-are-not-allowed-without-epan.patch
 arm64-kasan-fix-include-error-in-mte-functions.patch
 swiotlb-rework-fix-info-leak-with-dma_from_device.patch
+virtio-unexport-virtio_finalize_features.patch
+virtio-acknowledge-all-features-before-access.patch
+net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch
+arm-fix-thumb2-regression-with-spectre-bhb.patch
+watch_queue-fix-filter-limit-check.patch
+watch_queue-pipe-free-watchqueue-state-after-clearing-pipe-ring.patch
+watch_queue-fix-to-release-page-in-release.patch
+watch_queue-fix-to-always-request-a-pow-of-2-pipe-ring-size.patch
+watch_queue-fix-the-alloc-bitmap-size-to-reflect-notes-allocated.patch
+watch_queue-free-the-alloc-bitmap-when-the-watch_queue-is-torn-down.patch
+watch_queue-fix-lack-of-barrier-sync-lock-between-post-and-read.patch
+watch_queue-make-comment-about-setting-defunct-more-accurate.patch
diff --git a/queue-5.16/virtio-acknowledge-all-features-before-access.patch b/queue-5.16/virtio-acknowledge-all-features-before-access.patch
new file mode 100644 (file)
index 0000000..b907478
--- /dev/null
@@ -0,0 +1,141 @@
+From 4fa59ede95195f267101a1b8916992cf3f245cdb Mon Sep 17 00:00:00 2001
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Fri, 14 Jan 2022 14:58:41 -0500
+Subject: virtio: acknowledge all features before access
+
+From: Michael S. Tsirkin <mst@redhat.com>
+
+commit 4fa59ede95195f267101a1b8916992cf3f245cdb upstream.
+
+The feature negotiation was designed in a way that
+makes it possible for devices to know which config
+fields will be accessed by drivers.
+
+This is broken since commit 404123c2db79 ("virtio: allow drivers to
+validate features") with fallout in at least block and net.  We have a
+partial work-around in commit 2f9a174f918e ("virtio: write back
+F_VERSION_1 before validate") which at least lets devices find out which
+format should config space have, but this is a partial fix: guests
+should not access config space without acknowledging features since
+otherwise we'll never be able to change the config space format.
+
+To fix, split finalize_features from virtio_finalize_features and
+call finalize_features with all feature bits before validation,
+and then - if validation changed any bits - once again after.
+
+Since virtio_finalize_features no longer writes out features
+rename it to virtio_features_ok - since that is what it does:
+checks that features are ok with the device.
+
+As a side effect, this also reduces the amount of hypervisor accesses -
+we now only acknowledge features once unless we are clearing any
+features when validating (which is uncommon).
+
+IRC I think that this was more or less always the intent in the spec but
+unfortunately the way the spec is worded does not say this explicitly, I
+plan to address this at the spec level, too.
+
+Acked-by: Jason Wang <jasowang@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 404123c2db79 ("virtio: allow drivers to validate features")
+Fixes: 2f9a174f918e ("virtio: write back F_VERSION_1 before validate")
+Cc: "Halil Pasic" <pasic@linux.ibm.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/virtio/virtio.c       |   39 ++++++++++++++++++++++-----------------
+ include/linux/virtio_config.h |    3 ++-
+ 2 files changed, 24 insertions(+), 18 deletions(-)
+
+--- a/drivers/virtio/virtio.c
++++ b/drivers/virtio/virtio.c
+@@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_dev
+ }
+ EXPORT_SYMBOL_GPL(virtio_add_status);
+-static int virtio_finalize_features(struct virtio_device *dev)
++/* Do some validation, then set FEATURES_OK */
++static int virtio_features_ok(struct virtio_device *dev)
+ {
+-      int ret = dev->config->finalize_features(dev);
+       unsigned status;
++      int ret;
+       might_sleep();
+-      if (ret)
+-              return ret;
+       ret = arch_has_restricted_virtio_memory_access();
+       if (ret) {
+@@ -238,17 +237,6 @@ static int virtio_dev_probe(struct devic
+               driver_features_legacy = driver_features;
+       }
+-      /*
+-       * Some devices detect legacy solely via F_VERSION_1. Write
+-       * F_VERSION_1 to force LE config space accesses before FEATURES_OK for
+-       * these when needed.
+-       */
+-      if (drv->validate && !virtio_legacy_is_little_endian()
+-                        && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) {
+-              dev->features = BIT_ULL(VIRTIO_F_VERSION_1);
+-              dev->config->finalize_features(dev);
+-      }
+-
+       if (device_features & (1ULL << VIRTIO_F_VERSION_1))
+               dev->features = driver_features & device_features;
+       else
+@@ -259,13 +247,26 @@ static int virtio_dev_probe(struct devic
+               if (device_features & (1ULL << i))
+                       __virtio_set_bit(dev, i);
++      err = dev->config->finalize_features(dev);
++      if (err)
++              goto err;
++
+       if (drv->validate) {
++              u64 features = dev->features;
++
+               err = drv->validate(dev);
+               if (err)
+                       goto err;
++
++              /* Did validation change any features? Then write them again. */
++              if (features != dev->features) {
++                      err = dev->config->finalize_features(dev);
++                      if (err)
++                              goto err;
++              }
+       }
+-      err = virtio_finalize_features(dev);
++      err = virtio_features_ok(dev);
+       if (err)
+               goto err;
+@@ -489,7 +490,11 @@ int virtio_device_restore(struct virtio_
+       /* We have a driver! */
+       virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+-      ret = virtio_finalize_features(dev);
++      ret = dev->config->finalize_features(dev);
++      if (ret)
++              goto err;
++
++      ret = virtio_features_ok(dev);
+       if (ret)
+               goto err;
+--- a/include/linux/virtio_config.h
++++ b/include/linux/virtio_config.h
+@@ -64,8 +64,9 @@ struct virtio_shm_region {
+  *    Returns the first 64 feature bits (all we currently need).
+  * @finalize_features: confirm what device features we'll be using.
+  *    vdev: the virtio_device
+- *    This gives the final feature bits for the device: it can change
++ *    This sends the driver feature bits to the device: it can change
+  *    the dev->feature bits if it wants.
++ * Note: despite the name this can be called any number of times.
+  *    Returns 0 on success or error status
+  * @bus_name: return the bus name associated with the device (optional)
+  *    vdev: the virtio_device
diff --git a/queue-5.16/virtio-unexport-virtio_finalize_features.patch b/queue-5.16/virtio-unexport-virtio_finalize_features.patch
new file mode 100644 (file)
index 0000000..a9f03ef
--- /dev/null
@@ -0,0 +1,50 @@
+From 838d6d3461db0fdbf33fc5f8a69c27b50b4a46da Mon Sep 17 00:00:00 2001
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Fri, 14 Jan 2022 14:56:15 -0500
+Subject: virtio: unexport virtio_finalize_features
+
+From: Michael S. Tsirkin <mst@redhat.com>
+
+commit 838d6d3461db0fdbf33fc5f8a69c27b50b4a46da upstream.
+
+virtio_finalize_features is only used internally within virtio.
+No reason to export it.
+
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/virtio/virtio.c |    3 +--
+ include/linux/virtio.h  |    1 -
+ 2 files changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/virtio/virtio.c
++++ b/drivers/virtio/virtio.c
+@@ -166,7 +166,7 @@ void virtio_add_status(struct virtio_dev
+ }
+ EXPORT_SYMBOL_GPL(virtio_add_status);
+-int virtio_finalize_features(struct virtio_device *dev)
++static int virtio_finalize_features(struct virtio_device *dev)
+ {
+       int ret = dev->config->finalize_features(dev);
+       unsigned status;
+@@ -202,7 +202,6 @@ int virtio_finalize_features(struct virt
+       }
+       return 0;
+ }
+-EXPORT_SYMBOL_GPL(virtio_finalize_features);
+ static int virtio_dev_probe(struct device *_d)
+ {
+--- a/include/linux/virtio.h
++++ b/include/linux/virtio.h
+@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev
+ void virtio_break_device(struct virtio_device *dev);
+ void virtio_config_changed(struct virtio_device *dev);
+-int virtio_finalize_features(struct virtio_device *dev);
+ #ifdef CONFIG_PM_SLEEP
+ int virtio_device_freeze(struct virtio_device *dev);
+ int virtio_device_restore(struct virtio_device *dev);
diff --git a/queue-5.16/watch_queue-fix-filter-limit-check.patch b/queue-5.16/watch_queue-fix-filter-limit-check.patch
new file mode 100644 (file)
index 0000000..51ca005
--- /dev/null
@@ -0,0 +1,101 @@
+From c993ee0f9f81caf5767a50d1faeba39a0dc82af2 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:23:31 +0000
+Subject: watch_queue: Fix filter limit check
+
+From: David Howells <dhowells@redhat.com>
+
+commit c993ee0f9f81caf5767a50d1faeba39a0dc82af2 upstream.
+
+In watch_queue_set_filter(), there are a couple of places where we check
+that the filter type value does not exceed what the type_filter bitmap
+can hold.  One place calculates the number of bits by:
+
+   if (tf[i].type >= sizeof(wfilter->type_filter) * 8)
+
+which is fine, but the second does:
+
+   if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG)
+
+which is not.  This can lead to a couple of out-of-bounds writes due to
+a too-large type:
+
+ (1) __set_bit() on wfilter->type_filter
+ (2) Writing more elements in wfilter->filters[] than we allocated.
+
+Fix this by just using the proper WATCH_TYPE__NR instead, which is the
+number of types we actually know about.
+
+The bug may cause an oops looking something like:
+
+  BUG: KASAN: slab-out-of-bounds in watch_queue_set_filter+0x659/0x740
+  Write of size 4 at addr ffff88800d2c66bc by task watch_queue_oob/611
+  ...
+  Call Trace:
+   <TASK>
+   dump_stack_lvl+0x45/0x59
+   print_address_description.constprop.0+0x1f/0x150
+   ...
+   kasan_report.cold+0x7f/0x11b
+   ...
+   watch_queue_set_filter+0x659/0x740
+   ...
+   __x64_sys_ioctl+0x127/0x190
+   do_syscall_64+0x43/0x90
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+  Allocated by task 611:
+   kasan_save_stack+0x1e/0x40
+   __kasan_kmalloc+0x81/0xa0
+   watch_queue_set_filter+0x23a/0x740
+   __x64_sys_ioctl+0x127/0x190
+   do_syscall_64+0x43/0x90
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+  The buggy address belongs to the object at ffff88800d2c66a0
+   which belongs to the cache kmalloc-32 of size 32
+  The buggy address is located 28 bytes inside of
+   32-byte region [ffff88800d2c66a0, ffff88800d2c66c0)
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/watch_queue.h |    3 ++-
+ kernel/watch_queue.c        |    4 ++--
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+--- a/include/linux/watch_queue.h
++++ b/include/linux/watch_queue.h
+@@ -28,7 +28,8 @@ struct watch_type_filter {
+ struct watch_filter {
+       union {
+               struct rcu_head rcu;
+-              unsigned long   type_filter[2]; /* Bitmask of accepted types */
++              /* Bitmask of accepted types */
++              DECLARE_BITMAP(type_filter, WATCH_TYPE__NR);
+       };
+       u32                     nr_filters;     /* Number of filters */
+       struct watch_type_filter filters[];
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -320,7 +320,7 @@ long watch_queue_set_filter(struct pipe_
+                   tf[i].info_mask & WATCH_INFO_LENGTH)
+                       goto err_filter;
+               /* Ignore any unknown types */
+-              if (tf[i].type >= sizeof(wfilter->type_filter) * 8)
++              if (tf[i].type >= WATCH_TYPE__NR)
+                       continue;
+               nr_filter++;
+       }
+@@ -336,7 +336,7 @@ long watch_queue_set_filter(struct pipe_
+       q = wfilter->filters;
+       for (i = 0; i < filter.nr_filters; i++) {
+-              if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG)
++              if (tf[i].type >= WATCH_TYPE__NR)
+                       continue;
+               q->type                 = tf[i].type;
diff --git a/queue-5.16/watch_queue-fix-lack-of-barrier-sync-lock-between-post-and-read.patch b/queue-5.16/watch_queue-fix-lack-of-barrier-sync-lock-between-post-and-read.patch
new file mode 100644 (file)
index 0000000..cedb4f1
--- /dev/null
@@ -0,0 +1,55 @@
+From 2ed147f015af2b48f41c6f0b6746aa9ea85c19f3 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:24:36 +0000
+Subject: watch_queue: Fix lack of barrier/sync/lock between post and read
+
+From: David Howells <dhowells@redhat.com>
+
+commit 2ed147f015af2b48f41c6f0b6746aa9ea85c19f3 upstream.
+
+There's nothing to synchronise post_one_notification() versus
+pipe_read().  Whilst posting is done under pipe->rd_wait.lock, the
+reader only takes pipe->mutex which cannot bar notification posting as
+that may need to be made from contexts that cannot sleep.
+
+Fix this by setting pipe->head with a barrier in post_one_notification()
+and reading pipe->head with a barrier in pipe_read().
+
+If that's not sufficient, the rd_wait.lock will need to be taken,
+possibly in a ->confirm() op so that it only applies to notifications.
+The lock would, however, have to be dropped before copy_page_to_iter()
+is invoked.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/pipe.c            |    3 ++-
+ kernel/watch_queue.c |    2 +-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/pipe.c
++++ b/fs/pipe.c
+@@ -252,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov
+        */
+       was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+       for (;;) {
+-              unsigned int head = pipe->head;
++              /* Read ->head with a barrier vs post_one_notification() */
++              unsigned int head = smp_load_acquire(&pipe->head);
+               unsigned int tail = pipe->tail;
+               unsigned int mask = pipe->ring_size - 1;
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -113,7 +113,7 @@ static bool post_one_notification(struct
+       buf->offset = offset;
+       buf->len = len;
+       buf->flags = PIPE_BUF_FLAG_WHOLE;
+-      pipe->head = head + 1;
++      smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
+       if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
+               spin_unlock_irq(&pipe->rd_wait.lock);
diff --git a/queue-5.16/watch_queue-fix-the-alloc-bitmap-size-to-reflect-notes-allocated.patch b/queue-5.16/watch_queue-fix-the-alloc-bitmap-size-to-reflect-notes-allocated.patch
new file mode 100644 (file)
index 0000000..d430c7c
--- /dev/null
@@ -0,0 +1,45 @@
+From 3b4c0371928c17af03e8397ac842346624017ce6 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:24:22 +0000
+Subject: watch_queue: Fix the alloc bitmap size to reflect notes allocated
+
+From: David Howells <dhowells@redhat.com>
+
+commit 3b4c0371928c17af03e8397ac842346624017ce6 upstream.
+
+Currently, watch_queue_set_size() sets the number of notes available in
+wqueue->nr_notes according to the number of notes allocated, but sets
+the size of the bitmap to the unrounded number of notes originally asked
+for.
+
+Fix this by setting the bitmap size to the number of notes we're
+actually going to make available (ie. the number allocated).
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/watch_queue.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -244,6 +244,7 @@ long watch_queue_set_size(struct pipe_in
+               goto error;
+       }
++      nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
+       ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes));
+       if (ret < 0)
+               goto error;
+@@ -269,7 +270,7 @@ long watch_queue_set_size(struct pipe_in
+       wqueue->notes = pages;
+       wqueue->notes_bitmap = bitmap;
+       wqueue->nr_pages = nr_pages;
+-      wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
++      wqueue->nr_notes = nr_notes;
+       return 0;
+ error_p:
diff --git a/queue-5.16/watch_queue-fix-to-always-request-a-pow-of-2-pipe-ring-size.patch b/queue-5.16/watch_queue-fix-to-always-request-a-pow-of-2-pipe-ring-size.patch
new file mode 100644 (file)
index 0000000..483fee6
--- /dev/null
@@ -0,0 +1,41 @@
+From 96a4d8912b28451cd62825fd7caa0e66e091d938 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:24:08 +0000
+Subject: watch_queue: Fix to always request a pow-of-2 pipe ring size
+
+From: David Howells <dhowells@redhat.com>
+
+commit 96a4d8912b28451cd62825fd7caa0e66e091d938 upstream.
+
+The pipe ring size must always be a power of 2 as the head and tail
+pointers are masked off by AND'ing with the size of the ring - 1.
+watch_queue_set_size(), however, lets you specify any number of notes
+between 1 and 511.  This number is passed through to pipe_resize_ring()
+without checking/forcing its alignment.
+
+Fix this by rounding the number of slots required up to the nearest
+power of two.  The request is meant to guarantee that at least that many
+notifications can be generated before the queue is full, so rounding
+down isn't an option, but, alternatively, it may be better to give an
+error if we aren't allowed to allocate that much ring space.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/watch_queue.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -244,7 +244,7 @@ long watch_queue_set_size(struct pipe_in
+               goto error;
+       }
+-      ret = pipe_resize_ring(pipe, nr_notes);
++      ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes));
+       if (ret < 0)
+               goto error;
diff --git a/queue-5.16/watch_queue-fix-to-release-page-in-release.patch b/queue-5.16/watch_queue-fix-to-release-page-in-release.patch
new file mode 100644 (file)
index 0000000..3974396
--- /dev/null
@@ -0,0 +1,36 @@
+From c1853fbadcba1497f4907971e7107888e0714c81 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:23:46 +0000
+Subject: watch_queue: Fix to release page in ->release()
+
+From: David Howells <dhowells@redhat.com>
+
+commit c1853fbadcba1497f4907971e7107888e0714c81 upstream.
+
+When a pipe ring descriptor points to a notification message, the
+refcount on the backing page is incremented by the generic get function,
+but the release function, which marks the bitmap, doesn't drop the page
+ref.
+
+Fix this by calling generic_pipe_buf_release() at the end of
+watch_queue_pipe_buf_release().
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/watch_queue.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -54,6 +54,7 @@ static void watch_queue_pipe_buf_release
+       bit += page->index;
+       set_bit(bit, wqueue->notes_bitmap);
++      generic_pipe_buf_release(pipe, buf);
+ }
+ // No try_steal function => no stealing
diff --git a/queue-5.16/watch_queue-free-the-alloc-bitmap-when-the-watch_queue-is-torn-down.patch b/queue-5.16/watch_queue-free-the-alloc-bitmap-when-the-watch_queue-is-torn-down.patch
new file mode 100644 (file)
index 0000000..5108d10
--- /dev/null
@@ -0,0 +1,31 @@
+From 7ea1a0124b6da246b5bc8c66cddaafd36acf3ecb Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:24:29 +0000
+Subject: watch_queue: Free the alloc bitmap when the watch_queue is torn down
+
+From: David Howells <dhowells@redhat.com>
+
+commit 7ea1a0124b6da246b5bc8c66cddaafd36acf3ecb upstream.
+
+Free the watch_queue note allocation bitmap when the watch_queue is
+destroyed.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/watch_queue.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -373,6 +373,7 @@ static void __put_watch_queue(struct kre
+       for (i = 0; i < wqueue->nr_pages; i++)
+               __free_page(wqueue->notes[i]);
++      bitmap_free(wqueue->notes_bitmap);
+       wfilter = rcu_access_pointer(wqueue->filter);
+       if (wfilter)
diff --git a/queue-5.16/watch_queue-make-comment-about-setting-defunct-more-accurate.patch b/queue-5.16/watch_queue-make-comment-about-setting-defunct-more-accurate.patch
new file mode 100644 (file)
index 0000000..bcefa25
--- /dev/null
@@ -0,0 +1,37 @@
+From 4edc0760412b0c4ecefc7e02cb855b310b122825 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:24:47 +0000
+Subject: watch_queue: Make comment about setting ->defunct more accurate
+
+From: David Howells <dhowells@redhat.com>
+
+commit 4edc0760412b0c4ecefc7e02cb855b310b122825 upstream.
+
+watch_queue_clear() has a comment stating that setting ->defunct to true
+preventing new additions as well as preventing notifications.  Whilst
+the latter is true, the first bit is superfluous since at the time this
+function is called, the pipe cannot be accessed to add new event
+sources.
+
+Remove the "new additions" bit from the comment.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/watch_queue.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -569,7 +569,7 @@ void watch_queue_clear(struct watch_queu
+       rcu_read_lock();
+       spin_lock_bh(&wqueue->lock);
+-      /* Prevent new additions and prevent notifications from happening */
++      /* Prevent new notifications from being stored. */
+       wqueue->defunct = true;
+       while (!hlist_empty(&wqueue->watches)) {
diff --git a/queue-5.16/watch_queue-pipe-free-watchqueue-state-after-clearing-pipe-ring.patch b/queue-5.16/watch_queue-pipe-free-watchqueue-state-after-clearing-pipe-ring.patch
new file mode 100644 (file)
index 0000000..5bbdc31
--- /dev/null
@@ -0,0 +1,53 @@
+From db8facfc9fafacefe8a835416a6b77c838088f8b Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 11 Mar 2022 13:23:38 +0000
+Subject: watch_queue, pipe: Free watchqueue state after clearing pipe ring
+
+From: David Howells <dhowells@redhat.com>
+
+commit db8facfc9fafacefe8a835416a6b77c838088f8b upstream.
+
+In free_pipe_info(), free the watchqueue state after clearing the pipe
+ring as each pipe ring descriptor has a release function, and in the
+case of a notification message, this is watch_queue_pipe_buf_release()
+which tries to mark the allocation bitmap that was previously released.
+
+Fix this by moving the put of the pipe's ref on the watch queue to after
+the ring has been cleared.  We still need to call watch_queue_clear()
+before doing that to make sure that the pipe is disconnected from any
+notification sources first.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/pipe.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/pipe.c
++++ b/fs/pipe.c
+@@ -830,10 +830,8 @@ void free_pipe_info(struct pipe_inode_in
+       int i;
+ #ifdef CONFIG_WATCH_QUEUE
+-      if (pipe->watch_queue) {
++      if (pipe->watch_queue)
+               watch_queue_clear(pipe->watch_queue);
+-              put_watch_queue(pipe->watch_queue);
+-      }
+ #endif
+       (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
+@@ -843,6 +841,10 @@ void free_pipe_info(struct pipe_inode_in
+               if (buf->ops)
+                       pipe_buf_release(pipe, buf);
+       }
++#ifdef CONFIG_WATCH_QUEUE
++      if (pipe->watch_queue)
++              put_watch_queue(pipe->watch_queue);
++#endif
+       if (pipe->tmp_page)
+               __free_page(pipe->tmp_page);
+       kfree(pipe->bufs);