]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jul 2024 08:32:10 +0000 (10:32 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jul 2024 08:32:10 +0000 (10:32 +0200)
added patches:
af_packet-handle-outgoing-vlan-packets-without-hardware-offloading.patch
ata-libata-scsi-honor-the-d_sense-bit-for-ck_cond-1-and-no-error.patch
char-tpm-fix-possible-memory-leak-in-tpm_bios_measurements_open.patch
drm-gma500-fix-null-pointer-dereference-in-cdv_intel_lvds_get_modes.patch
drm-gma500-fix-null-pointer-dereference-in-psb_intel_lvds_get_modes.patch
dt-bindings-thermal-correct-thermal-zone-node-name-limit.patch
ext2-verify-bitmap-and-itable-block-numbers-before-using-them.patch
fuse-verify-g-u-id-mount-options-correctly.patch
hfs-fix-to-initialize-fields-of-hfs_inode_info-after-hfs_alloc_inode.patch
ipv6-take-care-of-scope-when-choosing-the-src-addr.patch
landlock-don-t-lose-track-of-restrictions-on-cred_transfer.patch
media-venus-fix-use-after-free-in-vdec_close.patch
mm-hugetlb-fix-possible-recursive-locking-detected-warning.patch
mm-mmap_lock-replace-get_memcg_path_buf-with-on-stack-buffer.patch
net-netconsole-disable-target-before-netpoll-cleanup.patch
sched-fair-set_load_weight-must-also-call-reweight_task-for-sched_idle-tasks.patch
tick-broadcast-make-takeover-of-broadcast-hrtimer-reliable.patch

18 files changed:
queue-5.15/af_packet-handle-outgoing-vlan-packets-without-hardware-offloading.patch [new file with mode: 0644]
queue-5.15/ata-libata-scsi-honor-the-d_sense-bit-for-ck_cond-1-and-no-error.patch [new file with mode: 0644]
queue-5.15/char-tpm-fix-possible-memory-leak-in-tpm_bios_measurements_open.patch [new file with mode: 0644]
queue-5.15/drm-gma500-fix-null-pointer-dereference-in-cdv_intel_lvds_get_modes.patch [new file with mode: 0644]
queue-5.15/drm-gma500-fix-null-pointer-dereference-in-psb_intel_lvds_get_modes.patch [new file with mode: 0644]
queue-5.15/dt-bindings-thermal-correct-thermal-zone-node-name-limit.patch [new file with mode: 0644]
queue-5.15/ext2-verify-bitmap-and-itable-block-numbers-before-using-them.patch [new file with mode: 0644]
queue-5.15/fuse-verify-g-u-id-mount-options-correctly.patch [new file with mode: 0644]
queue-5.15/hfs-fix-to-initialize-fields-of-hfs_inode_info-after-hfs_alloc_inode.patch [new file with mode: 0644]
queue-5.15/ipv6-take-care-of-scope-when-choosing-the-src-addr.patch [new file with mode: 0644]
queue-5.15/landlock-don-t-lose-track-of-restrictions-on-cred_transfer.patch [new file with mode: 0644]
queue-5.15/media-venus-fix-use-after-free-in-vdec_close.patch [new file with mode: 0644]
queue-5.15/mm-hugetlb-fix-possible-recursive-locking-detected-warning.patch [new file with mode: 0644]
queue-5.15/mm-mmap_lock-replace-get_memcg_path_buf-with-on-stack-buffer.patch [new file with mode: 0644]
queue-5.15/net-netconsole-disable-target-before-netpoll-cleanup.patch [new file with mode: 0644]
queue-5.15/sched-fair-set_load_weight-must-also-call-reweight_task-for-sched_idle-tasks.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/tick-broadcast-make-takeover-of-broadcast-hrtimer-reliable.patch [new file with mode: 0644]

diff --git a/queue-5.15/af_packet-handle-outgoing-vlan-packets-without-hardware-offloading.patch b/queue-5.15/af_packet-handle-outgoing-vlan-packets-without-hardware-offloading.patch
new file mode 100644 (file)
index 0000000..a5059a4
--- /dev/null
@@ -0,0 +1,173 @@
+From 79eecf631c14e7f4057186570ac20e2cfac3802e Mon Sep 17 00:00:00 2001
+From: Chengen Du <chengen.du@canonical.com>
+Date: Sat, 13 Jul 2024 19:47:35 +0800
+Subject: af_packet: Handle outgoing VLAN packets without hardware offloading
+
+From: Chengen Du <chengen.du@canonical.com>
+
+commit 79eecf631c14e7f4057186570ac20e2cfac3802e upstream.
+
+The issue initially stems from libpcap. The ethertype will be overwritten
+as the VLAN TPID if the network interface lacks hardware VLAN offloading.
+In the outbound packet path, if hardware VLAN offloading is unavailable,
+the VLAN tag is inserted into the payload but then cleared from the sk_buff
+struct. Consequently, this can lead to a false negative when checking for
+the presence of a VLAN tag, causing the packet sniffing outcome to lack
+VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing
+tool may be unable to parse packets as expected.
+
+The TCI-TPID is missing because the prb_fill_vlan_info() function does not
+modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the
+payload and not in the sk_buff struct. The skb_vlan_tag_present() function
+only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header
+is stripped, preventing the packet capturing tool from determining the
+correct TCI-TPID value. Additionally, the protocol in SLL is incorrect,
+which means the packet capturing tool cannot parse the L3 header correctly.
+
+Link: https://github.com/the-tcpdump-group/libpcap/issues/1105
+Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@canonical.com/T/#u
+Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Chengen Du <chengen.du@canonical.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240713114735.62360-1-chengen.du@canonical.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   86 +++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 84 insertions(+), 2 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -505,6 +505,61 @@ static void *packet_current_frame(struct
+       return packet_lookup_frame(po, rb, rb->head, status);
+ }
++static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev)
++{
++      u8 *skb_orig_data = skb->data;
++      int skb_orig_len = skb->len;
++      struct vlan_hdr vhdr, *vh;
++      unsigned int header_len;
++
++      if (!dev)
++              return 0;
++
++      /* In the SOCK_DGRAM scenario, skb data starts at the network
++       * protocol, which is after the VLAN headers. The outer VLAN
++       * header is at the hard_header_len offset in non-variable
++       * length link layer headers. If it's a VLAN device, the
++       * min_header_len should be used to exclude the VLAN header
++       * size.
++       */
++      if (dev->min_header_len == dev->hard_header_len)
++              header_len = dev->hard_header_len;
++      else if (is_vlan_dev(dev))
++              header_len = dev->min_header_len;
++      else
++              return 0;
++
++      skb_push(skb, skb->data - skb_mac_header(skb));
++      vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr);
++      if (skb_orig_data != skb->data) {
++              skb->data = skb_orig_data;
++              skb->len = skb_orig_len;
++      }
++      if (unlikely(!vh))
++              return 0;
++
++      return ntohs(vh->h_vlan_TCI);
++}
++
++static __be16 vlan_get_protocol_dgram(struct sk_buff *skb)
++{
++      __be16 proto = skb->protocol;
++
++      if (unlikely(eth_type_vlan(proto))) {
++              u8 *skb_orig_data = skb->data;
++              int skb_orig_len = skb->len;
++
++              skb_push(skb, skb->data - skb_mac_header(skb));
++              proto = __vlan_get_protocol(skb, proto, NULL);
++              if (skb_orig_data != skb->data) {
++                      skb->data = skb_orig_data;
++                      skb->len = skb_orig_len;
++              }
++      }
++
++      return proto;
++}
++
+ static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
+ {
+       del_timer_sync(&pkc->retire_blk_timer);
+@@ -974,10 +1029,16 @@ static void prb_clear_rxhash(struct tpac
+ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
+                       struct tpacket3_hdr *ppd)
+ {
++      struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc);
++
+       if (skb_vlan_tag_present(pkc->skb)) {
+               ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
+               ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
+               ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++      } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) {
++              ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev);
++              ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol);
++              ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+       } else {
+               ppd->hv1.tp_vlan_tci = 0;
+               ppd->hv1.tp_vlan_tpid = 0;
+@@ -2393,6 +2454,10 @@ static int tpacket_rcv(struct sk_buff *s
+                       h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
+                       h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
+                       status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++              } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
++                      h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev);
++                      h.h2->tp_vlan_tpid = ntohs(skb->protocol);
++                      status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+               } else {
+                       h.h2->tp_vlan_tci = 0;
+                       h.h2->tp_vlan_tpid = 0;
+@@ -2422,7 +2487,8 @@ static int tpacket_rcv(struct sk_buff *s
+       sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
+       sll->sll_family = AF_PACKET;
+       sll->sll_hatype = dev->type;
+-      sll->sll_protocol = skb->protocol;
++      sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
++              vlan_get_protocol_dgram(skb) : skb->protocol;
+       sll->sll_pkttype = skb->pkt_type;
+       if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
+               sll->sll_ifindex = orig_dev->ifindex;
+@@ -3447,7 +3513,8 @@ static int packet_recvmsg(struct socket
+               /* Original length was stored in sockaddr_ll fields */
+               origlen = PACKET_SKB_CB(skb)->sa.origlen;
+               sll->sll_family = AF_PACKET;
+-              sll->sll_protocol = skb->protocol;
++              sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
++                      vlan_get_protocol_dgram(skb) : skb->protocol;
+       }
+       sock_recv_ts_and_drops(msg, sk, skb);
+@@ -3502,6 +3569,21 @@ static int packet_recvmsg(struct socket
+                       aux.tp_vlan_tci = skb_vlan_tag_get(skb);
+                       aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
+                       aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++              } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
++                      struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
++                      struct net_device *dev;
++
++                      rcu_read_lock();
++                      dev = dev_get_by_index_rcu(sock_net(sk), sll->sll_ifindex);
++                      if (dev) {
++                              aux.tp_vlan_tci = vlan_get_tci(skb, dev);
++                              aux.tp_vlan_tpid = ntohs(skb->protocol);
++                              aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++                      } else {
++                              aux.tp_vlan_tci = 0;
++                              aux.tp_vlan_tpid = 0;
++                      }
++                      rcu_read_unlock();
+               } else {
+                       aux.tp_vlan_tci = 0;
+                       aux.tp_vlan_tpid = 0;
diff --git a/queue-5.15/ata-libata-scsi-honor-the-d_sense-bit-for-ck_cond-1-and-no-error.patch b/queue-5.15/ata-libata-scsi-honor-the-d_sense-bit-for-ck_cond-1-and-no-error.patch
new file mode 100644 (file)
index 0000000..19e670d
--- /dev/null
@@ -0,0 +1,65 @@
+From 28ab9769117ca944cb6eb537af5599aa436287a4 Mon Sep 17 00:00:00 2001
+From: Igor Pylypiv <ipylypiv@google.com>
+Date: Tue, 2 Jul 2024 02:47:31 +0000
+Subject: ata: libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error
+
+From: Igor Pylypiv <ipylypiv@google.com>
+
+commit 28ab9769117ca944cb6eb537af5599aa436287a4 upstream.
+
+SAT-5 revision 8 specification removed the text about the ANSI INCITS
+431-2007 compliance which was requiring SCSI/ATA Translation (SAT) to
+return descriptor format sense data for the ATA PASS-THROUGH commands
+regardless of the setting of the D_SENSE bit.
+
+Let's honor the D_SENSE bit for ATA PASS-THROUGH commands while
+generating the "ATA PASS-THROUGH INFORMATION AVAILABLE" sense data.
+
+SAT-5 revision 7
+================
+
+12.2.2.8 Fixed format sense data
+
+Table 212 shows the fields returned in the fixed format sense data
+(see SPC-5) for ATA PASS-THROUGH commands. SATLs compliant with ANSI
+INCITS 431-2007, SCSI/ATA Translation (SAT) return descriptor format
+sense data for the ATA PASS-THROUGH commands regardless of the setting
+of the D_SENSE bit.
+
+SAT-5 revision 8
+================
+
+12.2.2.8 Fixed format sense data
+
+Table 211 shows the fields returned in the fixed format sense data
+(see SPC-5) for ATA PASS-THROUGH commands.
+
+Cc: stable@vger.kernel.org # 4.19+
+Reported-by: Niklas Cassel <cassel@kernel.org>
+Closes: https://lore.kernel.org/linux-ide/Zn1WUhmLglM4iais@ryzen.lan
+Reviewed-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Link: https://lore.kernel.org/r/20240702024735.1152293-4-ipylypiv@google.com
+Signed-off-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-scsi.c |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -872,11 +872,8 @@ static void ata_gen_passthru_sense(struc
+                                  &sense_key, &asc, &ascq, verbose);
+               ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
+       } else {
+-              /*
+-               * ATA PASS-THROUGH INFORMATION AVAILABLE
+-               * Always in descriptor format sense.
+-               */
+-              scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
++              /* ATA PASS-THROUGH INFORMATION AVAILABLE */
++              ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D);
+       }
+       if ((cmd->sense_buffer[0] & 0x7f) >= 0x72) {
diff --git a/queue-5.15/char-tpm-fix-possible-memory-leak-in-tpm_bios_measurements_open.patch b/queue-5.15/char-tpm-fix-possible-memory-leak-in-tpm_bios_measurements_open.patch
new file mode 100644 (file)
index 0000000..441ad32
--- /dev/null
@@ -0,0 +1,35 @@
+From 5d8e2971e817bb64225fc0b6327a78752f58a9aa Mon Sep 17 00:00:00 2001
+From: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+Date: Thu, 27 Jun 2024 15:31:09 +0900
+Subject: char: tpm: Fix possible memory leak in tpm_bios_measurements_open()
+
+From: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+
+commit 5d8e2971e817bb64225fc0b6327a78752f58a9aa upstream.
+
+In tpm_bios_measurements_open(), get_device() is called on the device
+embedded in struct tpm_chip. In the error path, however, put_device() is
+not called. This results in a reference count leak, which prevents the
+device from being properly released. This commit makes sure to call
+put_device() when the seq_open() call fails.
+
+Cc: stable@vger.kernel.org # +v4.18
+Fixes: 9b01b5356629 ("tpm: Move shared eventlog functions to common.c")
+Signed-off-by: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/eventlog/common.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/char/tpm/eventlog/common.c
++++ b/drivers/char/tpm/eventlog/common.c
+@@ -47,6 +47,8 @@ static int tpm_bios_measurements_open(st
+       if (!err) {
+               seq = file->private_data;
+               seq->private = chip;
++      } else {
++              put_device(&chip->dev);
+       }
+       return err;
diff --git a/queue-5.15/drm-gma500-fix-null-pointer-dereference-in-cdv_intel_lvds_get_modes.patch b/queue-5.15/drm-gma500-fix-null-pointer-dereference-in-cdv_intel_lvds_get_modes.patch
new file mode 100644 (file)
index 0000000..c6982c1
--- /dev/null
@@ -0,0 +1,35 @@
+From cb520c3f366c77e8d69e4e2e2781a8ce48d98e79 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make24@iscas.ac.cn>
+Date: Tue, 9 Jul 2024 19:33:11 +0800
+Subject: drm/gma500: fix null pointer dereference in cdv_intel_lvds_get_modes
+
+From: Ma Ke <make24@iscas.ac.cn>
+
+commit cb520c3f366c77e8d69e4e2e2781a8ce48d98e79 upstream.
+
+In cdv_intel_lvds_get_modes(), the return value of drm_mode_duplicate()
+is assigned to mode, which will lead to a NULL pointer dereference on
+failure of drm_mode_duplicate(). Add a check to avoid npd.
+
+Cc: stable@vger.kernel.org
+Fixes: 6a227d5fd6c4 ("gma500: Add support for Cedarview")
+Signed-off-by: Ma Ke <make24@iscas.ac.cn>
+Signed-off-by: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240709113311.37168-1-make24@iscas.ac.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/gma500/cdv_intel_lvds.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
++++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+@@ -310,6 +310,9 @@ static int cdv_intel_lvds_get_modes(stru
+       if (mode_dev->panel_fixed_mode != NULL) {
+               struct drm_display_mode *mode =
+                   drm_mode_duplicate(dev, mode_dev->panel_fixed_mode);
++              if (!mode)
++                      return 0;
++
+               drm_mode_probed_add(connector, mode);
+               return 1;
+       }
diff --git a/queue-5.15/drm-gma500-fix-null-pointer-dereference-in-psb_intel_lvds_get_modes.patch b/queue-5.15/drm-gma500-fix-null-pointer-dereference-in-psb_intel_lvds_get_modes.patch
new file mode 100644 (file)
index 0000000..c99a2bc
--- /dev/null
@@ -0,0 +1,35 @@
+From 2df7aac81070987b0f052985856aa325a38debf6 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make24@iscas.ac.cn>
+Date: Tue, 9 Jul 2024 17:20:11 +0800
+Subject: drm/gma500: fix null pointer dereference in psb_intel_lvds_get_modes
+
+From: Ma Ke <make24@iscas.ac.cn>
+
+commit 2df7aac81070987b0f052985856aa325a38debf6 upstream.
+
+In psb_intel_lvds_get_modes(), the return value of drm_mode_duplicate() is
+assigned to mode, which will lead to a possible NULL pointer dereference
+on failure of drm_mode_duplicate(). Add a check to avoid npd.
+
+Cc: stable@vger.kernel.org
+Fixes: 89c78134cc54 ("gma500: Add Poulsbo support")
+Signed-off-by: Ma Ke <make24@iscas.ac.cn>
+Signed-off-by: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240709092011.3204970-1-make24@iscas.ac.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/gma500/psb_intel_lvds.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
++++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
+@@ -508,6 +508,9 @@ static int psb_intel_lvds_get_modes(stru
+       if (mode_dev->panel_fixed_mode != NULL) {
+               struct drm_display_mode *mode =
+                   drm_mode_duplicate(dev, mode_dev->panel_fixed_mode);
++              if (!mode)
++                      return 0;
++
+               drm_mode_probed_add(connector, mode);
+               return 1;
+       }
diff --git a/queue-5.15/dt-bindings-thermal-correct-thermal-zone-node-name-limit.patch b/queue-5.15/dt-bindings-thermal-correct-thermal-zone-node-name-limit.patch
new file mode 100644 (file)
index 0000000..3981106
--- /dev/null
@@ -0,0 +1,41 @@
+From 97e32381d0fc6c2602a767b0c46e15eb2b75971d Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Date: Tue, 2 Jul 2024 16:52:48 +0200
+Subject: dt-bindings: thermal: correct thermal zone node name limit
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+commit 97e32381d0fc6c2602a767b0c46e15eb2b75971d upstream.
+
+Linux kernel uses thermal zone node name during registering thermal
+zones and has a hard-coded limit of 20 characters, including terminating
+NUL byte.  The bindings expect node names to finish with '-thermal'
+which is eight bytes long, thus we have only 11 characters for the reset
+of the node name (thus 10 for the pattern after leading fixed character).
+
+Reported-by: Rob Herring <robh@kernel.org>
+Closes: https://lore.kernel.org/all/CAL_JsqKogbT_4DPd1n94xqeHaU_J8ve5K09WOyVsRX3jxxUW3w@mail.gmail.com/
+Fixes: 1202a442a31f ("dt-bindings: thermal: Add yaml bindings for thermal zones")
+Cc: stable@vger.kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Link: https://lore.kernel.org/r/20240702145248.47184-1-krzysztof.kozlowski@linaro.org
+Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/devicetree/bindings/thermal/thermal-zones.yaml |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
++++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
+@@ -49,7 +49,10 @@ properties:
+       to take when the temperature crosses those thresholds.
+ patternProperties:
+-  "^[a-zA-Z][a-zA-Z0-9\\-]{1,12}-thermal$":
++  # Node name is limited in size due to Linux kernel requirements - 19
++  # characters in total (see THERMAL_NAME_LENGTH, including terminating NUL
++  # byte):
++  "^[a-zA-Z][a-zA-Z0-9\\-]{1,10}-thermal$":
+     type: object
+     description:
+       Each thermal zone node contains information about how frequently it
diff --git a/queue-5.15/ext2-verify-bitmap-and-itable-block-numbers-before-using-them.patch b/queue-5.15/ext2-verify-bitmap-and-itable-block-numbers-before-using-them.patch
new file mode 100644 (file)
index 0000000..9a4ae37
--- /dev/null
@@ -0,0 +1,57 @@
+From 322a6aff03937aa1ece33b4e46c298eafaf9ac41 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 24 Jun 2024 17:12:56 +0200
+Subject: ext2: Verify bitmap and itable block numbers before using them
+
+From: Jan Kara <jack@suse.cz>
+
+commit 322a6aff03937aa1ece33b4e46c298eafaf9ac41 upstream.
+
+Verify bitmap block numbers and inode table blocks are sane before using
+them for checking bits in the block bitmap.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext2/balloc.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/fs/ext2/balloc.c
++++ b/fs/ext2/balloc.c
+@@ -79,26 +79,33 @@ static int ext2_valid_block_bitmap(struc
+       ext2_grpblk_t next_zero_bit;
+       ext2_fsblk_t bitmap_blk;
+       ext2_fsblk_t group_first_block;
++      ext2_grpblk_t max_bit;
+       group_first_block = ext2_group_first_block_no(sb, block_group);
++      max_bit = ext2_group_last_block_no(sb, block_group) - group_first_block;
+       /* check whether block bitmap block number is set */
+       bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
+       offset = bitmap_blk - group_first_block;
+-      if (!ext2_test_bit(offset, bh->b_data))
++      if (offset < 0 || offset > max_bit ||
++          !ext2_test_bit(offset, bh->b_data))
+               /* bad block bitmap */
+               goto err_out;
+       /* check whether the inode bitmap block number is set */
+       bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap);
+       offset = bitmap_blk - group_first_block;
+-      if (!ext2_test_bit(offset, bh->b_data))
++      if (offset < 0 || offset > max_bit ||
++          !ext2_test_bit(offset, bh->b_data))
+               /* bad block bitmap */
+               goto err_out;
+       /* check whether the inode table block number is set */
+       bitmap_blk = le32_to_cpu(desc->bg_inode_table);
+       offset = bitmap_blk - group_first_block;
++      if (offset < 0 || offset > max_bit ||
++          offset + EXT2_SB(sb)->s_itb_per_group - 1 > max_bit)
++              goto err_out;
+       next_zero_bit = ext2_find_next_zero_bit(bh->b_data,
+                               offset + EXT2_SB(sb)->s_itb_per_group,
+                               offset);
diff --git a/queue-5.15/fuse-verify-g-u-id-mount-options-correctly.patch b/queue-5.15/fuse-verify-g-u-id-mount-options-correctly.patch
new file mode 100644 (file)
index 0000000..f83aec8
--- /dev/null
@@ -0,0 +1,81 @@
+From 525bd65aa759ec320af1dc06e114ed69733e9e23 Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Tue, 2 Jul 2024 17:22:41 -0500
+Subject: fuse: verify {g,u}id mount options correctly
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 525bd65aa759ec320af1dc06e114ed69733e9e23 upstream.
+
+As was done in
+0200679fc795 ("tmpfs: verify {g,u}id mount options correctly")
+we need to validate that the requested uid and/or gid is representable in
+the filesystem's idmapping.
+
+Cribbing from the above commit log,
+
+The contract for {g,u}id mount options and {g,u}id values in general set
+from userspace has always been that they are translated according to the
+caller's idmapping. In so far, fuse has been doing the correct thing.
+But since fuse is mountable in unprivileged contexts it is also
+necessary to verify that the resulting {k,g}uid is representable in the
+namespace of the superblock.
+
+Fixes: c30da2e981a7 ("fuse: convert to use the new mount API")
+Cc: stable@vger.kernel.org # 5.4+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Link: https://lore.kernel.org/r/8f07d45d-c806-484d-a2e3-7a2199df1cd2@redhat.com
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/inode.c |   24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -686,6 +686,8 @@ static int fuse_parse_param(struct fs_co
+       struct fs_parse_result result;
+       struct fuse_fs_context *ctx = fsc->fs_private;
+       int opt;
++      kuid_t kuid;
++      kgid_t kgid;
+       if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+               /*
+@@ -730,16 +732,30 @@ static int fuse_parse_param(struct fs_co
+               break;
+       case OPT_USER_ID:
+-              ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
+-              if (!uid_valid(ctx->user_id))
++              kuid =  make_kuid(fsc->user_ns, result.uint_32);
++              if (!uid_valid(kuid))
+                       return invalfc(fsc, "Invalid user_id");
++              /*
++               * The requested uid must be representable in the
++               * filesystem's idmapping.
++               */
++              if (!kuid_has_mapping(fsc->user_ns, kuid))
++                      return invalfc(fsc, "Invalid user_id");
++              ctx->user_id = kuid;
+               ctx->user_id_present = true;
+               break;
+       case OPT_GROUP_ID:
+-              ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
+-              if (!gid_valid(ctx->group_id))
++              kgid = make_kgid(fsc->user_ns, result.uint_32);;
++              if (!gid_valid(kgid))
++                      return invalfc(fsc, "Invalid group_id");
++              /*
++               * The requested gid must be representable in the
++               * filesystem's idmapping.
++               */
++              if (!kgid_has_mapping(fsc->user_ns, kgid))
+                       return invalfc(fsc, "Invalid group_id");
++              ctx->group_id = kgid;
+               ctx->group_id_present = true;
+               break;
diff --git a/queue-5.15/hfs-fix-to-initialize-fields-of-hfs_inode_info-after-hfs_alloc_inode.patch b/queue-5.15/hfs-fix-to-initialize-fields-of-hfs_inode_info-after-hfs_alloc_inode.patch
new file mode 100644 (file)
index 0000000..11bc3d6
--- /dev/null
@@ -0,0 +1,122 @@
+From 26a2ed107929a855155429b11e1293b83e6b2a8b Mon Sep 17 00:00:00 2001
+From: Chao Yu <chao@kernel.org>
+Date: Sun, 16 Jun 2024 09:38:41 +0800
+Subject: hfs: fix to initialize fields of hfs_inode_info after hfs_alloc_inode()
+
+From: Chao Yu <chao@kernel.org>
+
+commit 26a2ed107929a855155429b11e1293b83e6b2a8b upstream.
+
+Syzbot reports uninitialized value access issue as below:
+
+loop0: detected capacity change from 0 to 64
+=====================================================
+BUG: KMSAN: uninit-value in hfs_revalidate_dentry+0x307/0x3f0 fs/hfs/sysdep.c:30
+ hfs_revalidate_dentry+0x307/0x3f0 fs/hfs/sysdep.c:30
+ d_revalidate fs/namei.c:862 [inline]
+ lookup_fast+0x89e/0x8e0 fs/namei.c:1649
+ walk_component fs/namei.c:2001 [inline]
+ link_path_walk+0x817/0x1480 fs/namei.c:2332
+ path_lookupat+0xd9/0x6f0 fs/namei.c:2485
+ filename_lookup+0x22e/0x740 fs/namei.c:2515
+ user_path_at_empty+0x8b/0x390 fs/namei.c:2924
+ user_path_at include/linux/namei.h:57 [inline]
+ do_mount fs/namespace.c:3689 [inline]
+ __do_sys_mount fs/namespace.c:3898 [inline]
+ __se_sys_mount+0x66b/0x810 fs/namespace.c:3875
+ __x64_sys_mount+0xe4/0x140 fs/namespace.c:3875
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+
+BUG: KMSAN: uninit-value in hfs_ext_read_extent fs/hfs/extent.c:196 [inline]
+BUG: KMSAN: uninit-value in hfs_get_block+0x92d/0x1620 fs/hfs/extent.c:366
+ hfs_ext_read_extent fs/hfs/extent.c:196 [inline]
+ hfs_get_block+0x92d/0x1620 fs/hfs/extent.c:366
+ block_read_full_folio+0x4ff/0x11b0 fs/buffer.c:2271
+ hfs_read_folio+0x55/0x60 fs/hfs/inode.c:39
+ filemap_read_folio+0x148/0x4f0 mm/filemap.c:2426
+ do_read_cache_folio+0x7c8/0xd90 mm/filemap.c:3553
+ do_read_cache_page mm/filemap.c:3595 [inline]
+ read_cache_page+0xfb/0x2f0 mm/filemap.c:3604
+ read_mapping_page include/linux/pagemap.h:755 [inline]
+ hfs_btree_open+0x928/0x1ae0 fs/hfs/btree.c:78
+ hfs_mdb_get+0x260c/0x3000 fs/hfs/mdb.c:204
+ hfs_fill_super+0x1fb1/0x2790 fs/hfs/super.c:406
+ mount_bdev+0x628/0x920 fs/super.c:1359
+ hfs_mount+0xcd/0xe0 fs/hfs/super.c:456
+ legacy_get_tree+0x167/0x2e0 fs/fs_context.c:610
+ vfs_get_tree+0xdc/0x5d0 fs/super.c:1489
+ do_new_mount+0x7a9/0x16f0 fs/namespace.c:3145
+ path_mount+0xf98/0x26a0 fs/namespace.c:3475
+ do_mount fs/namespace.c:3488 [inline]
+ __do_sys_mount fs/namespace.c:3697 [inline]
+ __se_sys_mount+0x919/0x9e0 fs/namespace.c:3674
+ __ia32_sys_mount+0x15b/0x1b0 fs/namespace.c:3674
+ do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline]
+ __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178
+ do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203
+ do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246
+ entry_SYSENTER_compat_after_hwframe+0x70/0x82
+
+Uninit was created at:
+ __alloc_pages+0x9a6/0xe00 mm/page_alloc.c:4590
+ __alloc_pages_node include/linux/gfp.h:238 [inline]
+ alloc_pages_node include/linux/gfp.h:261 [inline]
+ alloc_slab_page mm/slub.c:2190 [inline]
+ allocate_slab mm/slub.c:2354 [inline]
+ new_slab+0x2d7/0x1400 mm/slub.c:2407
+ ___slab_alloc+0x16b5/0x3970 mm/slub.c:3540
+ __slab_alloc mm/slub.c:3625 [inline]
+ __slab_alloc_node mm/slub.c:3678 [inline]
+ slab_alloc_node mm/slub.c:3850 [inline]
+ kmem_cache_alloc_lru+0x64d/0xb30 mm/slub.c:3879
+ alloc_inode_sb include/linux/fs.h:3018 [inline]
+ hfs_alloc_inode+0x5a/0xc0 fs/hfs/super.c:165
+ alloc_inode+0x83/0x440 fs/inode.c:260
+ new_inode_pseudo fs/inode.c:1005 [inline]
+ new_inode+0x38/0x4f0 fs/inode.c:1031
+ hfs_new_inode+0x61/0x1010 fs/hfs/inode.c:186
+ hfs_mkdir+0x54/0x250 fs/hfs/dir.c:228
+ vfs_mkdir+0x49a/0x700 fs/namei.c:4126
+ do_mkdirat+0x529/0x810 fs/namei.c:4149
+ __do_sys_mkdirat fs/namei.c:4164 [inline]
+ __se_sys_mkdirat fs/namei.c:4162 [inline]
+ __x64_sys_mkdirat+0xc8/0x120 fs/namei.c:4162
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+
+It missed to initialize .tz_secondswest, .cached_start and .cached_blocks
+fields in struct hfs_inode_info after hfs_alloc_inode(), fix it.
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+3ae6be33a50b5aae4dab@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/linux-fsdevel/0000000000005ad04005ee48897f@google.com
+Signed-off-by: Chao Yu <chao@kernel.org>
+Link: https://lore.kernel.org/r/20240616013841.2217-1-chao@kernel.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hfs/inode.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/hfs/inode.c
++++ b/fs/hfs/inode.c
+@@ -202,6 +202,7 @@ struct inode *hfs_new_inode(struct inode
+       HFS_I(inode)->flags = 0;
+       HFS_I(inode)->rsrc_inode = NULL;
+       HFS_I(inode)->fs_blocks = 0;
++      HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60;
+       if (S_ISDIR(mode)) {
+               inode->i_size = 2;
+               HFS_SB(sb)->folder_count++;
+@@ -277,6 +278,8 @@ void hfs_inode_read_fork(struct inode *i
+       for (count = 0, i = 0; i < 3; i++)
+               count += be16_to_cpu(ext[i].count);
+       HFS_I(inode)->first_blocks = count;
++      HFS_I(inode)->cached_start = 0;
++      HFS_I(inode)->cached_blocks = 0;
+       inode->i_size = HFS_I(inode)->phys_size = log_size;
+       HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
diff --git a/queue-5.15/ipv6-take-care-of-scope-when-choosing-the-src-addr.patch b/queue-5.15/ipv6-take-care-of-scope-when-choosing-the-src-addr.patch
new file mode 100644 (file)
index 0000000..a0632f3
--- /dev/null
@@ -0,0 +1,36 @@
+From abb9a68d2c64dd9b128ae1f2e635e4d805e7ce64 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Wed, 10 Jul 2024 10:14:29 +0200
+Subject: ipv6: take care of scope when choosing the src addr
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit abb9a68d2c64dd9b128ae1f2e635e4d805e7ce64 upstream.
+
+When the source address is selected, the scope must be checked. For
+example, if a loopback address is assigned to the vrf device, it must not
+be chosen for packets sent outside.
+
+CC: stable@vger.kernel.org
+Fixes: afbac6010aec ("net: ipv6: Address selection needs to consider L3 domains")
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20240710081521.3809742-4-nicolas.dichtel@6wind.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -1831,7 +1831,8 @@ int ipv6_dev_get_saddr(struct net *net,
+                                                           master, &dst,
+                                                           scores, hiscore_idx);
+-                      if (scores[hiscore_idx].ifa)
++                      if (scores[hiscore_idx].ifa &&
++                          scores[hiscore_idx].scopedist >= 0)
+                               goto out;
+               }
diff --git a/queue-5.15/landlock-don-t-lose-track-of-restrictions-on-cred_transfer.patch b/queue-5.15/landlock-don-t-lose-track-of-restrictions-on-cred_transfer.patch
new file mode 100644 (file)
index 0000000..e6c5fcb
--- /dev/null
@@ -0,0 +1,72 @@
+From 39705a6c29f8a2b93cf5b99528a55366c50014d1 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Wed, 24 Jul 2024 14:49:01 +0200
+Subject: landlock: Don't lose track of restrictions on cred_transfer
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jann Horn <jannh@google.com>
+
+commit 39705a6c29f8a2b93cf5b99528a55366c50014d1 upstream.
+
+When a process' cred struct is replaced, this _almost_ always invokes
+the cred_prepare LSM hook; but in one special case (when
+KEYCTL_SESSION_TO_PARENT updates the parent's credentials), the
+cred_transfer LSM hook is used instead.  Landlock only implements the
+cred_prepare hook, not cred_transfer, so KEYCTL_SESSION_TO_PARENT causes
+all information on Landlock restrictions to be lost.
+
+This basically means that a process with the ability to use the fork()
+and keyctl() syscalls can get rid of all Landlock restrictions on
+itself.
+
+Fix it by adding a cred_transfer hook that does the same thing as the
+existing cred_prepare hook. (Implemented by having hook_cred_prepare()
+call hook_cred_transfer() so that the two functions are less likely to
+accidentally diverge in the future.)
+
+Cc: stable@kernel.org
+Fixes: 385975dca53e ("landlock: Set up the security framework and manage credentials")
+Signed-off-by: Jann Horn <jannh@google.com>
+Link: https://lore.kernel.org/r/20240724-landlock-houdini-fix-v1-1-df89a4560ca3@google.com
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/landlock/cred.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/security/landlock/cred.c
++++ b/security/landlock/cred.c
+@@ -14,8 +14,8 @@
+ #include "ruleset.h"
+ #include "setup.h"
+-static int hook_cred_prepare(struct cred *const new,
+-                           const struct cred *const old, const gfp_t gfp)
++static void hook_cred_transfer(struct cred *const new,
++                             const struct cred *const old)
+ {
+       struct landlock_ruleset *const old_dom = landlock_cred(old)->domain;
+@@ -23,6 +23,12 @@ static int hook_cred_prepare(struct cred
+               landlock_get_ruleset(old_dom);
+               landlock_cred(new)->domain = old_dom;
+       }
++}
++
++static int hook_cred_prepare(struct cred *const new,
++                           const struct cred *const old, const gfp_t gfp)
++{
++      hook_cred_transfer(new, old);
+       return 0;
+ }
+@@ -36,6 +42,7 @@ static void hook_cred_free(struct cred *
+ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+       LSM_HOOK_INIT(cred_prepare, hook_cred_prepare),
++      LSM_HOOK_INIT(cred_transfer, hook_cred_transfer),
+       LSM_HOOK_INIT(cred_free, hook_cred_free),
+ };
diff --git a/queue-5.15/media-venus-fix-use-after-free-in-vdec_close.patch b/queue-5.15/media-venus-fix-use-after-free-in-vdec_close.patch
new file mode 100644 (file)
index 0000000..d43389b
--- /dev/null
@@ -0,0 +1,38 @@
+From a0157b5aa34eb43ec4c5510f9c260bbb03be937e Mon Sep 17 00:00:00 2001
+From: Dikshita Agarwal <quic_dikshita@quicinc.com>
+Date: Thu, 9 May 2024 10:44:29 +0530
+Subject: media: venus: fix use after free in vdec_close
+
+From: Dikshita Agarwal <quic_dikshita@quicinc.com>
+
+commit a0157b5aa34eb43ec4c5510f9c260bbb03be937e upstream.
+
+There appears to be a possible use after free with vdec_close().
+The firmware will add buffer release work to the work queue through
+HFI callbacks as a normal part of decoding. Randomly closing the
+decoder device from userspace during normal decoding can incur
+a read after free for inst.
+
+Fix it by cancelling the work in vdec_close.
+
+Cc: stable@vger.kernel.org
+Fixes: af2c3834c8ca ("[media] media: venus: adding core part and helper functions")
+Signed-off-by: Dikshita Agarwal <quic_dikshita@quicinc.com>
+Acked-by: Vikash Garodia <quic_vgarodia@quicinc.com>
+Signed-off-by: Stanimir Varbanov <stanimir.k.varbanov@gmail.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/media/platform/qcom/venus/vdec.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/media/platform/qcom/venus/vdec.c
++++ b/drivers/media/platform/qcom/venus/vdec.c
+@@ -1632,6 +1632,7 @@ static int vdec_close(struct file *file)
+       vdec_pm_get(inst);
++      cancel_work_sync(&inst->delayed_process_work);
+       v4l2_m2m_ctx_release(inst->m2m_ctx);
+       v4l2_m2m_release(inst->m2m_dev);
+       vdec_ctrl_deinit(inst);
diff --git a/queue-5.15/mm-hugetlb-fix-possible-recursive-locking-detected-warning.patch b/queue-5.15/mm-hugetlb-fix-possible-recursive-locking-detected-warning.patch
new file mode 100644 (file)
index 0000000..bb69134
--- /dev/null
@@ -0,0 +1,100 @@
+From 667574e873b5f77a220b2a93329689f36fb56d5d Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Fri, 12 Jul 2024 11:13:14 +0800
+Subject: mm/hugetlb: fix possible recursive locking detected warning
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit 667574e873b5f77a220b2a93329689f36fb56d5d upstream.
+
+When tries to demote 1G hugetlb folios, a lockdep warning is observed:
+
+============================================
+WARNING: possible recursive locking detected
+6.10.0-rc6-00452-ga4d0275fa660-dirty #79 Not tainted
+--------------------------------------------
+bash/710 is trying to acquire lock:
+ffffffff8f0a7850 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0x244/0x460
+
+but task is already holding lock:
+ffffffff8f0a6f48 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0xae/0x460
+
+other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+       CPU0
+       ----
+  lock(&h->resize_lock);
+  lock(&h->resize_lock);
+
+ *** DEADLOCK ***
+
+ May be due to missing lock nesting notation
+
+4 locks held by bash/710:
+ #0: ffff8f118439c3f0 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x64/0xe0
+ #1: ffff8f11893b9e88 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf8/0x1d0
+ #2: ffff8f1183dc4428 (kn->active#98){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1d0
+ #3: ffffffff8f0a6f48 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0xae/0x460
+
+stack backtrace:
+CPU: 3 PID: 710 Comm: bash Not tainted 6.10.0-rc6-00452-ga4d0275fa660-dirty #79
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x68/0xa0
+ __lock_acquire+0x10f2/0x1ca0
+ lock_acquire+0xbe/0x2d0
+ __mutex_lock+0x6d/0x400
+ demote_store+0x244/0x460
+ kernfs_fop_write_iter+0x12c/0x1d0
+ vfs_write+0x380/0x540
+ ksys_write+0x64/0xe0
+ do_syscall_64+0xb9/0x1d0
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+RIP: 0033:0x7fa61db14887
+RSP: 002b:00007ffc56c48358 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa61db14887
+RDX: 0000000000000002 RSI: 000055a030050220 RDI: 0000000000000001
+RBP: 000055a030050220 R08: 00007fa61dbd1460 R09: 000000007fffffff
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002
+R13: 00007fa61dc1b780 R14: 00007fa61dc17600 R15: 00007fa61dc16a00
+ </TASK>
+
+Lockdep considers this an AA deadlock because the different resize_lock
+mutexes reside in the same lockdep class, but this is a false positive.
+Place them in distinct classes to avoid these warnings.
+
+Link: https://lkml.kernel.org/r/20240712031314.2570452-1-linmiaohe@huawei.com
+Fixes: 8531fc6f52f5 ("hugetlb: add hugetlb demote page support")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h |    1 +
+ mm/hugetlb.c            |    2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -600,6 +600,7 @@ HPAGEFLAG(VmemmapOptimized, vmemmap_opti
+ /* Defines one hugetlb page size */
+ struct hstate {
+       struct mutex resize_lock;
++      struct lock_class_key resize_key;
+       int next_nid_to_alloc;
+       int next_nid_to_free;
+       unsigned int order;
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3717,7 +3717,7 @@ void __init hugetlb_add_hstate(unsigned
+       BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE);
+       BUG_ON(order == 0);
+       h = &hstates[hugetlb_max_hstate++];
+-      mutex_init(&h->resize_lock);
++      __mutex_init(&h->resize_lock, "resize mutex", &h->resize_key);
+       h->order = order;
+       h->mask = ~(huge_page_size(h) - 1);
+       for (i = 0; i < MAX_NUMNODES; ++i)
diff --git a/queue-5.15/mm-mmap_lock-replace-get_memcg_path_buf-with-on-stack-buffer.patch b/queue-5.15/mm-mmap_lock-replace-get_memcg_path_buf-with-on-stack-buffer.patch
new file mode 100644 (file)
index 0000000..8b9335a
--- /dev/null
@@ -0,0 +1,262 @@
+From 7d6be67cfdd4a53cea7147313ca13c531e3a470f Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Fri, 21 Jun 2024 10:08:41 +0900
+Subject: mm: mmap_lock: replace get_memcg_path_buf() with on-stack buffer
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit 7d6be67cfdd4a53cea7147313ca13c531e3a470f upstream.
+
+Commit 2b5067a8143e ("mm: mmap_lock: add tracepoints around lock
+acquisition") introduced TRACE_MMAP_LOCK_EVENT() macro using
+preempt_disable() in order to let get_mm_memcg_path() return a percpu
+buffer exclusively used by normal, softirq, irq and NMI contexts
+respectively.
+
+Commit 832b50725373 ("mm: mmap_lock: use local locks instead of disabling
+preemption") replaced preempt_disable() with local_lock(&memcg_paths.lock)
+based on an argument that preempt_disable() has to be avoided because
+get_mm_memcg_path() might sleep if PREEMPT_RT=y.
+
+But syzbot started reporting
+
+  inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
+
+and
+
+  inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
+
+messages, for local_lock() does not disable IRQ.
+
+We could replace local_lock() with local_lock_irqsave() in order to
+suppress these messages.  But this patch instead replaces percpu buffers
+with on-stack buffer, for the size of each buffer returned by
+get_memcg_path_buf() is only 256 bytes which is tolerable for allocating
+from current thread's kernel stack memory.
+
+Link: https://lkml.kernel.org/r/ef22d289-eadb-4ed9-863b-fbc922b33d8d@I-love.SAKURA.ne.jp
+Reported-by: syzbot <syzbot+40905bca570ae6784745@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=40905bca570ae6784745
+Fixes: 832b50725373 ("mm: mmap_lock: use local locks instead of disabling preemption")
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmap_lock.c |  175 ++++++---------------------------------------------------
+ 1 file changed, 20 insertions(+), 155 deletions(-)
+
+--- a/mm/mmap_lock.c
++++ b/mm/mmap_lock.c
+@@ -19,14 +19,7 @@ EXPORT_TRACEPOINT_SYMBOL(mmap_lock_relea
+ #ifdef CONFIG_MEMCG
+-/*
+- * Our various events all share the same buffer (because we don't want or need
+- * to allocate a set of buffers *per event type*), so we need to protect against
+- * concurrent _reg() and _unreg() calls, and count how many _reg() calls have
+- * been made.
+- */
+-static DEFINE_MUTEX(reg_lock);
+-static int reg_refcount; /* Protected by reg_lock. */
++static atomic_t reg_refcount;
+ /*
+  * Size of the buffer for memcg path names. Ignoring stack trace support,
+@@ -34,136 +27,22 @@ static int reg_refcount; /* Protected by
+  */
+ #define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL
+-/*
+- * How many contexts our trace events might be called in: normal, softirq, irq,
+- * and NMI.
+- */
+-#define CONTEXT_COUNT 4
+-
+-struct memcg_path {
+-      local_lock_t lock;
+-      char __rcu *buf;
+-      local_t buf_idx;
+-};
+-static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = {
+-      .lock = INIT_LOCAL_LOCK(lock),
+-      .buf_idx = LOCAL_INIT(0),
+-};
+-
+-static char **tmp_bufs;
+-
+-/* Called with reg_lock held. */
+-static void free_memcg_path_bufs(void)
+-{
+-      struct memcg_path *memcg_path;
+-      int cpu;
+-      char **old = tmp_bufs;
+-
+-      for_each_possible_cpu(cpu) {
+-              memcg_path = per_cpu_ptr(&memcg_paths, cpu);
+-              *(old++) = rcu_dereference_protected(memcg_path->buf,
+-                      lockdep_is_held(&reg_lock));
+-              rcu_assign_pointer(memcg_path->buf, NULL);
+-      }
+-
+-      /* Wait for inflight memcg_path_buf users to finish. */
+-      synchronize_rcu();
+-
+-      old = tmp_bufs;
+-      for_each_possible_cpu(cpu) {
+-              kfree(*(old++));
+-      }
+-
+-      kfree(tmp_bufs);
+-      tmp_bufs = NULL;
+-}
+-
+ int trace_mmap_lock_reg(void)
+ {
+-      int cpu;
+-      char *new;
+-
+-      mutex_lock(&reg_lock);
+-
+-      /* If the refcount is going 0->1, proceed with allocating buffers. */
+-      if (reg_refcount++)
+-              goto out;
+-
+-      tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs),
+-                               GFP_KERNEL);
+-      if (tmp_bufs == NULL)
+-              goto out_fail;
+-
+-      for_each_possible_cpu(cpu) {
+-              new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL);
+-              if (new == NULL)
+-                      goto out_fail_free;
+-              rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new);
+-              /* Don't need to wait for inflights, they'd have gotten NULL. */
+-      }
+-
+-out:
+-      mutex_unlock(&reg_lock);
++      atomic_inc(&reg_refcount);
+       return 0;
+-
+-out_fail_free:
+-      free_memcg_path_bufs();
+-out_fail:
+-      /* Since we failed, undo the earlier ref increment. */
+-      --reg_refcount;
+-
+-      mutex_unlock(&reg_lock);
+-      return -ENOMEM;
+ }
+ void trace_mmap_lock_unreg(void)
+ {
+-      mutex_lock(&reg_lock);
+-
+-      /* If the refcount is going 1->0, proceed with freeing buffers. */
+-      if (--reg_refcount)
+-              goto out;
+-
+-      free_memcg_path_bufs();
+-
+-out:
+-      mutex_unlock(&reg_lock);
+-}
+-
+-static inline char *get_memcg_path_buf(void)
+-{
+-      struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths);
+-      char *buf;
+-      int idx;
+-
+-      rcu_read_lock();
+-      buf = rcu_dereference(memcg_path->buf);
+-      if (buf == NULL) {
+-              rcu_read_unlock();
+-              return NULL;
+-      }
+-      idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) -
+-            MEMCG_PATH_BUF_SIZE;
+-      return &buf[idx];
++      atomic_dec(&reg_refcount);
+ }
+-static inline void put_memcg_path_buf(void)
+-{
+-      local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx);
+-      rcu_read_unlock();
+-}
+-
+-#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
+-      do {                                                                   \
+-              const char *memcg_path;                                        \
+-              local_lock(&memcg_paths.lock);                                 \
+-              memcg_path = get_mm_memcg_path(mm);                            \
+-              trace_mmap_lock_##type(mm,                                     \
+-                                     memcg_path != NULL ? memcg_path : "",   \
+-                                     ##__VA_ARGS__);                         \
+-              if (likely(memcg_path != NULL))                                \
+-                      put_memcg_path_buf();                                  \
+-              local_unlock(&memcg_paths.lock);                               \
++#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                    \
++      do {                                                    \
++              char buf[MEMCG_PATH_BUF_SIZE];                  \
++              get_mm_memcg_path(mm, buf, sizeof(buf));        \
++              trace_mmap_lock_##type(mm, buf, ##__VA_ARGS__); \
+       } while (0)
+ #else /* !CONFIG_MEMCG */
+@@ -185,37 +64,23 @@ void trace_mmap_lock_unreg(void)
+ #ifdef CONFIG_TRACING
+ #ifdef CONFIG_MEMCG
+ /*
+- * Write the given mm_struct's memcg path to a percpu buffer, and return a
+- * pointer to it. If the path cannot be determined, or no buffer was available
+- * (because the trace event is being unregistered), NULL is returned.
+- *
+- * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
+- * disabled by the caller before calling us, and re-enabled only after the
+- * caller is done with the pointer.
+- *
+- * The caller must call put_memcg_path_buf() once the buffer is no longer
+- * needed. This must be done while preemption is still disabled.
++ * Write the given mm_struct's memcg path to a buffer. If the path cannot be
++ * determined or the trace event is being unregistered, empty string is written.
+  */
+-static const char *get_mm_memcg_path(struct mm_struct *mm)
++static void get_mm_memcg_path(struct mm_struct *mm, char *buf, size_t buflen)
+ {
+-      char *buf = NULL;
+-      struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
++      struct mem_cgroup *memcg;
++      buf[0] = '\0';
++      /* No need to get path if no trace event is registered. */
++      if (!atomic_read(&reg_refcount))
++              return;
++      memcg = get_mem_cgroup_from_mm(mm);
+       if (memcg == NULL)
+-              goto out;
+-      if (unlikely(memcg->css.cgroup == NULL))
+-              goto out_put;
+-
+-      buf = get_memcg_path_buf();
+-      if (buf == NULL)
+-              goto out_put;
+-
+-      cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE);
+-
+-out_put:
++              return;
++      if (memcg->css.cgroup)
++              cgroup_path(memcg->css.cgroup, buf, buflen);
+       css_put(&memcg->css);
+-out:
+-      return buf;
+ }
+ #endif /* CONFIG_MEMCG */
diff --git a/queue-5.15/net-netconsole-disable-target-before-netpoll-cleanup.patch b/queue-5.15/net-netconsole-disable-target-before-netpoll-cleanup.patch
new file mode 100644 (file)
index 0000000..0bdb89e
--- /dev/null
@@ -0,0 +1,52 @@
+From 97d9fba9a812cada5484667a46e14a4c976ca330 Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Fri, 12 Jul 2024 07:34:15 -0700
+Subject: net: netconsole: Disable target before netpoll cleanup
+
+From: Breno Leitao <leitao@debian.org>
+
+commit 97d9fba9a812cada5484667a46e14a4c976ca330 upstream.
+
+Currently, netconsole cleans up the netpoll structure before disabling
+the target. This approach can lead to race conditions, as message
+senders (write_ext_msg() and write_msg()) check if the target is
+enabled before using netpoll. The sender can validate that the target is
+enabled, but, the netpoll might be de-allocated already, causing
+undesired behaviours.
+
+This patch reverses the order of operations:
+1. Disable the target
+2. Clean up the netpoll structure
+
+This change eliminates the potential race condition, ensuring that
+no messages are sent through a partially cleaned-up netpoll structure.
+
+Fixes: 2382b15bcc39 ("netconsole: take care of NETDEV_UNREGISTER event")
+Cc: stable@vger.kernel.org
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20240712143415.1141039-1-leitao@debian.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/netconsole.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/netconsole.c
++++ b/drivers/net/netconsole.c
+@@ -716,6 +716,7 @@ restart:
+                               /* rtnl_lock already held
+                                * we might sleep in __netpoll_cleanup()
+                                */
++                              nt->enabled = false;
+                               spin_unlock_irqrestore(&target_list_lock, flags);
+                               __netpoll_cleanup(&nt->np);
+@@ -723,7 +724,6 @@ restart:
+                               spin_lock_irqsave(&target_list_lock, flags);
+                               dev_put(nt->np.dev);
+                               nt->np.dev = NULL;
+-                              nt->enabled = false;
+                               stopped = true;
+                               netconsole_target_put(nt);
+                               goto restart;
diff --git a/queue-5.15/sched-fair-set_load_weight-must-also-call-reweight_task-for-sched_idle-tasks.patch b/queue-5.15/sched-fair-set_load_weight-must-also-call-reweight_task-for-sched_idle-tasks.patch
new file mode 100644 (file)
index 0000000..d86ff95
--- /dev/null
@@ -0,0 +1,107 @@
+From d329605287020c3d1c3b0dadc63d8208e7251382 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 25 Jun 2024 15:29:58 -1000
+Subject: sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks
+
+From: Tejun Heo <tj@kernel.org>
+
+commit d329605287020c3d1c3b0dadc63d8208e7251382 upstream.
+
+When a task's weight is being changed, set_load_weight() is called with
+@update_load set. As weight changes aren't trivial for the fair class,
+set_load_weight() calls fair.c::reweight_task() for fair class tasks.
+
+However, set_load_weight() first tests task_has_idle_policy() on entry and
+skips calling reweight_task() for SCHED_IDLE tasks. This is buggy as
+SCHED_IDLE tasks are just fair tasks with a very low weight and they would
+incorrectly skip load, vlag and position updates.
+
+Fix it by updating reweight_task() to take struct load_weight as idle weight
+can't be expressed with prio and making set_load_weight() call
+reweight_task() for SCHED_IDLE tasks too when @update_load is set.
+
+Fixes: 9059393e4ec1 ("sched/fair: Use reweight_entity() for set_user_nice()")
+Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org # v4.15+
+Link: http://lkml.kernel.org/r/20240624102331.GI31592@noisy.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/core.c  |   23 ++++++++++-------------
+ kernel/sched/fair.c  |    7 +++----
+ kernel/sched/sched.h |    2 +-
+ 3 files changed, 14 insertions(+), 18 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1203,27 +1203,24 @@ int tg_nop(struct task_group *tg, void *
+ static void set_load_weight(struct task_struct *p, bool update_load)
+ {
+       int prio = p->static_prio - MAX_RT_PRIO;
+-      struct load_weight *load = &p->se.load;
++      struct load_weight lw;
+-      /*
+-       * SCHED_IDLE tasks get minimal weight:
+-       */
+       if (task_has_idle_policy(p)) {
+-              load->weight = scale_load(WEIGHT_IDLEPRIO);
+-              load->inv_weight = WMULT_IDLEPRIO;
+-              return;
++              lw.weight = scale_load(WEIGHT_IDLEPRIO);
++              lw.inv_weight = WMULT_IDLEPRIO;
++      } else {
++              lw.weight = scale_load(sched_prio_to_weight[prio]);
++              lw.inv_weight = sched_prio_to_wmult[prio];
+       }
+       /*
+        * SCHED_OTHER tasks have to update their load when changing their
+        * weight
+        */
+-      if (update_load && p->sched_class == &fair_sched_class) {
+-              reweight_task(p, prio);
+-      } else {
+-              load->weight = scale_load(sched_prio_to_weight[prio]);
+-              load->inv_weight = sched_prio_to_wmult[prio];
+-      }
++      if (update_load && p->sched_class == &fair_sched_class)
++              reweight_task(p, &lw);
++      else
++              p->se.load = lw;
+ }
+ #ifdef CONFIG_UCLAMP_TASK
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3121,15 +3121,14 @@ static void reweight_entity(struct cfs_r
+ }
+-void reweight_task(struct task_struct *p, int prio)
++void reweight_task(struct task_struct *p, const struct load_weight *lw)
+ {
+       struct sched_entity *se = &p->se;
+       struct cfs_rq *cfs_rq = cfs_rq_of(se);
+       struct load_weight *load = &se->load;
+-      unsigned long weight = scale_load(sched_prio_to_weight[prio]);
+-      reweight_entity(cfs_rq, se, weight);
+-      load->inv_weight = sched_prio_to_wmult[prio];
++      reweight_entity(cfs_rq, se, lw->weight);
++      load->inv_weight = lw->inv_weight;
+ }
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2322,7 +2322,7 @@ extern void init_sched_dl_class(void);
+ extern void init_sched_rt_class(void);
+ extern void init_sched_fair_class(void);
+-extern void reweight_task(struct task_struct *p, int prio);
++extern void reweight_task(struct task_struct *p, const struct load_weight *lw);
+ extern void resched_curr(struct rq *rq);
+ extern void resched_cpu(int cpu);
index 123d40ff70401269932e9af3f58d1735444c9236..764fe24db0ca27b17e85e58b9c0197fba413e3f9 100644 (file)
@@ -163,3 +163,20 @@ nilfs2-avoid-undefined-behavior-in-nilfs_cnt32_ge-ma.patch
 rtc-interface-add-rtc-offset-to-alarm-after-fix-up.patch
 fs-ntfs3-missed-error-return.patch
 s390-dasd-fix-error-checks-in-dasd_copy_pair_store.patch
+landlock-don-t-lose-track-of-restrictions-on-cred_transfer.patch
+mm-hugetlb-fix-possible-recursive-locking-detected-warning.patch
+mm-mmap_lock-replace-get_memcg_path_buf-with-on-stack-buffer.patch
+dt-bindings-thermal-correct-thermal-zone-node-name-limit.patch
+tick-broadcast-make-takeover-of-broadcast-hrtimer-reliable.patch
+net-netconsole-disable-target-before-netpoll-cleanup.patch
+af_packet-handle-outgoing-vlan-packets-without-hardware-offloading.patch
+ipv6-take-care-of-scope-when-choosing-the-src-addr.patch
+sched-fair-set_load_weight-must-also-call-reweight_task-for-sched_idle-tasks.patch
+fuse-verify-g-u-id-mount-options-correctly.patch
+char-tpm-fix-possible-memory-leak-in-tpm_bios_measurements_open.patch
+media-venus-fix-use-after-free-in-vdec_close.patch
+ata-libata-scsi-honor-the-d_sense-bit-for-ck_cond-1-and-no-error.patch
+hfs-fix-to-initialize-fields-of-hfs_inode_info-after-hfs_alloc_inode.patch
+ext2-verify-bitmap-and-itable-block-numbers-before-using-them.patch
+drm-gma500-fix-null-pointer-dereference-in-cdv_intel_lvds_get_modes.patch
+drm-gma500-fix-null-pointer-dereference-in-psb_intel_lvds_get_modes.patch
diff --git a/queue-5.15/tick-broadcast-make-takeover-of-broadcast-hrtimer-reliable.patch b/queue-5.15/tick-broadcast-make-takeover-of-broadcast-hrtimer-reliable.patch
new file mode 100644 (file)
index 0000000..28f7af2
--- /dev/null
@@ -0,0 +1,109 @@
+From f7d43dd206e7e18c182f200e67a8db8c209907fa Mon Sep 17 00:00:00 2001
+From: Yu Liao <liaoyu15@huawei.com>
+Date: Thu, 11 Jul 2024 20:48:43 +0800
+Subject: tick/broadcast: Make takeover of broadcast hrtimer reliable
+
+From: Yu Liao <liaoyu15@huawei.com>
+
+commit f7d43dd206e7e18c182f200e67a8db8c209907fa upstream.
+
+Running the LTP hotplug stress test on a aarch64 machine results in
+rcu_sched stall warnings when the broadcast hrtimer was owned by the
+un-plugged CPU. The issue is the following:
+
+CPU1 (owns the broadcast hrtimer)      CPU2
+
+                               tick_broadcast_enter()
+                                 // shutdown local timer device
+                                 broadcast_shutdown_local()
+                               ...
+                               tick_broadcast_exit()
+                                 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT)
+                                 // timer device is not programmed
+                                 cpumask_set_cpu(cpu, tick_broadcast_force_mask)
+
+                               initiates offlining of CPU1
+take_cpu_down()
+/*
+ * CPU1 shuts down and does not
+ * send broadcast IPI anymore
+ */
+                               takedown_cpu()
+                                 hotplug_cpu__broadcast_tick_pull()
+                                   // move broadcast hrtimer to this CPU
+                                   clockevents_program_event()
+                                     bc_set_next()
+                                       hrtimer_start()
+                                       /*
+                                        * timer device is not programmed
+                                        * because only the first expiring
+                                        * timer will trigger clockevent
+                                        * device reprogramming
+                                        */
+
+What happens is that CPU2 exits broadcast mode with force bit set, then the
+local timer device is not reprogrammed and CPU2 expects to receive the
+expired event by the broadcast IPI. But this does not happen because CPU1
+is offlined by CPU2. CPU switches the clockevent device to ONESHOT state,
+but does not reprogram the device.
+
+The subsequent reprogramming of the hrtimer broadcast device does not
+program the clockevent device of CPU2 either because the pending expiry
+time is already in the past and the CPU expects the event to be delivered.
+As a consequence all CPUs which wait for a broadcast event to be delivered
+are stuck forever.
+
+Fix this issue by reprogramming the local timer device if the broadcast
+force bit of the CPU is set so that the broadcast hrtimer is delivered.
+
+[ tglx: Massage comment and change log. Add Fixes tag ]
+
+Fixes: 989dcb645ca7 ("tick: Handle broadcast wakeup of multiple cpus")
+Signed-off-by: Yu Liao <liaoyu15@huawei.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240711124843.64167-1-liaoyu15@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/tick-broadcast.c |   23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/kernel/time/tick-broadcast.c
++++ b/kernel/time/tick-broadcast.c
+@@ -1137,6 +1137,7 @@ void tick_broadcast_switch_to_oneshot(vo
+ #ifdef CONFIG_HOTPLUG_CPU
+ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
+ {
++      struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+       struct clock_event_device *bc;
+       unsigned long flags;
+@@ -1144,6 +1145,28 @@ void hotplug_cpu__broadcast_tick_pull(in
+       bc = tick_broadcast_device.evtdev;
+       if (bc && broadcast_needs_cpu(bc, deadcpu)) {
++              /*
++               * If the broadcast force bit of the current CPU is set,
++               * then the current CPU has not yet reprogrammed the local
++               * timer device to avoid a ping-pong race. See
++               * ___tick_broadcast_oneshot_control().
++               *
++               * If the broadcast device is hrtimer based then
++               * programming the broadcast event below does not have any
++               * effect because the local clockevent device is not
++               * running and not programmed because the broadcast event
++               * is not earlier than the pending event of the local clock
++               * event device. As a consequence all CPUs waiting for a
++               * broadcast event are stuck forever.
++               *
++               * Detect this condition and reprogram the cpu local timer
++               * device to avoid the starvation.
++               */
++              if (tick_check_broadcast_expired()) {
++                      cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask);
++                      tick_program_event(td->evtdev->next_event, 1);
++              }
++
+               /* This moves the broadcast assignment to this CPU: */
+               clockevents_program_event(bc, bc->next_event, 1);
+       }