--- /dev/null
+From 79eecf631c14e7f4057186570ac20e2cfac3802e Mon Sep 17 00:00:00 2001
+From: Chengen Du <chengen.du@canonical.com>
+Date: Sat, 13 Jul 2024 19:47:35 +0800
+Subject: af_packet: Handle outgoing VLAN packets without hardware offloading
+
+From: Chengen Du <chengen.du@canonical.com>
+
+commit 79eecf631c14e7f4057186570ac20e2cfac3802e upstream.
+
+The issue initially stems from libpcap. The ethertype will be overwritten
+as the VLAN TPID if the network interface lacks hardware VLAN offloading.
+In the outbound packet path, if hardware VLAN offloading is unavailable,
+the VLAN tag is inserted into the payload but then cleared from the sk_buff
+struct. Consequently, this can lead to a false negative when checking for
+the presence of a VLAN tag, causing the packet sniffing outcome to lack
+VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing
+tool may be unable to parse packets as expected.
+
+The TCI-TPID is missing because the prb_fill_vlan_info() function does not
+modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the
+payload and not in the sk_buff struct. The skb_vlan_tag_present() function
+only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header
+is stripped, preventing the packet capturing tool from determining the
+correct TCI-TPID value. Additionally, the protocol in SLL is incorrect,
+which means the packet capturing tool cannot parse the L3 header correctly.
+
+Link: https://github.com/the-tcpdump-group/libpcap/issues/1105
+Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@canonical.com/T/#u
+Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Chengen Du <chengen.du@canonical.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240713114735.62360-1-chengen.du@canonical.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 84 insertions(+), 2 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -538,6 +538,61 @@ static void *packet_current_frame(struct
+ return packet_lookup_frame(po, rb, rb->head, status);
+ }
+
++static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev)
++{
++ u8 *skb_orig_data = skb->data;
++ int skb_orig_len = skb->len;
++ struct vlan_hdr vhdr, *vh;
++ unsigned int header_len;
++
++ if (!dev)
++ return 0;
++
++ /* In the SOCK_DGRAM scenario, skb data starts at the network
++ * protocol, which is after the VLAN headers. The outer VLAN
++ * header is at the hard_header_len offset in non-variable
++ * length link layer headers. If it's a VLAN device, the
++ * min_header_len should be used to exclude the VLAN header
++ * size.
++ */
++ if (dev->min_header_len == dev->hard_header_len)
++ header_len = dev->hard_header_len;
++ else if (is_vlan_dev(dev))
++ header_len = dev->min_header_len;
++ else
++ return 0;
++
++ skb_push(skb, skb->data - skb_mac_header(skb));
++ vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr);
++ if (skb_orig_data != skb->data) {
++ skb->data = skb_orig_data;
++ skb->len = skb_orig_len;
++ }
++ if (unlikely(!vh))
++ return 0;
++
++ return ntohs(vh->h_vlan_TCI);
++}
++
++static __be16 vlan_get_protocol_dgram(struct sk_buff *skb)
++{
++ __be16 proto = skb->protocol;
++
++ if (unlikely(eth_type_vlan(proto))) {
++ u8 *skb_orig_data = skb->data;
++ int skb_orig_len = skb->len;
++
++ skb_push(skb, skb->data - skb_mac_header(skb));
++ proto = __vlan_get_protocol(skb, proto, NULL);
++ if (skb_orig_data != skb->data) {
++ skb->data = skb_orig_data;
++ skb->len = skb_orig_len;
++ }
++ }
++
++ return proto;
++}
++
+ static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
+ {
+ del_timer_sync(&pkc->retire_blk_timer);
+@@ -1007,10 +1062,16 @@ static void prb_clear_rxhash(struct tpac
+ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
+ struct tpacket3_hdr *ppd)
+ {
++ struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc);
++
+ if (skb_vlan_tag_present(pkc->skb)) {
+ ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
+ ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
+ ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++ } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) {
++ ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev);
++ ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol);
++ ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else {
+ ppd->hv1.tp_vlan_tci = 0;
+ ppd->hv1.tp_vlan_tpid = 0;
+@@ -2431,6 +2492,10 @@ static int tpacket_rcv(struct sk_buff *s
+ h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
+ h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
+ status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++ } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
++ h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev);
++ h.h2->tp_vlan_tpid = ntohs(skb->protocol);
++ status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else {
+ h.h2->tp_vlan_tci = 0;
+ h.h2->tp_vlan_tpid = 0;
+@@ -2460,7 +2525,8 @@ static int tpacket_rcv(struct sk_buff *s
+ sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
+ sll->sll_family = AF_PACKET;
+ sll->sll_hatype = dev->type;
+- sll->sll_protocol = skb->protocol;
++ sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
++ vlan_get_protocol_dgram(skb) : skb->protocol;
+ sll->sll_pkttype = skb->pkt_type;
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
+ sll->sll_ifindex = orig_dev->ifindex;
+@@ -3488,7 +3554,8 @@ static int packet_recvmsg(struct socket
+ /* Original length was stored in sockaddr_ll fields */
+ origlen = PACKET_SKB_CB(skb)->sa.origlen;
+ sll->sll_family = AF_PACKET;
+- sll->sll_protocol = skb->protocol;
++ sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
++ vlan_get_protocol_dgram(skb) : skb->protocol;
+ }
+
+ sock_recv_cmsgs(msg, sk, skb);
+@@ -3545,6 +3612,21 @@ static int packet_recvmsg(struct socket
+ aux.tp_vlan_tci = skb_vlan_tag_get(skb);
+ aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
+ aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++ } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
++ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
++ struct net_device *dev;
++
++ rcu_read_lock();
++ dev = dev_get_by_index_rcu(sock_net(sk), sll->sll_ifindex);
++ if (dev) {
++ aux.tp_vlan_tci = vlan_get_tci(skb, dev);
++ aux.tp_vlan_tpid = ntohs(skb->protocol);
++ aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
++ } else {
++ aux.tp_vlan_tci = 0;
++ aux.tp_vlan_tpid = 0;
++ }
++ rcu_read_unlock();
+ } else {
+ aux.tp_vlan_tci = 0;
+ aux.tp_vlan_tpid = 0;
--- /dev/null
+From 97981926224afe17ba3e22e0c2b7dd8b516ee574 Mon Sep 17 00:00:00 2001
+From: Igor Pylypiv <ipylypiv@google.com>
+Date: Tue, 2 Jul 2024 02:47:30 +0000
+Subject: ata: libata-scsi: Do not overwrite valid sense data when CK_COND=1
+
+From: Igor Pylypiv <ipylypiv@google.com>
+
+commit 97981926224afe17ba3e22e0c2b7dd8b516ee574 upstream.
+
+Current ata_gen_passthru_sense() code performs two actions:
+1. Generates sense data based on the ATA 'status' and ATA 'error' fields.
+2. Populates "ATA Status Return sense data descriptor" / "Fixed format
+ sense data" with ATA taskfile fields.
+
+The problem is that #1 generates sense data even when a valid sense data
+is already present (ATA_QCFLAG_SENSE_VALID is set). Factoring out #2 into
+a separate function allows us to generate sense data only when there is
+no valid sense data (ATA_QCFLAG_SENSE_VALID is not set).
+
+As a bonus, we can now delete a FIXME comment in atapi_qc_complete()
+which states that we don't want to translate taskfile registers into
+sense descriptors for ATAPI.
+
+Additionally, always set SAM_STAT_CHECK_CONDITION when CK_COND=1 because
+SAT specification mandates that SATL shall return CHECK CONDITION if
+the CK_COND bit is set.
+
+The ATA PASS-THROUGH handling logic in ata_scsi_qc_complete() is hard
+to read/understand. Improve the readability of the code by moving checks
+into self-explanatory boolean variables.
+
+Cc: stable@vger.kernel.org # 4.19+
+Co-developed-by: Niklas Cassel <cassel@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
+Link: https://lore.kernel.org/r/20240702024735.1152293-3-ipylypiv@google.com
+Signed-off-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-scsi.c | 169 +++++++++++++++++++++++++---------------------
+ 1 file changed, 92 insertions(+), 77 deletions(-)
+
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -230,6 +230,80 @@ void ata_scsi_set_sense_information(stru
+ SCSI_SENSE_BUFFERSIZE, information);
+ }
+
++/**
++ * ata_scsi_set_passthru_sense_fields - Set ATA fields in sense buffer
++ * @qc: ATA PASS-THROUGH command.
++ *
++ * Populates "ATA Status Return sense data descriptor" / "Fixed format
++ * sense data" with ATA taskfile fields.
++ *
++ * LOCKING:
++ * None.
++ */
++static void ata_scsi_set_passthru_sense_fields(struct ata_queued_cmd *qc)
++{
++ struct scsi_cmnd *cmd = qc->scsicmd;
++ struct ata_taskfile *tf = &qc->result_tf;
++ unsigned char *sb = cmd->sense_buffer;
++
++ if ((sb[0] & 0x7f) >= 0x72) {
++ unsigned char *desc;
++ u8 len;
++
++ /* descriptor format */
++ len = sb[7];
++ desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
++ if (!desc) {
++ if (SCSI_SENSE_BUFFERSIZE < len + 14)
++ return;
++ sb[7] = len + 14;
++ desc = sb + 8 + len;
++ }
++ desc[0] = 9;
++ desc[1] = 12;
++ /*
++ * Copy registers into sense buffer.
++ */
++ desc[2] = 0x00;
++ desc[3] = tf->error;
++ desc[5] = tf->nsect;
++ desc[7] = tf->lbal;
++ desc[9] = tf->lbam;
++ desc[11] = tf->lbah;
++ desc[12] = tf->device;
++ desc[13] = tf->status;
++
++ /*
++ * Fill in Extend bit, and the high order bytes
++ * if applicable.
++ */
++ if (tf->flags & ATA_TFLAG_LBA48) {
++ desc[2] |= 0x01;
++ desc[4] = tf->hob_nsect;
++ desc[6] = tf->hob_lbal;
++ desc[8] = tf->hob_lbam;
++ desc[10] = tf->hob_lbah;
++ }
++ } else {
++ /* Fixed sense format */
++ sb[0] |= 0x80;
++ sb[3] = tf->error;
++ sb[4] = tf->status;
++ sb[5] = tf->device;
++ sb[6] = tf->nsect;
++ if (tf->flags & ATA_TFLAG_LBA48) {
++ sb[8] |= 0x80;
++ if (tf->hob_nsect)
++ sb[8] |= 0x40;
++ if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
++ sb[8] |= 0x20;
++ }
++ sb[9] = tf->lbal;
++ sb[10] = tf->lbam;
++ sb[11] = tf->lbah;
++ }
++}
++
+ static void ata_scsi_set_invalid_field(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u16 field, u8 bit)
+ {
+@@ -837,10 +911,8 @@ static void ata_to_sense_error(unsigned
+ * ata_gen_passthru_sense - Generate check condition sense block.
+ * @qc: Command that completed.
+ *
+- * This function is specific to the ATA descriptor format sense
+- * block specified for the ATA pass through commands. Regardless
+- * of whether the command errored or not, return a sense
+- * block. Copy all controller registers into the sense
++ * This function is specific to the ATA pass through commands.
++ * Regardless of whether the command errored or not, return a sense
+ * block. If there was no error, we get the request from an ATA
+ * passthrough command, so we use the following sense data:
+ * sk = RECOVERED ERROR
+@@ -875,63 +947,6 @@ static void ata_gen_passthru_sense(struc
+ */
+ scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
+ }
+-
+- if ((sb[0] & 0x7f) >= 0x72) {
+- unsigned char *desc;
+- u8 len;
+-
+- /* descriptor format */
+- len = sb[7];
+- desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
+- if (!desc) {
+- if (SCSI_SENSE_BUFFERSIZE < len + 14)
+- return;
+- sb[7] = len + 14;
+- desc = sb + 8 + len;
+- }
+- desc[0] = 9;
+- desc[1] = 12;
+- /*
+- * Copy registers into sense buffer.
+- */
+- desc[2] = 0x00;
+- desc[3] = tf->error;
+- desc[5] = tf->nsect;
+- desc[7] = tf->lbal;
+- desc[9] = tf->lbam;
+- desc[11] = tf->lbah;
+- desc[12] = tf->device;
+- desc[13] = tf->status;
+-
+- /*
+- * Fill in Extend bit, and the high order bytes
+- * if applicable.
+- */
+- if (tf->flags & ATA_TFLAG_LBA48) {
+- desc[2] |= 0x01;
+- desc[4] = tf->hob_nsect;
+- desc[6] = tf->hob_lbal;
+- desc[8] = tf->hob_lbam;
+- desc[10] = tf->hob_lbah;
+- }
+- } else {
+- /* Fixed sense format */
+- sb[0] |= 0x80;
+- sb[3] = tf->error;
+- sb[4] = tf->status;
+- sb[5] = tf->device;
+- sb[6] = tf->nsect;
+- if (tf->flags & ATA_TFLAG_LBA48) {
+- sb[8] |= 0x80;
+- if (tf->hob_nsect)
+- sb[8] |= 0x40;
+- if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
+- sb[8] |= 0x20;
+- }
+- sb[9] = tf->lbal;
+- sb[10] = tf->lbam;
+- sb[11] = tf->lbah;
+- }
+ }
+
+ /**
+@@ -1664,26 +1679,32 @@ static void ata_scsi_qc_complete(struct
+ {
+ struct scsi_cmnd *cmd = qc->scsicmd;
+ u8 *cdb = cmd->cmnd;
+- int need_sense = (qc->err_mask != 0) &&
+- !(qc->flags & ATA_QCFLAG_SENSE_VALID);
++ bool have_sense = qc->flags & ATA_QCFLAG_SENSE_VALID;
++ bool is_ata_passthru = cdb[0] == ATA_16 || cdb[0] == ATA_12;
++ bool is_ck_cond_request = cdb[2] & 0x20;
++ bool is_error = qc->err_mask != 0;
+
+ /* For ATA pass thru (SAT) commands, generate a sense block if
+ * user mandated it or if there's an error. Note that if we
+- * generate because the user forced us to [CK_COND =1], a check
++ * generate because the user forced us to [CK_COND=1], a check
+ * condition is generated and the ATA register values are returned
+ * whether the command completed successfully or not. If there
+- * was no error, we use the following sense data:
++ * was no error, and CK_COND=1, we use the following sense data:
+ * sk = RECOVERED ERROR
+ * asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE
+ */
+- if (((cdb[0] == ATA_16) || (cdb[0] == ATA_12)) &&
+- ((cdb[2] & 0x20) || need_sense))
+- ata_gen_passthru_sense(qc);
+- else if (need_sense)
++ if (is_ata_passthru && (is_ck_cond_request || is_error || have_sense)) {
++ if (!have_sense)
++ ata_gen_passthru_sense(qc);
++ ata_scsi_set_passthru_sense_fields(qc);
++ if (is_ck_cond_request)
++ set_status_byte(qc->scsicmd, SAM_STAT_CHECK_CONDITION);
++ } else if (is_error && !have_sense) {
+ ata_gen_ata_sense(qc);
+- else
++ } else {
+ /* Keep the SCSI ML and status byte, clear host byte. */
+ cmd->result &= 0x0000ffff;
++ }
+
+ ata_qc_done(qc);
+ }
+@@ -2622,14 +2643,8 @@ static void atapi_qc_complete(struct ata
+ /* handle completion from EH */
+ if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) {
+
+- if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
+- /* FIXME: not quite right; we don't want the
+- * translation of taskfile registers into a
+- * sense descriptors, since that's only
+- * correct for ATA, not ATAPI
+- */
++ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID))
+ ata_gen_passthru_sense(qc);
+- }
+
+ /* SCSI EH automatically locks door if sdev->locked is
+ * set. Sometimes door lock request continues to
--- /dev/null
+From 38dab832c3f4154968f95b267a3bb789e87554b0 Mon Sep 17 00:00:00 2001
+From: Igor Pylypiv <ipylypiv@google.com>
+Date: Tue, 2 Jul 2024 02:47:29 +0000
+Subject: ata: libata-scsi: Fix offsets for the fixed format sense data
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Igor Pylypiv <ipylypiv@google.com>
+
+commit 38dab832c3f4154968f95b267a3bb789e87554b0 upstream.
+
+Correct the ATA PASS-THROUGH fixed format sense data offsets to conform
+to SPC-6 and SAT-5 specifications. Additionally, set the VALID bit to
+indicate that the INFORMATION field contains valid information.
+
+INFORMATION
+===========
+
+SAT-5 Table 212 — "Fixed format sense data INFORMATION field for the ATA
+PASS-THROUGH commands" defines the following format:
+
++------+------------+
+| Byte | Field |
++------+------------+
+| 0 | ERROR |
+| 1 | STATUS |
+| 2 | DEVICE |
+| 3 | COUNT(7:0) |
++------+------------+
+
+SPC-6 Table 48 - "Fixed format sense data" specifies that the INFORMATION
+field starts at byte 3 in sense buffer resulting in the following offsets
+for the ATA PASS-THROUGH commands:
+
++------------+-------------------------+
+| Field | Offset in sense buffer |
++------------+-------------------------+
+| ERROR | 3 |
+| STATUS | 4 |
+| DEVICE | 5 |
+| COUNT(7:0) | 6 |
++------------+-------------------------+
+
+COMMAND-SPECIFIC INFORMATION
+============================
+
+SAT-5 Table 213 - "Fixed format sense data COMMAND-SPECIFIC INFORMATION
+field for ATA PASS-THROUGH" defines the following format:
+
++------+-------------------+
+| Byte | Field |
++------+-------------------+
+| 0 | FLAGS | LOG INDEX |
+| 1 | LBA (7:0) |
+| 2 | LBA (15:8) |
+| 3 | LBA (23:16) |
++------+-------------------+
+
+SPC-6 Table 48 - "Fixed format sense data" specifies that
+the COMMAND-SPECIFIC-INFORMATION field starts at byte 8
+in sense buffer resulting in the following offsets for
+the ATA PASS-THROUGH commands:
+
+Offsets of these fields in the fixed sense format are as follows:
+
++-------------------+-------------------------+
+| Field | Offset in sense buffer |
++-------------------+-------------------------+
+| FLAGS | LOG INDEX | 8 |
+| LBA (7:0) | 9 |
+| LBA (15:8) | 10 |
+| LBA (23:16) | 11 |
++-------------------+-------------------------+
+
+Reported-by: Akshat Jain <akshatzen@google.com>
+Fixes: 11093cb1ef56 ("libata-scsi: generate correct ATA pass-through sense")
+Cc: stable@vger.kernel.org
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
+Link: https://lore.kernel.org/r/20240702024735.1152293-2-ipylypiv@google.com
+Signed-off-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-scsi.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -855,7 +855,6 @@ static void ata_gen_passthru_sense(struc
+ struct scsi_cmnd *cmd = qc->scsicmd;
+ struct ata_taskfile *tf = &qc->result_tf;
+ unsigned char *sb = cmd->sense_buffer;
+- unsigned char *desc = sb + 8;
+ u8 sense_key, asc, ascq;
+
+ memset(sb, 0, SCSI_SENSE_BUFFERSIZE);
+@@ -877,7 +876,8 @@ static void ata_gen_passthru_sense(struc
+ scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
+ }
+
+- if ((cmd->sense_buffer[0] & 0x7f) >= 0x72) {
++ if ((sb[0] & 0x7f) >= 0x72) {
++ unsigned char *desc;
+ u8 len;
+
+ /* descriptor format */
+@@ -916,21 +916,21 @@ static void ata_gen_passthru_sense(struc
+ }
+ } else {
+ /* Fixed sense format */
+- desc[0] = tf->error;
+- desc[1] = tf->status;
+- desc[2] = tf->device;
+- desc[3] = tf->nsect;
+- desc[7] = 0;
++ sb[0] |= 0x80;
++ sb[3] = tf->error;
++ sb[4] = tf->status;
++ sb[5] = tf->device;
++ sb[6] = tf->nsect;
+ if (tf->flags & ATA_TFLAG_LBA48) {
+- desc[8] |= 0x80;
++ sb[8] |= 0x80;
+ if (tf->hob_nsect)
+- desc[8] |= 0x40;
++ sb[8] |= 0x40;
+ if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
+- desc[8] |= 0x20;
++ sb[8] |= 0x20;
+ }
+- desc[9] = tf->lbal;
+- desc[10] = tf->lbam;
+- desc[11] = tf->lbah;
++ sb[9] = tf->lbal;
++ sb[10] = tf->lbam;
++ sb[11] = tf->lbah;
+ }
+ }
+
--- /dev/null
+From 28ab9769117ca944cb6eb537af5599aa436287a4 Mon Sep 17 00:00:00 2001
+From: Igor Pylypiv <ipylypiv@google.com>
+Date: Tue, 2 Jul 2024 02:47:31 +0000
+Subject: ata: libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error
+
+From: Igor Pylypiv <ipylypiv@google.com>
+
+commit 28ab9769117ca944cb6eb537af5599aa436287a4 upstream.
+
+SAT-5 revision 8 specification removed the text about the ANSI INCITS
+431-2007 compliance which was requiring SCSI/ATA Translation (SAT) to
+return descriptor format sense data for the ATA PASS-THROUGH commands
+regardless of the setting of the D_SENSE bit.
+
+Let's honor the D_SENSE bit for ATA PASS-THROUGH commands while
+generating the "ATA PASS-THROUGH INFORMATION AVAILABLE" sense data.
+
+SAT-5 revision 7
+================
+
+12.2.2.8 Fixed format sense data
+
+Table 212 shows the fields returned in the fixed format sense data
+(see SPC-5) for ATA PASS-THROUGH commands. SATLs compliant with ANSI
+INCITS 431-2007, SCSI/ATA Translation (SAT) return descriptor format
+sense data for the ATA PASS-THROUGH commands regardless of the setting
+of the D_SENSE bit.
+
+SAT-5 revision 8
+================
+
+12.2.2.8 Fixed format sense data
+
+Table 211 shows the fields returned in the fixed format sense data
+(see SPC-5) for ATA PASS-THROUGH commands.
+
+Cc: stable@vger.kernel.org # 4.19+
+Reported-by: Niklas Cassel <cassel@kernel.org>
+Closes: https://lore.kernel.org/linux-ide/Zn1WUhmLglM4iais@ryzen.lan
+Reviewed-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Link: https://lore.kernel.org/r/20240702024735.1152293-4-ipylypiv@google.com
+Signed-off-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-scsi.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -941,11 +941,8 @@ static void ata_gen_passthru_sense(struc
+ &sense_key, &asc, &ascq);
+ ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
+ } else {
+- /*
+- * ATA PASS-THROUGH INFORMATION AVAILABLE
+- * Always in descriptor format sense.
+- */
+- scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
++ /* ATA PASS-THROUGH INFORMATION AVAILABLE */
++ ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D);
+ }
+ }
+
--- /dev/null
+From 8e7860543a94784d744c7ce34b78a2e11beefa5c Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 4 Jul 2024 16:11:20 +0100
+Subject: btrfs: fix extent map use-after-free when adding pages to compressed bio
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8e7860543a94784d744c7ce34b78a2e11beefa5c upstream.
+
+At add_ra_bio_pages() we are accessing the extent map to calculate
+'add_size' after we dropped our reference on the extent map, resulting
+in a use-after-free. Fix this by computing 'add_size' before dropping our
+extent map reference.
+
+Reported-by: syzbot+853d80cba98ce1157ae6@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/000000000000038144061c6d18f2@google.com/
+Fixes: 6a4049102055 ("btrfs: subpage: make add_ra_bio_pages() compatible")
+CC: stable@vger.kernel.org # 6.1+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/compression.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/compression.c
++++ b/fs/btrfs/compression.c
+@@ -420,6 +420,7 @@ static noinline int add_ra_bio_pages(str
+ put_page(page);
+ break;
+ }
++ add_size = min(em->start + em->len, page_end + 1) - cur;
+ free_extent_map(em);
+
+ if (page->index == end_index) {
+@@ -432,7 +433,6 @@ static noinline int add_ra_bio_pages(str
+ }
+ }
+
+- add_size = min(em->start + em->len, page_end + 1) - cur;
+ ret = bio_add_page(orig_bio, page, add_size, offset_in_page(cur));
+ if (ret != add_size) {
+ unlock_extent(tree, cur, page_end, NULL);
--- /dev/null
+From 5d8e2971e817bb64225fc0b6327a78752f58a9aa Mon Sep 17 00:00:00 2001
+From: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+Date: Thu, 27 Jun 2024 15:31:09 +0900
+Subject: char: tpm: Fix possible memory leak in tpm_bios_measurements_open()
+
+From: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+
+commit 5d8e2971e817bb64225fc0b6327a78752f58a9aa upstream.
+
+In tpm_bios_measurements_open(), get_device() is called on the device
+embedded in struct tpm_chip. In the error path, however, put_device() is
+not called. This results in a reference count leak, which prevents the
+device from being properly released. This commit makes sure to call
+put_device() when the seq_open() call fails.
+
+Cc: stable@vger.kernel.org # +v4.18
+Fixes: 9b01b5356629 ("tpm: Move shared eventlog functions to common.c")
+Signed-off-by: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/eventlog/common.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/char/tpm/eventlog/common.c
++++ b/drivers/char/tpm/eventlog/common.c
+@@ -47,6 +47,8 @@ static int tpm_bios_measurements_open(st
+ if (!err) {
+ seq = file->private_data;
+ seq->private = chip;
++ } else {
++ put_device(&chip->dev);
+ }
+
+ return err;
--- /dev/null
+From cb520c3f366c77e8d69e4e2e2781a8ce48d98e79 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make24@iscas.ac.cn>
+Date: Tue, 9 Jul 2024 19:33:11 +0800
+Subject: drm/gma500: fix null pointer dereference in cdv_intel_lvds_get_modes
+
+From: Ma Ke <make24@iscas.ac.cn>
+
+commit cb520c3f366c77e8d69e4e2e2781a8ce48d98e79 upstream.
+
+In cdv_intel_lvds_get_modes(), the return value of drm_mode_duplicate()
+is assigned to mode, which will lead to a NULL pointer dereference on
+failure of drm_mode_duplicate(). Add a check to avoid npd.
+
+Cc: stable@vger.kernel.org
+Fixes: 6a227d5fd6c4 ("gma500: Add support for Cedarview")
+Signed-off-by: Ma Ke <make24@iscas.ac.cn>
+Signed-off-by: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240709113311.37168-1-make24@iscas.ac.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/gma500/cdv_intel_lvds.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
++++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+@@ -311,6 +311,9 @@ static int cdv_intel_lvds_get_modes(stru
+ if (mode_dev->panel_fixed_mode != NULL) {
+ struct drm_display_mode *mode =
+ drm_mode_duplicate(dev, mode_dev->panel_fixed_mode);
++ if (!mode)
++ return 0;
++
+ drm_mode_probed_add(connector, mode);
+ return 1;
+ }
--- /dev/null
+From 2df7aac81070987b0f052985856aa325a38debf6 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make24@iscas.ac.cn>
+Date: Tue, 9 Jul 2024 17:20:11 +0800
+Subject: drm/gma500: fix null pointer dereference in psb_intel_lvds_get_modes
+
+From: Ma Ke <make24@iscas.ac.cn>
+
+commit 2df7aac81070987b0f052985856aa325a38debf6 upstream.
+
+In psb_intel_lvds_get_modes(), the return value of drm_mode_duplicate() is
+assigned to mode, which will lead to a possible NULL pointer dereference
+on failure of drm_mode_duplicate(). Add a check to avoid npd.
+
+Cc: stable@vger.kernel.org
+Fixes: 89c78134cc54 ("gma500: Add Poulsbo support")
+Signed-off-by: Ma Ke <make24@iscas.ac.cn>
+Signed-off-by: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240709092011.3204970-1-make24@iscas.ac.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/gma500/psb_intel_lvds.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
++++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
+@@ -504,6 +504,9 @@ static int psb_intel_lvds_get_modes(stru
+ if (mode_dev->panel_fixed_mode != NULL) {
+ struct drm_display_mode *mode =
+ drm_mode_duplicate(dev, mode_dev->panel_fixed_mode);
++ if (!mode)
++ return 0;
++
+ drm_mode_probed_add(connector, mode);
+ return 1;
+ }
--- /dev/null
+From 97e32381d0fc6c2602a767b0c46e15eb2b75971d Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Date: Tue, 2 Jul 2024 16:52:48 +0200
+Subject: dt-bindings: thermal: correct thermal zone node name limit
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+commit 97e32381d0fc6c2602a767b0c46e15eb2b75971d upstream.
+
+Linux kernel uses thermal zone node name during registering thermal
+zones and has a hard-coded limit of 20 characters, including terminating
+NUL byte. The bindings expect node names to finish with '-thermal'
+which is eight bytes long, thus we have only 11 characters for the reset
+of the node name (thus 10 for the pattern after leading fixed character).
+
+Reported-by: Rob Herring <robh@kernel.org>
+Closes: https://lore.kernel.org/all/CAL_JsqKogbT_4DPd1n94xqeHaU_J8ve5K09WOyVsRX3jxxUW3w@mail.gmail.com/
+Fixes: 1202a442a31f ("dt-bindings: thermal: Add yaml bindings for thermal zones")
+Cc: stable@vger.kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Link: https://lore.kernel.org/r/20240702145248.47184-1-krzysztof.kozlowski@linaro.org
+Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/devicetree/bindings/thermal/thermal-zones.yaml | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
++++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
+@@ -49,7 +49,10 @@ properties:
+ to take when the temperature crosses those thresholds.
+
+ patternProperties:
+- "^[a-zA-Z][a-zA-Z0-9\\-]{1,12}-thermal$":
++ # Node name is limited in size due to Linux kernel requirements - 19
++ # characters in total (see THERMAL_NAME_LENGTH, including terminating NUL
++ # byte):
++ "^[a-zA-Z][a-zA-Z0-9\\-]{1,10}-thermal$":
+ type: object
+ description:
+ Each thermal zone node contains information about how frequently it
--- /dev/null
+From 89fc548767a2155231128cb98726d6d2ea1256c9 Mon Sep 17 00:00:00 2001
+From: Sungjong Seo <sj1557.seo@samsung.com>
+Date: Fri, 31 May 2024 19:14:44 +0900
+Subject: exfat: fix potential deadlock on __exfat_get_dentry_set
+
+From: Sungjong Seo <sj1557.seo@samsung.com>
+
+commit 89fc548767a2155231128cb98726d6d2ea1256c9 upstream.
+
+When accessing a file with more entries than ES_MAX_ENTRY_NUM, the bh-array
+is allocated in __exfat_get_entry_set. The problem is that the bh-array is
+allocated with GFP_KERNEL. It does not make sense. In the following cases,
+a deadlock for sbi->s_lock between the two processes may occur.
+
+ CPU0 CPU1
+ ---- ----
+ kswapd
+ balance_pgdat
+ lock(fs_reclaim)
+ exfat_iterate
+ lock(&sbi->s_lock)
+ exfat_readdir
+ exfat_get_uniname_from_ext_entry
+ exfat_get_dentry_set
+ __exfat_get_dentry_set
+ kmalloc_array
+ ...
+ lock(fs_reclaim)
+ ...
+ evict
+ exfat_evict_inode
+ lock(&sbi->s_lock)
+
+To fix this, let's allocate bh-array with GFP_NOFS.
+
+Fixes: a3ff29a95fde ("exfat: support dynamic allocate bh for exfat_entry_set_cache")
+Cc: stable@vger.kernel.org # v6.2+
+Reported-by: syzbot+412a392a2cd4a65e71db@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/lkml/000000000000fef47e0618c0327f@google.com
+Signed-off-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/dir.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/exfat/dir.c
++++ b/fs/exfat/dir.c
+@@ -890,7 +890,7 @@ int exfat_get_dentry_set(struct exfat_en
+
+ num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb);
+ if (num_bh > ARRAY_SIZE(es->__bh)) {
+- es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_KERNEL);
++ es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_NOFS);
+ if (!es->bh) {
+ brelse(bh);
+ return -ENOMEM;
--- /dev/null
+From 322a6aff03937aa1ece33b4e46c298eafaf9ac41 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 24 Jun 2024 17:12:56 +0200
+Subject: ext2: Verify bitmap and itable block numbers before using them
+
+From: Jan Kara <jack@suse.cz>
+
+commit 322a6aff03937aa1ece33b4e46c298eafaf9ac41 upstream.
+
+Verify bitmap block numbers and inode table blocks are sane before using
+them for checking bits in the block bitmap.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext2/balloc.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/fs/ext2/balloc.c
++++ b/fs/ext2/balloc.c
+@@ -77,26 +77,33 @@ static int ext2_valid_block_bitmap(struc
+ ext2_grpblk_t next_zero_bit;
+ ext2_fsblk_t bitmap_blk;
+ ext2_fsblk_t group_first_block;
++ ext2_grpblk_t max_bit;
+
+ group_first_block = ext2_group_first_block_no(sb, block_group);
++ max_bit = ext2_group_last_block_no(sb, block_group) - group_first_block;
+
+ /* check whether block bitmap block number is set */
+ bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
+ offset = bitmap_blk - group_first_block;
+- if (!ext2_test_bit(offset, bh->b_data))
++ if (offset < 0 || offset > max_bit ||
++ !ext2_test_bit(offset, bh->b_data))
+ /* bad block bitmap */
+ goto err_out;
+
+ /* check whether the inode bitmap block number is set */
+ bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap);
+ offset = bitmap_blk - group_first_block;
+- if (!ext2_test_bit(offset, bh->b_data))
++ if (offset < 0 || offset > max_bit ||
++ !ext2_test_bit(offset, bh->b_data))
+ /* bad block bitmap */
+ goto err_out;
+
+ /* check whether the inode table block number is set */
+ bitmap_blk = le32_to_cpu(desc->bg_inode_table);
+ offset = bitmap_blk - group_first_block;
++ if (offset < 0 || offset > max_bit ||
++ offset + EXT2_SB(sb)->s_itb_per_group - 1 > max_bit)
++ goto err_out;
+ next_zero_bit = ext2_find_next_zero_bit(bh->b_data,
+ offset + EXT2_SB(sb)->s_itb_per_group,
+ offset);
--- /dev/null
+From 525bd65aa759ec320af1dc06e114ed69733e9e23 Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Tue, 2 Jul 2024 17:22:41 -0500
+Subject: fuse: verify {g,u}id mount options correctly
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 525bd65aa759ec320af1dc06e114ed69733e9e23 upstream.
+
+As was done in
+0200679fc795 ("tmpfs: verify {g,u}id mount options correctly")
+we need to validate that the requested uid and/or gid is representable in
+the filesystem's idmapping.
+
+Cribbing from the above commit log,
+
+The contract for {g,u}id mount options and {g,u}id values in general set
+from userspace has always been that they are translated according to the
+caller's idmapping. In so far, fuse has been doing the correct thing.
+But since fuse is mountable in unprivileged contexts it is also
+necessary to verify that the resulting {k,g}uid is representable in the
+namespace of the superblock.
+
+Fixes: c30da2e981a7 ("fuse: convert to use the new mount API")
+Cc: stable@vger.kernel.org # 5.4+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Link: https://lore.kernel.org/r/8f07d45d-c806-484d-a2e3-7a2199df1cd2@redhat.com
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/inode.c | 24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -751,6 +751,8 @@ static int fuse_parse_param(struct fs_co
+ struct fs_parse_result result;
+ struct fuse_fs_context *ctx = fsc->fs_private;
+ int opt;
++ kuid_t kuid;
++ kgid_t kgid;
+
+ if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ /*
+@@ -795,16 +797,30 @@ static int fuse_parse_param(struct fs_co
+ break;
+
+ case OPT_USER_ID:
+- ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
+- if (!uid_valid(ctx->user_id))
++ kuid = make_kuid(fsc->user_ns, result.uint_32);
++ if (!uid_valid(kuid))
+ return invalfc(fsc, "Invalid user_id");
++ /*
++ * The requested uid must be representable in the
++ * filesystem's idmapping.
++ */
++ if (!kuid_has_mapping(fsc->user_ns, kuid))
++ return invalfc(fsc, "Invalid user_id");
++ ctx->user_id = kuid;
+ ctx->user_id_present = true;
+ break;
+
+ case OPT_GROUP_ID:
+- ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
+- if (!gid_valid(ctx->group_id))
++ kgid = make_kgid(fsc->user_ns, result.uint_32);;
++ if (!gid_valid(kgid))
++ return invalfc(fsc, "Invalid group_id");
++ /*
++ * The requested gid must be representable in the
++ * filesystem's idmapping.
++ */
++ if (!kgid_has_mapping(fsc->user_ns, kgid))
+ return invalfc(fsc, "Invalid group_id");
++ ctx->group_id = kgid;
+ ctx->group_id_present = true;
+ break;
+
--- /dev/null
+From 26a2ed107929a855155429b11e1293b83e6b2a8b Mon Sep 17 00:00:00 2001
+From: Chao Yu <chao@kernel.org>
+Date: Sun, 16 Jun 2024 09:38:41 +0800
+Subject: hfs: fix to initialize fields of hfs_inode_info after hfs_alloc_inode()
+
+From: Chao Yu <chao@kernel.org>
+
+commit 26a2ed107929a855155429b11e1293b83e6b2a8b upstream.
+
+Syzbot reports uninitialized value access issue as below:
+
+loop0: detected capacity change from 0 to 64
+=====================================================
+BUG: KMSAN: uninit-value in hfs_revalidate_dentry+0x307/0x3f0 fs/hfs/sysdep.c:30
+ hfs_revalidate_dentry+0x307/0x3f0 fs/hfs/sysdep.c:30
+ d_revalidate fs/namei.c:862 [inline]
+ lookup_fast+0x89e/0x8e0 fs/namei.c:1649
+ walk_component fs/namei.c:2001 [inline]
+ link_path_walk+0x817/0x1480 fs/namei.c:2332
+ path_lookupat+0xd9/0x6f0 fs/namei.c:2485
+ filename_lookup+0x22e/0x740 fs/namei.c:2515
+ user_path_at_empty+0x8b/0x390 fs/namei.c:2924
+ user_path_at include/linux/namei.h:57 [inline]
+ do_mount fs/namespace.c:3689 [inline]
+ __do_sys_mount fs/namespace.c:3898 [inline]
+ __se_sys_mount+0x66b/0x810 fs/namespace.c:3875
+ __x64_sys_mount+0xe4/0x140 fs/namespace.c:3875
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+
+BUG: KMSAN: uninit-value in hfs_ext_read_extent fs/hfs/extent.c:196 [inline]
+BUG: KMSAN: uninit-value in hfs_get_block+0x92d/0x1620 fs/hfs/extent.c:366
+ hfs_ext_read_extent fs/hfs/extent.c:196 [inline]
+ hfs_get_block+0x92d/0x1620 fs/hfs/extent.c:366
+ block_read_full_folio+0x4ff/0x11b0 fs/buffer.c:2271
+ hfs_read_folio+0x55/0x60 fs/hfs/inode.c:39
+ filemap_read_folio+0x148/0x4f0 mm/filemap.c:2426
+ do_read_cache_folio+0x7c8/0xd90 mm/filemap.c:3553
+ do_read_cache_page mm/filemap.c:3595 [inline]
+ read_cache_page+0xfb/0x2f0 mm/filemap.c:3604
+ read_mapping_page include/linux/pagemap.h:755 [inline]
+ hfs_btree_open+0x928/0x1ae0 fs/hfs/btree.c:78
+ hfs_mdb_get+0x260c/0x3000 fs/hfs/mdb.c:204
+ hfs_fill_super+0x1fb1/0x2790 fs/hfs/super.c:406
+ mount_bdev+0x628/0x920 fs/super.c:1359
+ hfs_mount+0xcd/0xe0 fs/hfs/super.c:456
+ legacy_get_tree+0x167/0x2e0 fs/fs_context.c:610
+ vfs_get_tree+0xdc/0x5d0 fs/super.c:1489
+ do_new_mount+0x7a9/0x16f0 fs/namespace.c:3145
+ path_mount+0xf98/0x26a0 fs/namespace.c:3475
+ do_mount fs/namespace.c:3488 [inline]
+ __do_sys_mount fs/namespace.c:3697 [inline]
+ __se_sys_mount+0x919/0x9e0 fs/namespace.c:3674
+ __ia32_sys_mount+0x15b/0x1b0 fs/namespace.c:3674
+ do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline]
+ __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178
+ do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203
+ do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246
+ entry_SYSENTER_compat_after_hwframe+0x70/0x82
+
+Uninit was created at:
+ __alloc_pages+0x9a6/0xe00 mm/page_alloc.c:4590
+ __alloc_pages_node include/linux/gfp.h:238 [inline]
+ alloc_pages_node include/linux/gfp.h:261 [inline]
+ alloc_slab_page mm/slub.c:2190 [inline]
+ allocate_slab mm/slub.c:2354 [inline]
+ new_slab+0x2d7/0x1400 mm/slub.c:2407
+ ___slab_alloc+0x16b5/0x3970 mm/slub.c:3540
+ __slab_alloc mm/slub.c:3625 [inline]
+ __slab_alloc_node mm/slub.c:3678 [inline]
+ slab_alloc_node mm/slub.c:3850 [inline]
+ kmem_cache_alloc_lru+0x64d/0xb30 mm/slub.c:3879
+ alloc_inode_sb include/linux/fs.h:3018 [inline]
+ hfs_alloc_inode+0x5a/0xc0 fs/hfs/super.c:165
+ alloc_inode+0x83/0x440 fs/inode.c:260
+ new_inode_pseudo fs/inode.c:1005 [inline]
+ new_inode+0x38/0x4f0 fs/inode.c:1031
+ hfs_new_inode+0x61/0x1010 fs/hfs/inode.c:186
+ hfs_mkdir+0x54/0x250 fs/hfs/dir.c:228
+ vfs_mkdir+0x49a/0x700 fs/namei.c:4126
+ do_mkdirat+0x529/0x810 fs/namei.c:4149
+ __do_sys_mkdirat fs/namei.c:4164 [inline]
+ __se_sys_mkdirat fs/namei.c:4162 [inline]
+ __x64_sys_mkdirat+0xc8/0x120 fs/namei.c:4162
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+
+It missed to initialize .tz_secondswest, .cached_start and .cached_blocks
+fields in struct hfs_inode_info after hfs_alloc_inode(), fix it.
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+3ae6be33a50b5aae4dab@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/linux-fsdevel/0000000000005ad04005ee48897f@google.com
+Signed-off-by: Chao Yu <chao@kernel.org>
+Link: https://lore.kernel.org/r/20240616013841.2217-1-chao@kernel.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hfs/inode.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/hfs/inode.c
++++ b/fs/hfs/inode.c
+@@ -204,6 +204,7 @@ struct inode *hfs_new_inode(struct inode
+ HFS_I(inode)->flags = 0;
+ HFS_I(inode)->rsrc_inode = NULL;
+ HFS_I(inode)->fs_blocks = 0;
++ HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60;
+ if (S_ISDIR(mode)) {
+ inode->i_size = 2;
+ HFS_SB(sb)->folder_count++;
+@@ -279,6 +280,8 @@ void hfs_inode_read_fork(struct inode *i
+ for (count = 0, i = 0; i < 3; i++)
+ count += be16_to_cpu(ext[i].count);
+ HFS_I(inode)->first_blocks = count;
++ HFS_I(inode)->cached_start = 0;
++ HFS_I(inode)->cached_blocks = 0;
+
+ inode->i_size = HFS_I(inode)->phys_size = log_size;
+ HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
--- /dev/null
+From 0453aad676ff99787124b9b3af4a5f59fbe808e2 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 10 Jul 2024 18:58:17 +0100
+Subject: io_uring/io-wq: limit retrying worker initialisation
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 0453aad676ff99787124b9b3af4a5f59fbe808e2 upstream.
+
+If io-wq worker creation fails, we retry it by queueing up a task_work.
+tasK_work is needed because it should be done from the user process
+context. The problem is that retries are not limited, and if queueing a
+task_work is the reason for the failure, we might get into an infinite
+loop.
+
+It doesn't seem to happen now but it would with the following patch
+executing task_work in the freezer's loop. For now, arbitrarily limit the
+number of attempts to create a worker.
+
+Cc: stable@vger.kernel.org
+Fixes: 3146cba99aa28 ("io-wq: make worker creation resilient against signals")
+Reported-by: Julian Orth <ju.orth@gmail.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/8280436925db88448c7c85c6656edee1a43029ea.1720634146.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io-wq.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/io_uring/io-wq.c
++++ b/io_uring/io-wq.c
+@@ -23,6 +23,7 @@
+ #include "io_uring.h"
+
+ #define WORKER_IDLE_TIMEOUT (5 * HZ)
++#define WORKER_INIT_LIMIT 3
+
+ enum {
+ IO_WORKER_F_UP = 0, /* up and active */
+@@ -59,6 +60,7 @@ struct io_worker {
+
+ unsigned long create_state;
+ struct callback_head create_work;
++ int init_retries;
+
+ union {
+ struct rcu_head rcu;
+@@ -746,7 +748,7 @@ static bool io_wq_work_match_all(struct
+ return true;
+ }
+
+-static inline bool io_should_retry_thread(long err)
++static inline bool io_should_retry_thread(struct io_worker *worker, long err)
+ {
+ /*
+ * Prevent perpetual task_work retry, if the task (or its group) is
+@@ -754,6 +756,8 @@ static inline bool io_should_retry_threa
+ */
+ if (fatal_signal_pending(current))
+ return false;
++ if (worker->init_retries++ >= WORKER_INIT_LIMIT)
++ return false;
+
+ switch (err) {
+ case -EAGAIN:
+@@ -780,7 +784,7 @@ static void create_worker_cont(struct ca
+ io_init_new_worker(wq, worker, tsk);
+ io_worker_release(worker);
+ return;
+- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
++ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
+
+ atomic_dec(&acct->nr_running);
+@@ -847,7 +851,7 @@ fail:
+ tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
+ if (!IS_ERR(tsk)) {
+ io_init_new_worker(wq, worker, tsk);
+- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
++ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
+ kfree(worker);
+ goto fail;
+ } else {
--- /dev/null
+From 6807352353561187a718e87204458999dbcbba1b Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Wed, 10 Jul 2024 10:14:27 +0200
+Subject: ipv4: fix source address selection with route leak
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit 6807352353561187a718e87204458999dbcbba1b upstream.
+
+By default, an address assigned to the output interface is selected when
+the source address is not specified. This is problematic when a route,
+configured in a vrf, uses an interface from another vrf (aka route leak).
+The original vrf does not own the selected source address.
+
+Let's add a check against the output interface and call the appropriate
+function to select the source address.
+
+CC: stable@vger.kernel.org
+Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF")
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -2270,6 +2270,15 @@ void fib_select_path(struct net *net, st
+ fib_select_default(fl4, res);
+
+ check_saddr:
+- if (!fl4->saddr)
+- fl4->saddr = fib_result_prefsrc(net, res);
++ if (!fl4->saddr) {
++ struct net_device *l3mdev;
++
++ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
++
++ if (!l3mdev ||
++ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
++ fl4->saddr = fib_result_prefsrc(net, res);
++ else
++ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
++ }
+ }
--- /dev/null
+From abb9a68d2c64dd9b128ae1f2e635e4d805e7ce64 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Wed, 10 Jul 2024 10:14:29 +0200
+Subject: ipv6: take care of scope when choosing the src addr
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit abb9a68d2c64dd9b128ae1f2e635e4d805e7ce64 upstream.
+
+When the source address is selected, the scope must be checked. For
+example, if a loopback address is assigned to the vrf device, it must not
+be chosen for packets sent outside.
+
+CC: stable@vger.kernel.org
+Fixes: afbac6010aec ("net: ipv6: Address selection needs to consider L3 domains")
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20240710081521.3809742-4-nicolas.dichtel@6wind.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -1839,7 +1839,8 @@ int ipv6_dev_get_saddr(struct net *net,
+ master, &dst,
+ scores, hiscore_idx);
+
+- if (scores[hiscore_idx].ifa)
++ if (scores[hiscore_idx].ifa &&
++ scores[hiscore_idx].scopedist >= 0)
+ goto out;
+ }
+
--- /dev/null
+From 943ad0b62e3c21f324c4884caa6cb4a871bca05c Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 10 Jul 2024 18:58:18 +0100
+Subject: kernel: rerun task_work while freezing in get_signal()
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 943ad0b62e3c21f324c4884caa6cb4a871bca05c upstream.
+
+io_uring can asynchronously add a task_work while the task is getting
+freezed. TIF_NOTIFY_SIGNAL will prevent the task from sleeping in
+do_freezer_trap(), and since the get_signal()'s relock loop doesn't
+retry task_work, the task will spin there not being able to sleep
+until the freezing is cancelled / the task is killed / etc.
+
+Run task_works in the freezer path. Keep the patch small and simple
+so it can be easily back ported, but we might need to do some cleaning
+after and look if there are other places with similar problems.
+
+Cc: stable@vger.kernel.org
+Link: https://github.com/systemd/systemd/issues/33626
+Fixes: 12db8b690010c ("entry: Add support for TIF_NOTIFY_SIGNAL")
+Reported-by: Julian Orth <ju.orth@gmail.com>
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/89ed3a52933370deaaf61a0a620a6ac91f1e754d.1720634146.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/signal.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -2587,6 +2587,14 @@ static void do_freezer_trap(void)
+ spin_unlock_irq(¤t->sighand->siglock);
+ cgroup_enter_frozen();
+ schedule();
++
++ /*
++ * We could've been woken by task_work, run it to clear
++ * TIF_NOTIFY_SIGNAL. The caller will retry if necessary.
++ */
++ clear_notify_signal();
++ if (unlikely(task_work_pending(current)))
++ task_work_run();
+ }
+
+ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
--- /dev/null
+From a0157b5aa34eb43ec4c5510f9c260bbb03be937e Mon Sep 17 00:00:00 2001
+From: Dikshita Agarwal <quic_dikshita@quicinc.com>
+Date: Thu, 9 May 2024 10:44:29 +0530
+Subject: media: venus: fix use after free in vdec_close
+
+From: Dikshita Agarwal <quic_dikshita@quicinc.com>
+
+commit a0157b5aa34eb43ec4c5510f9c260bbb03be937e upstream.
+
+There appears to be a possible use after free with vdec_close().
+The firmware will add buffer release work to the work queue through
+HFI callbacks as a normal part of decoding. Randomly closing the
+decoder device from userspace during normal decoding can incur
+a read after free for inst.
+
+Fix it by cancelling the work in vdec_close.
+
+Cc: stable@vger.kernel.org
+Fixes: af2c3834c8ca ("[media] media: venus: adding core part and helper functions")
+Signed-off-by: Dikshita Agarwal <quic_dikshita@quicinc.com>
+Acked-by: Vikash Garodia <quic_vgarodia@quicinc.com>
+Signed-off-by: Stanimir Varbanov <stanimir.k.varbanov@gmail.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/media/platform/qcom/venus/vdec.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/media/platform/qcom/venus/vdec.c
++++ b/drivers/media/platform/qcom/venus/vdec.c
+@@ -1747,6 +1747,7 @@ static int vdec_close(struct file *file)
+
+ vdec_pm_get(inst);
+
++ cancel_work_sync(&inst->delayed_process_work);
+ v4l2_m2m_ctx_release(inst->m2m_ctx);
+ v4l2_m2m_release(inst->m2m_dev);
+ vdec_ctrl_deinit(inst);
--- /dev/null
+From 97d9fba9a812cada5484667a46e14a4c976ca330 Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Fri, 12 Jul 2024 07:34:15 -0700
+Subject: net: netconsole: Disable target before netpoll cleanup
+
+From: Breno Leitao <leitao@debian.org>
+
+commit 97d9fba9a812cada5484667a46e14a4c976ca330 upstream.
+
+Currently, netconsole cleans up the netpoll structure before disabling
+the target. This approach can lead to race conditions, as message
+senders (write_ext_msg() and write_msg()) check if the target is
+enabled before using netpoll. The sender can validate that the target is
+enabled, but, the netpoll might be de-allocated already, causing
+undesired behaviours.
+
+This patch reverses the order of operations:
+1. Disable the target
+2. Clean up the netpoll structure
+
+This change eliminates the potential race condition, ensuring that
+no messages are sent through a partially cleaned-up netpoll structure.
+
+Fixes: 2382b15bcc39 ("netconsole: take care of NETDEV_UNREGISTER event")
+Cc: stable@vger.kernel.org
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20240712143415.1141039-1-leitao@debian.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/netconsole.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/netconsole.c
++++ b/drivers/net/netconsole.c
+@@ -770,6 +770,7 @@ restart:
+ /* rtnl_lock already held
+ * we might sleep in __netpoll_cleanup()
+ */
++ nt->enabled = false;
+ spin_unlock_irqrestore(&target_list_lock, flags);
+
+ __netpoll_cleanup(&nt->np);
+@@ -777,7 +778,6 @@ restart:
+ spin_lock_irqsave(&target_list_lock, flags);
+ netdev_put(nt->np.dev, &nt->np.dev_tracker);
+ nt->np.dev = NULL;
+- nt->enabled = false;
+ stopped = true;
+ netconsole_target_put(nt);
+ goto restart;
--- /dev/null
+From abc02e5602f7bf9bbae1e8999570a2ad5114578c Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Tue, 11 Jun 2024 15:36:46 -0400
+Subject: NFSD: Support write delegations in LAYOUTGET
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit abc02e5602f7bf9bbae1e8999570a2ad5114578c upstream.
+
+I noticed LAYOUTGET(LAYOUTIOMODE4_RW) returning NFS4ERR_ACCESS
+unexpectedly. The NFS client had created a file with mode 0444, and
+the server had returned a write delegation on the OPEN(CREATE). The
+client was requesting a RW layout using the write delegation stateid
+so that it could flush file modifications.
+
+Creating a read-only file does not seem to be problematic for
+NFSv4.1 without pNFS, so I began looking at NFSD's implementation of
+LAYOUTGET.
+
+The failure was because fh_verify() was doing a permission check as
+part of verifying the FH presented during the LAYOUTGET. It uses the
+loga_iomode value to specify the @accmode argument to fh_verify().
+fh_verify(MAY_WRITE) on a file whose mode is 0444 fails with -EACCES.
+
+To permit LAYOUT* operations in this case, add OWNER_OVERRIDE when
+checking the access permission of the incoming file handle for
+LAYOUTGET and LAYOUTCOMMIT.
+
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: stable@vger.kernel.org # v6.6+
+Message-Id: 4E9C0D74-A06D-4DC3-A48A-73034DC40395@oracle.com
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4proc.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2218,7 +2218,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
+ const struct nfsd4_layout_ops *ops;
+ struct nfs4_layout_stateid *ls;
+ __be32 nfserr;
+- int accmode = NFSD_MAY_READ_IF_EXEC;
++ int accmode = NFSD_MAY_READ_IF_EXEC | NFSD_MAY_OWNER_OVERRIDE;
+
+ switch (lgp->lg_seg.iomode) {
+ case IOMODE_READ:
+@@ -2308,7 +2308,8 @@ nfsd4_layoutcommit(struct svc_rqst *rqst
+ struct nfs4_layout_stateid *ls;
+ __be32 nfserr;
+
+- nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
++ nfserr = fh_verify(rqstp, current_fh, 0,
++ NFSD_MAY_WRITE | NFSD_MAY_OWNER_OVERRIDE);
+ if (nfserr)
+ goto out;
+
--- /dev/null
+From d329605287020c3d1c3b0dadc63d8208e7251382 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 25 Jun 2024 15:29:58 -1000
+Subject: sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks
+
+From: Tejun Heo <tj@kernel.org>
+
+commit d329605287020c3d1c3b0dadc63d8208e7251382 upstream.
+
+When a task's weight is being changed, set_load_weight() is called with
+@update_load set. As weight changes aren't trivial for the fair class,
+set_load_weight() calls fair.c::reweight_task() for fair class tasks.
+
+However, set_load_weight() first tests task_has_idle_policy() on entry and
+skips calling reweight_task() for SCHED_IDLE tasks. This is buggy as
+SCHED_IDLE tasks are just fair tasks with a very low weight and they would
+incorrectly skip load, vlag and position updates.
+
+Fix it by updating reweight_task() to take struct load_weight as idle weight
+can't be expressed with prio and making set_load_weight() call
+reweight_task() for SCHED_IDLE tasks too when @update_load is set.
+
+Fixes: 9059393e4ec1 ("sched/fair: Use reweight_entity() for set_user_nice()")
+Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org # v4.15+
+Link: http://lkml.kernel.org/r/20240624102331.GI31592@noisy.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/core.c | 23 ++++++++++-------------
+ kernel/sched/fair.c | 7 +++----
+ kernel/sched/sched.h | 2 +-
+ 3 files changed, 14 insertions(+), 18 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1304,27 +1304,24 @@ int tg_nop(struct task_group *tg, void *
+ static void set_load_weight(struct task_struct *p, bool update_load)
+ {
+ int prio = p->static_prio - MAX_RT_PRIO;
+- struct load_weight *load = &p->se.load;
++ struct load_weight lw;
+
+- /*
+- * SCHED_IDLE tasks get minimal weight:
+- */
+ if (task_has_idle_policy(p)) {
+- load->weight = scale_load(WEIGHT_IDLEPRIO);
+- load->inv_weight = WMULT_IDLEPRIO;
+- return;
++ lw.weight = scale_load(WEIGHT_IDLEPRIO);
++ lw.inv_weight = WMULT_IDLEPRIO;
++ } else {
++ lw.weight = scale_load(sched_prio_to_weight[prio]);
++ lw.inv_weight = sched_prio_to_wmult[prio];
+ }
+
+ /*
+ * SCHED_OTHER tasks have to update their load when changing their
+ * weight
+ */
+- if (update_load && p->sched_class == &fair_sched_class) {
+- reweight_task(p, prio);
+- } else {
+- load->weight = scale_load(sched_prio_to_weight[prio]);
+- load->inv_weight = sched_prio_to_wmult[prio];
+- }
++ if (update_load && p->sched_class == &fair_sched_class)
++ reweight_task(p, &lw);
++ else
++ p->se.load = lw;
+ }
+
+ #ifdef CONFIG_UCLAMP_TASK
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3791,15 +3791,14 @@ static void reweight_entity(struct cfs_r
+ }
+ }
+
+-void reweight_task(struct task_struct *p, int prio)
++void reweight_task(struct task_struct *p, const struct load_weight *lw)
+ {
+ struct sched_entity *se = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ struct load_weight *load = &se->load;
+- unsigned long weight = scale_load(sched_prio_to_weight[prio]);
+
+- reweight_entity(cfs_rq, se, weight);
+- load->inv_weight = sched_prio_to_wmult[prio];
++ reweight_entity(cfs_rq, se, lw->weight);
++ load->inv_weight = lw->inv_weight;
+ }
+
+ static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2435,7 +2435,7 @@ extern void init_sched_dl_class(void);
+ extern void init_sched_rt_class(void);
+ extern void init_sched_fair_class(void);
+
+-extern void reweight_task(struct task_struct *p, int prio);
++extern void reweight_task(struct task_struct *p, const struct load_weight *lw);
+
+ extern void resched_curr(struct rq *rq);
+ extern void resched_cpu(int cpu);
mm-mglru-fix-overshooting-shrinker-memory.patch
x86-efistub-avoid-returning-efi_success-on-error.patch
x86-efistub-revert-to-heap-allocated-boot_params-for-pe-entrypoint.patch
+exfat-fix-potential-deadlock-on-__exfat_get_dentry_set.patch
+dt-bindings-thermal-correct-thermal-zone-node-name-limit.patch
+tick-broadcast-make-takeover-of-broadcast-hrtimer-reliable.patch
+net-netconsole-disable-target-before-netpoll-cleanup.patch
+af_packet-handle-outgoing-vlan-packets-without-hardware-offloading.patch
+btrfs-fix-extent-map-use-after-free-when-adding-pages-to-compressed-bio.patch
+kernel-rerun-task_work-while-freezing-in-get_signal.patch
+ipv4-fix-source-address-selection-with-route-leak.patch
+ipv6-take-care-of-scope-when-choosing-the-src-addr.patch
+nfsd-support-write-delegations-in-layoutget.patch
+sched-fair-set_load_weight-must-also-call-reweight_task-for-sched_idle-tasks.patch
+fuse-verify-g-u-id-mount-options-correctly.patch
+ata-libata-scsi-fix-offsets-for-the-fixed-format-sense-data.patch
+char-tpm-fix-possible-memory-leak-in-tpm_bios_measurements_open.patch
+media-venus-fix-use-after-free-in-vdec_close.patch
+ata-libata-scsi-do-not-overwrite-valid-sense-data-when-ck_cond-1.patch
+ata-libata-scsi-honor-the-d_sense-bit-for-ck_cond-1-and-no-error.patch
+hfs-fix-to-initialize-fields-of-hfs_inode_info-after-hfs_alloc_inode.patch
+ext2-verify-bitmap-and-itable-block-numbers-before-using-them.patch
+io_uring-io-wq-limit-retrying-worker-initialisation.patch
+drm-gma500-fix-null-pointer-dereference-in-cdv_intel_lvds_get_modes.patch
+drm-gma500-fix-null-pointer-dereference-in-psb_intel_lvds_get_modes.patch
--- /dev/null
+From f7d43dd206e7e18c182f200e67a8db8c209907fa Mon Sep 17 00:00:00 2001
+From: Yu Liao <liaoyu15@huawei.com>
+Date: Thu, 11 Jul 2024 20:48:43 +0800
+Subject: tick/broadcast: Make takeover of broadcast hrtimer reliable
+
+From: Yu Liao <liaoyu15@huawei.com>
+
+commit f7d43dd206e7e18c182f200e67a8db8c209907fa upstream.
+
+Running the LTP hotplug stress test on a aarch64 machine results in
+rcu_sched stall warnings when the broadcast hrtimer was owned by the
+un-plugged CPU. The issue is the following:
+
+CPU1 (owns the broadcast hrtimer) CPU2
+
+ tick_broadcast_enter()
+ // shutdown local timer device
+ broadcast_shutdown_local()
+ ...
+ tick_broadcast_exit()
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT)
+ // timer device is not programmed
+ cpumask_set_cpu(cpu, tick_broadcast_force_mask)
+
+ initiates offlining of CPU1
+take_cpu_down()
+/*
+ * CPU1 shuts down and does not
+ * send broadcast IPI anymore
+ */
+ takedown_cpu()
+ hotplug_cpu__broadcast_tick_pull()
+ // move broadcast hrtimer to this CPU
+ clockevents_program_event()
+ bc_set_next()
+ hrtimer_start()
+ /*
+ * timer device is not programmed
+ * because only the first expiring
+ * timer will trigger clockevent
+ * device reprogramming
+ */
+
+What happens is that CPU2 exits broadcast mode with force bit set, then the
+local timer device is not reprogrammed and CPU2 expects to receive the
+expired event by the broadcast IPI. But this does not happen because CPU1
+is offlined by CPU2. CPU switches the clockevent device to ONESHOT state,
+but does not reprogram the device.
+
+The subsequent reprogramming of the hrtimer broadcast device does not
+program the clockevent device of CPU2 either because the pending expiry
+time is already in the past and the CPU expects the event to be delivered.
+As a consequence all CPUs which wait for a broadcast event to be delivered
+are stuck forever.
+
+Fix this issue by reprogramming the local timer device if the broadcast
+force bit of the CPU is set so that the broadcast hrtimer is delivered.
+
+[ tglx: Massage comment and change log. Add Fixes tag ]
+
+Fixes: 989dcb645ca7 ("tick: Handle broadcast wakeup of multiple cpus")
+Signed-off-by: Yu Liao <liaoyu15@huawei.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240711124843.64167-1-liaoyu15@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/tick-broadcast.c | 23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/kernel/time/tick-broadcast.c
++++ b/kernel/time/tick-broadcast.c
+@@ -1141,6 +1141,7 @@ void tick_broadcast_switch_to_oneshot(vo
+ #ifdef CONFIG_HOTPLUG_CPU
+ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
+ {
++ struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+ struct clock_event_device *bc;
+ unsigned long flags;
+
+@@ -1148,6 +1149,28 @@ void hotplug_cpu__broadcast_tick_pull(in
+ bc = tick_broadcast_device.evtdev;
+
+ if (bc && broadcast_needs_cpu(bc, deadcpu)) {
++ /*
++ * If the broadcast force bit of the current CPU is set,
++ * then the current CPU has not yet reprogrammed the local
++ * timer device to avoid a ping-pong race. See
++ * ___tick_broadcast_oneshot_control().
++ *
++ * If the broadcast device is hrtimer based then
++ * programming the broadcast event below does not have any
++ * effect because the local clockevent device is not
++ * running and not programmed because the broadcast event
++ * is not earlier than the pending event of the local clock
++ * event device. As a consequence all CPUs waiting for a
++ * broadcast event are stuck forever.
++ *
++ * Detect this condition and reprogram the cpu local timer
++ * device to avoid the starvation.
++ */
++ if (tick_check_broadcast_expired()) {
++ cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask);
++ tick_program_event(td->evtdev->next_event, 1);
++ }
++
+ /* This moves the broadcast assignment to this CPU: */
+ clockevents_program_event(bc, bc->next_event, 1);
+ }