From: Sasha Levin Date: Sun, 25 Aug 2024 11:50:50 +0000 (-0400) Subject: Fixes for 6.10 X-Git-Tag: v6.1.107~73 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a8688ed44acfbff0e30280db660144569d9afe1e;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.10 Signed-off-by: Sasha Levin --- diff --git a/queue-6.10/bluetooth-hci-invert-le-state-quirk-to-be-opt-out-ra.patch b/queue-6.10/bluetooth-hci-invert-le-state-quirk-to-be-opt-out-ra.patch new file mode 100644 index 00000000000..8e5c257a8a9 --- /dev/null +++ b/queue-6.10/bluetooth-hci-invert-le-state-quirk-to-be-opt-out-ra.patch @@ -0,0 +1,216 @@ +From c281a47bd697868a364cb1578ee4b0502d8ab299 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Aug 2024 10:43:48 -0400 +Subject: Bluetooth: HCI: Invert LE State quirk to be opt-out rather then + opt-in + +From: Luiz Augusto von Dentz + +[ Upstream commit aae6b81260fd9a7224f7eb4fc440d625852245bb ] + +This inverts the LE State quirk so by default we assume the controllers +would report valid states rather than invalid which is how quirks +normally behave, also this would result in HCI command failing it the LE +States are really broken thus exposing the controllers that are really +broken in this respect. + +Link: https://github.com/bluez/bluez/issues/584 +Fixes: 220915857e29 ("Bluetooth: Adding driver and quirk defs for multi-role LE") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/btintel.c | 10 ---------- + drivers/bluetooth/btintel_pcie.c | 3 --- + drivers/bluetooth/btmtksdio.c | 3 --- + drivers/bluetooth/btrtl.c | 1 - + drivers/bluetooth/btusb.c | 4 ++-- + drivers/bluetooth/hci_qca.c | 4 ++-- + drivers/bluetooth/hci_vhci.c | 2 -- + include/net/bluetooth/hci.h | 17 ++++++++++------- + include/net/bluetooth/hci_core.h | 2 +- + net/bluetooth/hci_event.c | 2 +- + 10 files changed, 16 insertions(+), 32 deletions(-) + +diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c +index 93900c37349c1..c084dc88d3d91 100644 +--- a/drivers/bluetooth/btintel.c ++++ b/drivers/bluetooth/btintel.c +@@ -2876,9 +2876,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) + INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, + &hdev->quirks); +- if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) +- set_bit(HCI_QUIRK_VALID_LE_STATES, +- &hdev->quirks); + + err = btintel_legacy_rom_setup(hdev, &ver); + break; +@@ -2887,7 +2884,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) + case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* CcP */ +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + fallthrough; + case 0x0c: /* WsP */ + /* Apply the device specific HCI quirks +@@ -2979,9 +2975,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) + /* These variants don't seem to support LE Coded PHY */ + set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + +- /* Set Valid LE States quirk */ +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); +- + /* Setup MSFT Extension support */ + btintel_set_msft_opcode(hdev, ver.hw_variant); + +@@ -3003,9 +2996,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) + */ + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + +- /* Apply LE States quirk from solar onwards */ +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); +- + /* Setup MSFT Extension support */ + btintel_set_msft_opcode(hdev, + INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); +diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c +index b8120b98a2395..1fd3b7073ab90 100644 +--- a/drivers/bluetooth/btintel_pcie.c ++++ b/drivers/bluetooth/btintel_pcie.c +@@ -1182,9 +1182,6 @@ static int btintel_pcie_setup(struct hci_dev *hdev) + */ + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + +- /* Apply LE States quirk from solar onwards */ +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); +- + /* Setup MSFT Extension support */ + btintel_set_msft_opcode(hdev, + INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); +diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c +index 8ded9ef8089a2..bc4700ed3b782 100644 +--- a/drivers/bluetooth/btmtksdio.c ++++ b/drivers/bluetooth/btmtksdio.c +@@ -1144,9 +1144,6 @@ static int btmtksdio_setup(struct hci_dev *hdev) + } + } + +- /* Valid LE States quirk for MediaTek 7921 */ +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); +- + break; + case 0x7663: + case 0x7668: +diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c +index 4f1e37b4f7802..bfcb41a57655f 100644 +--- a/drivers/bluetooth/btrtl.c ++++ b/drivers/bluetooth/btrtl.c +@@ -1287,7 +1287,6 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) + case CHIP_ID_8852C: + case CHIP_ID_8851B: + case CHIP_ID_8852BT: +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + + /* RTL8852C needs to transmit mSBC data continuously without +diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c +index 789c492df6fa2..0927f51867c26 100644 +--- a/drivers/bluetooth/btusb.c ++++ b/drivers/bluetooth/btusb.c +@@ -4545,8 +4545,8 @@ static int btusb_probe(struct usb_interface *intf, + if (id->driver_info & BTUSB_WIDEBAND_SPEECH) + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + +- if (id->driver_info & BTUSB_VALID_LE_STATES) +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); ++ if (!(id->driver_info & BTUSB_VALID_LE_STATES)) ++ set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); + + if (id->driver_info & BTUSB_DIGIANSWER) { + data->cmdreq_type = USB_TYPE_VENDOR; +diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c +index 9a0bc86f9aace..34c36f0f781ea 100644 +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -2408,8 +2408,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, + &hdev->quirks); + +- if (data->capabilities & QCA_CAP_VALID_LE_STATES) +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); ++ if (!(data->capabilities & QCA_CAP_VALID_LE_STATES)) ++ set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); + } + + return 0; +diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c +index 28750a40f0ed5..b652d68f0ee14 100644 +--- a/drivers/bluetooth/hci_vhci.c ++++ b/drivers/bluetooth/hci_vhci.c +@@ -425,8 +425,6 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) + if (opcode & 0x80) + set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + +- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); +- + if (hci_register_dev(hdev) < 0) { + BT_ERR("Can't register HCI device"); + hci_free_dev(hdev); +diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h +index e372a88e8c3f6..d1d073089f384 100644 +--- a/include/net/bluetooth/hci.h ++++ b/include/net/bluetooth/hci.h +@@ -206,14 +206,17 @@ enum { + */ + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, + +- /* When this quirk is set, the controller has validated that +- * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are +- * valid. This mechanism is necessary as many controllers have +- * been seen has having trouble initiating a connectable +- * advertisement despite the state combination being reported as +- * supported. ++ /* When this quirk is set, the LE states reported through the ++ * HCI_LE_READ_SUPPORTED_STATES are invalid/broken. ++ * ++ * This mechanism is necessary as many controllers have been seen has ++ * having trouble initiating a connectable advertisement despite the ++ * state combination being reported as supported. ++ * ++ * This quirk can be set before hci_register_dev is called or ++ * during the hdev->setup vendor callback. + */ +- HCI_QUIRK_VALID_LE_STATES, ++ HCI_QUIRK_BROKEN_LE_STATES, + + /* When this quirk is set, then erroneous data reporting + * is ignored. This is mainly due to the fact that the HCI +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index b15f51ae3bfd9..c97ff64c9189f 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -826,7 +826,7 @@ extern struct mutex hci_cb_list_lock; + } while (0) + + #define hci_dev_le_state_simultaneous(hdev) \ +- (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ ++ (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ + (hdev->le_states[4] & 0x08) && /* Central */ \ + (hdev->le_states[4] & 0x40) && /* Peripheral */ \ + (hdev->le_states[3] & 0x10)) /* Simultaneous */ +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index a78f6d706cd43..59d9086db75fe 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -5921,7 +5921,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, + * while we have an existing one in peripheral role. + */ + if (hdev->conn_hash.le_num_peripheral > 0 && +- (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) || ++ (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) || + !(hdev->le_states[3] & 0x10))) + return NULL; + +-- +2.43.0 + diff --git a/queue-6.10/bluetooth-hci_core-fix-le-quote-calculation.patch b/queue-6.10/bluetooth-hci_core-fix-le-quote-calculation.patch new file mode 100644 index 00000000000..7935556d912 --- /dev/null +++ b/queue-6.10/bluetooth-hci_core-fix-le-quote-calculation.patch @@ -0,0 +1,76 @@ +From 9f527be1d77a2aa27d37e75f8a45d7c3d2768010 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Aug 2024 11:22:08 -0400 +Subject: Bluetooth: hci_core: Fix LE quote calculation + +From: Luiz Augusto von Dentz + +[ Upstream commit 932021a11805b9da4bd6abf66fe233cccd59fe0e ] + +Function hci_sched_le needs to update the respective counter variable +inplace other the likes of hci_quote_sent would attempt to use the +possible outdated value of conn->{le_cnt,acl_cnt}. + +Link: https://github.com/bluez/bluez/issues/915 +Fixes: 73d80deb7bdf ("Bluetooth: prioritizing data over HCI") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_core.c | 19 +++++++------------ + 1 file changed, 7 insertions(+), 12 deletions(-) + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 6ecb110bf46bc..b488d0742c966 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -3674,19 +3674,19 @@ static void hci_sched_le(struct hci_dev *hdev) + { + struct hci_chan *chan; + struct sk_buff *skb; +- int quote, cnt, tmp; ++ int quote, *cnt, tmp; + + BT_DBG("%s", hdev->name); + + if (!hci_conn_num(hdev, LE_LINK)) + return; + +- cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; ++ cnt = hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; + +- __check_timeout(hdev, cnt, LE_LINK); ++ __check_timeout(hdev, *cnt, LE_LINK); + +- tmp = cnt; +- while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { ++ tmp = *cnt; ++ while (*cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { + u32 priority = (skb_peek(&chan->data_q))->priority; + while (quote-- && (skb = skb_peek(&chan->data_q))) { + BT_DBG("chan %p skb %p len %d priority %u", chan, skb, +@@ -3701,7 +3701,7 @@ static void hci_sched_le(struct hci_dev *hdev) + hci_send_frame(hdev, skb); + hdev->le_last_tx = jiffies; + +- cnt--; ++ (*cnt)--; + chan->sent++; + chan->conn->sent++; + +@@ -3711,12 +3711,7 @@ static void hci_sched_le(struct hci_dev *hdev) + } + } + +- if (hdev->le_pkts) +- hdev->le_cnt = cnt; +- else +- hdev->acl_cnt = cnt; +- +- if (cnt != tmp) ++ if (*cnt != tmp) + hci_prio_recalculate(hdev, LE_LINK); + } + +-- +2.43.0 + diff --git a/queue-6.10/bluetooth-smp-fix-assumption-of-central-always-being.patch b/queue-6.10/bluetooth-smp-fix-assumption-of-central-always-being.patch new file mode 100644 index 00000000000..1046d5b380c --- /dev/null +++ b/queue-6.10/bluetooth-smp-fix-assumption-of-central-always-being.patch @@ -0,0 +1,447 @@ +From 1bdbb0effbb15fc185d424e70f8fcaf2d861c64a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Aug 2023 15:08:06 -0700 +Subject: Bluetooth: SMP: Fix assumption of Central always being Initiator + +From: Luiz Augusto von Dentz + +[ Upstream commit 28cd47f75185c4818b0fb1b46f2f02faaba96376 ] + +SMP initiator role shall be considered the one that initiates the +pairing procedure with SMP_CMD_PAIRING_REQ: + +BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part H +page 1557: + +Figure 2.1: LE pairing phases + +Note that by sending SMP_CMD_SECURITY_REQ it doesn't change the role to +be Initiator. + +Link: https://github.com/bluez/bluez/issues/567 +Fixes: b28b4943660f ("Bluetooth: Add strict checks for allowed SMP PDUs") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/smp.c | 144 ++++++++++++++++++++++---------------------- + 1 file changed, 72 insertions(+), 72 deletions(-) + +diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c +index 1e7ea3a4b7ef3..4f9fdf400584e 100644 +--- a/net/bluetooth/smp.c ++++ b/net/bluetooth/smp.c +@@ -914,7 +914,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, + * Confirms and the responder Enters the passkey. + */ + if (smp->method == OVERLAP) { +- if (hcon->role == HCI_ROLE_MASTER) ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + smp->method = CFM_PASSKEY; + else + smp->method = REQ_PASSKEY; +@@ -964,7 +964,7 @@ static u8 smp_confirm(struct smp_chan *smp) + + smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); + +- if (conn->hcon->out) ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); + else + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); +@@ -980,7 +980,8 @@ static u8 smp_random(struct smp_chan *smp) + int ret; + + bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn, +- conn->hcon->out ? "initiator" : "responder"); ++ test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : ++ "responder"); + + ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp, + hcon->init_addr_type, &hcon->init_addr, +@@ -994,7 +995,7 @@ static u8 smp_random(struct smp_chan *smp) + return SMP_CONFIRM_FAILED; + } + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + u8 stk[16]; + __le64 rand = 0; + __le16 ediv = 0; +@@ -1256,14 +1257,15 @@ static void smp_distribute_keys(struct smp_chan *smp) + rsp = (void *) &smp->prsp[1]; + + /* The responder sends its keys first */ +- if (hcon->out && (smp->remote_key_dist & KEY_DIST_MASK)) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags) && ++ (smp->remote_key_dist & KEY_DIST_MASK)) { + smp_allow_key_dist(smp); + return; + } + + req = (void *) &smp->preq[1]; + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + keydist = &rsp->init_key_dist; + *keydist &= req->init_key_dist; + } else { +@@ -1432,7 +1434,7 @@ static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) + struct hci_conn *hcon = smp->conn->hcon; + u8 *na, *nb, a[7], b[7]; + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + na = smp->prnd; + nb = smp->rrnd; + } else { +@@ -1460,7 +1462,7 @@ static void sc_dhkey_check(struct smp_chan *smp) + a[6] = hcon->init_addr_type; + b[6] = hcon->resp_addr_type; + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + local_addr = a; + remote_addr = b; + memcpy(io_cap, &smp->preq[1], 3); +@@ -1539,7 +1541,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) + /* The round is only complete when the initiator + * receives pairing random. + */ +- if (!hcon->out) { ++ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, + sizeof(smp->prnd), smp->prnd); + if (smp->passkey_round == 20) +@@ -1567,7 +1569,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) + + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, + sizeof(smp->prnd), smp->prnd); + return 0; +@@ -1578,7 +1580,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) + case SMP_CMD_PUBLIC_KEY: + default: + /* Initiating device starts the round */ +- if (!hcon->out) ++ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + return 0; + + bt_dev_dbg(hdev, "Starting passkey round %u", +@@ -1623,7 +1625,7 @@ static int sc_user_reply(struct smp_chan *smp, u16 mgmt_op, __le32 passkey) + } + + /* Initiator sends DHKey check first */ +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + sc_dhkey_check(smp); + SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); + } else if (test_and_clear_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags)) { +@@ -1746,7 +1748,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) + struct smp_cmd_pairing rsp, *req = (void *) skb->data; + struct l2cap_chan *chan = conn->smp; + struct hci_dev *hdev = conn->hcon->hdev; +- struct smp_chan *smp; ++ struct smp_chan *smp = chan->data; + u8 key_size, auth, sec_level; + int ret; + +@@ -1755,16 +1757,14 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) + if (skb->len < sizeof(*req)) + return SMP_INVALID_PARAMS; + +- if (conn->hcon->role != HCI_ROLE_SLAVE) ++ if (smp && test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + return SMP_CMD_NOTSUPP; + +- if (!chan->data) ++ if (!smp) { + smp = smp_chan_create(conn); +- else +- smp = chan->data; +- +- if (!smp) +- return SMP_UNSPECIFIED; ++ if (!smp) ++ return SMP_UNSPECIFIED; ++ } + + /* We didn't start the pairing, so match remote */ + auth = req->auth_req & AUTH_REQ_MASK(hdev); +@@ -1946,7 +1946,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) + if (skb->len < sizeof(*rsp)) + return SMP_INVALID_PARAMS; + +- if (conn->hcon->role != HCI_ROLE_MASTER) ++ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + return SMP_CMD_NOTSUPP; + + skb_pull(skb, sizeof(*rsp)); +@@ -2041,7 +2041,7 @@ static u8 sc_check_confirm(struct smp_chan *smp) + if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) + return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); + +- if (conn->hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), + smp->prnd); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); +@@ -2063,7 +2063,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp) + u8 auth; + + /* The issue is only observed when we're in responder role */ +- if (hcon->out) ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + return SMP_UNSPECIFIED; + + if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { +@@ -2099,7 +2099,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) + struct hci_dev *hdev = hcon->hdev; + + bt_dev_dbg(hdev, "conn %p %s", conn, +- hcon->out ? "initiator" : "responder"); ++ test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : ++ "responder"); + + if (skb->len < sizeof(smp->pcnf)) + return SMP_INVALID_PARAMS; +@@ -2121,7 +2122,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) + return ret; + } + +- if (conn->hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), + smp->prnd); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); +@@ -2156,7 +2157,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) + if (!test_bit(SMP_FLAG_SC, &smp->flags)) + return smp_random(smp); + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + pkax = smp->local_pk; + pkbx = smp->remote_pk; + na = smp->prnd; +@@ -2169,7 +2170,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) + } + + if (smp->method == REQ_OOB) { +- if (!hcon->out) ++ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, + sizeof(smp->prnd), smp->prnd); + SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); +@@ -2180,7 +2181,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) + if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) + return sc_passkey_round(smp, SMP_CMD_PAIRING_RANDOM); + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + u8 cfm[16]; + + err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->local_pk, +@@ -2221,7 +2222,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) + return SMP_UNSPECIFIED; + + if (smp->method == REQ_OOB) { +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + sc_dhkey_check(smp); + SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); + } +@@ -2295,10 +2296,27 @@ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, + return false; + } + ++static void smp_send_pairing_req(struct smp_chan *smp, __u8 auth) ++{ ++ struct smp_cmd_pairing cp; ++ ++ if (smp->conn->hcon->type == ACL_LINK) ++ build_bredr_pairing_cmd(smp, &cp, NULL); ++ else ++ build_pairing_cmd(smp->conn, &cp, NULL, auth); ++ ++ smp->preq[0] = SMP_CMD_PAIRING_REQ; ++ memcpy(&smp->preq[1], &cp, sizeof(cp)); ++ ++ smp_send_cmd(smp->conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); ++ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); ++ ++ set_bit(SMP_FLAG_INITIATOR, &smp->flags); ++} ++ + static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) + { + struct smp_cmd_security_req *rp = (void *) skb->data; +- struct smp_cmd_pairing cp; + struct hci_conn *hcon = conn->hcon; + struct hci_dev *hdev = hcon->hdev; + struct smp_chan *smp; +@@ -2347,16 +2365,20 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) + + skb_pull(skb, sizeof(*rp)); + +- memset(&cp, 0, sizeof(cp)); +- build_pairing_cmd(conn, &cp, NULL, auth); ++ smp_send_pairing_req(smp, auth); + +- smp->preq[0] = SMP_CMD_PAIRING_REQ; +- memcpy(&smp->preq[1], &cp, sizeof(cp)); ++ return 0; ++} + +- smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); +- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); ++static void smp_send_security_req(struct smp_chan *smp, __u8 auth) ++{ ++ struct smp_cmd_security_req cp; + +- return 0; ++ cp.auth_req = auth; ++ smp_send_cmd(smp->conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); ++ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); ++ ++ clear_bit(SMP_FLAG_INITIATOR, &smp->flags); + } + + int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) +@@ -2427,23 +2449,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) + authreq |= SMP_AUTH_MITM; + } + +- if (hcon->role == HCI_ROLE_MASTER) { +- struct smp_cmd_pairing cp; +- +- build_pairing_cmd(conn, &cp, NULL, authreq); +- smp->preq[0] = SMP_CMD_PAIRING_REQ; +- memcpy(&smp->preq[1], &cp, sizeof(cp)); +- +- smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); +- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); +- } else { +- struct smp_cmd_security_req cp; +- cp.auth_req = authreq; +- smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); +- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); +- } ++ if (hcon->role == HCI_ROLE_MASTER) ++ smp_send_pairing_req(smp, authreq); ++ else ++ smp_send_security_req(smp, authreq); + +- set_bit(SMP_FLAG_INITIATOR, &smp->flags); + ret = 0; + + unlock: +@@ -2694,8 +2704,6 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) + + static u8 sc_select_method(struct smp_chan *smp) + { +- struct l2cap_conn *conn = smp->conn; +- struct hci_conn *hcon = conn->hcon; + struct smp_cmd_pairing *local, *remote; + u8 local_mitm, remote_mitm, local_io, remote_io, method; + +@@ -2708,7 +2716,7 @@ static u8 sc_select_method(struct smp_chan *smp) + * the "struct smp_cmd_pairing" from them we need to skip the + * first byte which contains the opcode. + */ +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + local = (void *) &smp->preq[1]; + remote = (void *) &smp->prsp[1]; + } else { +@@ -2777,7 +2785,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) + /* Non-initiating device sends its public key after receiving + * the key from the initiating device. + */ +- if (!hcon->out) { ++ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + err = sc_send_public_key(smp); + if (err) + return err; +@@ -2839,7 +2847,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) + } + + if (smp->method == REQ_OOB) { +- if (hcon->out) ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, + sizeof(smp->prnd), smp->prnd); + +@@ -2848,7 +2856,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) + return 0; + } + +- if (hcon->out) ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); + + if (smp->method == REQ_PASSKEY) { +@@ -2863,7 +2871,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) + /* The Initiating device waits for the non-initiating device to + * send the confirm value. + */ +- if (conn->hcon->out) ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) + return 0; + + err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->remote_pk, smp->prnd, +@@ -2897,7 +2905,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) + a[6] = hcon->init_addr_type; + b[6] = hcon->resp_addr_type; + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + local_addr = a; + remote_addr = b; + memcpy(io_cap, &smp->prsp[1], 3); +@@ -2922,7 +2930,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) + if (crypto_memneq(check->e, e, 16)) + return SMP_DHKEY_CHECK_FAILED; + +- if (!hcon->out) { ++ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + if (test_bit(SMP_FLAG_WAIT_USER, &smp->flags)) { + set_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags); + return 0; +@@ -2934,7 +2942,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) + + sc_add_ltk(smp); + +- if (hcon->out) { ++ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { + hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size); + hcon->enc_key_size = smp->enc_key_size; + } +@@ -3083,7 +3091,6 @@ static void bredr_pairing(struct l2cap_chan *chan) + struct l2cap_conn *conn = chan->conn; + struct hci_conn *hcon = conn->hcon; + struct hci_dev *hdev = hcon->hdev; +- struct smp_cmd_pairing req; + struct smp_chan *smp; + + bt_dev_dbg(hdev, "chan %p", chan); +@@ -3135,14 +3142,7 @@ static void bredr_pairing(struct l2cap_chan *chan) + + bt_dev_dbg(hdev, "starting SMP over BR/EDR"); + +- /* Prepare and send the BR/EDR SMP Pairing Request */ +- build_bredr_pairing_cmd(smp, &req, NULL); +- +- smp->preq[0] = SMP_CMD_PAIRING_REQ; +- memcpy(&smp->preq[1], &req, sizeof(req)); +- +- smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(req), &req); +- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); ++ smp_send_pairing_req(smp, 0x00); + } + + static void smp_resume_cb(struct l2cap_chan *chan) +-- +2.43.0 + diff --git a/queue-6.10/bnxt_en-fix-double-dma-unmapping-for-xdp_redirect.patch b/queue-6.10/bnxt_en-fix-double-dma-unmapping-for-xdp_redirect.patch new file mode 100644 index 00000000000..924814a299b --- /dev/null +++ b/queue-6.10/bnxt_en-fix-double-dma-unmapping-for-xdp_redirect.patch @@ -0,0 +1,76 @@ +From 4805ced0781d9aae13b104073e7d4ba25077c997 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2024 13:34:15 -0700 +Subject: bnxt_en: Fix double DMA unmapping for XDP_REDIRECT + +From: Somnath Kotur + +[ Upstream commit 8baeef7616d5194045c5a6b97fd1246b87c55b13 ] + +Remove the dma_unmap_page_attrs() call in the driver's XDP_REDIRECT +code path. This should have been removed when we let the page pool +handle the DMA mapping. This bug causes the warning: + +WARNING: CPU: 7 PID: 59 at drivers/iommu/dma-iommu.c:1198 iommu_dma_unmap_page+0xd5/0x100 +CPU: 7 PID: 59 Comm: ksoftirqd/7 Tainted: G W 6.8.0-1010-gcp #11-Ubuntu +Hardware name: Dell Inc. PowerEdge R7525/0PYVT1, BIOS 2.15.2 04/02/2024 +RIP: 0010:iommu_dma_unmap_page+0xd5/0x100 +Code: 89 ee 48 89 df e8 cb f2 69 ff 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 c9 31 f6 31 ff 45 31 c0 e9 ab 17 71 00 <0f> 0b 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 c9 +RSP: 0018:ffffab1fc0597a48 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: ffff99ff838280c8 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 +RBP: ffffab1fc0597a78 R08: 0000000000000002 R09: ffffab1fc0597c1c +R10: ffffab1fc0597cd3 R11: ffff99ffe375acd8 R12: 00000000e65b9000 +R13: 0000000000000050 R14: 0000000000001000 R15: 0000000000000002 +FS: 0000000000000000(0000) GS:ffff9a06efb80000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000565c34c37210 CR3: 00000005c7e3e000 CR4: 0000000000350ef0 +? show_regs+0x6d/0x80 +? __warn+0x89/0x150 +? iommu_dma_unmap_page+0xd5/0x100 +? report_bug+0x16a/0x190 +? handle_bug+0x51/0xa0 +? exc_invalid_op+0x18/0x80 +? iommu_dma_unmap_page+0xd5/0x100 +? iommu_dma_unmap_page+0x35/0x100 +dma_unmap_page_attrs+0x55/0x220 +? bpf_prog_4d7e87c0d30db711_xdp_dispatcher+0x64/0x9f +bnxt_rx_xdp+0x237/0x520 [bnxt_en] +bnxt_rx_pkt+0x640/0xdd0 [bnxt_en] +__bnxt_poll_work+0x1a1/0x3d0 [bnxt_en] +bnxt_poll+0xaa/0x1e0 [bnxt_en] +__napi_poll+0x33/0x1e0 +net_rx_action+0x18a/0x2f0 + +Fixes: 578fcfd26e2a ("bnxt_en: Let the page pool manage the DMA mapping") +Reviewed-by: Andy Gospodarek +Reviewed-by: Kalesh AP +Signed-off-by: Somnath Kotur +Signed-off-by: Michael Chan +Reviewed-by: Jacob Keller +Link: https://patch.msgid.link/20240820203415.168178-1-michael.chan@broadcom.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +index 345681d5007e3..f88b641533fcc 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +@@ -297,11 +297,6 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, + * redirect is coming from a frame received by the + * bnxt_en driver. + */ +- rx_buf = &rxr->rx_buf_ring[cons]; +- mapping = rx_buf->mapping - bp->rx_dma_offset; +- dma_unmap_page_attrs(&pdev->dev, mapping, +- BNXT_RX_PAGE_SIZE, bp->rx_dir, +- DMA_ATTR_WEAK_ORDERING); + + /* if we are unable to allocate a new buffer, abort and reuse */ + if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { +-- +2.43.0 + diff --git a/queue-6.10/bonding-fix-bond_ipsec_offload_ok-return-type.patch b/queue-6.10/bonding-fix-bond_ipsec_offload_ok-return-type.patch new file mode 100644 index 00000000000..ec7966759f1 --- /dev/null +++ b/queue-6.10/bonding-fix-bond_ipsec_offload_ok-return-type.patch @@ -0,0 +1,68 @@ +From 4401fb20cb40e9d4f2022978d55b0784271f32ac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Aug 2024 14:48:10 +0300 +Subject: bonding: fix bond_ipsec_offload_ok return type + +From: Nikolay Aleksandrov + +[ Upstream commit fc59b9a5f7201b9f7272944596113a82cc7773d5 ] + +Fix the return type which should be bool. + +Fixes: 955b785ec6b3 ("bonding: fix suspicious RCU usage in bond_ipsec_offload_ok()") +Signed-off-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_main.c | 18 ++++++------------ + 1 file changed, 6 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index 2ed0da0684906..2370da4632149 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -599,34 +599,28 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) + struct net_device *real_dev; + struct slave *curr_active; + struct bonding *bond; +- int err; ++ bool ok = false; + + bond = netdev_priv(bond_dev); + rcu_read_lock(); + curr_active = rcu_dereference(bond->curr_active_slave); + real_dev = curr_active->dev; + +- if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { +- err = false; ++ if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) + goto out; +- } + +- if (!xs->xso.real_dev) { +- err = false; ++ if (!xs->xso.real_dev) + goto out; +- } + + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_offload_ok || +- netif_is_bond_master(real_dev)) { +- err = false; ++ netif_is_bond_master(real_dev)) + goto out; +- } + +- err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); ++ ok = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + out: + rcu_read_unlock(); +- return err; ++ return ok; + } + + static const struct xfrmdev_ops bond_xfrmdev_ops = { +-- +2.43.0 + diff --git a/queue-6.10/bonding-fix-null-pointer-deref-in-bond_ipsec_offload.patch b/queue-6.10/bonding-fix-null-pointer-deref-in-bond_ipsec_offload.patch new file mode 100644 index 00000000000..7fe979ea300 --- /dev/null +++ b/queue-6.10/bonding-fix-null-pointer-deref-in-bond_ipsec_offload.patch @@ -0,0 +1,37 @@ +From 479ae8c19f2767c574c65cc3a7b01cffd8bcbc3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Aug 2024 14:48:11 +0300 +Subject: bonding: fix null pointer deref in bond_ipsec_offload_ok + +From: Nikolay Aleksandrov + +[ Upstream commit 95c90e4ad89d493a7a14fa200082e466e2548f9d ] + +We must check if there is an active slave before dereferencing the pointer. + +Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") +Signed-off-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Reviewed-by: Eric Dumazet +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index 2370da4632149..55841a0e05a47 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -604,6 +604,8 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) + bond = netdev_priv(bond_dev); + rcu_read_lock(); + curr_active = rcu_dereference(bond->curr_active_slave); ++ if (!curr_active) ++ goto out; + real_dev = curr_active->dev; + + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) +-- +2.43.0 + diff --git a/queue-6.10/bonding-fix-xfrm-real_dev-null-pointer-dereference.patch b/queue-6.10/bonding-fix-xfrm-real_dev-null-pointer-dereference.patch new file mode 100644 index 00000000000..92d7e801cd4 --- /dev/null +++ b/queue-6.10/bonding-fix-xfrm-real_dev-null-pointer-dereference.patch @@ -0,0 +1,81 @@ +From 5313cbd7c550cc39c0baa92b84b65c6e799dc7e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Aug 2024 14:48:12 +0300 +Subject: bonding: fix xfrm real_dev null pointer dereference + +From: Nikolay Aleksandrov + +[ Upstream commit f8cde9805981c50d0c029063dc7d82821806fc44 ] + +We shouldn't set real_dev to NULL because packets can be in transit and +xfrm might call xdo_dev_offload_ok() in parallel. All callbacks assume +real_dev is set. + + Example trace: + kernel: BUG: unable to handle page fault for address: 0000000000001030 + kernel: bond0: (slave eni0np1): making interface the new active one + kernel: #PF: supervisor write access in kernel mode + kernel: #PF: error_code(0x0002) - not-present page + kernel: PGD 0 P4D 0 + kernel: Oops: 0002 [#1] PREEMPT SMP + kernel: CPU: 4 PID: 2237 Comm: ping Not tainted 6.7.7+ #12 + kernel: Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 + kernel: RIP: 0010:nsim_ipsec_offload_ok+0xc/0x20 [netdevsim] + kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA + kernel: Code: e0 0f 0b 48 83 7f 38 00 74 de 0f 0b 48 8b 47 08 48 8b 37 48 8b 78 40 e9 b2 e5 9a d7 66 90 0f 1f 44 00 00 48 8b 86 80 02 00 00 <83> 80 30 10 00 00 01 b8 01 00 00 00 c3 0f 1f 80 00 00 00 00 0f 1f + kernel: bond0: (slave eni0np1): making interface the new active one + kernel: RSP: 0018:ffffabde81553b98 EFLAGS: 00010246 + kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA + kernel: + kernel: RAX: 0000000000000000 RBX: ffff9eb404e74900 RCX: ffff9eb403d97c60 + kernel: RDX: ffffffffc090de10 RSI: ffff9eb404e74900 RDI: ffff9eb3c5de9e00 + kernel: RBP: ffff9eb3c0a42000 R08: 0000000000000010 R09: 0000000000000014 + kernel: R10: 7974203030303030 R11: 3030303030303030 R12: 0000000000000000 + kernel: R13: ffff9eb3c5de9e00 R14: ffffabde81553cc8 R15: ffff9eb404c53000 + kernel: FS: 00007f2a77a3ad00(0000) GS:ffff9eb43bd00000(0000) knlGS:0000000000000000 + kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + kernel: CR2: 0000000000001030 CR3: 00000001122ab000 CR4: 0000000000350ef0 + kernel: bond0: (slave eni0np1): making interface the new active one + kernel: Call Trace: + kernel: + kernel: ? __die+0x1f/0x60 + kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA + kernel: ? page_fault_oops+0x142/0x4c0 + kernel: ? do_user_addr_fault+0x65/0x670 + kernel: ? kvm_read_and_reset_apf_flags+0x3b/0x50 + kernel: bond0: (slave eni0np1): making interface the new active one + kernel: ? exc_page_fault+0x7b/0x180 + kernel: ? asm_exc_page_fault+0x22/0x30 + kernel: ? nsim_bpf_uninit+0x50/0x50 [netdevsim] + kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA + kernel: ? nsim_ipsec_offload_ok+0xc/0x20 [netdevsim] + kernel: bond0: (slave eni0np1): making interface the new active one + kernel: bond_ipsec_offload_ok+0x7b/0x90 [bonding] + kernel: xfrm_output+0x61/0x3b0 + kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA + kernel: ip_push_pending_frames+0x56/0x80 + +Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") +Signed-off-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_main.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index 55841a0e05a47..b257504a85347 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -582,7 +582,6 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) + } else { + slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); + } +- ipsec->xs->xso.real_dev = NULL; + } + spin_unlock_bh(&bond->ipsec_lock); + rcu_read_unlock(); +-- +2.43.0 + diff --git a/queue-6.10/bonding-fix-xfrm-state-handling-when-clearing-active.patch b/queue-6.10/bonding-fix-xfrm-state-handling-when-clearing-active.patch new file mode 100644 index 00000000000..bd2ae72d010 --- /dev/null +++ b/queue-6.10/bonding-fix-xfrm-state-handling-when-clearing-active.patch @@ -0,0 +1,43 @@ +From 87d1dc17d774430eeb14d20d59f42e3e2fde9cae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Aug 2024 14:48:13 +0300 +Subject: bonding: fix xfrm state handling when clearing active slave + +From: Nikolay Aleksandrov + +[ Upstream commit c4c5c5d2ef40a9f67a9241dc5422eac9ffe19547 ] + +If the active slave is cleared manually the xfrm state is not flushed. +This leads to xfrm add/del imbalance and adding the same state multiple +times. For example when the device cannot handle anymore states we get: + [ 1169.884811] bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA +because it's filled with the same state after multiple active slave +clearings. This change also has a few nice side effects: user-space +gets a notification for the change, the old device gets its mac address +and promisc/mcast adjusted properly. + +Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") +Signed-off-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_options.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c +index bc80fb6397dcd..95d59a18c0223 100644 +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -936,7 +936,7 @@ static int bond_option_active_slave_set(struct bonding *bond, + /* check to see if we are clearing active */ + if (!slave_dev) { + netdev_dbg(bond->dev, "Clearing current active slave\n"); +- RCU_INIT_POINTER(bond->curr_active_slave, NULL); ++ bond_change_active_slave(bond, NULL); + bond_select_active_slave(bond); + } else { + struct slave *old_active = rtnl_dereference(bond->curr_active_slave); +-- +2.43.0 + diff --git a/queue-6.10/dpaa2-switch-fix-error-checking-in-dpaa2_switch_seed.patch b/queue-6.10/dpaa2-switch-fix-error-checking-in-dpaa2_switch_seed.patch new file mode 100644 index 00000000000..294b70d5690 --- /dev/null +++ b/queue-6.10/dpaa2-switch-fix-error-checking-in-dpaa2_switch_seed.patch @@ -0,0 +1,56 @@ +From 82a745183ce0a37eefc6e573b89912465cbbab21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 17 Aug 2024 09:52:46 +0300 +Subject: dpaa2-switch: Fix error checking in dpaa2_switch_seed_bp() + +From: Dan Carpenter + +[ Upstream commit c50e7475961c36ec4d21d60af055b32f9436b431 ] + +The dpaa2_switch_add_bufs() function returns the number of bufs that it +was able to add. It returns BUFS_PER_CMD (7) for complete success or a +smaller number if there are not enough pages available. However, the +error checking is looking at the total number of bufs instead of the +number which were added on this iteration. Thus the error checking +only works correctly for the first iteration through the loop and +subsequent iterations are always counted as a success. + +Fix this by checking only the bufs added in the current iteration. + +Fixes: 0b1b71370458 ("staging: dpaa2-switch: handle Rx path on control interface") +Signed-off-by: Dan Carpenter +Reviewed-by: Simon Horman +Reviewed-by: Ioana Ciornei +Tested-by: Ioana Ciornei +Link: https://patch.msgid.link/eec27f30-b43f-42b6-b8ee-04a6f83423b6@stanley.mountain +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +index a71f848adc054..a293b08f36d46 100644 +--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c ++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +@@ -2638,13 +2638,14 @@ static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw) + + static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw) + { +- int *count, i; ++ int *count, ret, i; + + for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) { ++ ret = dpaa2_switch_add_bufs(ethsw, ethsw->bpid); + count = ðsw->buf_count; +- *count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid); ++ *count += ret; + +- if (unlikely(*count < BUFS_PER_CMD)) ++ if (unlikely(ret < BUFS_PER_CMD)) + return -ENOMEM; + } + +-- +2.43.0 + diff --git a/queue-6.10/ice-fix-ice_last_offset-formula.patch b/queue-6.10/ice-fix-ice_last_offset-formula.patch new file mode 100644 index 00000000000..ad0661123eb --- /dev/null +++ b/queue-6.10/ice-fix-ice_last_offset-formula.patch @@ -0,0 +1,39 @@ +From 8b7c750b70953443f77dfd6397a78fcd04fe6076 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Aug 2024 12:53:25 +0200 +Subject: ice: fix ICE_LAST_OFFSET formula + +From: Maciej Fijalkowski + +[ Upstream commit b966ad832942b5a11e002f9b5ef102b08425b84a ] + +For bigger PAGE_SIZE archs, ice driver works on 3k Rx buffers. +Therefore, ICE_LAST_OFFSET should take into account ICE_RXBUF_3072, not +ICE_RXBUF_2048. + +Fixes: 7237f5b0dba4 ("ice: introduce legacy Rx flag") +Suggested-by: Luiz Capitulino +Signed-off-by: Maciej Fijalkowski +Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c +index 50211188c1a7a..4b690952bb403 100644 +--- a/drivers/net/ethernet/intel/ice/ice_txrx.c ++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c +@@ -842,7 +842,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) + return false; + #if (PAGE_SIZE >= 8192) + #define ICE_LAST_OFFSET \ +- (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) ++ (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) + if (rx_buf->page_offset > ICE_LAST_OFFSET) + return false; + #endif /* PAGE_SIZE >= 8192) */ +-- +2.43.0 + diff --git a/queue-6.10/ice-fix-page-reuse-when-page_size-is-over-8k.patch b/queue-6.10/ice-fix-page-reuse-when-page_size-is-over-8k.patch new file mode 100644 index 00000000000..5aaa3882df2 --- /dev/null +++ b/queue-6.10/ice-fix-page-reuse-when-page_size-is-over-8k.patch @@ -0,0 +1,67 @@ +From 94ba4374aa43bc774ffb15deb2a1e7cc8ecf3307 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Aug 2024 12:53:24 +0200 +Subject: ice: fix page reuse when PAGE_SIZE is over 8k + +From: Maciej Fijalkowski + +[ Upstream commit 50b2143356e888777fc5bca023c39f34f404613a ] + +Architectures that have PAGE_SIZE >= 8192 such as arm64 should act the +same as x86 currently, meaning reuse of a page should only take place +when no one else is busy with it. + +Do two things independently of underlying PAGE_SIZE: +- store the page count under ice_rx_buf::pgcnt +- then act upon its value vs ice_rx_buf::pagecnt_bias when making the + decision regarding page reuse + +Fixes: 2b245cb29421 ("ice: Implement transmit and NAPI support") +Signed-off-by: Maciej Fijalkowski +Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_txrx.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c +index 8d25b69812698..50211188c1a7a 100644 +--- a/drivers/net/ethernet/intel/ice/ice_txrx.c ++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c +@@ -837,16 +837,15 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) + if (!dev_page_is_reusable(page)) + return false; + +-#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) + return false; +-#else ++#if (PAGE_SIZE >= 8192) + #define ICE_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + if (rx_buf->page_offset > ICE_LAST_OFFSET) + return false; +-#endif /* PAGE_SIZE < 8192) */ ++#endif /* PAGE_SIZE >= 8192) */ + + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the +@@ -949,12 +948,7 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, + struct ice_rx_buf *rx_buf; + + rx_buf = &rx_ring->rx_buf[ntc]; +- rx_buf->pgcnt = +-#if (PAGE_SIZE < 8192) +- page_count(rx_buf->page); +-#else +- 0; +-#endif ++ rx_buf->pgcnt = page_count(rx_buf->page); + prefetchw(rx_buf->page); + + if (!size) +-- +2.43.0 + diff --git a/queue-6.10/ice-fix-truesize-operations-for-page_size-8192.patch b/queue-6.10/ice-fix-truesize-operations-for-page_size-8192.patch new file mode 100644 index 00000000000..b8d242a7d67 --- /dev/null +++ b/queue-6.10/ice-fix-truesize-operations-for-page_size-8192.patch @@ -0,0 +1,148 @@ +From 19b37d22ee90822aa0bfd3ba23f3e1d8640377a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Aug 2024 12:53:26 +0200 +Subject: ice: fix truesize operations for PAGE_SIZE >= 8192 + +From: Maciej Fijalkowski + +[ Upstream commit d53d4dcce69be5773e2d0878c9899ebfbf58c393 ] + +When working on multi-buffer packet on arch that has PAGE_SIZE >= 8192, +truesize is calculated and stored in xdp_buff::frame_sz per each +processed Rx buffer. This means that frame_sz will contain the truesize +based on last received buffer, but commit 1dc1a7e7f410 ("ice: +Centrallize Rx buffer recycling") assumed this value will be constant +for each buffer, which breaks the page recycling scheme and mess up the +way we update the page::page_offset. + +To fix this, let us work on constant truesize when PAGE_SIZE >= 8192 +instead of basing this on size of a packet read from Rx descriptor. This +way we can simplify the code and avoid calculating truesize per each +received frame and on top of that when using +xdp_update_skb_shared_info(), current formula for truesize update will +be valid. + +This means ice_rx_frame_truesize() can be removed altogether. +Furthermore, first call to it within ice_clean_rx_irq() for 4k PAGE_SIZE +was redundant as xdp_buff::frame_sz is initialized via xdp_init_buff() +in ice_vsi_cfg_rxq(). This should have been removed at the point where +xdp_buff struct started to be a member of ice_rx_ring and it was no +longer a stack based variable. + +There are two fixes tags as my understanding is that the first one +exposed us to broken truesize and page_offset handling and then second +introduced broken skb_shared_info update in ice_{construct,build}_skb(). + +Reported-and-tested-by: Luiz Capitulino +Closes: https://lore.kernel.org/netdev/8f9e2a5c-fd30-4206-9311-946a06d031bb@redhat.com/ +Fixes: 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling") +Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") +Signed-off-by: Maciej Fijalkowski +Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_base.c | 21 ++++++++++++++- + drivers/net/ethernet/intel/ice/ice_txrx.c | 33 ----------------------- + 2 files changed, 20 insertions(+), 34 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c +index 1facf179a96fd..f448d3a845642 100644 +--- a/drivers/net/ethernet/intel/ice/ice_base.c ++++ b/drivers/net/ethernet/intel/ice/ice_base.c +@@ -512,6 +512,25 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) + xsk_pool_fill_cb(ring->xsk_pool, &desc); + } + ++/** ++ * ice_get_frame_sz - calculate xdp_buff::frame_sz ++ * @rx_ring: the ring being configured ++ * ++ * Return frame size based on underlying PAGE_SIZE ++ */ ++static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) ++{ ++ unsigned int frame_sz; ++ ++#if (PAGE_SIZE >= 8192) ++ frame_sz = rx_ring->rx_buf_len; ++#else ++ frame_sz = ice_rx_pg_size(rx_ring) / 2; ++#endif ++ ++ return frame_sz; ++} ++ + /** + * ice_vsi_cfg_rxq - Configure an Rx queue + * @ring: the ring being configured +@@ -576,7 +595,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) + } + } + +- xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); ++ xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); + ring->xdp.data = NULL; + ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; + err = ice_setup_rx_ctx(ring); +diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c +index 4b690952bb403..c9bc3f1add5d3 100644 +--- a/drivers/net/ethernet/intel/ice/ice_txrx.c ++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c +@@ -521,30 +521,6 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) + return -ENOMEM; + } + +-/** +- * ice_rx_frame_truesize +- * @rx_ring: ptr to Rx ring +- * @size: size +- * +- * calculate the truesize with taking into the account PAGE_SIZE of +- * underlying arch +- */ +-static unsigned int +-ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) +-{ +- unsigned int truesize; +- +-#if (PAGE_SIZE < 8192) +- truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ +-#else +- truesize = rx_ring->rx_offset ? +- SKB_DATA_ALIGN(rx_ring->rx_offset + size) + +- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : +- SKB_DATA_ALIGN(size); +-#endif +- return truesize; +-} +- + /** + * ice_run_xdp - Executes an XDP program on initialized xdp_buff + * @rx_ring: Rx ring +@@ -1154,11 +1130,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) + bool failure; + u32 first; + +- /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ +-#if (PAGE_SIZE < 8192) +- xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); +-#endif +- + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + if (xdp_prog) { + xdp_ring = rx_ring->xdp_ring; +@@ -1217,10 +1188,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) + hard_start = page_address(rx_buf->page) + rx_buf->page_offset - + offset; + xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); +-#if (PAGE_SIZE > 4096) +- /* At larger PAGE_SIZE, frame_sz depend on len size */ +- xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); +-#endif + xdp_buff_clear_frags_flag(xdp); + } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { + break; +-- +2.43.0 + diff --git a/queue-6.10/ice-use-internal-pf-id-instead-of-function-number.patch b/queue-6.10/ice-use-internal-pf-id-instead-of-function-number.patch new file mode 100644 index 00000000000..eb8e5b8747e --- /dev/null +++ b/queue-6.10/ice-use-internal-pf-id-instead-of-function-number.patch @@ -0,0 +1,47 @@ +From 24a9fcb622a058a15eb86f2474980f39162f2f61 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2024 09:17:42 +0200 +Subject: ice: use internal pf id instead of function number + +From: Michal Swiatkowski + +[ Upstream commit 503ab6ee40fc103ea55cc9e50bb879e571d65aac ] + +Use always the same pf id in devlink port number. When doing +pass-through the PF to VM bus info func number can be any value. + +Fixes: 2ae0aa4758b0 ("ice: Move devlink port to PF/VF struct") +Reviewed-by: Wojciech Drewek +Suggested-by: Jiri Pirko +Signed-off-by: Michal Swiatkowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/devlink/devlink_port.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +index 13e6790d3cae7..afcf64dab48a1 100644 +--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c ++++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +@@ -337,7 +337,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) + return -EIO; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; +- attrs.phys.port_number = pf->hw.bus.func; ++ attrs.phys.port_number = pf->hw.pf_id; + + /* As FW supports only port split options for whole device, + * set port split options only for first PF. +@@ -399,7 +399,7 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) + return -EINVAL; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; +- attrs.pci_vf.pf = pf->hw.bus.func; ++ attrs.pci_vf.pf = pf->hw.pf_id; + attrs.pci_vf.vf = vf->vf_id; + + ice_devlink_set_switch_id(pf, &attrs.switch_id); +-- +2.43.0 + diff --git a/queue-6.10/igb-cope-with-large-max_skb_frags.patch b/queue-6.10/igb-cope-with-large-max_skb_frags.patch new file mode 100644 index 00000000000..1b50058f036 --- /dev/null +++ b/queue-6.10/igb-cope-with-large-max_skb_frags.patch @@ -0,0 +1,55 @@ +From 56511d3763ed54c73d1b3071758fa92fdcecaa49 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Aug 2024 17:20:34 +0200 +Subject: igb: cope with large MAX_SKB_FRAGS + +From: Paolo Abeni + +[ Upstream commit 8aba27c4a5020abdf60149239198297f88338a8d ] + +Sabrina reports that the igb driver does not cope well with large +MAX_SKB_FRAG values: setting MAX_SKB_FRAG to 45 causes payload +corruption on TX. + +An easy reproducer is to run ssh to connect to the machine. With +MAX_SKB_FRAGS=17 it works, with MAX_SKB_FRAGS=45 it fails. This has +been reported originally in +https://bugzilla.redhat.com/show_bug.cgi?id=2265320 + +The root cause of the issue is that the driver does not take into +account properly the (possibly large) shared info size when selecting +the ring layout, and will try to fit two packets inside the same 4K +page even when the 1st fraglist will trump over the 2nd head. + +Address the issue by checking if 2K buffers are insufficient. + +Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") +Reported-by: Jan Tluka +Reported-by: Jirka Hladky +Reported-by: Sabrina Dubroca +Tested-by: Sabrina Dubroca +Tested-by: Corinna Vinschen +Signed-off-by: Paolo Abeni +Signed-off-by: Corinna Vinschen +Link: https://patch.msgid.link/20240816152034.1453285-1-vinschen@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igb/igb_main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c +index fce2930ae6af7..b6aa449aa56af 100644 +--- a/drivers/net/ethernet/intel/igb/igb_main.c ++++ b/drivers/net/ethernet/intel/igb/igb_main.c +@@ -4809,6 +4809,7 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter, + + #if (PAGE_SIZE < 8192) + if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || ++ IGB_2K_TOO_SMALL_WITH_PADDING || + rd32(E1000_RCTL) & E1000_RCTL_SBP) + set_ring_uses_large_buffer(rx_ring); + #endif +-- +2.43.0 + diff --git a/queue-6.10/ip6_tunnel-fix-broken-gro.patch b/queue-6.10/ip6_tunnel-fix-broken-gro.patch new file mode 100644 index 00000000000..7ff135df6d0 --- /dev/null +++ b/queue-6.10/ip6_tunnel-fix-broken-gro.patch @@ -0,0 +1,78 @@ +From 0d35f7a9c39f366ff172759cf4ba354dd6c7d219 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 17:14:16 +0200 +Subject: ip6_tunnel: Fix broken GRO + +From: Thomas Bogendoerfer + +[ Upstream commit 4b3e33fcc38f7750604b065c55a43e94c5bc3145 ] + +GRO code checks for matching layer 2 headers to see, if packet belongs +to the same flow and because ip6 tunnel set dev->hard_header_len +this check fails in cases, where it shouldn't. To fix this don't +set hard_header_len, but use needed_headroom like ipv4/ip_tunnel.c +does. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Thomas Bogendoerfer +Link: https://patch.msgid.link/20240815151419.109864-1-tbogendoerfer@suse.de +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_tunnel.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c +index 9dee0c1279554..87dfb565a9f81 100644 +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1507,7 +1507,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) + tdev = __dev_get_by_index(t->net, p->link); + + if (tdev) { +- dev->hard_header_len = tdev->hard_header_len + t_hlen; ++ dev->needed_headroom = tdev->hard_header_len + ++ tdev->needed_headroom + t_hlen; + mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); + + mtu = mtu - t_hlen; +@@ -1731,7 +1732,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) + { + struct ip6_tnl *tnl = netdev_priv(dev); ++ int t_hlen; + ++ t_hlen = tnl->hlen + sizeof(struct ipv6hdr); + if (tnl->parms.proto == IPPROTO_IPV6) { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; +@@ -1740,10 +1743,10 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) + return -EINVAL; + } + if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { +- if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) ++ if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen) + return -EINVAL; + } else { +- if (new_mtu > IP_MAX_MTU - dev->hard_header_len) ++ if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen) + return -EINVAL; + } + WRITE_ONCE(dev->mtu, new_mtu); +@@ -1887,12 +1890,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) + t_hlen = t->hlen + sizeof(struct ipv6hdr); + + dev->type = ARPHRD_TUNNEL6; +- dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + dev->min_mtu = ETH_MIN_MTU; +- dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; ++ dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen; + + netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); +-- +2.43.0 + diff --git a/queue-6.10/ipv6-fix-possible-uaf-in-ip6_finish_output2.patch b/queue-6.10/ipv6-fix-possible-uaf-in-ip6_finish_output2.patch new file mode 100644 index 00000000000..4045955d87b --- /dev/null +++ b/queue-6.10/ipv6-fix-possible-uaf-in-ip6_finish_output2.patch @@ -0,0 +1,49 @@ +From 295a8962a274b6976931e900d2cd396af38dfb82 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2024 16:08:58 +0000 +Subject: ipv6: fix possible UAF in ip6_finish_output2() + +From: Eric Dumazet + +[ Upstream commit da273b377ae0d9bd255281ed3c2adb228321687b ] + +If skb_expand_head() returns NULL, skb has been freed +and associated dst/idev could also have been freed. + +We need to hold rcu_read_lock() to make sure the dst and +associated idev are alive. + +Fixes: 5796015fa968 ("ipv6: allocate enough headroom in ip6_finish_output2()") +Signed-off-by: Eric Dumazet +Cc: Vasily Averin +Reviewed-by: David Ahern +Link: https://patch.msgid.link/20240820160859.3786976-3-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_output.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index d44ddce4c9f4d..8778431acffda 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -70,11 +70,15 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * + + /* Be paranoid, rather than too clever. */ + if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { ++ /* Make sure idev stays alive */ ++ rcu_read_lock(); + skb = skb_expand_head(skb, hh_len); + if (!skb) { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); ++ rcu_read_unlock(); + return -ENOMEM; + } ++ rcu_read_unlock(); + } + + hdr = ipv6_hdr(skb); +-- +2.43.0 + diff --git a/queue-6.10/ipv6-prevent-possible-uaf-in-ip6_xmit.patch b/queue-6.10/ipv6-prevent-possible-uaf-in-ip6_xmit.patch new file mode 100644 index 00000000000..35f36fccba1 --- /dev/null +++ b/queue-6.10/ipv6-prevent-possible-uaf-in-ip6_xmit.patch @@ -0,0 +1,48 @@ +From ad3870bc1793d5e5b1a2b6e3935aab2d764e9153 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2024 16:08:59 +0000 +Subject: ipv6: prevent possible UAF in ip6_xmit() + +From: Eric Dumazet + +[ Upstream commit 2d5ff7e339d04622d8282661df36151906d0e1c7 ] + +If skb_expand_head() returns NULL, skb has been freed +and the associated dst/idev could also have been freed. + +We must use rcu_read_lock() to prevent a possible UAF. + +Fixes: 0c9f227bee11 ("ipv6: use skb_expand_head in ip6_xmit") +Signed-off-by: Eric Dumazet +Cc: Vasily Averin +Reviewed-by: David Ahern +Link: https://patch.msgid.link/20240820160859.3786976-4-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_output.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 8778431acffda..c49344d8311ab 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -287,11 +287,15 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, + head_room += opt->opt_nflen + opt->opt_flen; + + if (unlikely(head_room > skb_headroom(skb))) { ++ /* Make sure idev stays alive */ ++ rcu_read_lock(); + skb = skb_expand_head(skb, head_room); + if (!skb) { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); ++ rcu_read_unlock(); + return -ENOBUFS; + } ++ rcu_read_unlock(); + } + + if (opt) { +-- +2.43.0 + diff --git a/queue-6.10/ipv6-prevent-uaf-in-ip6_send_skb.patch b/queue-6.10/ipv6-prevent-uaf-in-ip6_send_skb.patch new file mode 100644 index 00000000000..0ae0b89f081 --- /dev/null +++ b/queue-6.10/ipv6-prevent-uaf-in-ip6_send_skb.patch @@ -0,0 +1,158 @@ +From 3e66009a64d62ea6af5afbbc917c01f8313e4297 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2024 16:08:57 +0000 +Subject: ipv6: prevent UAF in ip6_send_skb() + +From: Eric Dumazet + +[ Upstream commit faa389b2fbaaec7fd27a390b4896139f9da662e3 ] + +syzbot reported an UAF in ip6_send_skb() [1] + +After ip6_local_out() has returned, we no longer can safely +dereference rt, unless we hold rcu_read_lock(). + +A similar issue has been fixed in commit +a688caa34beb ("ipv6: take rcu lock in rawv6_send_hdrinc()") + +Another potential issue in ip6_finish_output2() is handled in a +separate patch. + +[1] + BUG: KASAN: slab-use-after-free in ip6_send_skb+0x18d/0x230 net/ipv6/ip6_output.c:1964 +Read of size 8 at addr ffff88806dde4858 by task syz.1.380/6530 + +CPU: 1 UID: 0 PID: 6530 Comm: syz.1.380 Not tainted 6.11.0-rc3-syzkaller-00306-gdf6cbc62cc9b #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 +Call Trace: + + __dump_stack lib/dump_stack.c:93 [inline] + dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 + print_address_description mm/kasan/report.c:377 [inline] + print_report+0x169/0x550 mm/kasan/report.c:488 + kasan_report+0x143/0x180 mm/kasan/report.c:601 + ip6_send_skb+0x18d/0x230 net/ipv6/ip6_output.c:1964 + rawv6_push_pending_frames+0x75c/0x9e0 net/ipv6/raw.c:588 + rawv6_sendmsg+0x19c7/0x23c0 net/ipv6/raw.c:926 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg+0x1a6/0x270 net/socket.c:745 + sock_write_iter+0x2dd/0x400 net/socket.c:1160 + do_iter_readv_writev+0x60a/0x890 + vfs_writev+0x37c/0xbb0 fs/read_write.c:971 + do_writev+0x1b1/0x350 fs/read_write.c:1018 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7f936bf79e79 +Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f936cd7f038 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 +RAX: ffffffffffffffda RBX: 00007f936c115f80 RCX: 00007f936bf79e79 +RDX: 0000000000000001 RSI: 0000000020000040 RDI: 0000000000000004 +RBP: 00007f936bfe7916 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 0000000000000000 R14: 00007f936c115f80 R15: 00007fff2860a7a8 + + +Allocated by task 6530: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 + unpoison_slab_object mm/kasan/common.c:312 [inline] + __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:338 + kasan_slab_alloc include/linux/kasan.h:201 [inline] + slab_post_alloc_hook mm/slub.c:3988 [inline] + slab_alloc_node mm/slub.c:4037 [inline] + kmem_cache_alloc_noprof+0x135/0x2a0 mm/slub.c:4044 + dst_alloc+0x12b/0x190 net/core/dst.c:89 + ip6_blackhole_route+0x59/0x340 net/ipv6/route.c:2670 + make_blackhole net/xfrm/xfrm_policy.c:3120 [inline] + xfrm_lookup_route+0xd1/0x1c0 net/xfrm/xfrm_policy.c:3313 + ip6_dst_lookup_flow+0x13e/0x180 net/ipv6/ip6_output.c:1257 + rawv6_sendmsg+0x1283/0x23c0 net/ipv6/raw.c:898 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg+0x1a6/0x270 net/socket.c:745 + ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 + ___sys_sendmsg net/socket.c:2651 [inline] + __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2680 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +Freed by task 45: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 + kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:579 + poison_slab_object+0xe0/0x150 mm/kasan/common.c:240 + __kasan_slab_free+0x37/0x60 mm/kasan/common.c:256 + kasan_slab_free include/linux/kasan.h:184 [inline] + slab_free_hook mm/slub.c:2252 [inline] + slab_free mm/slub.c:4473 [inline] + kmem_cache_free+0x145/0x350 mm/slub.c:4548 + dst_destroy+0x2ac/0x460 net/core/dst.c:124 + rcu_do_batch kernel/rcu/tree.c:2569 [inline] + rcu_core+0xafd/0x1830 kernel/rcu/tree.c:2843 + handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 + __do_softirq kernel/softirq.c:588 [inline] + invoke_softirq kernel/softirq.c:428 [inline] + __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:649 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043 + asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 + +Last potentially related work creation: + kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 + __kasan_record_aux_stack+0xac/0xc0 mm/kasan/generic.c:541 + __call_rcu_common kernel/rcu/tree.c:3106 [inline] + call_rcu+0x167/0xa70 kernel/rcu/tree.c:3210 + refdst_drop include/net/dst.h:263 [inline] + skb_dst_drop include/net/dst.h:275 [inline] + nf_ct_frag6_queue net/ipv6/netfilter/nf_conntrack_reasm.c:306 [inline] + nf_ct_frag6_gather+0xb9a/0x2080 net/ipv6/netfilter/nf_conntrack_reasm.c:485 + ipv6_defrag+0x2c8/0x3c0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:67 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 + nf_hook include/linux/netfilter.h:269 [inline] + __ip6_local_out+0x6fa/0x800 net/ipv6/output_core.c:143 + ip6_local_out+0x26/0x70 net/ipv6/output_core.c:153 + ip6_send_skb+0x112/0x230 net/ipv6/ip6_output.c:1959 + rawv6_push_pending_frames+0x75c/0x9e0 net/ipv6/raw.c:588 + rawv6_sendmsg+0x19c7/0x23c0 net/ipv6/raw.c:926 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg+0x1a6/0x270 net/socket.c:745 + sock_write_iter+0x2dd/0x400 net/socket.c:1160 + do_iter_readv_writev+0x60a/0x890 + +Fixes: 0625491493d9 ("ipv6: ip6_push_pending_frames() should increment IPSTATS_MIB_OUTDISCARDS") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Reviewed-by: David Ahern +Link: https://patch.msgid.link/20240820160859.3786976-2-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_output.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 784424ac41477..d44ddce4c9f4d 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1953,6 +1953,7 @@ int ip6_send_skb(struct sk_buff *skb) + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); + int err; + ++ rcu_read_lock(); + err = ip6_local_out(net, skb->sk, skb); + if (err) { + if (err > 0) +@@ -1962,6 +1963,7 @@ int ip6_send_skb(struct sk_buff *skb) + IPSTATS_MIB_OUTDISCARDS); + } + ++ rcu_read_unlock(); + return err; + } + +-- +2.43.0 + diff --git a/queue-6.10/kbuild-avoid-scripts-kallsyms-parsing-dev-null.patch b/queue-6.10/kbuild-avoid-scripts-kallsyms-parsing-dev-null.patch new file mode 100644 index 00000000000..90e15e9c3b7 --- /dev/null +++ b/queue-6.10/kbuild-avoid-scripts-kallsyms-parsing-dev-null.patch @@ -0,0 +1,43 @@ +From 9e3b43301f8476032de326616a1be2a46a7d9a07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Aug 2024 03:03:00 +0900 +Subject: kbuild: avoid scripts/kallsyms parsing /dev/null + +From: Masahiro Yamada + +[ Upstream commit 1472464c6248575bf2d01c7f076b94704bb32c95 ] + +On macOS, as reported by Daniel Gomez, getline() sets ENOTTY to errno +if it is requested to read from /dev/null. + +If this is worth fixing, I would rather pass an empty file to +scripts/kallsyms instead of adding the ugly #ifdef __APPLE__. + +Fixes: c442db3f49f2 ("kbuild: remove PROVIDE() for kallsyms symbols") +Reported-by: Daniel Gomez +Closes: https://lore.kernel.org/all/20240807-macos-build-support-v1-12-4cd1ded85694@samsung.com/ +Signed-off-by: Masahiro Yamada +Reviewed-by: Nicolas Schier +Reviewed-by: Daniel Gomez +Signed-off-by: Sasha Levin +--- + scripts/link-vmlinux.sh | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh +index 22d0bc8439863..070a319140e89 100755 +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -215,7 +215,8 @@ kallsymso= + strip_debug= + + if is_enabled CONFIG_KALLSYMS; then +- kallsyms /dev/null .tmp_vmlinux0.kallsyms ++ truncate -s0 .tmp_vmlinux.kallsyms0.syms ++ kallsyms .tmp_vmlinux.kallsyms0.syms .tmp_vmlinux0.kallsyms + fi + + if is_enabled CONFIG_KALLSYMS || is_enabled CONFIG_DEBUG_INFO_BTF; then +-- +2.43.0 + diff --git a/queue-6.10/kbuild-merge-temporary-vmlinux-for-btf-and-kallsyms.patch b/queue-6.10/kbuild-merge-temporary-vmlinux-for-btf-and-kallsyms.patch new file mode 100644 index 00000000000..394442f43c7 --- /dev/null +++ b/queue-6.10/kbuild-merge-temporary-vmlinux-for-btf-and-kallsyms.patch @@ -0,0 +1,166 @@ +From e4aefbe760ecd3041f053b0ab3341d2a72346940 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jun 2024 20:25:18 +0900 +Subject: kbuild: merge temporary vmlinux for BTF and kallsyms + +From: Masahiro Yamada + +[ Upstream commit b1a9a5e04767e2a78783e19c9e55c25812ceccc3 ] + +CONFIG_DEBUG_INFO_BTF=y requires one additional link step. +(.tmp_vmlinux.btf) + +CONFIG_KALLSYMS=y requires two additional link steps. +(.tmp_vmlinux.kallsyms1 and .tmp_vmlinux.kallsyms2) + +Enabling both requires three additional link steps. + +When CONFIG_DEBUG_INFO_BTF=y and CONFIG_KALLSYMS=y, the current build +process is as follows: + + KSYMS .tmp_vmlinux.kallsyms0.S + AS .tmp_vmlinux.kallsyms0.o + LD .tmp_vmlinux.btf # temporary vmlinux for BTF + BTF .btf.vmlinux.bin.o + LD .tmp_vmlinux.kallsyms1 # temporary vmlinux for kallsyms step 1 + NM .tmp_vmlinux.kallsyms1.syms + KSYMS .tmp_vmlinux.kallsyms1.S + AS .tmp_vmlinux.kallsyms1.o + LD .tmp_vmlinux.kallsyms2 # temporary vmlinux for kallsyms step 2 + NM .tmp_vmlinux.kallsyms2.syms + KSYMS .tmp_vmlinux.kallsyms2.S + AS .tmp_vmlinux.kallsyms2.o + LD vmlinux # final vmlinux + +This is redundant because the BTF generation and the kallsyms step 1 can +be performed against the same temporary vmlinux. + +When both CONFIG_DEBUG_INFO_BTF and CONFIG_KALLSYMS are enabled, we can +reduce the number of link steps by one. + +This commit changes the build process as follows: + + KSYMS .tmp_vmlinux0.kallsyms.S + AS .tmp_vmlinux0.kallsyms.o + LD .tmp_vmlinux1 # temporary vmlinux for BTF and kallsyms step 1 + BTF .tmp_vmlinux1.btf.o + NM .tmp_vmlinux1.syms + KSYMS .tmp_vmlinux1.kallsyms.S + AS .tmp_vmlinux1.kallsyms.o + LD .tmp_vmlinux2 # temporary vmlinux for kallsyms step 2 + NM .tmp_vmlinux2.syms + KSYMS .tmp_vmlinux2.kallsyms.S + AS .tmp_vmlinux2.kallsyms.o + LD vmlinux # final vmlinux + +Signed-off-by: Masahiro Yamada +Acked-by: Andrii Nakryiko +Stable-dep-of: 1472464c6248 ("kbuild: avoid scripts/kallsyms parsing /dev/null") +Signed-off-by: Sasha Levin +--- + scripts/link-vmlinux.sh | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh +index 1e41b330550e6..22d0bc8439863 100755 +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -105,11 +105,10 @@ vmlinux_link() + + # generate .BTF typeinfo from DWARF debuginfo + # ${1} - vmlinux image +-# ${2} - file to dump raw BTF data into + gen_btf() + { + local pahole_ver +- local btf_data=${2} ++ local btf_data=${1}.btf.o + + if ! [ -x "$(command -v ${PAHOLE})" ]; then + echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" +@@ -122,8 +121,6 @@ gen_btf() + return 1 + fi + +- vmlinux_link ${1} +- + info BTF "${btf_data}" + LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1} + +@@ -169,15 +166,13 @@ kallsyms() + kallsymso=${2}.o + } + +-# Perform one step in kallsyms generation, including temporary linking of +-# vmlinux. +-kallsyms_step() ++# Perform kallsyms for the given temporary vmlinux. ++sysmap_and_kallsyms() + { +- kallsyms_vmlinux=.tmp_vmlinux.kallsyms${1} ++ mksysmap "${1}" "${1}.syms" ++ kallsyms "${1}.syms" "${1}.kallsyms" + +- vmlinux_link "${kallsyms_vmlinux}" +- mksysmap "${kallsyms_vmlinux}" "${kallsyms_vmlinux}.syms" +- kallsyms "${kallsyms_vmlinux}.syms" "${kallsyms_vmlinux}" ++ kallsyms_sysmap=${1}.syms + } + + # Create map file with all symbols from ${1} +@@ -220,11 +215,21 @@ kallsymso= + strip_debug= + + if is_enabled CONFIG_KALLSYMS; then +- kallsyms /dev/null .tmp_vmlinux.kallsyms0 ++ kallsyms /dev/null .tmp_vmlinux0.kallsyms ++fi ++ ++if is_enabled CONFIG_KALLSYMS || is_enabled CONFIG_DEBUG_INFO_BTF; then ++ ++ # The kallsyms linking does not need debug symbols, but the BTF does. ++ if ! is_enabled CONFIG_DEBUG_INFO_BTF; then ++ strip_debug=1 ++ fi ++ ++ vmlinux_link .tmp_vmlinux1 + fi + + if is_enabled CONFIG_DEBUG_INFO_BTF; then +- if ! gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then ++ if ! gen_btf .tmp_vmlinux1; then + echo >&2 "Failed to generate BTF for vmlinux" + echo >&2 "Try to disable CONFIG_DEBUG_INFO_BTF" + exit 1 +@@ -260,14 +265,16 @@ if is_enabled CONFIG_KALLSYMS; then + # The kallsyms linking does not need debug symbols included. + strip_debug=1 + +- kallsyms_step 1 ++ sysmap_and_kallsyms .tmp_vmlinux1 + size1=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso}) + +- kallsyms_step 2 ++ vmlinux_link .tmp_vmlinux2 ++ sysmap_and_kallsyms .tmp_vmlinux2 + size2=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso}) + + if [ $size1 -ne $size2 ] || [ -n "${KALLSYMS_EXTRA_PASS}" ]; then +- kallsyms_step 3 ++ vmlinux_link .tmp_vmlinux3 ++ sysmap_and_kallsyms .tmp_vmlinux3 + fi + fi + +@@ -293,7 +300,7 @@ fi + + # step a (see comment above) + if is_enabled CONFIG_KALLSYMS; then +- if ! cmp -s System.map ${kallsyms_vmlinux}.syms; then ++ if ! cmp -s System.map "${kallsyms_sysmap}"; then + echo >&2 Inconsistent kallsyms data + echo >&2 'Try "make KALLSYMS_EXTRA_PASS=1" as a workaround' + exit 1 +-- +2.43.0 + diff --git a/queue-6.10/kcm-serialise-kcm_sendmsg-for-the-same-socket.patch b/queue-6.10/kcm-serialise-kcm_sendmsg-for-the-same-socket.patch new file mode 100644 index 00000000000..185edba65b8 --- /dev/null +++ b/queue-6.10/kcm-serialise-kcm_sendmsg-for-the-same-socket.patch @@ -0,0 +1,223 @@ +From 5a74a5df2363ccb3208481a0779c374f91901428 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 15:04:37 -0700 +Subject: kcm: Serialise kcm_sendmsg() for the same socket. + +From: Kuniyuki Iwashima + +[ Upstream commit 807067bf014d4a3ae2cc55bd3de16f22a01eb580 ] + +syzkaller reported UAF in kcm_release(). [0] + +The scenario is + + 1. Thread A builds a skb with MSG_MORE and sets kcm->seq_skb. + + 2. Thread A resumes building skb from kcm->seq_skb but is blocked + by sk_stream_wait_memory() + + 3. Thread B calls sendmsg() concurrently, finishes building kcm->seq_skb + and puts the skb to the write queue + + 4. Thread A faces an error and finally frees skb that is already in the + write queue + + 5. kcm_release() does double-free the skb in the write queue + +When a thread is building a MSG_MORE skb, another thread must not touch it. + +Let's add a per-sk mutex and serialise kcm_sendmsg(). + +[0]: +BUG: KASAN: slab-use-after-free in __skb_unlink include/linux/skbuff.h:2366 [inline] +BUG: KASAN: slab-use-after-free in __skb_dequeue include/linux/skbuff.h:2385 [inline] +BUG: KASAN: slab-use-after-free in __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] +BUG: KASAN: slab-use-after-free in __skb_queue_purge include/linux/skbuff.h:3181 [inline] +BUG: KASAN: slab-use-after-free in kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 +Read of size 8 at addr ffff0000ced0fc80 by task syz-executor329/6167 + +CPU: 1 PID: 6167 Comm: syz-executor329 Tainted: G B 6.8.0-rc5-syzkaller-g9abbc24128bc #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 +Call trace: + dump_backtrace+0x1b8/0x1e4 arch/arm64/kernel/stacktrace.c:291 + show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:298 + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xd0/0x124 lib/dump_stack.c:106 + print_address_description mm/kasan/report.c:377 [inline] + print_report+0x178/0x518 mm/kasan/report.c:488 + kasan_report+0xd8/0x138 mm/kasan/report.c:601 + __asan_report_load8_noabort+0x20/0x2c mm/kasan/report_generic.c:381 + __skb_unlink include/linux/skbuff.h:2366 [inline] + __skb_dequeue include/linux/skbuff.h:2385 [inline] + __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] + __skb_queue_purge include/linux/skbuff.h:3181 [inline] + kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 + __sock_release net/socket.c:659 [inline] + sock_close+0xa4/0x1e8 net/socket.c:1421 + __fput+0x30c/0x738 fs/file_table.c:376 + ____fput+0x20/0x30 fs/file_table.c:404 + task_work_run+0x230/0x2e0 kernel/task_work.c:180 + exit_task_work include/linux/task_work.h:38 [inline] + do_exit+0x618/0x1f64 kernel/exit.c:871 + do_group_exit+0x194/0x22c kernel/exit.c:1020 + get_signal+0x1500/0x15ec kernel/signal.c:2893 + do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 + do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 + exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] + exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] + el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 + el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 + el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 + +Allocated by task 6166: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x40/0x78 mm/kasan/common.c:68 + kasan_save_alloc_info+0x70/0x84 mm/kasan/generic.c:626 + unpoison_slab_object mm/kasan/common.c:314 [inline] + __kasan_slab_alloc+0x74/0x8c mm/kasan/common.c:340 + kasan_slab_alloc include/linux/kasan.h:201 [inline] + slab_post_alloc_hook mm/slub.c:3813 [inline] + slab_alloc_node mm/slub.c:3860 [inline] + kmem_cache_alloc_node+0x204/0x4c0 mm/slub.c:3903 + __alloc_skb+0x19c/0x3d8 net/core/skbuff.c:641 + alloc_skb include/linux/skbuff.h:1296 [inline] + kcm_sendmsg+0x1d3c/0x2124 net/kcm/kcmsock.c:783 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg net/socket.c:745 [inline] + sock_sendmsg+0x220/0x2c0 net/socket.c:768 + splice_to_socket+0x7cc/0xd58 fs/splice.c:889 + do_splice_from fs/splice.c:941 [inline] + direct_splice_actor+0xec/0x1d8 fs/splice.c:1164 + splice_direct_to_actor+0x438/0xa0c fs/splice.c:1108 + do_splice_direct_actor fs/splice.c:1207 [inline] + do_splice_direct+0x1e4/0x304 fs/splice.c:1233 + do_sendfile+0x460/0xb3c fs/read_write.c:1295 + __do_sys_sendfile64 fs/read_write.c:1362 [inline] + __se_sys_sendfile64 fs/read_write.c:1348 [inline] + __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1348 + __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] + invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 + el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 + do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 + el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 + el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 + el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 + +Freed by task 6167: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x40/0x78 mm/kasan/common.c:68 + kasan_save_free_info+0x5c/0x74 mm/kasan/generic.c:640 + poison_slab_object+0x124/0x18c mm/kasan/common.c:241 + __kasan_slab_free+0x3c/0x78 mm/kasan/common.c:257 + kasan_slab_free include/linux/kasan.h:184 [inline] + slab_free_hook mm/slub.c:2121 [inline] + slab_free mm/slub.c:4299 [inline] + kmem_cache_free+0x15c/0x3d4 mm/slub.c:4363 + kfree_skbmem+0x10c/0x19c + __kfree_skb net/core/skbuff.c:1109 [inline] + kfree_skb_reason+0x240/0x6f4 net/core/skbuff.c:1144 + kfree_skb include/linux/skbuff.h:1244 [inline] + kcm_release+0x104/0x4c8 net/kcm/kcmsock.c:1685 + __sock_release net/socket.c:659 [inline] + sock_close+0xa4/0x1e8 net/socket.c:1421 + __fput+0x30c/0x738 fs/file_table.c:376 + ____fput+0x20/0x30 fs/file_table.c:404 + task_work_run+0x230/0x2e0 kernel/task_work.c:180 + exit_task_work include/linux/task_work.h:38 [inline] + do_exit+0x618/0x1f64 kernel/exit.c:871 + do_group_exit+0x194/0x22c kernel/exit.c:1020 + get_signal+0x1500/0x15ec kernel/signal.c:2893 + do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 + do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 + exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] + exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] + el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 + el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 + el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 + +The buggy address belongs to the object at ffff0000ced0fc80 + which belongs to the cache skbuff_head_cache of size 240 +The buggy address is located 0 bytes inside of + freed 240-byte region [ffff0000ced0fc80, ffff0000ced0fd70) + +The buggy address belongs to the physical page: +page:00000000d35f4ae4 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10ed0f +flags: 0x5ffc00000000800(slab|node=0|zone=2|lastcpupid=0x7ff) +page_type: 0xffffffff() +raw: 05ffc00000000800 ffff0000c1cbf640 fffffdffc3423100 dead000000000004 +raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff0000ced0fb80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff0000ced0fc00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc +>ffff0000ced0fc80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff0000ced0fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc + ffff0000ced0fd80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb + +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Reported-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=b72d86aa5df17ce74c60 +Tested-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20240815220437.69511-1-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/kcm.h | 1 + + net/kcm/kcmsock.c | 4 ++++ + 2 files changed, 5 insertions(+) + +diff --git a/include/net/kcm.h b/include/net/kcm.h +index 90279e5e09a5c..441e993be634c 100644 +--- a/include/net/kcm.h ++++ b/include/net/kcm.h +@@ -70,6 +70,7 @@ struct kcm_sock { + struct work_struct tx_work; + struct list_head wait_psock_list; + struct sk_buff *seq_skb; ++ struct mutex tx_mutex; + u32 tx_stopped : 1; + + /* Don't use bit fields here, these are set under different locks */ +diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c +index 2f191e50d4fc9..d4118c796290e 100644 +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -755,6 +755,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); + int err = -EPIPE; + ++ mutex_lock(&kcm->tx_mutex); + lock_sock(sk); + + /* Per tcp_sendmsg this should be in poll */ +@@ -926,6 +927,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + KCM_STATS_ADD(kcm->stats.tx_bytes, copied); + + release_sock(sk); ++ mutex_unlock(&kcm->tx_mutex); + return copied; + + out_error: +@@ -951,6 +953,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + sk->sk_write_space(sk); + + release_sock(sk); ++ mutex_unlock(&kcm->tx_mutex); + return err; + } + +@@ -1204,6 +1207,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) + spin_unlock_bh(&mux->lock); + + INIT_WORK(&kcm->tx_work, kcm_tx_work); ++ mutex_init(&kcm->tx_mutex); + + spin_lock_bh(&mux->rx_lock); + kcm_rcv_ready(kcm); +-- +2.43.0 + diff --git a/queue-6.10/makefile-add-srctree-to-dependency-of-compile_comman.patch b/queue-6.10/makefile-add-srctree-to-dependency-of-compile_comman.patch new file mode 100644 index 00000000000..ab247292c4b --- /dev/null +++ b/queue-6.10/makefile-add-srctree-to-dependency-of-compile_comman.patch @@ -0,0 +1,49 @@ +From a4e5db2e003a6b34d6a65561179d99432616cada Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 4 Aug 2024 14:50:57 +0900 +Subject: Makefile: add $(srctree) to dependency of compile_commands.json + target + +From: Alexandre Courbot + +[ Upstream commit 6fc9aacad49e3fbecd270c266850d50c453d52ef ] + +When trying to build compile_commands.json for an external module against +the kernel built in a separate output directory, the following error is +displayed: + + make[1]: *** No rule to make target 'scripts/clang-tools/gen_compile_commands.py', + needed by 'compile_commands.json'. Stop. + +This is because gen_compile_commands.py was previously looked up using a +relative path to $(srctree), but commit b1992c3772e6 ("kbuild: use +$(src) instead of $(srctree)/$(src) for source directory") stopped +defining VPATH for external module builds. + +Prefixing gen_compile_commands.py with $(srctree) fixes the problem. + +Fixes: b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory") +Signed-off-by: Alexandre Courbot +Reviewed-by: Nicolas Schier +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Makefile b/Makefile +index 361a70264e1fb..194841a4efde9 100644 +--- a/Makefile ++++ b/Makefile +@@ -1986,7 +1986,7 @@ nsdeps: modules + quiet_cmd_gen_compile_commands = GEN $@ + cmd_gen_compile_commands = $(PYTHON3) $< -a $(AR) -o $@ $(filter-out $<, $(real-prereqs)) + +-$(extmod_prefix)compile_commands.json: scripts/clang-tools/gen_compile_commands.py \ ++$(extmod_prefix)compile_commands.json: $(srctree)/scripts/clang-tools/gen_compile_commands.py \ + $(if $(KBUILD_EXTMOD),, vmlinux.a $(KBUILD_VMLINUX_LIBS)) \ + $(if $(CONFIG_MODULES), $(MODORDER)) FORCE + $(call if_changed,gen_compile_commands) +-- +2.43.0 + diff --git a/queue-6.10/net-dsa-mv88e6xxx-fix-out-of-bound-access.patch b/queue-6.10/net-dsa-mv88e6xxx-fix-out-of-bound-access.patch new file mode 100644 index 00000000000..177e78dfbcd --- /dev/null +++ b/queue-6.10/net-dsa-mv88e6xxx-fix-out-of-bound-access.patch @@ -0,0 +1,39 @@ +From e824326b868c630ae7789b334f71992a005bf4c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2024 19:52:50 -0400 +Subject: net: dsa: mv88e6xxx: Fix out-of-bound access + +From: Joseph Huang + +[ Upstream commit 528876d867a23b5198022baf2e388052ca67c952 ] + +If an ATU violation was caused by a CPU Load operation, the SPID could +be larger than DSA_MAX_PORTS (the size of mv88e6xxx_chip.ports[] array). + +Fixes: 75c05a74e745 ("net: dsa: mv88e6xxx: Fix counting of ATU violations") +Signed-off-by: Joseph Huang +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20240819235251.1331763-1-Joseph.Huang@garmin.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/global1_atu.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c +index ce3b3690c3c05..c47f068f56b32 100644 +--- a/drivers/net/dsa/mv88e6xxx/global1_atu.c ++++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c +@@ -457,7 +457,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) + trace_mv88e6xxx_atu_full_violation(chip->dev, spid, + entry.portvec, entry.mac, + fid); +- chip->ports[spid].atu_full_violation++; ++ if (spid < ARRAY_SIZE(chip->ports)) ++ chip->ports[spid].atu_full_violation++; + } + + return IRQ_HANDLED; +-- +2.43.0 + diff --git a/queue-6.10/net-mctp-test-use-correct-skb-for-route-input-check.patch b/queue-6.10/net-mctp-test-use-correct-skb-for-route-input-check.patch new file mode 100644 index 00000000000..f4a512d5242 --- /dev/null +++ b/queue-6.10/net-mctp-test-use-correct-skb-for-route-input-check.patch @@ -0,0 +1,44 @@ +From b778edf298c0814e2a69c380c697a11bad708bd3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Aug 2024 18:29:17 +0800 +Subject: net: mctp: test: Use correct skb for route input check + +From: Jeremy Kerr + +[ Upstream commit ce335db0621648472f9bb4b7191eb2e13a5793cf ] + +In the MCTP route input test, we're routing one skb, then (when delivery +is expected) checking the resulting routed skb. + +However, we're currently checking the original skb length, rather than +the routed skb. Check the routed skb instead; the original will have +been freed at this point. + +Fixes: 8892c0490779 ("mctp: Add route input to socket tests") +Reported-by: Dan Carpenter +Closes: https://lore.kernel.org/kernel-janitors/4ad204f0-94cf-46c5-bdab-49592addf315@kili.mountain/ +Signed-off-by: Jeremy Kerr +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20240816-mctp-kunit-skb-fix-v1-1-3c367ac89c27@codeconstruct.com.au +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/mctp/test/route-test.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c +index 77e5dd4222580..8551dab1d1e69 100644 +--- a/net/mctp/test/route-test.c ++++ b/net/mctp/test/route-test.c +@@ -366,7 +366,7 @@ static void mctp_test_route_input_sk(struct kunit *test) + + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); +- KUNIT_EXPECT_EQ(test, skb->len, 1); ++ KUNIT_EXPECT_EQ(test, skb2->len, 1); + + skb_free_datagram(sock->sk, skb2); + +-- +2.43.0 + diff --git a/queue-6.10/net-mlx5-fix-ipsec-roce-mpv-trace-call.patch b/queue-6.10/net-mlx5-fix-ipsec-roce-mpv-trace-call.patch new file mode 100644 index 00000000000..3f80495e513 --- /dev/null +++ b/queue-6.10/net-mlx5-fix-ipsec-roce-mpv-trace-call.patch @@ -0,0 +1,97 @@ +From bfe6b85bdd5c6e3ba5e84f5cd0cafe3387abe1ac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 10:16:11 +0300 +Subject: net/mlx5: Fix IPsec RoCE MPV trace call + +From: Patrisious Haddad + +[ Upstream commit 607e1df7bd47fe91cab85a97f57870a26d066137 ] + +Prevent the call trace below from happening, by not allowing IPsec +creation over a slave, if master device doesn't support IPsec. + +WARNING: CPU: 44 PID: 16136 at kernel/locking/rwsem.c:240 down_read+0x75/0x94 +Modules linked in: esp4_offload esp4 act_mirred act_vlan cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa mst_pciconf(OE) nfsv3 nfs_acl nfs lockd grace fscache netfs xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill cuse fuse rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_ipoib iw_cm ib_cm ipmi_ssif intel_rapl_msr intel_rapl_common amd64_edac edac_mce_amd kvm_amd kvm irqbypass crct10dif_pclmul crc32_pclmul mlx5_ib ghash_clmulni_intel sha1_ssse3 dell_smbios ib_uverbs aesni_intel crypto_simd dcdbas wmi_bmof dell_wmi_descriptor cryptd pcspkr ib_core acpi_ipmi sp5100_tco ccp i2c_piix4 ipmi_si ptdma k10temp ipmi_devintf ipmi_msghandler acpi_power_meter acpi_cpufreq ext4 mbcache jbd2 sd_mod t10_pi sg mgag200 drm_kms_helper syscopyarea sysfillrect mlx5_core sysimgblt fb_sys_fops cec + ahci libahci mlxfw drm pci_hyperv_intf libata tg3 sha256_ssse3 tls megaraid_sas i2c_algo_bit psample wmi dm_mirror dm_region_hash dm_log dm_mod [last unloaded: mst_pci] +CPU: 44 PID: 16136 Comm: kworker/44:3 Kdump: loaded Tainted: GOE 5.15.0-20240509.el8uek.uek7_u3_update_v6.6_ipsec_bf.x86_64 #2 +Hardware name: Dell Inc. PowerEdge R7525/074H08, BIOS 2.0.3 01/15/2021 +Workqueue: events xfrm_state_gc_task +RIP: 0010:down_read+0x75/0x94 +Code: 00 48 8b 45 08 65 48 8b 14 25 80 fc 01 00 83 e0 02 48 09 d0 48 83 c8 01 48 89 45 08 5d 31 c0 89 c2 89 c6 89 c7 e9 cb 88 3b 00 <0f> 0b 48 8b 45 08 a8 01 74 b2 a8 02 75 ae 48 89 c2 48 83 ca 02 f0 +RSP: 0018:ffffb26387773da8 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: ffffa08b658af900 RCX: 0000000000000001 +RDX: 0000000000000000 RSI: ff886bc5e1366f2f RDI: 0000000000000000 +RBP: ffffa08b658af940 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: ffffa0a9bfb31540 +R13: ffffa0a9bfb37900 R14: 0000000000000000 R15: ffffa0a9bfb37905 +FS: 0000000000000000(0000) GS:ffffa0a9bfb00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000055a45ed814e8 CR3: 000000109038a000 CR4: 0000000000350ee0 +Call Trace: + + ? show_trace_log_lvl+0x1d6/0x2f9 + ? show_trace_log_lvl+0x1d6/0x2f9 + ? mlx5_devcom_for_each_peer_begin+0x29/0x60 [mlx5_core] + ? down_read+0x75/0x94 + ? __warn+0x80/0x113 + ? down_read+0x75/0x94 + ? report_bug+0xa4/0x11d + ? handle_bug+0x35/0x8b + ? exc_invalid_op+0x14/0x75 + ? asm_exc_invalid_op+0x16/0x1b + ? down_read+0x75/0x94 + ? down_read+0xe/0x94 + mlx5_devcom_for_each_peer_begin+0x29/0x60 [mlx5_core] + mlx5_ipsec_fs_roce_tx_destroy+0xb1/0x130 [mlx5_core] + tx_destroy+0x1b/0xc0 [mlx5_core] + tx_ft_put+0x53/0xc0 [mlx5_core] + mlx5e_xfrm_free_state+0x45/0x90 [mlx5_core] + ___xfrm_state_destroy+0x10f/0x1a2 + xfrm_state_gc_task+0x81/0xa9 + process_one_work+0x1f1/0x3c6 + worker_thread+0x53/0x3e4 + ? process_one_work.cold+0x46/0x3c + kthread+0x127/0x144 + ? set_kthread_struct+0x60/0x52 + ret_from_fork+0x22/0x2d + +---[ end trace 5ef7896144d398e1 ]--- + +Fixes: dfbd229abeee ("net/mlx5: Configure IPsec steering for egress RoCEv2 MPV traffic") +Reviewed-by: Leon Romanovsky +Signed-off-by: Patrisious Haddad +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20240815071611.2211873-5-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +index 234cd00f71a1c..b7d4b1a2baf2e 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +@@ -386,7 +386,8 @@ static int ipsec_fs_roce_tx_mpv_create(struct mlx5_core_dev *mdev, + return -EOPNOTSUPP; + + peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); +- if (!peer_priv) { ++ if (!peer_priv || !peer_priv->ipsec) { ++ mlx5_core_err(mdev, "IPsec not supported on master device\n"); + err = -EOPNOTSUPP; + goto release_peer; + } +@@ -455,7 +456,8 @@ static int ipsec_fs_roce_rx_mpv_create(struct mlx5_core_dev *mdev, + return -EOPNOTSUPP; + + peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); +- if (!peer_priv) { ++ if (!peer_priv || !peer_priv->ipsec) { ++ mlx5_core_err(mdev, "IPsec not supported on master device\n"); + err = -EOPNOTSUPP; + goto release_peer; + } +-- +2.43.0 + diff --git a/queue-6.10/net-mlx5e-xps-fix-oversight-of-multi-pf-netdev-chang.patch b/queue-6.10/net-mlx5e-xps-fix-oversight-of-multi-pf-netdev-chang.patch new file mode 100644 index 00000000000..3ee8da31c45 --- /dev/null +++ b/queue-6.10/net-mlx5e-xps-fix-oversight-of-multi-pf-netdev-chang.patch @@ -0,0 +1,55 @@ +From 719bf7a59f333580ab4d1907a975900474f66bda Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 10:16:10 +0300 +Subject: net/mlx5e: XPS, Fix oversight of Multi-PF Netdev changes + +From: Carolina Jubran + +[ Upstream commit a07e953dafe5ebd88942dc861dfb06eaf055fb07 ] + +The offending commit overlooked the Multi-PF Netdev changes. + +Revert mlx5e_set_default_xps_cpumasks to incorporate Multi-PF Netdev +changes. + +Fixes: bcee093751f8 ("net/mlx5e: Modifying channels number and updating TX queues") +Signed-off-by: Carolina Jubran +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20240815071611.2211873-4-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index eedbcba226894..409f525f1703c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3005,15 +3005,18 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) + static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, + struct mlx5e_params *params) + { +- struct mlx5_core_dev *mdev = priv->mdev; +- int num_comp_vectors, ix, irq; +- +- num_comp_vectors = mlx5_comp_vectors_max(mdev); ++ int ix; + + for (ix = 0; ix < params->num_channels; ix++) { ++ int num_comp_vectors, irq, vec_ix; ++ struct mlx5_core_dev *mdev; ++ ++ mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); ++ num_comp_vectors = mlx5_comp_vectors_max(mdev); + cpumask_clear(priv->scratchpad.cpumask); ++ vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); + +- for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { ++ for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = mlx5_comp_vector_get_cpu(mdev, irq); + + cpumask_set_cpu(cpu, priv->scratchpad.cpumask); +-- +2.43.0 + diff --git a/queue-6.10/net-mscc-ocelot-fix-qos-class-for-injected-packets-w.patch b/queue-6.10/net-mscc-ocelot-fix-qos-class-for-injected-packets-w.patch new file mode 100644 index 00000000000..7d09283ea7b --- /dev/null +++ b/queue-6.10/net-mscc-ocelot-fix-qos-class-for-injected-packets-w.patch @@ -0,0 +1,106 @@ +From ad01d9077e049d77e52e61f98ad750c52b779804 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 03:07:03 +0300 +Subject: net: mscc: ocelot: fix QoS class for injected packets with + "ocelot-8021q" + +From: Vladimir Oltean + +[ Upstream commit e1b9e80236c540fa85d76e2d510d1b38e1968c5d ] + +There are 2 distinct code paths (listed below) in the source code which +set up an injection header for Ocelot(-like) switches. Code path (2) +lacks the QoS class and source port being set correctly. Especially the +improper QoS classification is a problem for the "ocelot-8021q" +alternative DSA tagging protocol, because we support tc-taprio and each +packet needs to be scheduled precisely through its time slot. This +includes PTP, which is normally assigned to a traffic class other than +0, but would be sent through TC 0 nonetheless. + +The code paths are: + +(1) ocelot_xmit_common() from net/dsa/tag_ocelot.c - called only by the + standard "ocelot" DSA tagging protocol which uses NPI-based + injection - sets up bit fields in the tag manually to account for + a small difference (destination port offset) between Ocelot and + Seville. Namely, ocelot_ifh_set_dest() is omitted out of + ocelot_xmit_common(), because there's also seville_ifh_set_dest(). + +(2) ocelot_ifh_set_basic(), called by: + - ocelot_fdma_prepare_skb() for FDMA transmission of the ocelot + switchdev driver + - ocelot_port_xmit() -> ocelot_port_inject_frame() for + register-based transmission of the ocelot switchdev driver + - felix_port_deferred_xmit() -> ocelot_port_inject_frame() for the + DSA tagger ocelot-8021q when it must transmit PTP frames (also + through register-based injection). + sets the bit fields according to its own logic. + +The problem is that (2) doesn't call ocelot_ifh_set_qos_class(). +Copying that logic from ocelot_xmit_common() fixes that. + +Unfortunately, although desirable, it is not easily possible to +de-duplicate code paths (1) and (2), and make net/dsa/tag_ocelot.c +directly call ocelot_ifh_set_basic()), because of the ocelot/seville +difference. This is the "minimal" fix with some logic duplicated (but +at least more consolidated). + +Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") +Signed-off-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot.c | 10 +++++++++- + drivers/net/ethernet/mscc/ocelot_fdma.c | 1 - + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c +index 69a4e5a90475b..9301716e21d58 100644 +--- a/drivers/net/ethernet/mscc/ocelot.c ++++ b/drivers/net/ethernet/mscc/ocelot.c +@@ -1208,13 +1208,21 @@ void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ struct net_device *dev = skb->dev; + u64 vlan_tci, tag_type; ++ int qos_class; + + ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, + &tag_type); + ++ qos_class = netdev_get_num_tc(dev) ? ++ netdev_get_prio_tc_map(dev, skb->priority) : skb->priority; ++ ++ memset(ifh, 0, OCELOT_TAG_LEN); + ocelot_ifh_set_bypass(ifh, 1); ++ ocelot_ifh_set_src(ifh, BIT_ULL(ocelot->num_phys_ports)); + ocelot_ifh_set_dest(ifh, BIT_ULL(port)); ++ ocelot_ifh_set_qos_class(ifh, qos_class); + ocelot_ifh_set_tag_type(ifh, tag_type); + ocelot_ifh_set_vlan_tci(ifh, vlan_tci); + if (rew_op) +@@ -1225,7 +1233,7 @@ EXPORT_SYMBOL(ocelot_ifh_set_basic); + void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, + u32 rew_op, struct sk_buff *skb) + { +- u32 ifh[OCELOT_TAG_LEN / 4] = {0}; ++ u32 ifh[OCELOT_TAG_LEN / 4]; + unsigned int i, count, last; + + ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | +diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c +index 87b59cc5e4416..00326ae8c708b 100644 +--- a/drivers/net/ethernet/mscc/ocelot_fdma.c ++++ b/drivers/net/ethernet/mscc/ocelot_fdma.c +@@ -665,7 +665,6 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, + + ifh = skb_push(skb, OCELOT_TAG_LEN); + skb_put(skb, ETH_FCS_LEN); +- memset(ifh, 0, OCELOT_TAG_LEN); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); + + return 0; +-- +2.43.0 + diff --git a/queue-6.10/net-mscc-ocelot-serialize-access-to-the-injection-ex.patch b/queue-6.10/net-mscc-ocelot-serialize-access-to-the-injection-ex.patch new file mode 100644 index 00000000000..5a7427efec7 --- /dev/null +++ b/queue-6.10/net-mscc-ocelot-serialize-access-to-the-injection-ex.patch @@ -0,0 +1,245 @@ +From 1dc1be717800969117335feb2edbb9668f57f804 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 03:07:04 +0300 +Subject: net: mscc: ocelot: serialize access to the injection/extraction + groups + +From: Vladimir Oltean + +[ Upstream commit c5e12ac3beb0dd3a718296b2d8af5528e9ab728e ] + +As explained by Horatiu Vultur in commit 603ead96582d ("net: sparx5: Add +spinlock for frame transmission from CPU") which is for a similar +hardware design, multiple CPUs can simultaneously perform injection +or extraction. There are only 2 register groups for injection and 2 +for extraction, and the driver only uses one of each. So we'd better +serialize access using spin locks, otherwise frame corruption is +possible. + +Note that unlike in sparx5, FDMA in ocelot does not have this issue +because struct ocelot_fdma_tx_ring already contains an xmit_lock. + +I guess this is mostly a problem for NXP LS1028A, as that is dual core. +I don't think VSC7514 is. So I'm blaming the commit where LS1028A (aka +the felix DSA driver) started using register-based packet injection and +extraction. + +Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") +Signed-off-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/ocelot/felix.c | 11 +++++ + drivers/net/ethernet/mscc/ocelot.c | 52 ++++++++++++++++++++++ + drivers/net/ethernet/mscc/ocelot_vsc7514.c | 4 ++ + include/soc/mscc/ocelot.h | 9 ++++ + 4 files changed, 76 insertions(+) + +diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c +index 61e95487732dc..f5d26e724ae65 100644 +--- a/drivers/net/dsa/ocelot/felix.c ++++ b/drivers/net/dsa/ocelot/felix.c +@@ -528,7 +528,9 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds) + * so we need to be careful that there are no extra frames to be + * dequeued over MMIO, since we would never know to discard them. + */ ++ ocelot_lock_xtr_grp_bh(ocelot, 0); + ocelot_drain_cpu_queue(ocelot, 0); ++ ocelot_unlock_xtr_grp_bh(ocelot, 0); + + return 0; + } +@@ -1504,6 +1506,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) + int port = xmit_work->dp->index; + int retries = 10; + ++ ocelot_lock_inj_grp(ocelot, 0); ++ + do { + if (ocelot_can_inject(ocelot, 0)) + break; +@@ -1512,6 +1516,7 @@ static void felix_port_deferred_xmit(struct kthread_work *work) + } while (--retries); + + if (!retries) { ++ ocelot_unlock_inj_grp(ocelot, 0); + dev_err(ocelot->dev, "port %d failed to inject skb\n", + port); + ocelot_port_purge_txtstamp_skb(ocelot, port, skb); +@@ -1521,6 +1526,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) + + ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); + ++ ocelot_unlock_inj_grp(ocelot, 0); ++ + consume_skb(skb); + kfree(xmit_work); + } +@@ -1671,6 +1678,8 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot) + if (!felix->info->quirk_no_xtr_irq) + return false; + ++ ocelot_lock_xtr_grp(ocelot, grp); ++ + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { + struct sk_buff *skb; + unsigned int type; +@@ -1707,6 +1716,8 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot) + ocelot_drain_cpu_queue(ocelot, 0); + } + ++ ocelot_unlock_xtr_grp(ocelot, grp); ++ + return true; + } + +diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c +index 9301716e21d58..f4e027a6fe955 100644 +--- a/drivers/net/ethernet/mscc/ocelot.c ++++ b/drivers/net/ethernet/mscc/ocelot.c +@@ -1099,6 +1099,48 @@ void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, + } + EXPORT_SYMBOL(ocelot_ptp_rx_timestamp); + ++void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp) ++ __acquires(&ocelot->inj_lock) ++{ ++ spin_lock(&ocelot->inj_lock); ++} ++EXPORT_SYMBOL_GPL(ocelot_lock_inj_grp); ++ ++void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp) ++ __releases(&ocelot->inj_lock) ++{ ++ spin_unlock(&ocelot->inj_lock); ++} ++EXPORT_SYMBOL_GPL(ocelot_unlock_inj_grp); ++ ++void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp) ++ __acquires(&ocelot->inj_lock) ++{ ++ spin_lock(&ocelot->inj_lock); ++} ++EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp); ++ ++void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp) ++ __releases(&ocelot->inj_lock) ++{ ++ spin_unlock(&ocelot->inj_lock); ++} ++EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp); ++ ++void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp) ++ __acquires(&ocelot->xtr_lock) ++{ ++ spin_lock_bh(&ocelot->xtr_lock); ++} ++EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp_bh); ++ ++void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp) ++ __releases(&ocelot->xtr_lock) ++{ ++ spin_unlock_bh(&ocelot->xtr_lock); ++} ++EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp_bh); ++ + int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) + { + u64 timestamp, src_port, len; +@@ -1109,6 +1151,8 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) + u32 val, *buf; + int err; + ++ lockdep_assert_held(&ocelot->xtr_lock); ++ + err = ocelot_xtr_poll_xfh(ocelot, grp, xfh); + if (err) + return err; +@@ -1184,6 +1228,8 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) + { + u32 val = ocelot_read(ocelot, QS_INJ_STATUS); + ++ lockdep_assert_held(&ocelot->inj_lock); ++ + if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp)))) + return false; + if (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp))) +@@ -1236,6 +1282,8 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, + u32 ifh[OCELOT_TAG_LEN / 4]; + unsigned int i, count, last; + ++ lockdep_assert_held(&ocelot->inj_lock); ++ + ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | + QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); + +@@ -1272,6 +1320,8 @@ EXPORT_SYMBOL(ocelot_port_inject_frame); + + void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) + { ++ lockdep_assert_held(&ocelot->xtr_lock); ++ + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) + ocelot_read_rix(ocelot, QS_XTR_RD, grp); + } +@@ -2954,6 +3004,8 @@ int ocelot_init(struct ocelot *ocelot) + mutex_init(&ocelot->fwd_domain_lock); + spin_lock_init(&ocelot->ptp_clock_lock); + spin_lock_init(&ocelot->ts_id_lock); ++ spin_lock_init(&ocelot->inj_lock); ++ spin_lock_init(&ocelot->xtr_lock); + + ocelot->owq = alloc_ordered_workqueue("ocelot-owq", 0); + if (!ocelot->owq) +diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c +index 993212c3a7da6..c09dd2e3343cb 100644 +--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c ++++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c +@@ -51,6 +51,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) + struct ocelot *ocelot = arg; + int grp = 0, err; + ++ ocelot_lock_xtr_grp(ocelot, grp); ++ + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { + struct sk_buff *skb; + +@@ -69,6 +71,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) + if (err < 0) + ocelot_drain_cpu_queue(ocelot, 0); + ++ ocelot_unlock_xtr_grp(ocelot, grp); ++ + return IRQ_HANDLED; + } + +diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h +index 0297bc2277927..846132ca5503d 100644 +--- a/include/soc/mscc/ocelot.h ++++ b/include/soc/mscc/ocelot.h +@@ -813,6 +813,9 @@ struct ocelot { + const u32 *const *map; + struct list_head stats_regions; + ++ spinlock_t inj_lock; ++ spinlock_t xtr_lock; ++ + u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; + int packet_buffer_size; + int num_frame_refs; +@@ -966,6 +969,12 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, + u32 val, u32 reg, u32 offset); + + /* Packet I/O */ ++void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp); ++void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp); ++void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp); ++void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp); ++void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp); ++void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp); + bool ocelot_can_inject(struct ocelot *ocelot, int grp); + void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, + u32 rew_op, struct sk_buff *skb); +-- +2.43.0 + diff --git a/queue-6.10/net-mscc-ocelot-use-ocelot_xmit_get_vlan_info-also-f.patch b/queue-6.10/net-mscc-ocelot-use-ocelot_xmit_get_vlan_info-also-f.patch new file mode 100644 index 00000000000..e38c3df73d7 --- /dev/null +++ b/queue-6.10/net-mscc-ocelot-use-ocelot_xmit_get_vlan_info-also-f.patch @@ -0,0 +1,347 @@ +From 232e4900f7ba50db6b627f9fd74dede2a2ddd58c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 03:07:02 +0300 +Subject: net: mscc: ocelot: use ocelot_xmit_get_vlan_info() also for FDMA and + register injection + +From: Vladimir Oltean + +[ Upstream commit 67c3ca2c5cfe6a50772514e3349b5e7b3b0fac03 ] + +Problem description +------------------- + +On an NXP LS1028A (felix DSA driver) with the following configuration: + +- ocelot-8021q tagging protocol +- VLAN-aware bridge (with STP) spanning at least swp0 and swp1 +- 8021q VLAN upper interfaces on swp0 and swp1: swp0.700, swp1.700 +- ptp4l on swp0.700 and swp1.700 + +we see that the ptp4l instances do not see each other's traffic, +and they all go to the grand master state due to the +ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES condition. + +Jumping to the conclusion for the impatient +------------------------------------------- + +There is a zero-day bug in the ocelot switchdev driver in the way it +handles VLAN-tagged packet injection. The correct logic already exists in +the source code, in function ocelot_xmit_get_vlan_info() added by commit +5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit"). +But it is used only for normal NPI-based injection with the DSA "ocelot" +tagging protocol. The other injection code paths (register-based and +FDMA-based) roll their own wrong logic. This affects and was noticed on +the DSA "ocelot-8021q" protocol because it uses register-based injection. + +By moving ocelot_xmit_get_vlan_info() to a place that's common for both +the DSA tagger and the ocelot switch library, it can also be called from +ocelot_port_inject_frame() in ocelot.c. + +We need to touch the lines with ocelot_ifh_port_set()'s prototype +anyway, so let's rename it to something clearer regarding what it does, +and add a kernel-doc. ocelot_ifh_set_basic() should do. + +Investigation notes +------------------- + +Debugging reveals that PTP event (aka those carrying timestamps, like +Sync) frames injected into swp0.700 (but also swp1.700) hit the wire +with two VLAN tags: + +00000000: 01 1b 19 00 00 00 00 01 02 03 04 05 81 00 02 bc + ~~~~~~~~~~~ +00000010: 81 00 02 bc 88 f7 00 12 00 2c 00 00 02 00 00 00 + ~~~~~~~~~~~ +00000020: 00 00 00 00 00 00 00 00 00 00 00 01 02 ff fe 03 +00000030: 04 05 00 01 00 04 00 00 00 00 00 00 00 00 00 00 +00000040: 00 00 + +The second (unexpected) VLAN tag makes felix_check_xtr_pkt() -> +ptp_classify_raw() fail to see these as PTP packets at the link +partner's receiving end, and return PTP_CLASS_NONE (because the BPF +classifier is not written to expect 2 VLAN tags). + +The reason why packets have 2 VLAN tags is because the transmission +code treats VLAN incorrectly. + +Neither ocelot switchdev, nor felix DSA, declare the NETIF_F_HW_VLAN_CTAG_TX +feature. Therefore, at xmit time, all VLANs should be in the skb head, +and none should be in the hwaccel area. This is done by: + +static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, + netdev_features_t features) +{ + if (skb_vlan_tag_present(skb) && + !vlan_hw_offload_capable(features, skb->vlan_proto)) + skb = __vlan_hwaccel_push_inside(skb); + return skb; +} + +But ocelot_port_inject_frame() handles things incorrectly: + + ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + +void ocelot_ifh_port_set(struct sk_buff *skb, void *ifh, int port, u32 rew_op) +{ + (...) + if (vlan_tag) + ocelot_ifh_set_vlan_tci(ifh, vlan_tag); + (...) +} + +The way __vlan_hwaccel_push_inside() pushes the tag inside the skb head +is by calling: + +static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) +{ + skb->vlan_present = 0; +} + +which does _not_ zero out skb->vlan_tci as seen by skb_vlan_tag_get(). +This means that ocelot, when it calls skb_vlan_tag_get(), sees +(and uses) a residual skb->vlan_tci, while the same VLAN tag is +_already_ in the skb head. + +The trivial fix for double VLAN headers is to replace the content of +ocelot_ifh_port_set() with: + + if (skb_vlan_tag_present(skb)) + ocelot_ifh_set_vlan_tci(ifh, skb_vlan_tag_get(skb)); + +but this would not be correct either, because, as mentioned, +vlan_hw_offload_capable() is false for us, so we'd be inserting dead +code and we'd always transmit packets with VID=0 in the injection frame +header. + +I can't actually test the ocelot switchdev driver and rely exclusively +on code inspection, but I don't think traffic from 8021q uppers has ever +been injected properly, and not double-tagged. Thus I'm blaming the +introduction of VLAN fields in the injection header - early driver code. + +As hinted at in the early conclusion, what we _want_ to happen for +VLAN transmission was already described once in commit 5ca721c54d86 +("net: dsa: tag_ocelot: set the classified VLAN during xmit"). + +ocelot_xmit_get_vlan_info() intends to ensure that if the port through +which we're transmitting is under a VLAN-aware bridge, the outer VLAN +tag from the skb head is stripped from there and inserted into the +injection frame header (so that the packet is processed in hardware +through that actual VLAN). And in all other cases, the packet is sent +with VID=0 in the injection frame header, since the port is VLAN-unaware +and has logic to strip this VID on egress (making it invisible to the +wire). + +Fixes: 08d02364b12f ("net: mscc: fix the injection header") +Signed-off-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot.c | 29 +++++++++++---- + drivers/net/ethernet/mscc/ocelot_fdma.c | 2 +- + include/linux/dsa/ocelot.h | 47 +++++++++++++++++++++++++ + include/soc/mscc/ocelot.h | 3 +- + net/dsa/tag_ocelot.c | 37 ++----------------- + 5 files changed, 75 insertions(+), 43 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c +index ed2fb44500b0c..69a4e5a90475b 100644 +--- a/drivers/net/ethernet/mscc/ocelot.c ++++ b/drivers/net/ethernet/mscc/ocelot.c +@@ -1193,17 +1193,34 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) + } + EXPORT_SYMBOL(ocelot_can_inject); + +-void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag) ++/** ++ * ocelot_ifh_set_basic - Set basic information in Injection Frame Header ++ * @ifh: Pointer to Injection Frame Header memory ++ * @ocelot: Switch private data structure ++ * @port: Egress port number ++ * @rew_op: Egress rewriter operation for PTP ++ * @skb: Pointer to socket buffer (packet) ++ * ++ * Populate the Injection Frame Header with basic information for this skb: the ++ * analyzer bypass bit, destination port, VLAN info, egress rewriter info. ++ */ ++void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, ++ u32 rew_op, struct sk_buff *skb) + { ++ struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ u64 vlan_tci, tag_type; ++ ++ ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, ++ &tag_type); ++ + ocelot_ifh_set_bypass(ifh, 1); + ocelot_ifh_set_dest(ifh, BIT_ULL(port)); +- ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C); +- if (vlan_tag) +- ocelot_ifh_set_vlan_tci(ifh, vlan_tag); ++ ocelot_ifh_set_tag_type(ifh, tag_type); ++ ocelot_ifh_set_vlan_tci(ifh, vlan_tci); + if (rew_op) + ocelot_ifh_set_rew_op(ifh, rew_op); + } +-EXPORT_SYMBOL(ocelot_ifh_port_set); ++EXPORT_SYMBOL(ocelot_ifh_set_basic); + + void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, + u32 rew_op, struct sk_buff *skb) +@@ -1214,7 +1231,7 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, + ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | + QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); + +- ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); ++ ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); + + for (i = 0; i < OCELOT_TAG_LEN / 4; i++) + ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp); +diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c +index 312a468321544..87b59cc5e4416 100644 +--- a/drivers/net/ethernet/mscc/ocelot_fdma.c ++++ b/drivers/net/ethernet/mscc/ocelot_fdma.c +@@ -666,7 +666,7 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, + ifh = skb_push(skb, OCELOT_TAG_LEN); + skb_put(skb, ETH_FCS_LEN); + memset(ifh, 0, OCELOT_TAG_LEN); +- ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); ++ ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); + + return 0; + } +diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h +index dca2969015d80..6fbfbde68a37c 100644 +--- a/include/linux/dsa/ocelot.h ++++ b/include/linux/dsa/ocelot.h +@@ -5,6 +5,8 @@ + #ifndef _NET_DSA_TAG_OCELOT_H + #define _NET_DSA_TAG_OCELOT_H + ++#include ++#include + #include + #include + #include +@@ -273,4 +275,49 @@ static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) + return rew_op; + } + ++/** ++ * ocelot_xmit_get_vlan_info: Determine VLAN_TCI and TAG_TYPE for injected frame ++ * @skb: Pointer to socket buffer ++ * @br: Pointer to bridge device that the port is under, if any ++ * @vlan_tci: ++ * @tag_type: ++ * ++ * If the port is under a VLAN-aware bridge, remove the VLAN header from the ++ * payload and move it into the DSA tag, which will make the switch classify ++ * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, ++ * which is the pvid of standalone ports (OCELOT_STANDALONE_PVID), although not ++ * of VLAN-unaware bridge ports (that would be ocelot_vlan_unaware_pvid()). ++ * Anyway, VID 0 is fine because it is stripped on egress for these port modes, ++ * and source address learning is not performed for packets injected from the ++ * CPU anyway, so it doesn't matter that the VID is "wrong". ++ */ ++static inline void ocelot_xmit_get_vlan_info(struct sk_buff *skb, ++ struct net_device *br, ++ u64 *vlan_tci, u64 *tag_type) ++{ ++ struct vlan_ethhdr *hdr; ++ u16 proto, tci; ++ ++ if (!br || !br_vlan_enabled(br)) { ++ *vlan_tci = 0; ++ *tag_type = IFH_TAG_TYPE_C; ++ return; ++ } ++ ++ hdr = (struct vlan_ethhdr *)skb_mac_header(skb); ++ br_vlan_get_proto(br, &proto); ++ ++ if (ntohs(hdr->h_vlan_proto) == proto) { ++ vlan_remove_tag(skb, &tci); ++ *vlan_tci = tci; ++ } else { ++ rcu_read_lock(); ++ br_vlan_get_pvid_rcu(br, &tci); ++ rcu_read_unlock(); ++ *vlan_tci = tci; ++ } ++ ++ *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; ++} ++ + #endif +diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h +index 1e1b40f4e664e..0297bc2277927 100644 +--- a/include/soc/mscc/ocelot.h ++++ b/include/soc/mscc/ocelot.h +@@ -969,7 +969,8 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, + bool ocelot_can_inject(struct ocelot *ocelot, int grp); + void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, + u32 rew_op, struct sk_buff *skb); +-void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); ++void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, ++ u32 rew_op, struct sk_buff *skb); + int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); + void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); + void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, +diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c +index e0e4300bfbd3f..bf6608fc6be70 100644 +--- a/net/dsa/tag_ocelot.c ++++ b/net/dsa/tag_ocelot.c +@@ -8,40 +8,6 @@ + #define OCELOT_NAME "ocelot" + #define SEVILLE_NAME "seville" + +-/* If the port is under a VLAN-aware bridge, remove the VLAN header from the +- * payload and move it into the DSA tag, which will make the switch classify +- * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, +- * which is the pvid of standalone and VLAN-unaware bridge ports. +- */ +-static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, +- u64 *vlan_tci, u64 *tag_type) +-{ +- struct net_device *br = dsa_port_bridge_dev_get(dp); +- struct vlan_ethhdr *hdr; +- u16 proto, tci; +- +- if (!br || !br_vlan_enabled(br)) { +- *vlan_tci = 0; +- *tag_type = IFH_TAG_TYPE_C; +- return; +- } +- +- hdr = skb_vlan_eth_hdr(skb); +- br_vlan_get_proto(br, &proto); +- +- if (ntohs(hdr->h_vlan_proto) == proto) { +- vlan_remove_tag(skb, &tci); +- *vlan_tci = tci; +- } else { +- rcu_read_lock(); +- br_vlan_get_pvid_rcu(br, &tci); +- rcu_read_unlock(); +- *vlan_tci = tci; +- } +- +- *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +-} +- + static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, + __be32 ifh_prefix, void **ifh) + { +@@ -53,7 +19,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, + u32 rew_op = 0; + u64 qos_class; + +- ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); ++ ocelot_xmit_get_vlan_info(skb, dsa_port_bridge_dev_get(dp), &vlan_tci, ++ &tag_type); + + qos_class = netdev_get_num_tc(netdev) ? + netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; +-- +2.43.0 + diff --git a/queue-6.10/net-ovs-fix-ovs_drop_reasons-error.patch b/queue-6.10/net-ovs-fix-ovs_drop_reasons-error.patch new file mode 100644 index 00000000000..985c2664fc3 --- /dev/null +++ b/queue-6.10/net-ovs-fix-ovs_drop_reasons-error.patch @@ -0,0 +1,65 @@ +From a5b60cf3a5ad1de04ea545cc4c6d007054c2cb5e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Aug 2024 20:32:52 +0800 +Subject: net: ovs: fix ovs_drop_reasons error + +From: Menglong Dong + +[ Upstream commit 57fb67783c4011581882f32e656d738da1f82042 ] + +There is something wrong with ovs_drop_reasons. ovs_drop_reasons[0] is +"OVS_DROP_LAST_ACTION", but OVS_DROP_LAST_ACTION == __OVS_DROP_REASON + 1, +which means that ovs_drop_reasons[1] should be "OVS_DROP_LAST_ACTION". + +And as Adrian tested, without the patch, adding flow to drop packets +results in: + +drop at: do_execute_actions+0x197/0xb20 [openvsw (0xffffffffc0db6f97) +origin: software +input port ifindex: 8 +timestamp: Tue Aug 20 10:19:17 2024 859853461 nsec +protocol: 0x800 +length: 98 +original length: 98 +drop reason: OVS_DROP_ACTION_ERROR + +With the patch, the same results in: + +drop at: do_execute_actions+0x197/0xb20 [openvsw (0xffffffffc0db6f97) +origin: software +input port ifindex: 8 +timestamp: Tue Aug 20 10:16:13 2024 475856608 nsec +protocol: 0x800 +length: 98 +original length: 98 +drop reason: OVS_DROP_LAST_ACTION + +Fix this by initializing ovs_drop_reasons with index. + +Fixes: 9d802da40b7c ("net: openvswitch: add last-action drop reason") +Signed-off-by: Menglong Dong +Tested-by: Adrian Moreno +Reviewed-by: Adrian Moreno +Link: https://patch.msgid.link/20240821123252.186305-1-dongml2@chinatelecom.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/openvswitch/datapath.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c +index 99d72543abd3a..78d9961fcd446 100644 +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -2706,7 +2706,7 @@ static struct pernet_operations ovs_net_ops = { + }; + + static const char * const ovs_drop_reasons[] = { +-#define S(x) (#x), ++#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), + OVS_DROP_REASONS(S) + #undef S + }; +-- +2.43.0 + diff --git a/queue-6.10/net-xilinx-axienet-always-disable-promiscuous-mode.patch b/queue-6.10/net-xilinx-axienet-always-disable-promiscuous-mode.patch new file mode 100644 index 00000000000..778d16cb124 --- /dev/null +++ b/queue-6.10/net-xilinx-axienet-always-disable-promiscuous-mode.patch @@ -0,0 +1,42 @@ +From d480c09a8698f6ae2c1bf9c71f1fddff0dbe587a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Aug 2024 11:40:55 -0400 +Subject: net: xilinx: axienet: Always disable promiscuous mode + +From: Sean Anderson + +[ Upstream commit 4ae738dfef2c0323752ab81786e2d298c9939321 ] + +If promiscuous mode is disabled when there are fewer than four multicast +addresses, then it will not be reflected in the hardware. Fix this by +always clearing the promiscuous mode flag even when we program multicast +addresses. + +Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") +Signed-off-by: Sean Anderson +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20240822154059.1066595-2-sean.anderson@linux.dev +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +index fa510f4e26008..b2e4d0b11a7d7 100644 +--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +@@ -450,6 +450,10 @@ static void axienet_set_multicast_list(struct net_device *ndev) + } else if (!netdev_mc_empty(ndev)) { + struct netdev_hw_addr *ha; + ++ reg = axienet_ior(lp, XAE_FMI_OFFSET); ++ reg &= ~XAE_FMI_PM_MASK; ++ axienet_iow(lp, XAE_FMI_OFFSET, reg); ++ + i = 0; + netdev_for_each_mc_addr(ha, ndev) { + if (i >= XAE_MULTICAST_CAM_TABLE_NUM) +-- +2.43.0 + diff --git a/queue-6.10/net-xilinx-axienet-fix-dangling-multicast-addresses.patch b/queue-6.10/net-xilinx-axienet-fix-dangling-multicast-addresses.patch new file mode 100644 index 00000000000..05dc5aa2942 --- /dev/null +++ b/queue-6.10/net-xilinx-axienet-fix-dangling-multicast-addresses.patch @@ -0,0 +1,94 @@ +From 7e80bdd7a6b251d69d53e81524c22cdae9936687 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Aug 2024 11:40:56 -0400 +Subject: net: xilinx: axienet: Fix dangling multicast addresses + +From: Sean Anderson + +[ Upstream commit 797a68c9de0f5a5447baf4bd3bb9c10a3993435b ] + +If a multicast address is removed but there are still some multicast +addresses, that address would remain programmed into the frame filter. +Fix this by explicitly setting the enable bit for each filter. + +Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") +Signed-off-by: Sean Anderson +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20240822154059.1066595-3-sean.anderson@linux.dev +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/xilinx/xilinx_axienet.h | 1 + + .../net/ethernet/xilinx/xilinx_axienet_main.c | 21 ++++++++----------- + 2 files changed, 10 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h +index c7d9221fafdcb..09c9f9787180b 100644 +--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h +@@ -170,6 +170,7 @@ + #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ + #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ + #define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ ++#define XAE_FFE_OFFSET 0x0000070C /* Frame Filter Enable */ + #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ + #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ + +diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +index b2e4d0b11a7d7..559c0d60d9483 100644 +--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +@@ -432,7 +432,7 @@ static int netdev_set_mac_address(struct net_device *ndev, void *p) + */ + static void axienet_set_multicast_list(struct net_device *ndev) + { +- int i; ++ int i = 0; + u32 reg, af0reg, af1reg; + struct axienet_local *lp = netdev_priv(ndev); + +@@ -454,7 +454,6 @@ static void axienet_set_multicast_list(struct net_device *ndev) + reg &= ~XAE_FMI_PM_MASK; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + +- i = 0; + netdev_for_each_mc_addr(ha, ndev) { + if (i >= XAE_MULTICAST_CAM_TABLE_NUM) + break; +@@ -473,6 +472,7 @@ static void axienet_set_multicast_list(struct net_device *ndev) + axienet_iow(lp, XAE_FMI_OFFSET, reg); + axienet_iow(lp, XAE_AF0_OFFSET, af0reg); + axienet_iow(lp, XAE_AF1_OFFSET, af1reg); ++ axienet_iow(lp, XAE_FFE_OFFSET, 1); + i++; + } + } else { +@@ -480,18 +480,15 @@ static void axienet_set_multicast_list(struct net_device *ndev) + reg &= ~XAE_FMI_PM_MASK; + + axienet_iow(lp, XAE_FMI_OFFSET, reg); +- +- for (i = 0; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { +- reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; +- reg |= i; +- +- axienet_iow(lp, XAE_FMI_OFFSET, reg); +- axienet_iow(lp, XAE_AF0_OFFSET, 0); +- axienet_iow(lp, XAE_AF1_OFFSET, 0); +- } +- + dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); + } ++ ++ for (; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { ++ reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; ++ reg |= i; ++ axienet_iow(lp, XAE_FMI_OFFSET, reg); ++ axienet_iow(lp, XAE_FFE_OFFSET, 0); ++ } + } + + /** +-- +2.43.0 + diff --git a/queue-6.10/netem-fix-return-value-if-duplicate-enqueue-fails.patch b/queue-6.10/netem-fix-return-value-if-duplicate-enqueue-fails.patch new file mode 100644 index 00000000000..a7dd2a582fd --- /dev/null +++ b/queue-6.10/netem-fix-return-value-if-duplicate-enqueue-fails.patch @@ -0,0 +1,138 @@ +From e7f7d0ab9d1ad1ddcdc77307df8a5eade691c212 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2024 10:56:45 -0700 +Subject: netem: fix return value if duplicate enqueue fails + +From: Stephen Hemminger + +[ Upstream commit c07ff8592d57ed258afee5a5e04991a48dbaf382 ] + +There is a bug in netem_enqueue() introduced by +commit 5845f706388a ("net: netem: fix skb length BUG_ON in __skb_to_sgvec") +that can lead to a use-after-free. + +This commit made netem_enqueue() always return NET_XMIT_SUCCESS +when a packet is duplicated, which can cause the parent qdisc's q.qlen +to be mistakenly incremented. When this happens qlen_notify() may be +skipped on the parent during destruction, leaving a dangling pointer +for some classful qdiscs like DRR. + +There are two ways for the bug happen: + +- If the duplicated packet is dropped by rootq->enqueue() and then + the original packet is also dropped. +- If rootq->enqueue() sends the duplicated packet to a different qdisc + and the original packet is dropped. + +In both cases NET_XMIT_SUCCESS is returned even though no packets +are enqueued at the netem qdisc. + +The fix is to defer the enqueue of the duplicate packet until after +the original packet has been guaranteed to return NET_XMIT_SUCCESS. + +Fixes: 5845f706388a ("net: netem: fix skb length BUG_ON in __skb_to_sgvec") +Reported-by: Budimir Markovic +Signed-off-by: Stephen Hemminger +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20240819175753.5151-1-stephen@networkplumber.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_netem.c | 47 ++++++++++++++++++++++++++----------------- + 1 file changed, 29 insertions(+), 18 deletions(-) + +diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c +index edc72962ae63a..0f8d581438c39 100644 +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -446,12 +446,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct netem_sched_data *q = qdisc_priv(sch); + /* We don't fill cb now as skb_unshare() may invalidate it */ + struct netem_skb_cb *cb; +- struct sk_buff *skb2; ++ struct sk_buff *skb2 = NULL; + struct sk_buff *segs = NULL; + unsigned int prev_len = qdisc_pkt_len(skb); + int count = 1; +- int rc = NET_XMIT_SUCCESS; +- int rc_drop = NET_XMIT_DROP; + + /* Do not fool qdisc_drop_all() */ + skb->prev = NULL; +@@ -480,19 +478,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + skb_orphan_partial(skb); + + /* +- * If we need to duplicate packet, then re-insert at top of the +- * qdisc tree, since parent queuer expects that only one +- * skb will be queued. ++ * If we need to duplicate packet, then clone it before ++ * original is modified. + */ +- if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { +- struct Qdisc *rootq = qdisc_root_bh(sch); +- u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ +- +- q->duplicate = 0; +- rootq->enqueue(skb2, rootq, to_free); +- q->duplicate = dupsave; +- rc_drop = NET_XMIT_SUCCESS; +- } ++ if (count > 1) ++ skb2 = skb_clone(skb, GFP_ATOMIC); + + /* + * Randomized packet corruption. +@@ -504,7 +494,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + if (skb_is_gso(skb)) { + skb = netem_segment(skb, sch, to_free); + if (!skb) +- return rc_drop; ++ goto finish_segs; ++ + segs = skb->next; + skb_mark_not_on_list(skb); + qdisc_skb_cb(skb)->pkt_len = skb->len; +@@ -530,7 +521,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + /* re-link segs, so that qdisc_drop_all() frees them all */ + skb->next = segs; + qdisc_drop_all(skb, sch, to_free); +- return rc_drop; ++ if (skb2) ++ __qdisc_drop(skb2, to_free); ++ return NET_XMIT_DROP; ++ } ++ ++ /* ++ * If doing duplication then re-insert at top of the ++ * qdisc tree, since parent queuer expects that only one ++ * skb will be queued. ++ */ ++ if (skb2) { ++ struct Qdisc *rootq = qdisc_root_bh(sch); ++ u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ ++ ++ q->duplicate = 0; ++ rootq->enqueue(skb2, rootq, to_free); ++ q->duplicate = dupsave; ++ skb2 = NULL; + } + + qdisc_qstats_backlog_inc(sch, skb); +@@ -601,9 +609,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + } + + finish_segs: ++ if (skb2) ++ __qdisc_drop(skb2, to_free); ++ + if (segs) { + unsigned int len, last_len; +- int nb; ++ int rc, nb; + + len = skb ? skb->len : 0; + nb = skb ? 1 : 0; +-- +2.43.0 + diff --git a/queue-6.10/netfilter-flowtable-validate-vlan-header.patch b/queue-6.10/netfilter-flowtable-validate-vlan-header.patch new file mode 100644 index 00000000000..313e5fa2a65 --- /dev/null +++ b/queue-6.10/netfilter-flowtable-validate-vlan-header.patch @@ -0,0 +1,60 @@ +From 65ae68542a51a7afcf43c158a3032d551c406dc0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Aug 2024 12:39:46 +0200 +Subject: netfilter: flowtable: validate vlan header + +From: Pablo Neira Ayuso + +[ Upstream commit 6ea14ccb60c8ab829349979b22b58a941ec4a3ee ] + +Ensure there is sufficient room to access the protocol field of the +VLAN header, validate it once before the flowtable lookup. + +===================================================== +BUG: KMSAN: uninit-value in nf_flow_offload_inet_hook+0x45a/0x5f0 net/netfilter/nf_flow_table_inet.c:32 + nf_flow_offload_inet_hook+0x45a/0x5f0 net/netfilter/nf_flow_table_inet.c:32 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 + nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline] + nf_ingress net/core/dev.c:5440 [inline] + +Fixes: 4cd91f7c290f ("netfilter: flowtable: add vlan support") +Reported-by: syzbot+8407d9bb88cd4c6bf61a@syzkaller.appspotmail.com +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_flow_table_inet.c | 3 +++ + net/netfilter/nf_flow_table_ip.c | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c +index 6eef15648b7b0..b0f1991719324 100644 +--- a/net/netfilter/nf_flow_table_inet.c ++++ b/net/netfilter/nf_flow_table_inet.c +@@ -17,6 +17,9 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, + + switch (skb->protocol) { + case htons(ETH_P_8021Q): ++ if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) ++ return NF_ACCEPT; ++ + veth = (struct vlan_ethhdr *)skb_mac_header(skb); + proto = veth->h_vlan_encapsulated_proto; + break; +diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c +index c2c005234dcd3..98edcaa37b38d 100644 +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -281,6 +281,9 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, + + switch (skb->protocol) { + case htons(ETH_P_8021Q): ++ if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) ++ return false; ++ + veth = (struct vlan_ethhdr *)skb_mac_header(skb); + if (veth->h_vlan_encapsulated_proto == proto) { + *offset += VLAN_HLEN; +-- +2.43.0 + diff --git a/queue-6.10/netfilter-nft_counter-disable-bh-in-nft_counter_offl.patch b/queue-6.10/netfilter-nft_counter-disable-bh-in-nft_counter_offl.patch new file mode 100644 index 00000000000..021806705ae --- /dev/null +++ b/queue-6.10/netfilter-nft_counter-disable-bh-in-nft_counter_offl.patch @@ -0,0 +1,55 @@ +From 6dcacb7d0302ddf19d191b698e8f5a36d72ce9d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2024 09:54:30 +0200 +Subject: netfilter: nft_counter: Disable BH in nft_counter_offload_stats(). + +From: Sebastian Andrzej Siewior + +[ Upstream commit 1eacdd71b3436b54d5fc8218c4bb0187d92a6892 ] + +The sequence counter nft_counter_seq is a per-CPU counter. There is no +lock associated with it. nft_counter_do_eval() is using the same counter +and disables BH which suggest that it can be invoked from a softirq. +This in turn means that nft_counter_offload_stats(), which disables only +preemption, can be interrupted by nft_counter_do_eval() leading to two +writer for one seqcount_t. +This can lead to loosing stats or reading statistics while they are +updated. + +Disable BH during stats update in nft_counter_offload_stats() to ensure +one writer at a time. + +Fixes: b72920f6e4a9d ("netfilter: nftables: counter hardware offload support") +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_counter.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c +index 291ed2026367e..16f40b503d379 100644 +--- a/net/netfilter/nft_counter.c ++++ b/net/netfilter/nft_counter.c +@@ -265,7 +265,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, + struct nft_counter *this_cpu; + seqcount_t *myseq; + +- preempt_disable(); ++ local_bh_disable(); + this_cpu = this_cpu_ptr(priv->counter); + myseq = this_cpu_ptr(&nft_counter_seq); + +@@ -273,7 +273,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, + this_cpu->packets += stats->pkts; + this_cpu->bytes += stats->bytes; + write_seqcount_end(myseq); +- preempt_enable(); ++ local_bh_enable(); + } + + void nft_counter_init_seqcount(void) +-- +2.43.0 + diff --git a/queue-6.10/netfilter-nft_counter-synchronize-nft_counter_reset-.patch b/queue-6.10/netfilter-nft_counter-synchronize-nft_counter_reset-.patch new file mode 100644 index 00000000000..b5afe85a0cd --- /dev/null +++ b/queue-6.10/netfilter-nft_counter-synchronize-nft_counter_reset-.patch @@ -0,0 +1,50 @@ +From 502f5b25dd123cd3b7e4bab6f1b6cf84c05c0d87 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2024 09:54:31 +0200 +Subject: netfilter: nft_counter: Synchronize nft_counter_reset() against + reader. + +From: Sebastian Andrzej Siewior + +[ Upstream commit a0b39e2dc7017ac667b70bdeee5293e410fab2fb ] + +nft_counter_reset() resets the counter by subtracting the previously +retrieved value from the counter. This is a write operation on the +counter and as such it requires to be performed with a write sequence of +nft_counter_seq to serialize against its possible reader. + +Update the packets/ bytes within write-sequence of nft_counter_seq. + +Fixes: d84701ecbcd6a ("netfilter: nft_counter: rework atomic dump and reset") +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_counter.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c +index 16f40b503d379..eab0dc66bee6b 100644 +--- a/net/netfilter/nft_counter.c ++++ b/net/netfilter/nft_counter.c +@@ -107,11 +107,16 @@ static void nft_counter_reset(struct nft_counter_percpu_priv *priv, + struct nft_counter *total) + { + struct nft_counter *this_cpu; ++ seqcount_t *myseq; + + local_bh_disable(); + this_cpu = this_cpu_ptr(priv->counter); ++ myseq = this_cpu_ptr(&nft_counter_seq); ++ ++ write_seqcount_begin(myseq); + this_cpu->packets -= total->packets; + this_cpu->bytes -= total->bytes; ++ write_seqcount_end(myseq); + local_bh_enable(); + } + +-- +2.43.0 + diff --git a/queue-6.10/octeontx2-af-fix-cpt-af-register-offset-calculation.patch b/queue-6.10/octeontx2-af-fix-cpt-af-register-offset-calculation.patch new file mode 100644 index 00000000000..49808535065 --- /dev/null +++ b/queue-6.10/octeontx2-af-fix-cpt-af-register-offset-calculation.patch @@ -0,0 +1,88 @@ +From f5c51c75acbf2bb8582dd9b3256ed9547c619713 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Aug 2024 12:35:58 +0530 +Subject: octeontx2-af: Fix CPT AF register offset calculation + +From: Bharat Bhushan + +[ Upstream commit af688a99eb1fc7ef69774665d61e6be51cea627a ] + +Some CPT AF registers are per LF and others are global. Translation +of PF/VF local LF slot number to actual LF slot number is required +only for accessing perf LF registers. CPT AF global registers access +do not require any LF slot number. Also, there is no reason CPT +PF/VF to know actual lf's register offset. + +Without this fix microcode loading will fail, VFs cannot be created +and hardware is not usable. + +Fixes: bc35e28af789 ("octeontx2-af: replace cpt slot with lf id on reg write") +Signed-off-by: Bharat Bhushan +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20240821070558.1020101-1-bbhushan2@marvell.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 23 +++++++++---------- + 1 file changed, 11 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +index 3e09d22858147..daf4b951e9059 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +@@ -632,7 +632,9 @@ int rvu_mbox_handler_cpt_inline_ipsec_cfg(struct rvu *rvu, + return ret; + } + +-static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) ++static bool validate_and_update_reg_offset(struct rvu *rvu, ++ struct cpt_rd_wr_reg_msg *req, ++ u64 *reg_offset) + { + u64 offset = req->reg_offset; + int blkaddr, num_lfs, lf; +@@ -663,6 +665,11 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) + if (lf < 0) + return false; + ++ /* Translate local LF's offset to global CPT LF's offset to ++ * access LFX register. ++ */ ++ *reg_offset = (req->reg_offset & 0xFF000) + (lf << 3); ++ + return true; + } else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) { + /* Registers that can be accessed from PF */ +@@ -697,7 +704,7 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, + struct cpt_rd_wr_reg_msg *rsp) + { + u64 offset = req->reg_offset; +- int blkaddr, lf; ++ int blkaddr; + + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) +@@ -708,18 +715,10 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, + !is_cpt_vf(rvu, req->hdr.pcifunc)) + return CPT_AF_ERR_ACCESS_DENIED; + +- if (!is_valid_offset(rvu, req)) ++ if (!validate_and_update_reg_offset(rvu, req, &offset)) + return CPT_AF_ERR_ACCESS_DENIED; + +- /* Translate local LF used by VFs to global CPT LF */ +- lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc, +- (offset & 0xFFF) >> 3); +- +- /* Translate local LF's offset to global CPT LF's offset */ +- offset &= 0xFF000; +- offset += lf << 3; +- +- rsp->reg_offset = offset; ++ rsp->reg_offset = req->reg_offset; + rsp->ret_val = req->ret_val; + rsp->is_write = req->is_write; + +-- +2.43.0 + diff --git a/queue-6.10/s390-iucv-fix-vargs-handling-in-iucv_alloc_device.patch b/queue-6.10/s390-iucv-fix-vargs-handling-in-iucv_alloc_device.patch new file mode 100644 index 00000000000..70893622eb0 --- /dev/null +++ b/queue-6.10/s390-iucv-fix-vargs-handling-in-iucv_alloc_device.patch @@ -0,0 +1,54 @@ +From 2fddac94d8d189dd3b73536556261d3f5af81968 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Aug 2024 11:13:37 +0200 +Subject: s390/iucv: Fix vargs handling in iucv_alloc_device() + +From: Alexandra Winter + +[ Upstream commit 0124fb0ebf3b0ef89892d42147c9387be3105318 ] + +iucv_alloc_device() gets a format string and a varying number of +arguments. This is incorrectly forwarded by calling dev_set_name() with +the format string and a va_list, while dev_set_name() expects also a +varying number of arguments. + +Symptoms: +Corrupted iucv device names, which can result in log messages like: +sysfs: cannot create duplicate filename '/devices/iucv/hvc_iucv1827699952' + +Fixes: 4452e8ef8c36 ("s390/iucv: Provide iucv_alloc_device() / iucv_release_device()") +Link: https://bugzilla.suse.com/show_bug.cgi?id=1228425 +Signed-off-by: Alexandra Winter +Reviewed-by: Thorsten Winkler +Reviewed-by: Przemek Kitszel +Link: https://patch.msgid.link/20240821091337.3627068-1-wintera@linux.ibm.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/iucv/iucv.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c +index b7bf34a5eb37a..1235307020075 100644 +--- a/net/iucv/iucv.c ++++ b/net/iucv/iucv.c +@@ -86,13 +86,15 @@ struct device *iucv_alloc_device(const struct attribute_group **attrs, + { + struct device *dev; + va_list vargs; ++ char buf[20]; + int rc; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + goto out_error; + va_start(vargs, fmt); +- rc = dev_set_name(dev, fmt, vargs); ++ vsnprintf(buf, sizeof(buf), fmt, vargs); ++ rc = dev_set_name(dev, "%s", buf); + va_end(vargs); + if (rc) + goto out_error; +-- +2.43.0 + diff --git a/queue-6.10/selftests-mlxsw-ethtool_lanes-source-ethtool-lib-fro.patch b/queue-6.10/selftests-mlxsw-ethtool_lanes-source-ethtool-lib-fro.patch new file mode 100644 index 00000000000..bd580e69b1e --- /dev/null +++ b/queue-6.10/selftests-mlxsw-ethtool_lanes-source-ethtool-lib-fro.patch @@ -0,0 +1,49 @@ +From ff1f338f1729343fea294a70aeff4c8c1d0522f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2024 12:53:47 +0200 +Subject: selftests: mlxsw: ethtool_lanes: Source ethtool lib from correct path + +From: Ido Schimmel + +[ Upstream commit f8669d7b5f5d2d88959456ae9123d8bb6fdc1ebe ] + +Source the ethtool library from the correct path and avoid the following +error: + +./ethtool_lanes.sh: line 14: ./../../../net/forwarding/ethtool_lib.sh: No such file or directory + +Fixes: 40d269c000bd ("selftests: forwarding: Move several selftests") +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/2112faff02e536e1ac14beb4c2be09c9574b90ae.1724150067.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +index 877cd6df94a10..fe905a7f34b3c 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +@@ -2,6 +2,7 @@ + # SPDX-License-Identifier: GPL-2.0 + + lib_dir=$(dirname $0)/../../../net/forwarding ++ethtool_lib_dir=$(dirname $0)/../hw + + ALL_TESTS=" + autoneg +@@ -11,7 +12,7 @@ ALL_TESTS=" + NUM_NETIFS=2 + : ${TIMEOUT:=30000} # ms + source $lib_dir/lib.sh +-source $lib_dir/ethtool_lib.sh ++source $ethtool_lib_dir/ethtool_lib.sh + + setup_prepare() + { +-- +2.43.0 + diff --git a/queue-6.10/selftests-udpgro-no-need-to-load-xdp-for-gro.patch b/queue-6.10/selftests-udpgro-no-need-to-load-xdp-for-gro.patch new file mode 100644 index 00000000000..809d5af5766 --- /dev/null +++ b/queue-6.10/selftests-udpgro-no-need-to-load-xdp-for-gro.patch @@ -0,0 +1,91 @@ +From 33efe9cbe41850fe89a226ed16c07525c810c2ad Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 15:59:51 +0800 +Subject: selftests: udpgro: no need to load xdp for gro +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hangbin Liu + +[ Upstream commit d7818402b1d80347c764001583f6d63fa68c2e1a ] + +After commit d7db7775ea2e ("net: veth: do not manipulate GRO when using +XDP"), there is no need to load XDP program to enable GRO. On the other +hand, the current test is failed due to loading the XDP program. e.g. + + # selftests: net: udpgro.sh + # ipv4 + # no GRO ok + # no GRO chk cmsg ok + # GRO ./udpgso_bench_rx: recv: bad packet len, got 1472, expected 14720 + # + # failed + + [...] + + # bad GRO lookup ok + # multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 + # + # ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 + # + # failed + ok 1 selftests: net: udpgro.sh + +After fix, all the test passed. + + # ./udpgro.sh + ipv4 + no GRO ok + [...] + multiple GRO socks ok + +Fixes: d7db7775ea2e ("net: veth: do not manipulate GRO when using XDP") +Reported-by: Yi Chen +Closes: https://issues.redhat.com/browse/RHEL-53858 +Reviewed-by: Toke Høiland-Jørgensen +Acked-by: Paolo Abeni +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/udpgro.sh | 9 +-------- + 1 file changed, 1 insertion(+), 8 deletions(-) + +diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh +index 4659cf01e4384..d5ffd8c9172e1 100755 +--- a/tools/testing/selftests/net/udpgro.sh ++++ b/tools/testing/selftests/net/udpgro.sh +@@ -7,8 +7,6 @@ source net_helper.sh + + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +-BPF_FILE="xdp_dummy.bpf.o" +- + # set global exit status, but never reset nonzero one. + check_err() + { +@@ -38,7 +36,7 @@ cfg_veth() { + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up +- ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp ++ ip netns exec "${PEER_NS}" ethtool -K veth1 gro on + } + + run_one() { +@@ -206,11 +204,6 @@ run_all() { + return $ret + } + +-if [ ! -f ${BPF_FILE} ]; then +- echo "Missing ${BPF_FILE}. Run 'make' first" +- exit -1 +-fi +- + if [[ $# -eq 0 ]]; then + run_all + elif [[ $1 == "__subprocess" ]]; then +-- +2.43.0 + diff --git a/queue-6.10/selftests-udpgro-report-error-when-receive-failed.patch b/queue-6.10/selftests-udpgro-report-error-when-receive-failed.patch new file mode 100644 index 00000000000..10bc9bbeab2 --- /dev/null +++ b/queue-6.10/selftests-udpgro-report-error-when-receive-failed.patch @@ -0,0 +1,139 @@ +From 5aebb126154a4e2300680d11cd7409d2e6723e63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 15:59:50 +0800 +Subject: selftests: udpgro: report error when receive failed + +From: Hangbin Liu + +[ Upstream commit 7167395a4be7930ecac6a33b4e54d7e3dd9ee209 ] + +Currently, we only check the latest senders's exit code. If the receiver +report failed, it is not recoreded. Fix it by checking the exit code +of all the involved processes. + +Before: + bad GRO lookup ok + multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 + + ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 + + failed + $ echo $? + 0 + +After: + bad GRO lookup ok + multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 + + ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 + + failed + $ echo $? + 1 + +Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO") +Suggested-by: Paolo Abeni +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/udpgro.sh | 44 ++++++++++++++++----------- + 1 file changed, 27 insertions(+), 17 deletions(-) + +diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh +index 11a1ebda564fd..4659cf01e4384 100755 +--- a/tools/testing/selftests/net/udpgro.sh ++++ b/tools/testing/selftests/net/udpgro.sh +@@ -46,17 +46,19 @@ run_one() { + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} ++ local ret=0 + + cfg_veth + +- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ +- echo "ok" || \ +- echo "failed" & ++ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & ++ local PID1=$! + + wait_local_port_listen ${PEER_NS} 8000 udp + ./udpgso_bench_tx ${tx_args} +- ret=$? +- wait $(jobs -p) ++ check_err $? ++ wait ${PID1} ++ check_err $? ++ [ "$ret" -eq 0 ] && echo "ok" || echo "failed" + return $ret + } + +@@ -73,6 +75,7 @@ run_one_nat() { + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} ++ local ret=0 + + if [[ ${tx_args} = *-4* ]]; then + ipt_cmd=iptables +@@ -93,16 +96,17 @@ run_one_nat() { + # ... so that GRO will match the UDP_GRO enabled socket, but packets + # will land on the 'plain' one + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & +- pid=$! +- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ +- echo "ok" || \ +- echo "failed"& ++ local PID1=$! ++ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & ++ local PID2=$! + + wait_local_port_listen "${PEER_NS}" 8000 udp + ./udpgso_bench_tx ${tx_args} +- ret=$? +- kill -INT $pid +- wait $(jobs -p) ++ check_err $? ++ kill -INT ${PID1} ++ wait ${PID2} ++ check_err $? ++ [ "$ret" -eq 0 ] && echo "ok" || echo "failed" + return $ret + } + +@@ -111,20 +115,26 @@ run_one_2sock() { + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} ++ local ret=0 + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & +- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ +- echo "ok" || \ +- echo "failed" & ++ local PID1=$! ++ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & ++ local PID2=$! + + wait_local_port_listen "${PEER_NS}" 12345 udp + ./udpgso_bench_tx ${tx_args} -p 12345 ++ check_err $? + wait_local_port_listen "${PEER_NS}" 8000 udp + ./udpgso_bench_tx ${tx_args} +- ret=$? +- wait $(jobs -p) ++ check_err $? ++ wait ${PID1} ++ check_err $? ++ wait ${PID2} ++ check_err $? ++ [ "$ret" -eq 0 ] && echo "ok" || echo "failed" + return $ret + } + +-- +2.43.0 + diff --git a/queue-6.10/series b/queue-6.10/series index 828c6bfaaed..f0f2600db59 100644 --- a/queue-6.10/series +++ b/queue-6.10/series @@ -145,3 +145,47 @@ dm-suspend-return-erestartsys-instead-of-eintr.patch wifi-ath12k-use-128-bytes-aligned-iova-in-transmit-p.patch platform-surface-aggregator-fix-warning-when-control.patch alsa-hda-tas2781-use-correct-endian-conversion.patch +makefile-add-srctree-to-dependency-of-compile_comman.patch +kbuild-merge-temporary-vmlinux-for-btf-and-kallsyms.patch +kbuild-avoid-scripts-kallsyms-parsing-dev-null.patch +bluetooth-hci-invert-le-state-quirk-to-be-opt-out-ra.patch +bluetooth-hci_core-fix-le-quote-calculation.patch +bluetooth-smp-fix-assumption-of-central-always-being.patch +net-mscc-ocelot-use-ocelot_xmit_get_vlan_info-also-f.patch +net-mscc-ocelot-fix-qos-class-for-injected-packets-w.patch +net-mscc-ocelot-serialize-access-to-the-injection-ex.patch +net-mlx5e-xps-fix-oversight-of-multi-pf-netdev-chang.patch +net-mlx5-fix-ipsec-roce-mpv-trace-call.patch +tc-testing-don-t-access-non-existent-variable-on-exc.patch +selftests-udpgro-report-error-when-receive-failed.patch +selftests-udpgro-no-need-to-load-xdp-for-gro.patch +tcp-prevent-concurrent-execution-of-tcp_sk_exit_batc.patch +net-mctp-test-use-correct-skb-for-route-input-check.patch +kcm-serialise-kcm_sendmsg-for-the-same-socket.patch +netfilter-nft_counter-disable-bh-in-nft_counter_offl.patch +netfilter-nft_counter-synchronize-nft_counter_reset-.patch +ip6_tunnel-fix-broken-gro.patch +bonding-fix-bond_ipsec_offload_ok-return-type.patch +bonding-fix-null-pointer-deref-in-bond_ipsec_offload.patch +bonding-fix-xfrm-real_dev-null-pointer-dereference.patch +bonding-fix-xfrm-state-handling-when-clearing-active.patch +ice-fix-page-reuse-when-page_size-is-over-8k.patch +ice-fix-ice_last_offset-formula.patch +ice-fix-truesize-operations-for-page_size-8192.patch +ice-use-internal-pf-id-instead-of-function-number.patch +dpaa2-switch-fix-error-checking-in-dpaa2_switch_seed.patch +igb-cope-with-large-max_skb_frags.patch +net-dsa-mv88e6xxx-fix-out-of-bound-access.patch +netem-fix-return-value-if-duplicate-enqueue-fails.patch +udp-fix-receiving-fraglist-gso-packets.patch +selftests-mlxsw-ethtool_lanes-source-ethtool-lib-fro.patch +ipv6-prevent-uaf-in-ip6_send_skb.patch +ipv6-fix-possible-uaf-in-ip6_finish_output2.patch +ipv6-prevent-possible-uaf-in-ip6_xmit.patch +bnxt_en-fix-double-dma-unmapping-for-xdp_redirect.patch +netfilter-flowtable-validate-vlan-header.patch +octeontx2-af-fix-cpt-af-register-offset-calculation.patch +net-xilinx-axienet-always-disable-promiscuous-mode.patch +net-xilinx-axienet-fix-dangling-multicast-addresses.patch +net-ovs-fix-ovs_drop_reasons-error.patch +s390-iucv-fix-vargs-handling-in-iucv_alloc_device.patch diff --git a/queue-6.10/tc-testing-don-t-access-non-existent-variable-on-exc.patch b/queue-6.10/tc-testing-don-t-access-non-existent-variable-on-exc.patch new file mode 100644 index 00000000000..a021068a293 --- /dev/null +++ b/queue-6.10/tc-testing-don-t-access-non-existent-variable-on-exc.patch @@ -0,0 +1,60 @@ +From 926e02ac826d2f93c0895913c4b76b82228ecd27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Aug 2024 16:37:13 +0100 +Subject: tc-testing: don't access non-existent variable on exception + +From: Simon Horman + +[ Upstream commit a0c9fe5eecc97680323ee83780ea3eaf440ba1b7 ] + +Since commit 255c1c7279ab ("tc-testing: Allow test cases to be skipped") +the variable test_ordinal doesn't exist in call_pre_case(). +So it should not be accessed when an exception occurs. + +This resolves the following splat: + + ... + During handling of the above exception, another exception occurred: + + Traceback (most recent call last): + File ".../tdc.py", line 1028, in + main() + File ".../tdc.py", line 1022, in main + set_operation_mode(pm, parser, args, remaining) + File ".../tdc.py", line 966, in set_operation_mode + catresults = test_runner_serial(pm, args, alltests) + File ".../tdc.py", line 642, in test_runner_serial + (index, tsr) = test_runner(pm, args, alltests) + File ".../tdc.py", line 536, in test_runner + res = run_one_test(pm, args, index, tidx) + File ".../tdc.py", line 419, in run_one_test + pm.call_pre_case(tidx) + File ".../tdc.py", line 146, in call_pre_case + print('test_ordinal is {}'.format(test_ordinal)) + NameError: name 'test_ordinal' is not defined + +Fixes: 255c1c7279ab ("tc-testing: Allow test cases to be skipped") +Signed-off-by: Simon Horman +Acked-by: Jamal Hadi Salim +Link: https://patch.msgid.link/20240815-tdc-test-ordinal-v1-1-0255c122a427@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/tc-testing/tdc.py | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py +index ee349187636fc..4f255cec0c22e 100755 +--- a/tools/testing/selftests/tc-testing/tdc.py ++++ b/tools/testing/selftests/tc-testing/tdc.py +@@ -143,7 +143,6 @@ class PluginMgr: + except Exception as ee: + print('exception {} in call to pre_case for {} plugin'. + format(ee, pgn_inst.__class__)) +- print('test_ordinal is {}'.format(test_ordinal)) + print('testid is {}'.format(caseinfo['id'])) + raise + +-- +2.43.0 + diff --git a/queue-6.10/tcp-prevent-concurrent-execution-of-tcp_sk_exit_batc.patch b/queue-6.10/tcp-prevent-concurrent-execution-of-tcp_sk_exit_batc.patch new file mode 100644 index 00000000000..62f5be3ce65 --- /dev/null +++ b/queue-6.10/tcp-prevent-concurrent-execution-of-tcp_sk_exit_batc.patch @@ -0,0 +1,109 @@ +From eb6ee959c67ab0e02e6d4d5cca23715bcb918c8f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Aug 2024 00:28:25 +0200 +Subject: tcp: prevent concurrent execution of tcp_sk_exit_batch + +From: Florian Westphal + +[ Upstream commit 565d121b69980637f040eb4d84289869cdaabedf ] + +Its possible that two threads call tcp_sk_exit_batch() concurrently, +once from the cleanup_net workqueue, once from a task that failed to clone +a new netns. In the latter case, error unwinding calls the exit handlers +in reverse order for the 'failed' netns. + +tcp_sk_exit_batch() calls tcp_twsk_purge(). +Problem is that since commit b099ce2602d8 ("net: Batch inet_twsk_purge"), +this function picks up twsk in any dying netns, not just the one passed +in via exit_batch list. + +This means that the error unwind of setup_net() can "steal" and destroy +timewait sockets belonging to the exiting netns. + +This allows the netns exit worker to proceed to call + +WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); + +without the expected 1 -> 0 transition, which then splats. + +At same time, error unwind path that is also running inet_twsk_purge() +will splat as well: + +WARNING: .. at lib/refcount.c:31 refcount_warn_saturate+0x1ed/0x210 +... + refcount_dec include/linux/refcount.h:351 [inline] + inet_twsk_kill+0x758/0x9c0 net/ipv4/inet_timewait_sock.c:70 + inet_twsk_deschedule_put net/ipv4/inet_timewait_sock.c:221 + inet_twsk_purge+0x725/0x890 net/ipv4/inet_timewait_sock.c:304 + tcp_sk_exit_batch+0x1c/0x170 net/ipv4/tcp_ipv4.c:3522 + ops_exit_list+0x128/0x180 net/core/net_namespace.c:178 + setup_net+0x714/0xb40 net/core/net_namespace.c:375 + copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508 + create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110 + +... because refcount_dec() of tw_refcount unexpectedly dropped to 0. + +This doesn't seem like an actual bug (no tw sockets got lost and I don't +see a use-after-free) but as erroneous trigger of debug check. + +Add a mutex to force strict ordering: the task that calls tcp_twsk_purge() +blocks other task from doing final _dec_and_test before mutex-owner has +removed all tw sockets of dying netns. + +Fixes: e9bd0cca09d1 ("tcp: Don't allocate tcp_death_row outside of struct netns_ipv4.") +Reported-by: syzbot+8ea26396ff85d23a8929@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/0000000000003a5292061f5e4e19@google.com/ +Link: https://lore.kernel.org/netdev/20240812140104.GA21559@breakpoint.cc/ +Signed-off-by: Florian Westphal +Reviewed-by: Kuniyuki Iwashima +Reviewed-by: Jason Xing +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20240812222857.29837-1-fw@strlen.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_ipv4.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index a541659b6562b..8f8f93716ff85 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -95,6 +95,8 @@ EXPORT_SYMBOL(tcp_hashinfo); + + static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); + ++static DEFINE_MUTEX(tcp_exit_batch_mutex); ++ + static u32 tcp_v4_init_seq(const struct sk_buff *skb) + { + return secure_tcp_seq(ip_hdr(skb)->daddr, +@@ -3509,6 +3511,16 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) + { + struct net *net; + ++ /* make sure concurrent calls to tcp_sk_exit_batch from net_cleanup_work ++ * and failed setup_net error unwinding path are serialized. ++ * ++ * tcp_twsk_purge() handles twsk in any dead netns, not just those in ++ * net_exit_list, the thread that dismantles a particular twsk must ++ * do so without other thread progressing to refcount_dec_and_test() of ++ * tcp_death_row.tw_refcount. ++ */ ++ mutex_lock(&tcp_exit_batch_mutex); ++ + tcp_twsk_purge(net_exit_list); + + list_for_each_entry(net, net_exit_list, exit_list) { +@@ -3516,6 +3528,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) + WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); + tcp_fastopen_ctx_destroy(net); + } ++ ++ mutex_unlock(&tcp_exit_batch_mutex); + } + + static struct pernet_operations __net_initdata tcp_sk_ops = { +-- +2.43.0 + diff --git a/queue-6.10/udp-fix-receiving-fraglist-gso-packets.patch b/queue-6.10/udp-fix-receiving-fraglist-gso-packets.patch new file mode 100644 index 00000000000..44ee3d4b900 --- /dev/null +++ b/queue-6.10/udp-fix-receiving-fraglist-gso-packets.patch @@ -0,0 +1,39 @@ +From 439f79c5bfece2c5a901518acabe9170b6d6e991 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2024 17:06:21 +0200 +Subject: udp: fix receiving fraglist GSO packets + +From: Felix Fietkau + +[ Upstream commit b128ed5ab27330deeeaf51ea8bb69f1442a96f7f ] + +When assembling fraglist GSO packets, udp4_gro_complete does not set +skb->csum_start, which makes the extra validation in __udp_gso_segment fail. + +Fixes: 89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr") +Signed-off-by: Felix Fietkau +Reviewed-by: Willem de Bruijn +Link: https://patch.msgid.link/20240819150621.59833-1-nbd@nbd.name +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/udp_offload.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c +index ee9af921556a7..5b54f4f32b1cd 100644 +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -279,7 +279,8 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, + return ERR_PTR(-EINVAL); + + if (unlikely(skb_checksum_start(gso_skb) != +- skb_transport_header(gso_skb))) ++ skb_transport_header(gso_skb) && ++ !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST))) + return ERR_PTR(-EINVAL); + + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { +-- +2.43.0 +