From: Sasha Levin Date: Tue, 10 Jan 2023 01:55:34 +0000 (-0500) Subject: Fixes for 6.1 X-Git-Tag: v5.15.87~46 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=41ee5668f7925a520616a89aba797fc69af649c9;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/9p-client-fix-data-race-on-req-status.patch b/queue-6.1/9p-client-fix-data-race-on-req-status.patch new file mode 100644 index 00000000000..c757a6adde6 --- /dev/null +++ b/queue-6.1/9p-client-fix-data-race-on-req-status.patch @@ -0,0 +1,248 @@ +From 345f153b8f65510a61e6dcf77d9431f288c7b235 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Dec 2022 21:39:01 +0900 +Subject: 9p/client: fix data race on req->status + +From: Dominique Martinet + +[ Upstream commit 1a4f69ef15ec29b213e2b086b2502644e8ef76ee ] + +KCSAN reported a race between writing req->status in p9_client_cb and +accessing it in p9_client_rpc's wait_event. + +Accesses to req itself is protected by the data barrier (writing req +fields, write barrier, writing status // reading status, read barrier, +reading other req fields), but status accesses themselves apparently +also must be annotated properly with WRITE_ONCE/READ_ONCE when we +access it without locks. + +Follows: + - error paths writing status in various threads all can notify +p9_client_rpc, so these all also need WRITE_ONCE + - there's a similar read loop in trans_virtio for zc case that also +needs READ_ONCE + - other reads in trans_fd should be protected by the trans_fd lock and +lists state machine, as corresponding writers all are within trans_fd +and should be under the same lock. If KCSAN complains on them we likely +will have something else to fix as well, so it's better to leave them +unmarked and look again if required. + +Link: https://lkml.kernel.org/r/20221205124756.426350-1-asmadeus@codewreck.org +Reported-by: Naresh Kamboju +Suggested-by: Marco Elver +Acked-by: Marco Elver +Reviewed-by: Christian Schoenebeck +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + net/9p/client.c | 15 ++++++++------- + net/9p/trans_fd.c | 12 ++++++------ + net/9p/trans_rdma.c | 4 ++-- + net/9p/trans_virtio.c | 9 +++++---- + net/9p/trans_xen.c | 4 ++-- + 5 files changed, 23 insertions(+), 21 deletions(-) + +diff --git a/net/9p/client.c b/net/9p/client.c +index b554f8357f96..b5aa25f82b78 100644 +--- a/net/9p/client.c ++++ b/net/9p/client.c +@@ -443,7 +443,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) + * the status change is visible to another thread + */ + smp_wmb(); +- req->status = status; ++ WRITE_ONCE(req->status, status); + + wake_up(&req->wq); + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); +@@ -605,7 +605,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) + /* if we haven't received a response for oldreq, + * remove it from the list + */ +- if (oldreq->status == REQ_STATUS_SENT) { ++ if (READ_ONCE(oldreq->status) == REQ_STATUS_SENT) { + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, oldreq); + } +@@ -702,7 +702,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) + } + again: + /* Wait for the response */ +- err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD); ++ err = wait_event_killable(req->wq, ++ READ_ONCE(req->status) >= REQ_STATUS_RCVD); + + /* Make sure our req is coherent with regard to updates in other + * threads - echoes to wmb() in the callback +@@ -716,7 +717,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) + goto again; + } + +- if (req->status == REQ_STATUS_ERROR) { ++ if (READ_ONCE(req->status) == REQ_STATUS_ERROR) { + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } +@@ -729,7 +730,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ +- if (req->status == REQ_STATUS_RCVD) ++ if (READ_ONCE(req->status) == REQ_STATUS_RCVD) + err = 0; + } + recalc_sigpending: +@@ -798,7 +799,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + if (err != -ERESTARTSYS) + goto recalc_sigpending; + } +- if (req->status == REQ_STATUS_ERROR) { ++ if (READ_ONCE(req->status) == REQ_STATUS_ERROR) { + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } +@@ -811,7 +812,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ +- if (req->status == REQ_STATUS_RCVD) ++ if (READ_ONCE(req->status) == REQ_STATUS_RCVD) + err = 0; + } + recalc_sigpending: +diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c +index 07db2f436d44..5a1aecf7fe48 100644 +--- a/net/9p/trans_fd.c ++++ b/net/9p/trans_fd.c +@@ -202,11 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err) + + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + list_move(&req->req_list, &cancel_list); +- req->status = REQ_STATUS_ERROR; ++ WRITE_ONCE(req->status, REQ_STATUS_ERROR); + } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + list_move(&req->req_list, &cancel_list); +- req->status = REQ_STATUS_ERROR; ++ WRITE_ONCE(req->status, REQ_STATUS_ERROR); + } + + spin_unlock(&m->req_lock); +@@ -467,7 +467,7 @@ static void p9_write_work(struct work_struct *work) + + req = list_entry(m->unsent_req_list.next, struct p9_req_t, + req_list); +- req->status = REQ_STATUS_SENT; ++ WRITE_ONCE(req->status, REQ_STATUS_SENT); + p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); + list_move_tail(&req->req_list, &m->req_list); + +@@ -676,7 +676,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) + return m->err; + + spin_lock(&m->req_lock); +- req->status = REQ_STATUS_UNSENT; ++ WRITE_ONCE(req->status, REQ_STATUS_UNSENT); + list_add_tail(&req->req_list, &m->unsent_req_list); + spin_unlock(&m->req_lock); + +@@ -703,7 +703,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) + + if (req->status == REQ_STATUS_UNSENT) { + list_del(&req->req_list); +- req->status = REQ_STATUS_FLSHD; ++ WRITE_ONCE(req->status, REQ_STATUS_FLSHD); + p9_req_put(client, req); + ret = 0; + } +@@ -732,7 +732,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) + * remove it from the list. + */ + list_del(&req->req_list); +- req->status = REQ_STATUS_FLSHD; ++ WRITE_ONCE(req->status, REQ_STATUS_FLSHD); + spin_unlock(&m->req_lock); + + p9_req_put(client, req); +diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c +index 6ff706760676..e9a830c69058 100644 +--- a/net/9p/trans_rdma.c ++++ b/net/9p/trans_rdma.c +@@ -507,7 +507,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) + * because doing if after could erase the REQ_STATUS_RCVD + * status in case of a very fast reply. + */ +- req->status = REQ_STATUS_SENT; ++ WRITE_ONCE(req->status, REQ_STATUS_SENT); + err = ib_post_send(rdma->qp, &wr, NULL); + if (err) + goto send_error; +@@ -517,7 +517,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) + + /* Handle errors that happened during or while preparing the send: */ + send_error: +- req->status = REQ_STATUS_ERROR; ++ WRITE_ONCE(req->status, REQ_STATUS_ERROR); + kfree(c); + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + +diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c +index e757f0601304..3f3eb03cda7d 100644 +--- a/net/9p/trans_virtio.c ++++ b/net/9p/trans_virtio.c +@@ -263,7 +263,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) + + p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + +- req->status = REQ_STATUS_SENT; ++ WRITE_ONCE(req->status, REQ_STATUS_SENT); + req_retry: + spin_lock_irqsave(&chan->lock, flags); + +@@ -469,7 +469,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + inlen = n; + } + } +- req->status = REQ_STATUS_SENT; ++ WRITE_ONCE(req->status, REQ_STATUS_SENT); + req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); + +@@ -532,9 +532,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + spin_unlock_irqrestore(&chan->lock, flags); + kicked = 1; + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); +- err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD); ++ err = wait_event_killable(req->wq, ++ READ_ONCE(req->status) >= REQ_STATUS_RCVD); + // RERROR needs reply (== error string) in static data +- if (req->status == REQ_STATUS_RCVD && ++ if (READ_ONCE(req->status) == REQ_STATUS_RCVD && + unlikely(req->rc.sdata[4] == P9_RERROR)) + handle_rerror(req, in_hdr_len, offs, in_pages); + +diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c +index aaa5fd364691..cf1b89ba522b 100644 +--- a/net/9p/trans_xen.c ++++ b/net/9p/trans_xen.c +@@ -157,7 +157,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) + &masked_prod, masked_cons, + XEN_9PFS_RING_SIZE(ring)); + +- p9_req->status = REQ_STATUS_SENT; ++ WRITE_ONCE(p9_req->status, REQ_STATUS_SENT); + virt_wmb(); /* write ring before updating pointer */ + prod += size; + ring->intf->out_prod = prod; +@@ -212,7 +212,7 @@ static void p9_xen_response(struct work_struct *work) + dev_warn(&priv->dev->dev, + "requested packet size too big: %d for tag %d with capacity %zd\n", + h.size, h.tag, req->rc.capacity); +- req->status = REQ_STATUS_ERROR; ++ WRITE_ONCE(req->status, REQ_STATUS_ERROR); + goto recv_error; + } + +-- +2.35.1 + diff --git a/queue-6.1/acpi-video-allow-gpu-drivers-to-report-no-panels.patch b/queue-6.1/acpi-video-allow-gpu-drivers-to-report-no-panels.patch new file mode 100644 index 00000000000..af8acfb2c97 --- /dev/null +++ b/queue-6.1/acpi-video-allow-gpu-drivers-to-report-no-panels.patch @@ -0,0 +1,74 @@ +From 7339ac100c3f74369c99b51c04cc0b7f63f80da5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Dec 2022 10:42:05 -0600 +Subject: ACPI: video: Allow GPU drivers to report no panels + +From: Mario Limonciello + +[ Upstream commit 00a734104af7d878f1252d49eff9298785c6cbdc ] + +The current logic for the ACPI backlight detection will create +a backlight device if no native or vendor drivers have created +8 seconds after the system has booted if the ACPI tables +included backlight control methods. + +If the GPU drivers have loaded, they may be able to report whether +any LCD panels were found. Allow using this information to factor +in whether to enable the fallback logic for making an acpi_video0 +backlight device. + +Suggested-by: Hans de Goede +Signed-off-by: Mario Limonciello +Reviewed-by: Hans de Goede +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/acpi_video.c | 11 +++++++++++ + include/acpi/video.h | 2 ++ + 2 files changed, 13 insertions(+) + +diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c +index 32953646caeb..f64fdb029090 100644 +--- a/drivers/acpi/acpi_video.c ++++ b/drivers/acpi/acpi_video.c +@@ -2178,6 +2178,17 @@ static bool should_check_lcd_flag(void) + return false; + } + ++/* ++ * At least one graphics driver has reported that no LCD is connected ++ * via the native interface. cancel the registration for fallback acpi_video0. ++ * If another driver still deems this necessary, it can explicitly register it. ++ */ ++void acpi_video_report_nolcd(void) ++{ ++ cancel_delayed_work(&video_bus_register_backlight_work); ++} ++EXPORT_SYMBOL(acpi_video_report_nolcd); ++ + int acpi_video_register(void) + { + int ret = 0; +diff --git a/include/acpi/video.h b/include/acpi/video.h +index a275c35e5249..8ed9bec03e53 100644 +--- a/include/acpi/video.h ++++ b/include/acpi/video.h +@@ -53,6 +53,7 @@ enum acpi_backlight_type { + }; + + #if IS_ENABLED(CONFIG_ACPI_VIDEO) ++extern void acpi_video_report_nolcd(void); + extern int acpi_video_register(void); + extern void acpi_video_unregister(void); + extern void acpi_video_register_backlight(void); +@@ -69,6 +70,7 @@ extern int acpi_video_get_levels(struct acpi_device *device, + struct acpi_video_device_brightness **dev_br, + int *pmax_level); + #else ++static inline void acpi_video_report_nolcd(void) { return; }; + static inline int acpi_video_register(void) { return -ENODEV; } + static inline void acpi_video_unregister(void) { return; } + static inline void acpi_video_register_backlight(void) { return; } +-- +2.35.1 + diff --git a/queue-6.1/acpi-video-don-t-enable-fallback-path-for-creating-a.patch b/queue-6.1/acpi-video-don-t-enable-fallback-path-for-creating-a.patch new file mode 100644 index 00000000000..d8e0f8725b3 --- /dev/null +++ b/queue-6.1/acpi-video-don-t-enable-fallback-path-for-creating-a.patch @@ -0,0 +1,49 @@ +From a85e954e58d67f49abb6fdb79191888ff211d0d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Dec 2022 10:42:07 -0600 +Subject: ACPI: video: Don't enable fallback path for creating ACPI backlight + by default + +From: Mario Limonciello + +[ Upstream commit 5aa9d943e9b6bf6e6023645cbe7ce7d5ed84baf4 ] + +The ACPI video detection code has a module parameter +`register_backlight_delay` which is currently configured to 8 seconds. +This means that if after 8 seconds of booting no native driver has created +a backlight device then the code will attempt to make an ACPI video +backlight device. + +This was intended as a safety mechanism with the backlight overhaul that +occurred in kernel 6.1, but as it doesn't appear necesssary set it to be +disabled by default. + +Suggested-by: Hans de Goede +Signed-off-by: Mario Limonciello +Reviewed-by: Hans de Goede +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/acpi_video.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c +index f64fdb029090..0c79f463fbfd 100644 +--- a/drivers/acpi/acpi_video.c ++++ b/drivers/acpi/acpi_video.c +@@ -70,11 +70,7 @@ module_param(device_id_scheme, bool, 0444); + static int only_lcd = -1; + module_param(only_lcd, int, 0444); + +-/* +- * Display probing is known to take up to 5 seconds, so delay the fallback +- * backlight registration by 5 seconds + 3 seconds for some extra margin. +- */ +-static int register_backlight_delay = 8; ++static int register_backlight_delay; + module_param(register_backlight_delay, int, 0444); + MODULE_PARM_DESC(register_backlight_delay, + "Delay in seconds before doing fallback (non GPU driver triggered) " +-- +2.35.1 + diff --git a/queue-6.1/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch b/queue-6.1/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch new file mode 100644 index 00000000000..7c87ca38519 --- /dev/null +++ b/queue-6.1/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch @@ -0,0 +1,59 @@ +From 059867da14e70f8e20d5baa2926cb27482bd8821 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Dec 2022 13:32:46 +0100 +Subject: ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071 + tablet + +From: Hans de Goede + +[ Upstream commit a1dec9d70b6ad97087b60b81d2492134a84208c6 ] + +The Advantech MICA-071 tablet deviates from the defaults for +a non CR Bay Trail based tablet in several ways: + +1. It uses an analog MIC on IN3 rather then using DMIC1 +2. It only has 1 speaker +3. It needs the OVCD current threshold to be set to 1500uA instead of + the default 2000uA to reliable differentiate between headphones vs + headsets + +Add a quirk with these settings for this tablet. + +Signed-off-by: Hans de Goede +Acked-by: Pierre-Louis Bossart +Link: https://lore.kernel.org/r/20221213123246.11226-1-hdegoede@redhat.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/intel/boards/bytcr_rt5640.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c +index fb9d9e271845..ddd2625bed90 100644 +--- a/sound/soc/intel/boards/bytcr_rt5640.c ++++ b/sound/soc/intel/boards/bytcr_rt5640.c +@@ -570,6 +570,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, ++ { ++ /* Advantech MICA-071 */ ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"), ++ }, ++ /* OVCD Th = 1500uA to reliable detect head-phones vs -set */ ++ .driver_data = (void *)(BYT_RT5640_IN3_MAP | ++ BYT_RT5640_JD_SRC_JD2_IN4N | ++ BYT_RT5640_OVCD_TH_1500UA | ++ BYT_RT5640_OVCD_SF_0P75 | ++ BYT_RT5640_MONO_SPEAKER | ++ BYT_RT5640_DIFF_MIC | ++ BYT_RT5640_MCLK_EN), ++ }, + { + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), +-- +2.35.1 + diff --git a/queue-6.1/asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch b/queue-6.1/asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch new file mode 100644 index 00000000000..fc61360430c --- /dev/null +++ b/queue-6.1/asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch @@ -0,0 +1,147 @@ +From 2fe0e5e94e473b9a24a6a2519902aea8a224cd26 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Dec 2022 13:45:28 +0200 +Subject: ASoC: SOF: Intel: pci-tgl: unblock S5 entry if DMA stop has failed" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kai Vehmanen + +[ Upstream commit 2aa2a5ead0ee0a358bf80a2984a641d1bf2adc2a ] + +If system shutdown has not been completed cleanly, it is possible the +DMA stream shutdown has not been done, or was not clean. + +If this is the case, Intel TGL/ADL HDA platforms may fail to shutdown +cleanly due to pending HDA DMA transactions. To avoid this, detect this +scenario in the shutdown callback, and perform an additional controller +reset. This has been tested to unblock S5 entry if this condition is +hit. + +Co-developed-by: Archana Patni +Signed-off-by: Archana Patni +Signed-off-by: Kai Vehmanen +Reviewed-by: Pierre-Louis Bossart +Reviewed-by: Péter Ujfalusi +Reviewed-by: Ranjani Sridharan +Link: https://lore.kernel.org/r/20221209114529.3909192-2-kai.vehmanen@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sof/intel/hda-dsp.c | 72 +++++++++++++++++++++++++++++++++++ + sound/soc/sof/intel/hda.h | 1 + + sound/soc/sof/intel/tgl.c | 2 +- + 3 files changed, 74 insertions(+), 1 deletion(-) + +diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c +index 3c76f843454b..428aee8fd93b 100644 +--- a/sound/soc/sof/intel/hda-dsp.c ++++ b/sound/soc/sof/intel/hda-dsp.c +@@ -903,6 +903,78 @@ int hda_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state) + return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); + } + ++static unsigned int hda_dsp_check_for_dma_streams(struct snd_sof_dev *sdev) ++{ ++ struct hdac_bus *bus = sof_to_bus(sdev); ++ struct hdac_stream *s; ++ unsigned int active_streams = 0; ++ int sd_offset; ++ u32 val; ++ ++ list_for_each_entry(s, &bus->stream_list, list) { ++ sd_offset = SOF_STREAM_SD_OFFSET(s); ++ val = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, ++ sd_offset); ++ if (val & SOF_HDA_SD_CTL_DMA_START) ++ active_streams |= BIT(s->index); ++ } ++ ++ return active_streams; ++} ++ ++static int hda_dsp_s5_quirk(struct snd_sof_dev *sdev) ++{ ++ int ret; ++ ++ /* ++ * Do not assume a certain timing between the prior ++ * suspend flow, and running of this quirk function. ++ * This is needed if the controller was just put ++ * to reset before calling this function. ++ */ ++ usleep_range(500, 1000); ++ ++ /* ++ * Take controller out of reset to flush DMA ++ * transactions. ++ */ ++ ret = hda_dsp_ctrl_link_reset(sdev, false); ++ if (ret < 0) ++ return ret; ++ ++ usleep_range(500, 1000); ++ ++ /* Restore state for shutdown, back to reset */ ++ ret = hda_dsp_ctrl_link_reset(sdev, true); ++ if (ret < 0) ++ return ret; ++ ++ return ret; ++} ++ ++int hda_dsp_shutdown_dma_flush(struct snd_sof_dev *sdev) ++{ ++ unsigned int active_streams; ++ int ret, ret2; ++ ++ /* check if DMA cleanup has been successful */ ++ active_streams = hda_dsp_check_for_dma_streams(sdev); ++ ++ sdev->system_suspend_target = SOF_SUSPEND_S3; ++ ret = snd_sof_suspend(sdev->dev); ++ ++ if (active_streams) { ++ dev_warn(sdev->dev, ++ "There were active DSP streams (%#x) at shutdown, trying to recover\n", ++ active_streams); ++ ret2 = hda_dsp_s5_quirk(sdev); ++ if (ret2 < 0) ++ dev_err(sdev->dev, "shutdown recovery failed (%d)\n", ret2); ++ } ++ ++ return ret; ++} ++ + int hda_dsp_shutdown(struct snd_sof_dev *sdev) + { + sdev->system_suspend_target = SOF_SUSPEND_S3; +diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h +index 2ab3c3840b92..9acd21901e68 100644 +--- a/sound/soc/sof/intel/hda.h ++++ b/sound/soc/sof/intel/hda.h +@@ -581,6 +581,7 @@ int hda_dsp_resume(struct snd_sof_dev *sdev); + int hda_dsp_runtime_suspend(struct snd_sof_dev *sdev); + int hda_dsp_runtime_resume(struct snd_sof_dev *sdev); + int hda_dsp_runtime_idle(struct snd_sof_dev *sdev); ++int hda_dsp_shutdown_dma_flush(struct snd_sof_dev *sdev); + int hda_dsp_shutdown(struct snd_sof_dev *sdev); + int hda_dsp_set_hw_params_upon_resume(struct snd_sof_dev *sdev); + void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags); +diff --git a/sound/soc/sof/intel/tgl.c b/sound/soc/sof/intel/tgl.c +index 9ae2890e9dac..8637fe102c87 100644 +--- a/sound/soc/sof/intel/tgl.c ++++ b/sound/soc/sof/intel/tgl.c +@@ -60,7 +60,7 @@ int sof_tgl_ops_init(struct snd_sof_dev *sdev) + memcpy(&sof_tgl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops)); + + /* probe/remove/shutdown */ +- sof_tgl_ops.shutdown = hda_dsp_shutdown; ++ sof_tgl_ops.shutdown = hda_dsp_shutdown_dma_flush; + + if (sdev->pdata->ipc_type == SOF_IPC) { + /* doorbell */ +-- +2.35.1 + diff --git a/queue-6.1/asoc-sof-mediatek-initialize-panic_info-to-zero.patch b/queue-6.1/asoc-sof-mediatek-initialize-panic_info-to-zero.patch new file mode 100644 index 00000000000..3d9b2b77c94 --- /dev/null +++ b/queue-6.1/asoc-sof-mediatek-initialize-panic_info-to-zero.patch @@ -0,0 +1,39 @@ +From 3b5a044d9f22339a0d62ec18c15a0401ca7a1128 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Dec 2022 19:56:17 +0800 +Subject: ASoC: SOF: mediatek: initialize panic_info to zero + +From: YC Hung + +[ Upstream commit 7bd220f2ba9014b78f0304178103393554b8c4fe ] + +Coverity spotted that panic_info is not initialized to zero in +mtk_adsp_dump. Using uninitialized value panic_info.linenum when +calling snd_sof_get_status. Fix this coverity by initializing +panic_info struct as zero. + +Signed-off-by: YC Hung +Reviewed-by: Curtis Malainey +Link: https://lore.kernel.org/r/20221213115617.25086-1-yc.hung@mediatek.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sof/mediatek/mtk-adsp-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sound/soc/sof/mediatek/mtk-adsp-common.c b/sound/soc/sof/mediatek/mtk-adsp-common.c +index 1e0769c668a7..de8dbe27cd0d 100644 +--- a/sound/soc/sof/mediatek/mtk-adsp-common.c ++++ b/sound/soc/sof/mediatek/mtk-adsp-common.c +@@ -60,7 +60,7 @@ void mtk_adsp_dump(struct snd_sof_dev *sdev, u32 flags) + { + char *level = (flags & SOF_DBG_DUMP_OPTIONAL) ? KERN_DEBUG : KERN_ERR; + struct sof_ipc_dsp_oops_xtensa xoops; +- struct sof_ipc_panic_info panic_info; ++ struct sof_ipc_panic_info panic_info = {}; + u32 stack[MTK_ADSP_STACK_DUMP_SIZE]; + u32 status; + +-- +2.35.1 + diff --git a/queue-6.1/asoc-sof-revert-core-unregister-clients-and-machine-.patch b/queue-6.1/asoc-sof-revert-core-unregister-clients-and-machine-.patch new file mode 100644 index 00000000000..a96c635238a --- /dev/null +++ b/queue-6.1/asoc-sof-revert-core-unregister-clients-and-machine-.patch @@ -0,0 +1,71 @@ +From 1a40a40424a121e579eb5d5b2eb08f4c2fa92694 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Dec 2022 13:45:29 +0200 +Subject: ASoC: SOF: Revert: "core: unregister clients and machine drivers in + .shutdown" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kai Vehmanen + +[ Upstream commit 44fda61d2bcfb74a942df93959e083a4e8eff75f ] + +The unregister machine drivers call is not safe to do when +kexec is used. Kexec-lite gets blocked with following backtrace: + +[ 84.943749] Freezing user space processes ... (elapsed 0.111 seconds) done. +[ 246.784446] INFO: task kexec-lite:5123 blocked for more than 122 seconds. +[ 246.819035] Call Trace: +[ 246.821782] +[ 246.824186] __schedule+0x5f9/0x1263 +[ 246.828231] schedule+0x87/0xc5 +[ 246.831779] snd_card_disconnect_sync+0xb5/0x127 +... +[ 246.889249] snd_sof_device_shutdown+0xb4/0x150 +[ 246.899317] pci_device_shutdown+0x37/0x61 +[ 246.903990] device_shutdown+0x14c/0x1d6 +[ 246.908391] kernel_kexec+0x45/0xb9 + +This reverts commit 83bfc7e793b555291785136c3ae86abcdc046887. + +Reported-by: Ricardo Ribalda +Cc: Ricardo Ribalda +Signed-off-by: Kai Vehmanen +Reviewed-by: Pierre-Louis Bossart +Reviewed-by: Péter Ujfalusi +Reviewed-by: Ranjani Sridharan +Link: https://lore.kernel.org/r/20221209114529.3909192-3-kai.vehmanen@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sof/core.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c +index 3e6141d03770..625977a29d8a 100644 +--- a/sound/soc/sof/core.c ++++ b/sound/soc/sof/core.c +@@ -475,19 +475,10 @@ EXPORT_SYMBOL(snd_sof_device_remove); + int snd_sof_device_shutdown(struct device *dev) + { + struct snd_sof_dev *sdev = dev_get_drvdata(dev); +- struct snd_sof_pdata *pdata = sdev->pdata; + + if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) + cancel_work_sync(&sdev->probe_work); + +- /* +- * make sure clients and machine driver(s) are unregistered to force +- * all userspace devices to be closed prior to the DSP shutdown sequence +- */ +- sof_unregister_clients(sdev); +- +- snd_sof_machine_unregister(sdev, pdata); +- + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) + return snd_sof_shutdown(sdev); + +-- +2.35.1 + diff --git a/queue-6.1/bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch b/queue-6.1/bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch new file mode 100644 index 00000000000..11209acbf28 --- /dev/null +++ b/queue-6.1/bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch @@ -0,0 +1,129 @@ +From 71ca0a3d33cd01caf733322b6bed1d40ce8efa1a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Dec 2022 22:19:39 -0500 +Subject: bnxt_en: Fix first buffer size calculations for XDP multi-buffer + +From: Michael Chan + +[ Upstream commit 1abeacc1979fa4a756695f5030791d8f0fa934b9 ] + +The size of the first buffer is always page size, and the useable +space is the page size minus the offset and the skb_shared_info size. +Make sure SKB and XDP buf sizes match so that the skb_shared_info +is at the same offset seen from the SKB and XDP_BUF. + +build_skb() should be passed PAGE_SIZE. xdp_init_buff() should +be passed PAGE_SIZE as well. xdp_get_shared_info_from_buff() will +automatically deduct the skb_shared_info size if the XDP buffer +has frags. There is no need to keep bp->xdp_has_frags. + +Change BNXT_PAGE_MODE_BUF_SIZE to BNXT_MAX_PAGE_MODE_MTU_SBUF +since this constant is really the MTU with ethernet header size +subtracted. + +Also fix the BNXT_MAX_PAGE_MODE_MTU macro with proper parentheses. + +Fixes: 32861236190b ("bnxt: change receive ring space parameters") +Reviewed-by: Somnath Kotur +Reviewed-by: Andy Gospodarek +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 15 +++++++++++---- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 +------ + 3 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index a83d534a096a..b0c9c9813d23 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -988,8 +988,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, + dma_addr -= bp->rx_dma_offset; + dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, + DMA_ATTR_WEAK_ORDERING); +- skb = build_skb(page_address(page), BNXT_PAGE_MODE_BUF_SIZE + +- bp->rx_dma_offset); ++ skb = build_skb(page_address(page), PAGE_SIZE); + if (!skb) { + __free_page(page); + return NULL; +@@ -3966,8 +3965,10 @@ void bnxt_set_ring_params(struct bnxt *bp) + bp->rx_agg_ring_mask = (bp->rx_agg_nr_pages * RX_DESC_CNT) - 1; + + if (BNXT_RX_PAGE_MODE(bp)) { +- rx_space = BNXT_PAGE_MODE_BUF_SIZE; +- rx_size = BNXT_MAX_PAGE_MODE_MTU; ++ rx_space = PAGE_SIZE; ++ rx_size = PAGE_SIZE - ++ ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) - ++ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } else { + rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN); + rx_space = rx_size + NET_SKB_PAD + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +index d5fa43cfe524..02741d499bf4 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -591,12 +591,20 @@ struct nqe_cn { + #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) + + #define BNXT_MAX_MTU 9500 +-#define BNXT_PAGE_MODE_BUF_SIZE \ ++ ++/* First RX buffer page in XDP multi-buf mode ++ * ++ * +-------------------------------------------------------------------------+ ++ * | XDP_PACKET_HEADROOM | bp->rx_buf_use_size | skb_shared_info| ++ * | (bp->rx_dma_offset) | | | ++ * +-------------------------------------------------------------------------+ ++ */ ++#define BNXT_MAX_PAGE_MODE_MTU_SBUF \ + ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ + XDP_PACKET_HEADROOM) + #define BNXT_MAX_PAGE_MODE_MTU \ +- BNXT_PAGE_MODE_BUF_SIZE - \ +- SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)) ++ (BNXT_MAX_PAGE_MODE_MTU_SBUF - \ ++ SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) + + #define BNXT_MIN_PKT_SIZE 52 + +@@ -2131,7 +2139,6 @@ struct bnxt { + #define BNXT_DUMP_CRASH 1 + + struct bpf_prog *xdp_prog; +- u8 xdp_has_frags; + + struct bnxt_ptp_cfg *ptp_cfg; + u8 ptp_all_rx_tstamp; +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +index 2ceeaa818c1c..36d5202c0aee 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +@@ -193,9 +193,6 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + mapping = rx_buf->mapping - bp->rx_dma_offset; + dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir); + +- if (bp->xdp_has_frags) +- buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; +- + xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); + xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false); + } +@@ -404,10 +401,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) + netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); + return -EOPNOTSUPP; + } +- if (prog) { ++ if (prog) + tx_xdp = bp->rx_nr_rings; +- bp->xdp_has_frags = prog->aux->xdp_has_frags; +- } + + tc = netdev_get_num_tc(dev); + if (!tc) +-- +2.35.1 + diff --git a/queue-6.1/bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch b/queue-6.1/bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch new file mode 100644 index 00000000000..fd7785333d1 --- /dev/null +++ b/queue-6.1/bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch @@ -0,0 +1,55 @@ +From 2aa01b6054f71301f725e110f060597c5172508f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Dec 2022 22:19:40 -0500 +Subject: bnxt_en: Fix HDS and jumbo thresholds for RX packets + +From: Michael Chan + +[ Upstream commit a056ebcc30e2f78451d66f615d2f6bdada3e6438 ] + +The recent XDP multi-buffer feature has introduced regressions in the +setting of HDS and jumbo thresholds. HDS was accidentally disabled in +the nornmal mode without XDP. This patch restores jumbo HDS placement +when not in XDP mode. In XDP multi-buffer mode, HDS should be disabled +and the jumbo threshold should be set to the usable page size in the +first page buffer. + +Fixes: 32861236190b ("bnxt: change receive ring space parameters") +Reviewed-by: Mohammad Shuab Siddique +Reviewed-by: Ajit Khaparde +Reviewed-by: Andy Gospodarek +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index b0c9c9813d23..f5a8bae8d79a 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -5371,15 +5371,16 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) + req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT); + req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID); + +- if (BNXT_RX_PAGE_MODE(bp) && !BNXT_RX_JUMBO_MODE(bp)) { ++ if (BNXT_RX_PAGE_MODE(bp)) { ++ req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size); ++ } else { + req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | + VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); + req->enables |= + cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); ++ req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); ++ req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); + } +- /* thresholds not implemented in firmware yet */ +- req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); +- req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); + req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); + return hwrm_req_send(bp, req); + } +-- +2.35.1 + diff --git a/queue-6.1/bnxt_en-fix-xdp-rx-path.patch b/queue-6.1/bnxt_en-fix-xdp-rx-path.patch new file mode 100644 index 00000000000..799fe613cb8 --- /dev/null +++ b/queue-6.1/bnxt_en-fix-xdp-rx-path.patch @@ -0,0 +1,84 @@ +From f22272b9cdf422090f6eeb66de24cc2f260d849f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Dec 2022 22:19:38 -0500 +Subject: bnxt_en: Fix XDP RX path + +From: Michael Chan + +[ Upstream commit 9b3e607871ea5ee90f10f5be3965fc07f2aa3ef7 ] + +The XDP program can change the starting address of the RX data buffer and +this information needs to be passed back from bnxt_rx_xdp() to +bnxt_rx_pkt() for the XDP_PASS case so that the SKB can point correctly +to the modified buffer address. Add back the data_ptr parameter to +bnxt_rx_xdp() to make this work. + +Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff") +Reviewed-by: Andy Gospodarek +Reviewed-by: Pavan Chebbi +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 +++++-- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 4 ++-- + 3 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 0166c99cb7c6..a83d534a096a 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1937,7 +1937,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + } + + if (xdp_active) { +- if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &len, event)) { ++ if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &data_ptr, &len, event)) { + rc = 1; + goto next_rx; + } +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +index 1847f191577d..2ceeaa818c1c 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +@@ -222,7 +222,8 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, + * false - packet should be passed to the stack. + */ + bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, +- struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event) ++ struct xdp_buff xdp, struct page *page, u8 **data_ptr, ++ unsigned int *len, u8 *event) + { + struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog); + struct bnxt_tx_ring_info *txr; +@@ -255,8 +256,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, + *event &= ~BNXT_RX_EVENT; + + *len = xdp.data_end - xdp.data; +- if (orig_data != xdp.data) ++ if (orig_data != xdp.data) { + offset = xdp.data - xdp.data_hard_start; ++ *data_ptr = xdp.data_hard_start + offset; ++ } + + switch (act) { + case XDP_PASS: +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +index 2bbdb8e7c506..ea430d6961df 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +@@ -18,8 +18,8 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, + struct xdp_buff *xdp); + void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); + bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, +- struct xdp_buff xdp, struct page *page, unsigned int *len, +- u8 *event); ++ struct xdp_buff xdp, struct page *page, u8 **data_ptr, ++ unsigned int *len, u8 *event); + int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); + int bnxt_xdp_xmit(struct net_device *dev, int num_frames, + struct xdp_frame **frames, u32 flags); +-- +2.35.1 + diff --git a/queue-6.1/bnxt_en-simplify-bnxt_xdp_buff_init.patch b/queue-6.1/bnxt_en-simplify-bnxt_xdp_buff_init.patch new file mode 100644 index 00000000000..4d7ed8190ed --- /dev/null +++ b/queue-6.1/bnxt_en-simplify-bnxt_xdp_buff_init.patch @@ -0,0 +1,83 @@ +From 3990198d829e40551cbe41132d3346caddeb5433 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Dec 2022 22:19:37 -0500 +Subject: bnxt_en: Simplify bnxt_xdp_buff_init() + +From: Michael Chan + +[ Upstream commit bbfc17e50ba2ed18dfef46b1c433d50a58566bf1 ] + +bnxt_xdp_buff_init() does not modify the data_ptr or the len parameters, +so no need to pass in the addresses of these parameters. + +Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff") +Reviewed-by: Andy Gospodarek +Reviewed-by: Somnath Kotur +Reviewed-by: Pavan Chebbi +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 6 +++--- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 9f8a6ce4b356..0166c99cb7c6 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1922,7 +1922,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + dma_addr = rx_buf->mapping; + + if (bnxt_xdp_attached(bp, rxr)) { +- bnxt_xdp_buff_init(bp, rxr, cons, &data_ptr, &len, &xdp); ++ bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp); + if (agg_bufs) { + u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, + cp_cons, agg_bufs, +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +index c3065ec0a479..1847f191577d 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +@@ -177,7 +177,7 @@ bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) + } + + void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, +- u16 cons, u8 **data_ptr, unsigned int *len, ++ u16 cons, u8 *data_ptr, unsigned int len, + struct xdp_buff *xdp) + { + struct bnxt_sw_rx_bd *rx_buf; +@@ -191,13 +191,13 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + offset = bp->rx_offset; + + mapping = rx_buf->mapping - bp->rx_dma_offset; +- dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); ++ dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir); + + if (bp->xdp_has_frags) + buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; + + xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); +- xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false); ++ xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false); + } + + void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +index 505911ae095d..2bbdb8e7c506 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +@@ -27,7 +27,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, + bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr); + + void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, +- u16 cons, u8 **data_ptr, unsigned int *len, ++ u16 cons, u8 *data_ptr, unsigned int len, + struct xdp_buff *xdp); + void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, + struct xdp_buff *xdp); +-- +2.35.1 + diff --git a/queue-6.1/bonding-fix-lockdep-splat-in-bond_miimon_commit.patch b/queue-6.1/bonding-fix-lockdep-splat-in-bond_miimon_commit.patch new file mode 100644 index 00000000000..8a3568fe182 --- /dev/null +++ b/queue-6.1/bonding-fix-lockdep-splat-in-bond_miimon_commit.patch @@ -0,0 +1,62 @@ +From d56cd43655fe0a8dad3de52f8509eb2ca7f1648f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 13:08:31 +0000 +Subject: bonding: fix lockdep splat in bond_miimon_commit() + +From: Eric Dumazet + +[ Upstream commit 42c7ded0eeacd2ba5db599205c71c279dc715de7 ] + +bond_miimon_commit() is run while RTNL is held, not RCU. + +WARNING: suspicious RCU usage +6.1.0-syzkaller-09671-g89529367293c #0 Not tainted +----------------------------- +drivers/net/bonding/bond_main.c:2704 suspicious rcu_dereference_check() usage! + +Fixes: e95cc44763a4 ("bonding: do failover when high prio link up") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Hangbin Liu +Cc: Jay Vosburgh +Cc: Veaceslav Falico +Cc: Andy Gospodarek +Link: https://lore.kernel.org/r/20221220130831.1480888-1-edumazet@google.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_main.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index b108f2f4adc2..fce9301c8ebb 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -2653,10 +2653,12 @@ static void bond_miimon_link_change(struct bonding *bond, + + static void bond_miimon_commit(struct bonding *bond) + { +- struct slave *slave, *primary; ++ struct slave *slave, *primary, *active; + bool do_failover = false; + struct list_head *iter; + ++ ASSERT_RTNL(); ++ + bond_for_each_slave(bond, slave, iter) { + switch (slave->link_new_state) { + case BOND_LINK_NOCHANGE: +@@ -2699,8 +2701,8 @@ static void bond_miimon_commit(struct bonding *bond) + + bond_miimon_link_change(bond, slave, BOND_LINK_UP); + +- if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary || +- slave->prio > rcu_dereference(bond->curr_active_slave)->prio) ++ active = rtnl_dereference(bond->curr_active_slave); ++ if (!active || slave == primary || slave->prio > active->prio) + do_failover = true; + + continue; +-- +2.35.1 + diff --git a/queue-6.1/bpf-always-use-maximal-size-for-copy_array.patch b/queue-6.1/bpf-always-use-maximal-size-for-copy_array.patch new file mode 100644 index 00000000000..4527b00cf07 --- /dev/null +++ b/queue-6.1/bpf-always-use-maximal-size-for-copy_array.patch @@ -0,0 +1,71 @@ +From 6e8783225b0deb8e6de76cca54338f0cf441cf3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Dec 2022 10:28:44 -0800 +Subject: bpf: Always use maximal size for copy_array() + +From: Kees Cook + +[ Upstream commit 45435d8da71f9f3e6860e6e6ea9667b6ec17ec64 ] + +Instead of counting on prior allocations to have sized allocations to +the next kmalloc bucket size, always perform a krealloc that is at least +ksize(dst) in size (which is a no-op), so the size can be correctly +tracked by all the various allocation size trackers (KASAN, +__alloc_size, etc). + +Reported-by: Hyunwoo Kim +Link: https://lore.kernel.org/bpf/20221223094551.GA1439509@ubuntu +Fixes: ceb35b666d42 ("bpf/verifier: Use kmalloc_size_roundup() to match ksize() usage") +Cc: Alexei Starovoitov +Cc: Daniel Borkmann +Cc: John Fastabend +Cc: Andrii Nakryiko +Cc: Martin KaFai Lau +Cc: Song Liu +Cc: Yonghong Song +Cc: KP Singh +Cc: Stanislav Fomichev +Cc: Hao Luo +Cc: Jiri Olsa +Cc: bpf@vger.kernel.org +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221223182836.never.866-kees@kernel.org +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 242fe307032f..b4d5b343c191 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1000,6 +1000,8 @@ static void print_insn_state(struct bpf_verifier_env *env, + */ + static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) + { ++ size_t alloc_bytes; ++ void *orig = dst; + size_t bytes; + + if (ZERO_OR_NULL_PTR(src)) +@@ -1008,11 +1010,11 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + +- if (ksize(dst) < ksize(src)) { +- kfree(dst); +- dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); +- if (!dst) +- return NULL; ++ alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); ++ dst = krealloc(orig, alloc_bytes, flags); ++ if (!dst) { ++ kfree(orig); ++ return NULL; + } + + memcpy(dst, src, bytes); +-- +2.35.1 + diff --git a/queue-6.1/bpf-pull-before-calling-skb_postpull_rcsum.patch b/queue-6.1/bpf-pull-before-calling-skb_postpull_rcsum.patch new file mode 100644 index 00000000000..4042e4b962c --- /dev/null +++ b/queue-6.1/bpf-pull-before-calling-skb_postpull_rcsum.patch @@ -0,0 +1,61 @@ +From 4ddb1e3e1817d359b2971b8a7f0c291575b29f3d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 16:47:00 -0800 +Subject: bpf: pull before calling skb_postpull_rcsum() + +From: Jakub Kicinski + +[ Upstream commit 54c3f1a81421f85e60ae2eaae7be3727a09916ee ] + +Anand hit a BUG() when pulling off headers on egress to a SW tunnel. +We get to skb_checksum_help() with an invalid checksum offset +(commit d7ea0d9df2a6 ("net: remove two BUG() from skb_checksum_help()") +converted those BUGs to WARN_ONs()). +He points out oddness in how skb_postpull_rcsum() gets used. +Indeed looks like we should pull before "postpull", otherwise +the CHECKSUM_PARTIAL fixup from skb_postpull_rcsum() will not +be able to do its job: + + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; + +Reported-by: Anand Parthasarathy +Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") +Signed-off-by: Jakub Kicinski +Acked-by: Stanislav Fomichev +Link: https://lore.kernel.org/r/20221220004701.402165-1-kuba@kernel.org +Signed-off-by: Martin KaFai Lau +Signed-off-by: Sasha Levin +--- + net/core/filter.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/net/core/filter.c b/net/core/filter.c +index a368edd9057c..0c2666e041d3 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -3184,15 +3184,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) + + static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) + { ++ void *old_data; ++ + /* skb_ensure_writable() is not needed here, as we're + * already working on an uncloned skb. + */ + if (unlikely(!pskb_may_pull(skb, off + len))) + return -ENOMEM; + +- skb_postpull_rcsum(skb, skb->data + off, len); +- memmove(skb->data + len, skb->data, off); ++ old_data = skb->data; + __skb_pull(skb, len); ++ skb_postpull_rcsum(skb, old_data + off, len); ++ memmove(skb->data, old_data, off); + + return 0; + } +-- +2.35.1 + diff --git a/queue-6.1/btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch b/queue-6.1/btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch new file mode 100644 index 00000000000..33fe9ac607f --- /dev/null +++ b/queue-6.1/btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch @@ -0,0 +1,47 @@ +From e78ab9a5f5bdecc66b642b962fb92f875585c0d4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Dec 2022 21:01:43 +0100 +Subject: btrfs: fix an error handling path in btrfs_defrag_leaves() + +From: Christophe JAILLET + +[ Upstream commit db0a4a7b8e95f9312a59a67cbd5bc589f090e13d ] + +All error handling paths end to 'out', except this memory allocation +failure. + +This is spurious. So branch to the error handling path also in this case. +It will add a call to: + + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + +Fixes: 6702ed490ca0 ("Btrfs: Add run time btree defrag, and an ioctl to force btree defrag") +Signed-off-by: Christophe JAILLET +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/defrag.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c +index b6cf39f4e7e4..072ab9a1374b 100644 +--- a/fs/btrfs/defrag.c ++++ b/fs/btrfs/defrag.c +@@ -31,8 +31,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + goto out; + + path = btrfs_alloc_path(); +- if (!path) +- return -ENOMEM; ++ if (!path) { ++ ret = -ENOMEM; ++ goto out; ++ } + + level = btrfs_header_level(root->node); + +-- +2.35.1 + diff --git a/queue-6.1/btrfs-move-assert-helpers-out-of-ctree.h.patch b/queue-6.1/btrfs-move-assert-helpers-out-of-ctree.h.patch new file mode 100644 index 00000000000..f9ed910c51f --- /dev/null +++ b/queue-6.1/btrfs-move-assert-helpers-out-of-ctree.h.patch @@ -0,0 +1,97 @@ +From 25d536ab2905e7d98cd6848de0f45f12711b84ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 10:50:48 -0400 +Subject: btrfs: move assert helpers out of ctree.h + +From: Josef Bacik + +[ Upstream commit e118578a8df7941a9bbc568851997852e5bc7338 ] + +These call functions that aren't defined in, or will be moved out of, +ctree.h Move them to super.c where the other assert/error message code +is defined. Drop the __noreturn attribute for btrfs_assertfail as +objtool does not like it and fails with warnings like + + fs/btrfs/dir-item.o: warning: objtool: .text.unlikely: unexpected end of section + fs/btrfs/xattr.o: warning: objtool: btrfs_setxattr() falls through to next function btrfs_setxattr_trans.cold() + fs/btrfs/xattr.o: warning: objtool: .text.unlikely: unexpected end of section + +Reviewed-by: Johannes Thumshirn +Reviewed-by: Anand Jain +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.h | 18 +++--------------- + fs/btrfs/super.c | 14 ++++++++++++++ + 2 files changed, 17 insertions(+), 15 deletions(-) + +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 329277a5c6e8..d7869188f8b2 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3609,18 +3609,11 @@ do { \ + } while (0) + + #ifdef CONFIG_BTRFS_ASSERT +-__cold __noreturn +-static inline void assertfail(const char *expr, const char *file, int line) +-{ +- pr_err("assertion failed: %s, in %s:%d\n", expr, file, line); +- BUG(); +-} ++void __cold btrfs_assertfail(const char *expr, const char *file, int line); + + #define ASSERT(expr) \ +- (likely(expr) ? (void)0 : assertfail(#expr, __FILE__, __LINE__)) +- ++ (likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__)) + #else +-static inline void assertfail(const char *expr, const char* file, int line) { } + #define ASSERT(expr) (void)(expr) + #endif + +@@ -3680,12 +3673,7 @@ static inline unsigned long get_eb_page_index(unsigned long offset) + #define EXPORT_FOR_TESTS + #endif + +-__cold +-static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info) +-{ +- btrfs_err(fs_info, +-"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel"); +-} ++void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info); + + __printf(5, 6) + __cold +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index adb0ce19a13c..7cdf27d807be 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -305,6 +305,20 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, + } + #endif + ++#ifdef CONFIG_BTRFS_ASSERT ++void __cold btrfs_assertfail(const char *expr, const char *file, int line) ++{ ++ pr_err("assertion failed: %s, in %s:%d\n", expr, file, line); ++ BUG(); ++} ++#endif ++ ++void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info) ++{ ++ btrfs_err(fs_info, ++"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel"); ++} ++ + #if BITS_PER_LONG == 32 + void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info) + { +-- +2.35.1 + diff --git a/queue-6.1/btrfs-move-btrfs_get_block_group-helper-out-of-disk-.patch b/queue-6.1/btrfs-move-btrfs_get_block_group-helper-out-of-disk-.patch new file mode 100644 index 00000000000..091063010a5 --- /dev/null +++ b/queue-6.1/btrfs-move-btrfs_get_block_group-helper-out-of-disk-.patch @@ -0,0 +1,71 @@ +From 338cc4cbd738e55ae3b8dbe02b50d0b4d80cee00 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Sep 2022 11:06:29 -0400 +Subject: btrfs: move btrfs_get_block_group helper out of disk-io.h + +From: Josef Bacik + +[ Upstream commit 51129b33d3911c7a36e643d47cf7c00fba3089fe ] + +This inline helper calls btrfs_fs_compat_ro(), which is defined in +another header. To avoid weird header dependency problems move this +helper into disk-io.c with the rest of the global root helpers. + +Reviewed-by: Qu Wenruo +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/disk-io.c | 7 +++++++ + fs/btrfs/disk-io.h | 8 +------- + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 6538f52262ca..a93fff4f2892 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -1167,6 +1167,13 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr) + return btrfs_global_root(fs_info, &key); + } + ++struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) ++{ ++ if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) ++ return fs_info->block_group_root; ++ return btrfs_extent_root(fs_info, 0); ++} ++ + struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, + u64 objectid) + { +diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h +index 7322af63c0cc..ead1fa98ad3a 100644 +--- a/fs/btrfs/disk-io.h ++++ b/fs/btrfs/disk-io.h +@@ -75,6 +75,7 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *key); + struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr); + struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr); ++struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info); + + void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); + int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); +@@ -106,13 +107,6 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root) + return NULL; + } + +-static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) +-{ +- if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) +- return fs_info->block_group_root; +- return btrfs_extent_root(fs_info, 0); +-} +- + void btrfs_put_root(struct btrfs_root *root); + void btrfs_mark_buffer_dirty(struct extent_buffer *buf); + int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, +-- +2.35.1 + diff --git a/queue-6.1/btrfs-move-btrfs_print_data_csum_error-into-inode.c.patch b/queue-6.1/btrfs-move-btrfs_print_data_csum_error-into-inode.c.patch new file mode 100644 index 00000000000..a4df3fe236d --- /dev/null +++ b/queue-6.1/btrfs-move-btrfs_print_data_csum_error-into-inode.c.patch @@ -0,0 +1,100 @@ +From e2fe92702ae8bbcddb7edce39062555a7b4f0c64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Sep 2022 11:06:36 -0400 +Subject: btrfs: move btrfs_print_data_csum_error into inode.c + +From: Josef Bacik + +[ Upstream commit f60acad355cf14ccccf420e6ea0ddd6de87cb210 ] + +This isn't used outside of inode.c, there's no reason to define it in +btrfs_inode.h. Drop the inline and add __cold as it's for errors that +are not in any hot path. + +Reviewed-by: Qu Wenruo +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/btrfs_inode.h | 26 -------------------------- + fs/btrfs/inode.c | 26 ++++++++++++++++++++++++++ + 2 files changed, 26 insertions(+), 26 deletions(-) + +diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h +index 54c2ccb36b61..530a0ebfab3f 100644 +--- a/fs/btrfs/btrfs_inode.h ++++ b/fs/btrfs/btrfs_inode.h +@@ -410,30 +410,4 @@ static inline void btrfs_inode_split_flags(u64 inode_item_flags, + /* Array of bytes with variable length, hexadecimal format 0x1234 */ + #define CSUM_FMT "0x%*phN" + #define CSUM_FMT_VALUE(size, bytes) size, bytes +- +-static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode, +- u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num) +-{ +- struct btrfs_root *root = inode->root; +- const u32 csum_size = root->fs_info->csum_size; +- +- /* Output minus objectid, which is more meaningful */ +- if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) +- btrfs_warn_rl(root->fs_info, +-"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", +- root->root_key.objectid, btrfs_ino(inode), +- logical_start, +- CSUM_FMT_VALUE(csum_size, csum), +- CSUM_FMT_VALUE(csum_size, csum_expected), +- mirror_num); +- else +- btrfs_warn_rl(root->fs_info, +-"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", +- root->root_key.objectid, btrfs_ino(inode), +- logical_start, +- CSUM_FMT_VALUE(csum_size, csum), +- CSUM_FMT_VALUE(csum_size, csum_expected), +- mirror_num); +-} +- + #endif +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 0e516aefbf51..fafc8bc279c5 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -125,6 +125,32 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, + u64 ram_bytes, int compress_type, + int type); + ++static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode, ++ u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num) ++{ ++ struct btrfs_root *root = inode->root; ++ const u32 csum_size = root->fs_info->csum_size; ++ ++ /* Output without objectid, which is more meaningful */ ++ if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) { ++ btrfs_warn_rl(root->fs_info, ++"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", ++ root->root_key.objectid, btrfs_ino(inode), ++ logical_start, ++ CSUM_FMT_VALUE(csum_size, csum), ++ CSUM_FMT_VALUE(csum_size, csum_expected), ++ mirror_num); ++ } else { ++ btrfs_warn_rl(root->fs_info, ++"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", ++ root->root_key.objectid, btrfs_ino(inode), ++ logical_start, ++ CSUM_FMT_VALUE(csum_size, csum), ++ CSUM_FMT_VALUE(csum_size, csum_expected), ++ mirror_num); ++ } ++} ++ + /* + * btrfs_inode_lock - lock inode i_rwsem based on arguments passed + * +-- +2.35.1 + diff --git a/queue-6.1/btrfs-move-flush-related-definitions-to-space-info.h.patch b/queue-6.1/btrfs-move-flush-related-definitions-to-space-info.h.patch new file mode 100644 index 00000000000..1aaae4141b5 --- /dev/null +++ b/queue-6.1/btrfs-move-flush-related-definitions-to-space-info.h.patch @@ -0,0 +1,218 @@ +From 5bdaedb4ba20395cf5c08bb3845b74143f1ad833 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Sep 2022 11:06:35 -0400 +Subject: btrfs: move flush related definitions to space-info.h + +From: Josef Bacik + +[ Upstream commit f1e5c6185ca166cde0c7c2eeeab5d233ef315140 ] + +This code is used in space-info.c, move the definitions to space-info.h. + +Reviewed-by: Qu Wenruo +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.h | 59 ---------------------------------------- + fs/btrfs/delayed-inode.c | 1 + + fs/btrfs/inode-item.c | 1 + + fs/btrfs/props.c | 1 + + fs/btrfs/relocation.c | 1 + + fs/btrfs/space-info.h | 59 ++++++++++++++++++++++++++++++++++++++++ + 6 files changed, 63 insertions(+), 59 deletions(-) + +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 9e6d48ff4597..2abdf6bab66d 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -2920,65 +2920,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + + void btrfs_clear_space_info_full(struct btrfs_fs_info *info); + +-/* +- * Different levels for to flush space when doing space reservations. +- * +- * The higher the level, the more methods we try to reclaim space. +- */ +-enum btrfs_reserve_flush_enum { +- /* If we are in the transaction, we can't flush anything.*/ +- BTRFS_RESERVE_NO_FLUSH, +- +- /* +- * Flush space by: +- * - Running delayed inode items +- * - Allocating a new chunk +- */ +- BTRFS_RESERVE_FLUSH_LIMIT, +- +- /* +- * Flush space by: +- * - Running delayed inode items +- * - Running delayed refs +- * - Running delalloc and waiting for ordered extents +- * - Allocating a new chunk +- */ +- BTRFS_RESERVE_FLUSH_EVICT, +- +- /* +- * Flush space by above mentioned methods and by: +- * - Running delayed iputs +- * - Committing transaction +- * +- * Can be interrupted by a fatal signal. +- */ +- BTRFS_RESERVE_FLUSH_DATA, +- BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE, +- BTRFS_RESERVE_FLUSH_ALL, +- +- /* +- * Pretty much the same as FLUSH_ALL, but can also steal space from +- * global rsv. +- * +- * Can be interrupted by a fatal signal. +- */ +- BTRFS_RESERVE_FLUSH_ALL_STEAL, +-}; +- +-enum btrfs_flush_state { +- FLUSH_DELAYED_ITEMS_NR = 1, +- FLUSH_DELAYED_ITEMS = 2, +- FLUSH_DELAYED_REFS_NR = 3, +- FLUSH_DELAYED_REFS = 4, +- FLUSH_DELALLOC = 5, +- FLUSH_DELALLOC_WAIT = 6, +- FLUSH_DELALLOC_FULL = 7, +- ALLOC_CHUNK = 8, +- ALLOC_CHUNK_FORCE = 9, +- RUN_DELAYED_IPUTS = 10, +- COMMIT_TRANS = 11, +-}; +- + int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, + struct btrfs_block_rsv *rsv, + int nitems, bool use_global_rsv); +diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c +index cac5169eaf8d..a411f04a7b97 100644 +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -14,6 +14,7 @@ + #include "qgroup.h" + #include "locking.h" + #include "inode-item.h" ++#include "space-info.h" + + #define BTRFS_DELAYED_WRITEBACK 512 + #define BTRFS_DELAYED_BACKGROUND 128 +diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c +index 0eeb5ea87894..366f3a788c6a 100644 +--- a/fs/btrfs/inode-item.c ++++ b/fs/btrfs/inode-item.c +@@ -8,6 +8,7 @@ + #include "disk-io.h" + #include "transaction.h" + #include "print-tree.h" ++#include "space-info.h" + + struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, + int slot, const char *name, +diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c +index 055a631276ce..07f62e3ba6a5 100644 +--- a/fs/btrfs/props.c ++++ b/fs/btrfs/props.c +@@ -10,6 +10,7 @@ + #include "ctree.h" + #include "xattr.h" + #include "compression.h" ++#include "space-info.h" + + #define BTRFS_PROP_HANDLERS_HT_BITS 8 + static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); +diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c +index 666a37a0ee89..748c171a9488 100644 +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -27,6 +27,7 @@ + #include "subpage.h" + #include "zoned.h" + #include "inode-item.h" ++#include "space-info.h" + + /* + * Relocation overview +diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h +index ce66023a9eb8..7e17bb803436 100644 +--- a/fs/btrfs/space-info.h ++++ b/fs/btrfs/space-info.h +@@ -5,6 +5,65 @@ + + #include "volumes.h" + ++/* ++ * Different levels for to flush space when doing space reservations. ++ * ++ * The higher the level, the more methods we try to reclaim space. ++ */ ++enum btrfs_reserve_flush_enum { ++ /* If we are in the transaction, we can't flush anything.*/ ++ BTRFS_RESERVE_NO_FLUSH, ++ ++ /* ++ * Flush space by: ++ * - Running delayed inode items ++ * - Allocating a new chunk ++ */ ++ BTRFS_RESERVE_FLUSH_LIMIT, ++ ++ /* ++ * Flush space by: ++ * - Running delayed inode items ++ * - Running delayed refs ++ * - Running delalloc and waiting for ordered extents ++ * - Allocating a new chunk ++ */ ++ BTRFS_RESERVE_FLUSH_EVICT, ++ ++ /* ++ * Flush space by above mentioned methods and by: ++ * - Running delayed iputs ++ * - Committing transaction ++ * ++ * Can be interrupted by a fatal signal. ++ */ ++ BTRFS_RESERVE_FLUSH_DATA, ++ BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE, ++ BTRFS_RESERVE_FLUSH_ALL, ++ ++ /* ++ * Pretty much the same as FLUSH_ALL, but can also steal space from ++ * global rsv. ++ * ++ * Can be interrupted by a fatal signal. ++ */ ++ BTRFS_RESERVE_FLUSH_ALL_STEAL, ++}; ++ ++enum btrfs_flush_state { ++ FLUSH_DELAYED_ITEMS_NR = 1, ++ FLUSH_DELAYED_ITEMS = 2, ++ FLUSH_DELAYED_REFS_NR = 3, ++ FLUSH_DELAYED_REFS = 4, ++ FLUSH_DELALLOC = 5, ++ FLUSH_DELALLOC_WAIT = 6, ++ FLUSH_DELALLOC_FULL = 7, ++ ALLOC_CHUNK = 8, ++ ALLOC_CHUNK_FORCE = 9, ++ RUN_DELAYED_IPUTS = 10, ++ COMMIT_TRANS = 11, ++}; ++ + struct btrfs_space_info { + spinlock_t lock; + +-- +2.35.1 + diff --git a/queue-6.1/btrfs-move-fs-wide-helpers-out-of-ctree.h.patch b/queue-6.1/btrfs-move-fs-wide-helpers-out-of-ctree.h.patch new file mode 100644 index 00000000000..6a75389bec5 --- /dev/null +++ b/queue-6.1/btrfs-move-fs-wide-helpers-out-of-ctree.h.patch @@ -0,0 +1,704 @@ +From ce1565843571a8604db5ebf3ac11d0b624d6b264 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 10:50:47 -0400 +Subject: btrfs: move fs wide helpers out of ctree.h + +From: Josef Bacik + +[ Upstream commit c7f13d428ea1bfe883f2741a9b5a5352d595eb09 ] + +We have several fs wide related helpers in ctree.h. The bulk of these +are the incompat flag test helpers, but there are things such as +btrfs_fs_closing() and the read only helpers that also aren't directly +related to the ctree code. Move these into a fs.h header, which will +serve as the location for file system wide related helpers. + +Reviewed-by: Johannes Thumshirn +Reviewed-by: Anand Jain +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/Makefile | 2 +- + fs/btrfs/backref.c | 1 + + fs/btrfs/block-group.c | 1 + + fs/btrfs/ctree.h | 164 ------------------------------------- + fs/btrfs/disk-io.c | 1 + + fs/btrfs/extent-tree.c | 1 + + fs/btrfs/file-item.c | 1 + + fs/btrfs/file.c | 1 + + fs/btrfs/free-space-tree.c | 1 + + fs/btrfs/fs.c | 92 +++++++++++++++++++++ + fs/btrfs/fs.h | 85 +++++++++++++++++++ + fs/btrfs/inode.c | 1 + + fs/btrfs/ioctl.c | 1 + + fs/btrfs/props.c | 1 + + fs/btrfs/qgroup.c | 1 + + fs/btrfs/relocation.c | 1 + + fs/btrfs/scrub.c | 1 + + fs/btrfs/space-info.c | 1 + + fs/btrfs/super.c | 1 + + fs/btrfs/transaction.c | 1 + + fs/btrfs/tree-checker.c | 1 + + fs/btrfs/tree-log.c | 1 + + fs/btrfs/uuid-tree.c | 1 + + fs/btrfs/verity.c | 1 + + fs/btrfs/volumes.c | 1 + + fs/btrfs/zoned.c | 1 + + 26 files changed, 200 insertions(+), 165 deletions(-) + create mode 100644 fs/btrfs/fs.c + create mode 100644 fs/btrfs/fs.h + +diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile +index fa9ddcc9eb0b..eebb45c06485 100644 +--- a/fs/btrfs/Makefile ++++ b/fs/btrfs/Makefile +@@ -31,7 +31,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ + uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ + block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ +- subpage.o tree-mod-log.o extent-io-tree.o ++ subpage.o tree-mod-log.o extent-io-tree.o fs.o + + btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o + btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o +diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c +index 18cf801ab590..cbab685fe373 100644 +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -15,6 +15,7 @@ + #include "locking.h" + #include "misc.h" + #include "tree-mod-log.h" ++#include "fs.h" + + /* Just an arbitrary number so we can be sure this happened */ + #define BACKREF_FOUND_SHARED 6 +diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c +index deebc8ddbd93..1fb8a4753735 100644 +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -17,6 +17,7 @@ + #include "discard.h" + #include "raid56.h" + #include "zoned.h" ++#include "fs.h" + + /* + * Return target flags in extended format or 0 if restripe for this chunk_type +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 2abdf6bab66d..329277a5c6e8 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3135,44 +3135,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *node, + struct extent_buffer *parent); +-static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) +-{ +- /* +- * Do it this way so we only ever do one test_bit in the normal case. +- */ +- if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { +- if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) +- return 2; +- return 1; +- } +- return 0; +-} +- +-/* +- * If we remount the fs to be R/O or umount the fs, the cleaner needn't do +- * anything except sleeping. This function is used to check the status of +- * the fs. +- * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, +- * since setting and checking for SB_RDONLY in the superblock's flags is not +- * atomic. +- */ +-static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) +-{ +- return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || +- btrfs_fs_closing(fs_info); +-} +- +-static inline void btrfs_set_sb_rdonly(struct super_block *sb) +-{ +- sb->s_flags |= SB_RDONLY; +- set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); +-} +- +-static inline void btrfs_clear_sb_rdonly(struct super_block *sb) +-{ +- sb->s_flags &= ~SB_RDONLY; +- clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); +-} + + /* root-item.c */ + int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, +@@ -3805,132 +3767,6 @@ do { \ + } while (0) + + +-/* compatibility and incompatibility defines */ +- +-#define btrfs_set_fs_incompat(__fs_info, opt) \ +- __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \ +- #opt) +- +-static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, +- u64 flag, const char* name) +-{ +- struct btrfs_super_block *disk_super; +- u64 features; +- +- disk_super = fs_info->super_copy; +- features = btrfs_super_incompat_flags(disk_super); +- if (!(features & flag)) { +- spin_lock(&fs_info->super_lock); +- features = btrfs_super_incompat_flags(disk_super); +- if (!(features & flag)) { +- features |= flag; +- btrfs_set_super_incompat_flags(disk_super, features); +- btrfs_info(fs_info, +- "setting incompat feature flag for %s (0x%llx)", +- name, flag); +- } +- spin_unlock(&fs_info->super_lock); +- } +-} +- +-#define btrfs_clear_fs_incompat(__fs_info, opt) \ +- __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \ +- #opt) +- +-static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, +- u64 flag, const char* name) +-{ +- struct btrfs_super_block *disk_super; +- u64 features; +- +- disk_super = fs_info->super_copy; +- features = btrfs_super_incompat_flags(disk_super); +- if (features & flag) { +- spin_lock(&fs_info->super_lock); +- features = btrfs_super_incompat_flags(disk_super); +- if (features & flag) { +- features &= ~flag; +- btrfs_set_super_incompat_flags(disk_super, features); +- btrfs_info(fs_info, +- "clearing incompat feature flag for %s (0x%llx)", +- name, flag); +- } +- spin_unlock(&fs_info->super_lock); +- } +-} +- +-#define btrfs_fs_incompat(fs_info, opt) \ +- __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) +- +-static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) +-{ +- struct btrfs_super_block *disk_super; +- disk_super = fs_info->super_copy; +- return !!(btrfs_super_incompat_flags(disk_super) & flag); +-} +- +-#define btrfs_set_fs_compat_ro(__fs_info, opt) \ +- __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \ +- #opt) +- +-static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, +- u64 flag, const char *name) +-{ +- struct btrfs_super_block *disk_super; +- u64 features; +- +- disk_super = fs_info->super_copy; +- features = btrfs_super_compat_ro_flags(disk_super); +- if (!(features & flag)) { +- spin_lock(&fs_info->super_lock); +- features = btrfs_super_compat_ro_flags(disk_super); +- if (!(features & flag)) { +- features |= flag; +- btrfs_set_super_compat_ro_flags(disk_super, features); +- btrfs_info(fs_info, +- "setting compat-ro feature flag for %s (0x%llx)", +- name, flag); +- } +- spin_unlock(&fs_info->super_lock); +- } +-} +- +-#define btrfs_clear_fs_compat_ro(__fs_info, opt) \ +- __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \ +- #opt) +- +-static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, +- u64 flag, const char *name) +-{ +- struct btrfs_super_block *disk_super; +- u64 features; +- +- disk_super = fs_info->super_copy; +- features = btrfs_super_compat_ro_flags(disk_super); +- if (features & flag) { +- spin_lock(&fs_info->super_lock); +- features = btrfs_super_compat_ro_flags(disk_super); +- if (features & flag) { +- features &= ~flag; +- btrfs_set_super_compat_ro_flags(disk_super, features); +- btrfs_info(fs_info, +- "clearing compat-ro feature flag for %s (0x%llx)", +- name, flag); +- } +- spin_unlock(&fs_info->super_lock); +- } +-} +- +-#define btrfs_fs_compat_ro(fs_info, opt) \ +- __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) +- +-static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) +-{ +- struct btrfs_super_block *disk_super; +- disk_super = fs_info->super_copy; +- return !!(btrfs_super_compat_ro_flags(disk_super) & flag); +-} +- + /* acl.c */ + #ifdef CONFIG_BTRFS_FS_POSIX_ACL + struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index a93fff4f2892..98d1cc51ba8f 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -43,6 +43,7 @@ + #include "space-info.h" + #include "zoned.h" + #include "subpage.h" ++#include "fs.h" + + #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ + BTRFS_HEADER_FLAG_RELOC |\ +diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c +index 2801c991814f..bc010dbcb6b1 100644 +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -36,6 +36,7 @@ + #include "rcu-string.h" + #include "zoned.h" + #include "dev-replace.h" ++#include "fs.h" + + #undef SCRAMBLE_DELAYED_REFS + +diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c +index 6bb9fa961a6a..824ff54d8155 100644 +--- a/fs/btrfs/file-item.c ++++ b/fs/btrfs/file-item.c +@@ -16,6 +16,7 @@ + #include "volumes.h" + #include "print-tree.h" + #include "compression.h" ++#include "fs.h" + + #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) * 2) / \ +diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c +index 9bef8eaa074a..48b672220406 100644 +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -30,6 +30,7 @@ + #include "delalloc-space.h" + #include "reflink.h" + #include "subpage.h" ++#include "fs.h" + + static struct kmem_cache *btrfs_inode_defrag_cachep; + /* +diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c +index 367bcfcf68f5..bfc21eb8ec63 100644 +--- a/fs/btrfs/free-space-tree.c ++++ b/fs/btrfs/free-space-tree.c +@@ -11,6 +11,7 @@ + #include "free-space-tree.h" + #include "transaction.h" + #include "block-group.h" ++#include "fs.h" + + static int __add_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, +diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c +new file mode 100644 +index 000000000000..d4ba948eba56 +--- /dev/null ++++ b/fs/btrfs/fs.c +@@ -0,0 +1,92 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#include "ctree.h" ++#include "fs.h" ++ ++void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name) ++{ ++ struct btrfs_super_block *disk_super; ++ u64 features; ++ ++ disk_super = fs_info->super_copy; ++ features = btrfs_super_incompat_flags(disk_super); ++ if (!(features & flag)) { ++ spin_lock(&fs_info->super_lock); ++ features = btrfs_super_incompat_flags(disk_super); ++ if (!(features & flag)) { ++ features |= flag; ++ btrfs_set_super_incompat_flags(disk_super, features); ++ btrfs_info(fs_info, ++ "setting incompat feature flag for %s (0x%llx)", ++ name, flag); ++ } ++ spin_unlock(&fs_info->super_lock); ++ } ++} ++ ++void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name) ++{ ++ struct btrfs_super_block *disk_super; ++ u64 features; ++ ++ disk_super = fs_info->super_copy; ++ features = btrfs_super_incompat_flags(disk_super); ++ if (features & flag) { ++ spin_lock(&fs_info->super_lock); ++ features = btrfs_super_incompat_flags(disk_super); ++ if (features & flag) { ++ features &= ~flag; ++ btrfs_set_super_incompat_flags(disk_super, features); ++ btrfs_info(fs_info, ++ "clearing incompat feature flag for %s (0x%llx)", ++ name, flag); ++ } ++ spin_unlock(&fs_info->super_lock); ++ } ++} ++ ++void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name) ++{ ++ struct btrfs_super_block *disk_super; ++ u64 features; ++ ++ disk_super = fs_info->super_copy; ++ features = btrfs_super_compat_ro_flags(disk_super); ++ if (!(features & flag)) { ++ spin_lock(&fs_info->super_lock); ++ features = btrfs_super_compat_ro_flags(disk_super); ++ if (!(features & flag)) { ++ features |= flag; ++ btrfs_set_super_compat_ro_flags(disk_super, features); ++ btrfs_info(fs_info, ++ "setting compat-ro feature flag for %s (0x%llx)", ++ name, flag); ++ } ++ spin_unlock(&fs_info->super_lock); ++ } ++} ++ ++void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name) ++{ ++ struct btrfs_super_block *disk_super; ++ u64 features; ++ ++ disk_super = fs_info->super_copy; ++ features = btrfs_super_compat_ro_flags(disk_super); ++ if (features & flag) { ++ spin_lock(&fs_info->super_lock); ++ features = btrfs_super_compat_ro_flags(disk_super); ++ if (features & flag) { ++ features &= ~flag; ++ btrfs_set_super_compat_ro_flags(disk_super, features); ++ btrfs_info(fs_info, ++ "clearing compat-ro feature flag for %s (0x%llx)", ++ name, flag); ++ } ++ spin_unlock(&fs_info->super_lock); ++ } ++} +diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h +new file mode 100644 +index 000000000000..8eda9ce0a904 +--- /dev/null ++++ b/fs/btrfs/fs.h +@@ -0,0 +1,85 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++#ifndef BTRFS_FS_H ++#define BTRFS_FS_H ++ ++/* Compatibility and incompatibility defines */ ++void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name); ++void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name); ++void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name); ++void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, ++ const char *name); ++ ++#define btrfs_set_fs_incompat(__fs_info, opt) \ ++ __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) ++ ++#define btrfs_clear_fs_incompat(__fs_info, opt) \ ++ __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) ++ ++#define btrfs_fs_incompat(fs_info, opt) \ ++ __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) ++ ++#define btrfs_set_fs_compat_ro(__fs_info, opt) \ ++ __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) ++ ++#define btrfs_clear_fs_compat_ro(__fs_info, opt) \ ++ __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) ++ ++#define btrfs_fs_compat_ro(fs_info, opt) \ ++ __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) ++ ++static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) ++{ ++ struct btrfs_super_block *disk_super; ++ disk_super = fs_info->super_copy; ++ return !!(btrfs_super_incompat_flags(disk_super) & flag); ++} ++ ++static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) ++{ ++ struct btrfs_super_block *disk_super; ++ disk_super = fs_info->super_copy; ++ return !!(btrfs_super_compat_ro_flags(disk_super) & flag); ++} ++ ++static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) ++{ ++ /* Do it this way so we only ever do one test_bit in the normal case. */ ++ if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { ++ if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) ++ return 2; ++ return 1; ++ } ++ return 0; ++} ++ ++/* ++ * If we remount the fs to be R/O or umount the fs, the cleaner needn't do ++ * anything except sleeping. This function is used to check the status of ++ * the fs. ++ * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, ++ * since setting and checking for SB_RDONLY in the superblock's flags is not ++ * atomic. ++ */ ++static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) ++{ ++ return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || ++ btrfs_fs_closing(fs_info); ++} ++ ++static inline void btrfs_set_sb_rdonly(struct super_block *sb) ++{ ++ sb->s_flags |= SB_RDONLY; ++ set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); ++} ++ ++static inline void btrfs_clear_sb_rdonly(struct super_block *sb) ++{ ++ sb->s_flags &= ~SB_RDONLY; ++ clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); ++} ++ ++#endif +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index fafc8bc279c5..cc996828e4a8 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -55,6 +55,7 @@ + #include "zoned.h" + #include "subpage.h" + #include "inode-item.h" ++#include "fs.h" + + struct btrfs_iget_args { + u64 ino; +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index fd1902573cde..8e3512be6fb1 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -50,6 +50,7 @@ + #include "delalloc-space.h" + #include "block-group.h" + #include "subpage.h" ++#include "fs.h" + + #ifdef CONFIG_64BIT + /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI +diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c +index 07f62e3ba6a5..ef17014221e2 100644 +--- a/fs/btrfs/props.c ++++ b/fs/btrfs/props.c +@@ -11,6 +11,7 @@ + #include "xattr.h" + #include "compression.h" + #include "space-info.h" ++#include "fs.h" + + #define BTRFS_PROP_HANDLERS_HT_BITS 8 + static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); +diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c +index b74105a10f16..e87a2f066f4d 100644 +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -24,6 +24,7 @@ + #include "block-group.h" + #include "sysfs.h" + #include "tree-mod-log.h" ++#include "fs.h" + + /* + * Helpers to access qgroup reservation +diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c +index 748c171a9488..9fc6c7ce7c42 100644 +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -28,6 +28,7 @@ + #include "zoned.h" + #include "inode-item.h" + #include "space-info.h" ++#include "fs.h" + + /* + * Relocation overview +diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c +index 196c4c6ed1ed..3562834cc1bc 100644 +--- a/fs/btrfs/scrub.c ++++ b/fs/btrfs/scrub.c +@@ -21,6 +21,7 @@ + #include "raid56.h" + #include "block-group.h" + #include "zoned.h" ++#include "fs.h" + + /* + * This is only the first step towards a full-features scrub. It reads all +diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c +index f171bf875633..024e033883d0 100644 +--- a/fs/btrfs/space-info.c ++++ b/fs/btrfs/space-info.c +@@ -10,6 +10,7 @@ + #include "transaction.h" + #include "block-group.h" + #include "zoned.h" ++#include "fs.h" + + /* + * HOW DOES SPACE RESERVATION WORK +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index abfd7c897075..adb0ce19a13c 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -49,6 +49,7 @@ + #include "discard.h" + #include "qgroup.h" + #include "raid56.h" ++#include "fs.h" + #define CREATE_TRACE_POINTS + #include + +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index d1f1da6820fb..6cee7f2fa5ec 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -23,6 +23,7 @@ + #include "block-group.h" + #include "space-info.h" + #include "zoned.h" ++#include "fs.h" + + #define BTRFS_ROOT_TRANS_TAG 0 + +diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c +index 43f905ab0a18..862d67798de5 100644 +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -25,6 +25,7 @@ + #include "volumes.h" + #include "misc.h" + #include "btrfs_inode.h" ++#include "fs.h" + + /* + * Error message should follow the following format: +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index c3cf3dabe0b1..e294c38f9b19 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -21,6 +21,7 @@ + #include "space-info.h" + #include "zoned.h" + #include "inode-item.h" ++#include "fs.h" + + #define MAX_CONFLICT_INODES 10 + +diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c +index b458452a1aaf..2d7eb290fb9c 100644 +--- a/fs/btrfs/uuid-tree.c ++++ b/fs/btrfs/uuid-tree.c +@@ -9,6 +9,7 @@ + #include "transaction.h" + #include "disk-io.h" + #include "print-tree.h" ++#include "fs.h" + + + static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key) +diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c +index ee00e33c309e..ab0b39badbbe 100644 +--- a/fs/btrfs/verity.c ++++ b/fs/btrfs/verity.c +@@ -15,6 +15,7 @@ + #include "transaction.h" + #include "disk-io.h" + #include "locking.h" ++#include "fs.h" + + /* + * Implementation of the interface defined in struct fsverity_operations. +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index dba087ad40ea..762512be35e8 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -33,6 +33,7 @@ + #include "block-group.h" + #include "discard.h" + #include "zoned.h" ++#include "fs.h" + + static struct bio_set btrfs_bioset; + +diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c +index c9e2b0c85309..2a7d856c232c 100644 +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -15,6 +15,7 @@ + #include "transaction.h" + #include "dev-replace.h" + #include "space-info.h" ++#include "fs.h" + + /* Maximum number of zones to report per blkdev_report_zones() call */ + #define BTRFS_REPORT_NR_ZONES 4096 +-- +2.35.1 + diff --git a/queue-6.1/btrfs-move-the-printk-helpers-out-of-ctree.h.patch b/queue-6.1/btrfs-move-the-printk-helpers-out-of-ctree.h.patch new file mode 100644 index 00000000000..7b65a963993 --- /dev/null +++ b/queue-6.1/btrfs-move-the-printk-helpers-out-of-ctree.h.patch @@ -0,0 +1,1006 @@ +From f8285fa41bee4b2346a277ca6da37205d2f81e7e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 10:50:49 -0400 +Subject: btrfs: move the printk helpers out of ctree.h + +From: Josef Bacik + +[ Upstream commit 9b569ea0be6fb27a4985acc9325896a3edc95ede ] + +We have a bunch of printk helpers that are in ctree.h. These have +nothing to do with ctree.c, so move them into their own header. +Subsequent patches will cleanup the printk helpers. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/backref.h | 1 + + fs/btrfs/check-integrity.c | 1 + + fs/btrfs/ctree.c | 1 + + fs/btrfs/ctree.h | 249 ---------------------------------- + fs/btrfs/delalloc-space.c | 1 + + fs/btrfs/delayed-inode.c | 1 + + fs/btrfs/delayed-ref.c | 1 + + fs/btrfs/dir-item.c | 1 + + fs/btrfs/extent-io-tree.c | 1 + + fs/btrfs/extent_map.c | 1 + + fs/btrfs/file-item.c | 1 + + fs/btrfs/free-space-cache.c | 1 + + fs/btrfs/free-space-tree.c | 1 + + fs/btrfs/fs.c | 1 + + fs/btrfs/inode-item.c | 1 + + fs/btrfs/lzo.c | 1 + + fs/btrfs/messages.h | 259 ++++++++++++++++++++++++++++++++++++ + fs/btrfs/ordered-data.c | 1 + + fs/btrfs/print-tree.c | 1 + + fs/btrfs/props.c | 1 + + fs/btrfs/raid56.c | 1 + + fs/btrfs/ref-verify.c | 1 + + fs/btrfs/reflink.c | 1 + + fs/btrfs/root-tree.c | 1 + + fs/btrfs/struct-funcs.c | 1 + + fs/btrfs/subpage.c | 1 + + fs/btrfs/super.c | 1 + + fs/btrfs/sysfs.c | 2 +- + fs/btrfs/tree-checker.c | 1 + + fs/btrfs/tree-log.h | 1 + + fs/btrfs/tree-mod-log.c | 1 + + fs/btrfs/ulist.c | 1 + + fs/btrfs/uuid-tree.c | 1 + + fs/btrfs/verity.c | 1 + + fs/btrfs/xattr.c | 1 + + fs/btrfs/zoned.h | 1 + + 36 files changed, 293 insertions(+), 250 deletions(-) + create mode 100644 fs/btrfs/messages.h + +diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h +index 8e69584d538d..e59236c3e5b7 100644 +--- a/fs/btrfs/backref.h ++++ b/fs/btrfs/backref.h +@@ -7,6 +7,7 @@ + #define BTRFS_BACKREF_H + + #include ++#include "messages.h" + #include "ulist.h" + #include "disk-io.h" + #include "extent_io.h" +diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c +index 98c6e5feab19..e8e1a92b30ac 100644 +--- a/fs/btrfs/check-integrity.c ++++ b/fs/btrfs/check-integrity.c +@@ -82,6 +82,7 @@ + #include + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "disk-io.h" + #include "transaction.h" +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index dcb510f38dda..49d487ca0ad4 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "disk-io.h" + #include "transaction.h" +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index d7869188f8b2..79835f2a2c30 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3444,179 +3444,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait); + char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, + u64 subvol_objectid); + +-static inline __printf(2, 3) __cold +-void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) +-{ +-} +- +-#ifdef CONFIG_PRINTK_INDEX +- +-#define btrfs_printk(fs_info, fmt, args...) \ +-do { \ +- printk_index_subsys_emit("%sBTRFS %s (device %s): ", NULL, fmt); \ +- _btrfs_printk(fs_info, fmt, ##args); \ +-} while (0) +- +-__printf(2, 3) +-__cold +-void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); +- +-#elif defined(CONFIG_PRINTK) +- +-#define btrfs_printk(fs_info, fmt, args...) \ +- _btrfs_printk(fs_info, fmt, ##args) +- +-__printf(2, 3) +-__cold +-void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); +- +-#else +- +-#define btrfs_printk(fs_info, fmt, args...) \ +- btrfs_no_printk(fs_info, fmt, ##args) +-#endif +- +-#define btrfs_emerg(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_EMERG fmt, ##args) +-#define btrfs_alert(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_ALERT fmt, ##args) +-#define btrfs_crit(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_CRIT fmt, ##args) +-#define btrfs_err(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_ERR fmt, ##args) +-#define btrfs_warn(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_WARNING fmt, ##args) +-#define btrfs_notice(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_NOTICE fmt, ##args) +-#define btrfs_info(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_INFO fmt, ##args) +- +-/* +- * Wrappers that use printk_in_rcu +- */ +-#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args) +-#define btrfs_alert_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args) +-#define btrfs_crit_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args) +-#define btrfs_err_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args) +-#define btrfs_warn_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args) +-#define btrfs_notice_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args) +-#define btrfs_info_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args) +- +-/* +- * Wrappers that use a ratelimited printk_in_rcu +- */ +-#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args) +-#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args) +-#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args) +-#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args) +-#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args) +-#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args) +-#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args) +- +-/* +- * Wrappers that use a ratelimited printk +- */ +-#define btrfs_emerg_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args) +-#define btrfs_alert_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args) +-#define btrfs_crit_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args) +-#define btrfs_err_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args) +-#define btrfs_warn_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args) +-#define btrfs_notice_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args) +-#define btrfs_info_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args) +- +-#if defined(CONFIG_DYNAMIC_DEBUG) +-#define btrfs_debug(fs_info, fmt, args...) \ +- _dynamic_func_call_no_desc(fmt, btrfs_printk, \ +- fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ +- _dynamic_func_call_no_desc(fmt, btrfs_printk_in_rcu, \ +- fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ +- _dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \ +- fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_rl(fs_info, fmt, args...) \ +- _dynamic_func_call_no_desc(fmt, btrfs_printk_ratelimited, \ +- fs_info, KERN_DEBUG fmt, ##args) +-#elif defined(DEBUG) +-#define btrfs_debug(fs_info, fmt, args...) \ +- btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_rl(fs_info, fmt, args...) \ +- btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args) +-#else +-#define btrfs_debug(fs_info, fmt, args...) \ +- btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ +- btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ +- btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) +-#define btrfs_debug_rl(fs_info, fmt, args...) \ +- btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) +-#endif +- +-#define btrfs_printk_in_rcu(fs_info, fmt, args...) \ +-do { \ +- rcu_read_lock(); \ +- btrfs_printk(fs_info, fmt, ##args); \ +- rcu_read_unlock(); \ +-} while (0) +- +-#define btrfs_no_printk_in_rcu(fs_info, fmt, args...) \ +-do { \ +- rcu_read_lock(); \ +- btrfs_no_printk(fs_info, fmt, ##args); \ +- rcu_read_unlock(); \ +-} while (0) +- +-#define btrfs_printk_ratelimited(fs_info, fmt, args...) \ +-do { \ +- static DEFINE_RATELIMIT_STATE(_rs, \ +- DEFAULT_RATELIMIT_INTERVAL, \ +- DEFAULT_RATELIMIT_BURST); \ +- if (__ratelimit(&_rs)) \ +- btrfs_printk(fs_info, fmt, ##args); \ +-} while (0) +- +-#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \ +-do { \ +- rcu_read_lock(); \ +- btrfs_printk_ratelimited(fs_info, fmt, ##args); \ +- rcu_read_unlock(); \ +-} while (0) +- +-#ifdef CONFIG_BTRFS_ASSERT +-void __cold btrfs_assertfail(const char *expr, const char *file, int line); +- +-#define ASSERT(expr) \ +- (likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__)) +-#else +-#define ASSERT(expr) (void)(expr) +-#endif +- + #if BITS_PER_LONG == 32 + #define BTRFS_32BIT_MAX_FILE_SIZE (((u64)ULONG_MAX + 1) << PAGE_SHIFT) + /* +@@ -3673,88 +3500,12 @@ static inline unsigned long get_eb_page_index(unsigned long offset) + #define EXPORT_FOR_TESTS + #endif + +-void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info); +- +-__printf(5, 6) +-__cold +-void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, +- unsigned int line, int errno, const char *fmt, ...); +- +-const char * __attribute_const__ btrfs_decode_error(int errno); +- +-__cold +-void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, +- const char *function, +- unsigned int line, int errno, bool first_hit); +- +-bool __cold abort_should_print_stack(int errno); +- +-/* +- * Call btrfs_abort_transaction as early as possible when an error condition is +- * detected, that way the exact stack trace is reported for some errors. +- */ +-#define btrfs_abort_transaction(trans, errno) \ +-do { \ +- bool first = false; \ +- /* Report first abort since mount */ \ +- if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ +- &((trans)->fs_info->fs_state))) { \ +- first = true; \ +- if (WARN(abort_should_print_stack(errno), \ +- KERN_DEBUG \ +- "BTRFS: Transaction aborted (error %d)\n", \ +- (errno))) { \ +- /* Stack trace printed. */ \ +- } else { \ +- btrfs_debug((trans)->fs_info, \ +- "Transaction aborted (error %d)", \ +- (errno)); \ +- } \ +- } \ +- __btrfs_abort_transaction((trans), __func__, \ +- __LINE__, (errno), first); \ +-} while (0) +- +-#ifdef CONFIG_PRINTK_INDEX +- +-#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \ +-do { \ +- printk_index_subsys_emit( \ +- "BTRFS: error (device %s%s) in %s:%d: errno=%d %s", \ +- KERN_CRIT, fmt); \ +- __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ +- (errno), fmt, ##args); \ +-} while (0) +- +-#else +- +-#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \ +- __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ +- (errno), fmt, ##args) +- +-#endif +- + #define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \ + &(fs_info)->fs_state))) + #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ + (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ + &(fs_info)->fs_state))) + +-__printf(5, 6) +-__cold +-void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, +- unsigned int line, int errno, const char *fmt, ...); +-/* +- * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic +- * will panic(). Otherwise we BUG() here. +- */ +-#define btrfs_panic(fs_info, errno, fmt, args...) \ +-do { \ +- __btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \ +- BUG(); \ +-} while (0) +- +- + /* acl.c */ + #ifdef CONFIG_BTRFS_FS_POSIX_ACL + struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); +diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c +index 118b2e20b2e1..045545145a2b 100644 +--- a/fs/btrfs/delalloc-space.c ++++ b/fs/btrfs/delalloc-space.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + ++#include "messages.h" + #include "ctree.h" + #include "delalloc-space.h" + #include "block-rsv.h" +diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c +index a411f04a7b97..8cf5ee646147 100644 +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -6,6 +6,7 @@ + + #include + #include ++#include "messages.h" + #include "misc.h" + #include "delayed-inode.h" + #include "disk-io.h" +diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c +index 36a3debe9493..c775ff4f1cb1 100644 +--- a/fs/btrfs/delayed-ref.c ++++ b/fs/btrfs/delayed-ref.c +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "delayed-ref.h" + #include "transaction.h" +diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c +index 72fb2c518a2b..be5c1c2a8da5 100644 +--- a/fs/btrfs/dir-item.c ++++ b/fs/btrfs/dir-item.c +@@ -3,6 +3,7 @@ + * Copyright (C) 2007 Oracle. All rights reserved. + */ + ++#include "messages.h" + #include "ctree.h" + #include "disk-io.h" + #include "transaction.h" +diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c +index 09ae0e73e680..015851eb97fb 100644 +--- a/fs/btrfs/extent-io-tree.c ++++ b/fs/btrfs/extent-io-tree.c +@@ -2,6 +2,7 @@ + + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "extent-io-tree.h" + #include "btrfs_inode.h" +diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c +index 6092a4eedc92..ba8fb176601b 100644 +--- a/fs/btrfs/extent_map.c ++++ b/fs/btrfs/extent_map.c +@@ -3,6 +3,7 @@ + #include + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "volumes.h" + #include "extent_map.h" +diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c +index 824ff54d8155..675987e2d652 100644 +--- a/fs/btrfs/file-item.c ++++ b/fs/btrfs/file-item.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include "messages.h" + #include "misc.h" + #include "ctree.h" + #include "disk-io.h" +diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c +index f4023651dd68..f8ccc6cfd42b 100644 +--- a/fs/btrfs/free-space-cache.c ++++ b/fs/btrfs/free-space-cache.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include "messages.h" + #include "misc.h" + #include "ctree.h" + #include "free-space-cache.h" +diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c +index bfc21eb8ec63..026214d74a02 100644 +--- a/fs/btrfs/free-space-tree.c ++++ b/fs/btrfs/free-space-tree.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "disk-io.h" + #include "locking.h" +diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c +index d4ba948eba56..a59504b59435 100644 +--- a/fs/btrfs/fs.c ++++ b/fs/btrfs/fs.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + ++#include "messages.h" + #include "ctree.h" + #include "fs.h" + +diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c +index 366f3a788c6a..b301d8e3df87 100644 +--- a/fs/btrfs/inode-item.c ++++ b/fs/btrfs/inode-item.c +@@ -3,6 +3,7 @@ + * Copyright (C) 2007 Oracle. All rights reserved. + */ + ++#include "messages.h" + #include "ctree.h" + #include "inode-item.h" + #include "disk-io.h" +diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c +index 89bc5f825e0a..6751874a3e69 100644 +--- a/fs/btrfs/lzo.c ++++ b/fs/btrfs/lzo.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include "messages.h" + #include "compression.h" + #include "ctree.h" + +diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h +new file mode 100644 +index 000000000000..ace5bb02820a +--- /dev/null ++++ b/fs/btrfs/messages.h +@@ -0,0 +1,259 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++#ifndef BTRFS_MESSAGES_H ++#define BTRFS_MESSAGES_H ++ ++#include ++ ++struct btrfs_fs_info; ++struct btrfs_trans_handle; ++ ++static inline __printf(2, 3) __cold ++void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) ++{ ++} ++ ++#ifdef CONFIG_PRINTK_INDEX ++ ++#define btrfs_printk(fs_info, fmt, args...) \ ++do { \ ++ printk_index_subsys_emit("%sBTRFS %s (device %s): ", NULL, fmt); \ ++ _btrfs_printk(fs_info, fmt, ##args); \ ++} while (0) ++ ++__printf(2, 3) ++__cold ++void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); ++ ++#elif defined(CONFIG_PRINTK) ++ ++#define btrfs_printk(fs_info, fmt, args...) \ ++ _btrfs_printk(fs_info, fmt, ##args) ++ ++__printf(2, 3) ++__cold ++void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); ++ ++#else ++ ++#define btrfs_printk(fs_info, fmt, args...) \ ++ btrfs_no_printk(fs_info, fmt, ##args) ++#endif ++ ++#define btrfs_emerg(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_EMERG fmt, ##args) ++#define btrfs_alert(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_ALERT fmt, ##args) ++#define btrfs_crit(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_CRIT fmt, ##args) ++#define btrfs_err(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_ERR fmt, ##args) ++#define btrfs_warn(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_WARNING fmt, ##args) ++#define btrfs_notice(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_NOTICE fmt, ##args) ++#define btrfs_info(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_INFO fmt, ##args) ++ ++/* ++ * Wrappers that use printk_in_rcu ++ */ ++#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args) ++#define btrfs_alert_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args) ++#define btrfs_crit_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args) ++#define btrfs_err_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args) ++#define btrfs_warn_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args) ++#define btrfs_notice_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args) ++#define btrfs_info_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args) ++ ++/* ++ * Wrappers that use a ratelimited printk_in_rcu ++ */ ++#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args) ++#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args) ++#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args) ++#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args) ++#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args) ++#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args) ++#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args) ++ ++/* ++ * Wrappers that use a ratelimited printk ++ */ ++#define btrfs_emerg_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args) ++#define btrfs_alert_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args) ++#define btrfs_crit_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args) ++#define btrfs_err_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args) ++#define btrfs_warn_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args) ++#define btrfs_notice_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args) ++#define btrfs_info_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args) ++ ++#if defined(CONFIG_DYNAMIC_DEBUG) ++#define btrfs_debug(fs_info, fmt, args...) \ ++ _dynamic_func_call_no_desc(fmt, btrfs_printk, \ ++ fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ ++ _dynamic_func_call_no_desc(fmt, btrfs_printk_in_rcu, \ ++ fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ ++ _dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \ ++ fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_rl(fs_info, fmt, args...) \ ++ _dynamic_func_call_no_desc(fmt, btrfs_printk_ratelimited, \ ++ fs_info, KERN_DEBUG fmt, ##args) ++#elif defined(DEBUG) ++#define btrfs_debug(fs_info, fmt, args...) \ ++ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_rl(fs_info, fmt, args...) \ ++ btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args) ++#else ++#define btrfs_debug(fs_info, fmt, args...) \ ++ btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ ++ btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ ++ btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) ++#define btrfs_debug_rl(fs_info, fmt, args...) \ ++ btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) ++#endif ++ ++#define btrfs_printk_in_rcu(fs_info, fmt, args...) \ ++do { \ ++ rcu_read_lock(); \ ++ btrfs_printk(fs_info, fmt, ##args); \ ++ rcu_read_unlock(); \ ++} while (0) ++ ++#define btrfs_no_printk_in_rcu(fs_info, fmt, args...) \ ++do { \ ++ rcu_read_lock(); \ ++ btrfs_no_printk(fs_info, fmt, ##args); \ ++ rcu_read_unlock(); \ ++} while (0) ++ ++#define btrfs_printk_ratelimited(fs_info, fmt, args...) \ ++do { \ ++ static DEFINE_RATELIMIT_STATE(_rs, \ ++ DEFAULT_RATELIMIT_INTERVAL, \ ++ DEFAULT_RATELIMIT_BURST); \ ++ if (__ratelimit(&_rs)) \ ++ btrfs_printk(fs_info, fmt, ##args); \ ++} while (0) ++ ++#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \ ++do { \ ++ rcu_read_lock(); \ ++ btrfs_printk_ratelimited(fs_info, fmt, ##args); \ ++ rcu_read_unlock(); \ ++} while (0) ++ ++#ifdef CONFIG_BTRFS_ASSERT ++void __cold btrfs_assertfail(const char *expr, const char *file, int line); ++ ++#define ASSERT(expr) \ ++ (likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__)) ++#else ++#define ASSERT(expr) (void)(expr) ++#endif ++ ++void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info); ++ ++__printf(5, 6) ++__cold ++void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, ++ unsigned int line, int errno, const char *fmt, ...); ++ ++const char * __attribute_const__ btrfs_decode_error(int errno); ++ ++__cold ++void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, ++ const char *function, ++ unsigned int line, int errno, bool first_hit); ++ ++bool __cold abort_should_print_stack(int errno); ++ ++/* ++ * Call btrfs_abort_transaction as early as possible when an error condition is ++ * detected, that way the exact stack trace is reported for some errors. ++ */ ++#define btrfs_abort_transaction(trans, errno) \ ++do { \ ++ bool first = false; \ ++ /* Report first abort since mount */ \ ++ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ ++ &((trans)->fs_info->fs_state))) { \ ++ first = true; \ ++ if (WARN(abort_should_print_stack(errno), \ ++ KERN_DEBUG \ ++ "BTRFS: Transaction aborted (error %d)\n", \ ++ (errno))) { \ ++ /* Stack trace printed. */ \ ++ } else { \ ++ btrfs_debug((trans)->fs_info, \ ++ "Transaction aborted (error %d)", \ ++ (errno)); \ ++ } \ ++ } \ ++ __btrfs_abort_transaction((trans), __func__, \ ++ __LINE__, (errno), first); \ ++} while (0) ++ ++#ifdef CONFIG_PRINTK_INDEX ++ ++#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \ ++do { \ ++ printk_index_subsys_emit( \ ++ "BTRFS: error (device %s%s) in %s:%d: errno=%d %s", \ ++ KERN_CRIT, fmt); \ ++ __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ ++ (errno), fmt, ##args); \ ++} while (0) ++ ++#else ++ ++#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \ ++ __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ ++ (errno), fmt, ##args) ++ ++#endif ++ ++__printf(5, 6) ++__cold ++void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, ++ unsigned int line, int errno, const char *fmt, ...); ++/* ++ * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic ++ * will panic(). Otherwise we BUG() here. ++ */ ++#define btrfs_panic(fs_info, errno, fmt, args...) \ ++do { \ ++ __btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \ ++ BUG(); \ ++} while (0) ++ ++#endif +diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c +index e54f8280031f..cf6b2a466e59 100644 +--- a/fs/btrfs/ordered-data.c ++++ b/fs/btrfs/ordered-data.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include "messages.h" + #include "misc.h" + #include "ctree.h" + #include "transaction.h" +diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c +index dd8777872143..708facaede2c 100644 +--- a/fs/btrfs/print-tree.c ++++ b/fs/btrfs/print-tree.c +@@ -3,6 +3,7 @@ + * Copyright (C) 2007 Oracle. All rights reserved. + */ + ++#include "messages.h" + #include "ctree.h" + #include "disk-io.h" + #include "print-tree.h" +diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c +index ef17014221e2..6e11eda7acd4 100644 +--- a/fs/btrfs/props.c ++++ b/fs/btrfs/props.c +@@ -4,6 +4,7 @@ + */ + + #include ++#include "messages.h" + #include "props.h" + #include "btrfs_inode.h" + #include "transaction.h" +diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c +index 82c8e991300e..d047032a5d46 100644 +--- a/fs/btrfs/raid56.c ++++ b/fs/btrfs/raid56.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include "messages.h" + #include "misc.h" + #include "ctree.h" + #include "disk-io.h" +diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c +index a248f46cfe72..f7535b8b62f5 100644 +--- a/fs/btrfs/ref-verify.c ++++ b/fs/btrfs/ref-verify.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "disk-io.h" + #include "locking.h" +diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c +index f50586ff85c8..6179864de6e7 100644 +--- a/fs/btrfs/reflink.c ++++ b/fs/btrfs/reflink.c +@@ -2,6 +2,7 @@ + + #include + #include ++#include "messages.h" + #include "compression.h" + #include "ctree.h" + #include "delalloc-space.h" +diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c +index e1f599d7a916..44c8c8ad0a16 100644 +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "transaction.h" + #include "disk-io.h" +diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c +index 12455b2b41de..6ba16c018d7f 100644 +--- a/fs/btrfs/struct-funcs.c ++++ b/fs/btrfs/struct-funcs.c +@@ -5,6 +5,7 @@ + + #include + ++#include "messages.h" + #include "ctree.h" + + static bool check_setget_bounds(const struct extent_buffer *eb, +diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c +index 9a176af847d7..dd46b978ac2c 100644 +--- a/fs/btrfs/subpage.c ++++ b/fs/btrfs/subpage.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + + #include ++#include "messages.h" + #include "ctree.h" + #include "subpage.h" + #include "btrfs_inode.h" +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index 7cdf27d807be..501aa4b8d8b5 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include "messages.h" + #include "delayed-inode.h" + #include "ctree.h" + #include "disk-io.h" +diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c +index 74fef1f49c35..693b139d17da 100644 +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -10,7 +10,7 @@ + #include + #include + #include +- ++#include "messages.h" + #include "ctree.h" + #include "discard.h" + #include "disk-io.h" +diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c +index 862d67798de5..fa9536110d69 100644 +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "tree-checker.h" + #include "disk-io.h" +diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h +index aed1e05e9879..f5770829d075 100644 +--- a/fs/btrfs/tree-log.h ++++ b/fs/btrfs/tree-log.h +@@ -6,6 +6,7 @@ + #ifndef BTRFS_TREE_LOG_H + #define BTRFS_TREE_LOG_H + ++#include "messages.h" + #include "ctree.h" + #include "transaction.h" + +diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c +index 8a3a14686d3e..bf894de47731 100644 +--- a/fs/btrfs/tree-mod-log.c ++++ b/fs/btrfs/tree-mod-log.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + ++#include "messages.h" + #include "tree-mod-log.h" + #include "disk-io.h" + +diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c +index 3374c9e9be67..f2f20c8d84aa 100644 +--- a/fs/btrfs/ulist.c ++++ b/fs/btrfs/ulist.c +@@ -5,6 +5,7 @@ + */ + + #include ++#include "messages.h" + #include "ulist.h" + #include "ctree.h" + +diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c +index 2d7eb290fb9c..190f752a2e10 100644 +--- a/fs/btrfs/uuid-tree.c ++++ b/fs/btrfs/uuid-tree.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "transaction.h" + #include "disk-io.h" +diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c +index ab0b39badbbe..35445855df4d 100644 +--- a/fs/btrfs/verity.c ++++ b/fs/btrfs/verity.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "btrfs_inode.h" + #include "transaction.h" +diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c +index 5bb8d8c86311..d12903f01f83 100644 +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include "messages.h" + #include "ctree.h" + #include "btrfs_inode.h" + #include "transaction.h" +diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h +index 8bd16d40b7c6..f43990985d80 100644 +--- a/fs/btrfs/zoned.h ++++ b/fs/btrfs/zoned.h +@@ -5,6 +5,7 @@ + + #include + #include ++#include "messages.h" + #include "volumes.h" + #include "disk-io.h" + #include "block-group.h" +-- +2.35.1 + diff --git a/queue-6.1/btrfs-rename-struct-funcs.c-to-accessors.c.patch b/queue-6.1/btrfs-rename-struct-funcs.c-to-accessors.c.patch new file mode 100644 index 00000000000..0c0d7ff711b --- /dev/null +++ b/queue-6.1/btrfs-rename-struct-funcs.c-to-accessors.c.patch @@ -0,0 +1,57 @@ +From 6861093cb95084bdcaf3eec2a90aa65208d1f876 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 10:50:58 -0400 +Subject: btrfs: rename struct-funcs.c to accessors.c + +From: Josef Bacik + +[ Upstream commit 818fe33aed42ddd5052171328a3f708e98357e10 ] + +Rename struct-funcs.c to accessors.c so we can move the item accessors +out of ctree.h. accessors.c is a better description of the code that is +contained in these files. + +Reviewed-by: Johannes Thumshirn +Reviewed-by: Anand Jain +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/Makefile | 2 +- + fs/btrfs/{struct-funcs.c => accessors.c} | 1 - + 2 files changed, 1 insertion(+), 2 deletions(-) + rename fs/btrfs/{struct-funcs.c => accessors.c} (99%) + +diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile +index eebb45c06485..76f90dcfb14d 100644 +--- a/fs/btrfs/Makefile ++++ b/fs/btrfs/Makefile +@@ -24,7 +24,7 @@ obj-$(CONFIG_BTRFS_FS) := btrfs.o + btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + file-item.o inode-item.o disk-io.o \ + transaction.o inode.o file.o tree-defrag.o \ +- extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ ++ extent_map.o sysfs.o accessors.o xattr.o ordered-data.o \ + extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ + export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ + compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ +diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/accessors.c +similarity index 99% +rename from fs/btrfs/struct-funcs.c +rename to fs/btrfs/accessors.c +index 6ba16c018d7f..118bfd1c0e3e 100644 +--- a/fs/btrfs/struct-funcs.c ++++ b/fs/btrfs/accessors.c +@@ -4,7 +4,6 @@ + */ + + #include +- + #include "messages.h" + #include "ctree.h" + +-- +2.35.1 + diff --git a/queue-6.1/btrfs-rename-tree-defrag.c-to-defrag.c.patch b/queue-6.1/btrfs-rename-tree-defrag.c-to-defrag.c.patch new file mode 100644 index 00000000000..6435c3e364c --- /dev/null +++ b/queue-6.1/btrfs-rename-tree-defrag.c-to-defrag.c.patch @@ -0,0 +1,46 @@ +From 77ecdb1cdc88d28e19778e517e54aa7a6df2c7f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Oct 2022 15:08:22 -0400 +Subject: btrfs: rename tree-defrag.c to defrag.c + +From: Josef Bacik + +[ Upstream commit 778dd695dd4d5a21eff07bb1570b570da69dfbd9 ] + +This currently has only one helper in it, and it's for tree based +defrag. We have the various defrag code in 3 different places, so +rename this to defrag.c. Followup patches will move the code into this +new file. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: db0a4a7b8e95 ("btrfs: fix an error handling path in btrfs_defrag_leaves()") +Signed-off-by: Sasha Levin +--- + fs/btrfs/Makefile | 2 +- + fs/btrfs/{tree-defrag.c => defrag.c} | 0 + 2 files changed, 1 insertion(+), 1 deletion(-) + rename fs/btrfs/{tree-defrag.c => defrag.c} (100%) + +diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile +index 76f90dcfb14d..a7885fff9105 100644 +--- a/fs/btrfs/Makefile ++++ b/fs/btrfs/Makefile +@@ -23,7 +23,7 @@ obj-$(CONFIG_BTRFS_FS) := btrfs.o + + btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + file-item.o inode-item.o disk-io.o \ +- transaction.o inode.o file.o tree-defrag.o \ ++ transaction.o inode.o file.o defrag.o \ + extent_map.o sysfs.o accessors.o xattr.o ordered-data.o \ + extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ + export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ +diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/defrag.c +similarity index 100% +rename from fs/btrfs/tree-defrag.c +rename to fs/btrfs/defrag.c +-- +2.35.1 + diff --git a/queue-6.1/caif-fix-memory-leak-in-cfctrl_linkup_request.patch b/queue-6.1/caif-fix-memory-leak-in-cfctrl_linkup_request.patch new file mode 100644 index 00000000000..5a16cfe2617 --- /dev/null +++ b/queue-6.1/caif-fix-memory-leak-in-cfctrl_linkup_request.patch @@ -0,0 +1,47 @@ +From 7619b63f606158db8916df6cf083e2bc54fbdaf2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Jan 2023 14:51:46 +0800 +Subject: caif: fix memory leak in cfctrl_linkup_request() + +From: Zhengchao Shao + +[ Upstream commit fe69230f05897b3de758427b574fc98025dfc907 ] + +When linktype is unknown or kzalloc failed in cfctrl_linkup_request(), +pkt is not released. Add release process to error path. + +Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack") +Fixes: 8d545c8f958f ("caif: Disconnect without waiting for response") +Signed-off-by: Zhengchao Shao +Reviewed-by: Jiri Pirko +Link: https://lore.kernel.org/r/20230104065146.1153009-1-shaozhengchao@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/caif/cfctrl.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c +index cc405d8c7c30..8480684f2762 100644 +--- a/net/caif/cfctrl.c ++++ b/net/caif/cfctrl.c +@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer, + default: + pr_warn("Request setup of bad link type = %d\n", + param->linktype); ++ cfpkt_destroy(pkt); + return -EINVAL; + } + req = kzalloc(sizeof(*req), GFP_KERNEL); +- if (!req) ++ if (!req) { ++ cfpkt_destroy(pkt); + return -ENOMEM; ++ } ++ + req->client_layer = user_layer; + req->cmd = CFCTRL_CMD_LINK_SETUP; + req->param = *param; +-- +2.35.1 + diff --git a/queue-6.1/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch b/queue-6.1/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch new file mode 100644 index 00000000000..0ef9e276f23 --- /dev/null +++ b/queue-6.1/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch @@ -0,0 +1,85 @@ +From 088bb22996bfc361116a6d54212a1ed887d47b94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Nov 2022 10:43:21 +0800 +Subject: ceph: switch to vfs_inode_has_locks() to fix file lock bug + +From: Xiubo Li + +[ Upstream commit 461ab10ef7e6ea9b41a0571a7fc6a72af9549a3c ] + +For the POSIX locks they are using the same owner, which is the +thread id. And multiple POSIX locks could be merged into single one, +so when checking whether the 'file' has locks may fail. + +For a file where some openers use locking and others don't is a +really odd usage pattern though. Locks are like stoplights -- they +only work if everyone pays attention to them. + +Just switch ceph_get_caps() to check whether any locks are set on +the inode. If there are POSIX/OFD/FLOCK locks on the file at the +time, we should set CHECK_FILELOCK, regardless of what fd was used +to set the lock. + +Fixes: ff5d913dfc71 ("ceph: return -EIO if read/write against filp that lost file locks") +Signed-off-by: Xiubo Li +Reviewed-by: Jeff Layton +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/caps.c | 2 +- + fs/ceph/locks.c | 4 ---- + fs/ceph/super.h | 1 - + 3 files changed, 1 insertion(+), 6 deletions(-) + +diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c +index e54814d0c2f7..cd69bf267d1b 100644 +--- a/fs/ceph/caps.c ++++ b/fs/ceph/caps.c +@@ -2915,7 +2915,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got + + while (true) { + flags &= CEPH_FILE_MODE_MASK; +- if (atomic_read(&fi->num_locks)) ++ if (vfs_inode_has_locks(inode)) + flags |= CHECK_FILELOCK; + _got = 0; + ret = try_get_cap_refs(inode, need, want, endoff, +diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c +index 3e2843e86e27..b191426bf880 100644 +--- a/fs/ceph/locks.c ++++ b/fs/ceph/locks.c +@@ -32,18 +32,14 @@ void __init ceph_flock_init(void) + + static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) + { +- struct ceph_file_info *fi = dst->fl_file->private_data; + struct inode *inode = file_inode(dst->fl_file); + atomic_inc(&ceph_inode(inode)->i_filelock_ref); +- atomic_inc(&fi->num_locks); + } + + static void ceph_fl_release_lock(struct file_lock *fl) + { +- struct ceph_file_info *fi = fl->fl_file->private_data; + struct inode *inode = file_inode(fl->fl_file); + struct ceph_inode_info *ci = ceph_inode(inode); +- atomic_dec(&fi->num_locks); + if (atomic_dec_and_test(&ci->i_filelock_ref)) { + /* clear error when all locks are released */ + spin_lock(&ci->i_ceph_lock); +diff --git a/fs/ceph/super.h b/fs/ceph/super.h +index 40630e6f691c..ae4126f63410 100644 +--- a/fs/ceph/super.h ++++ b/fs/ceph/super.h +@@ -788,7 +788,6 @@ struct ceph_file_info { + struct list_head rw_contexts; + + u32 filp_gen; +- atomic_t num_locks; + }; + + struct ceph_dir_file_info { +-- +2.35.1 + diff --git a/queue-6.1/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch b/queue-6.1/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch new file mode 100644 index 00000000000..180ad39296a --- /dev/null +++ b/queue-6.1/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch @@ -0,0 +1,39 @@ +From 9e3123e133b7c166ec85574cd6a611db1220a2c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Jan 2023 12:53:35 +0300 +Subject: drivers/net/bonding/bond_3ad: return when there's no aggregator + +From: Daniil Tatianin + +[ Upstream commit 9c807965483f42df1d053b7436eedd6cf28ece6f ] + +Otherwise we would dereference a NULL aggregator pointer when calling +__set_agg_ports_ready on the line below. + +Found by Linux Verification Center (linuxtesting.org) with the SVACE +static analysis tool. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Daniil Tatianin +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_3ad.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index e58a1e0cadd2..9270977e6c7f 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -1540,6 +1540,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) + slave_err(bond->dev, port->slave->dev, + "Port %d did not find a suitable aggregator\n", + port->actor_port_number); ++ return; + } + } + /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE +-- +2.35.1 + diff --git a/queue-6.1/drm-amd-display-report-to-acpi-video-if-no-panels-we.patch b/queue-6.1/drm-amd-display-report-to-acpi-video-if-no-panels-we.patch new file mode 100644 index 00000000000..f50444a4d9e --- /dev/null +++ b/queue-6.1/drm-amd-display-report-to-acpi-video-if-no-panels-we.patch @@ -0,0 +1,48 @@ +From 12bb6676eb59e215076fa8072b5bbef98fa55deb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Dec 2022 10:42:06 -0600 +Subject: drm/amd/display: Report to ACPI video if no panels were found + +From: Mario Limonciello + +[ Upstream commit c573e240609ff781a0246c0c8c8351abd0475287 ] + +On desktop APUs amdgpu doesn't create a native backlight device +as no eDP panels are found. However if the BIOS has reported +backlight control methods in the ACPI tables then an acpi_video0 +backlight device will be made 8 seconds after boot. + +This has manifested in a power slider on a number of desktop APUs +ranging from Ryzen 5000 through Ryzen 7000 on various motherboard +manufacturers. To avoid this, report to the acpi video detection +that the system does not have any panel connected in the native +driver. + +Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786 +Reported-by: Hans de Goede +Signed-off-by: Mario Limonciello +Reviewed-by: Hans de Goede +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index c2c26fbea512..6f1cc5ce4c7a 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -4372,6 +4372,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) + amdgpu_set_panel_orientation(&aconnector->base); + } + ++ /* If we didn't find a panel, notify the acpi video detection */ ++ if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0) ++ acpi_video_report_nolcd(); ++ + /* Software is initialized. Now we can register interrupt handlers. */ + switch (adev->asic_type) { + #if defined(CONFIG_DRM_AMD_DC_SI) +-- +2.35.1 + diff --git a/queue-6.1/drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch b/queue-6.1/drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch new file mode 100644 index 00000000000..8665bbd236d --- /dev/null +++ b/queue-6.1/drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch @@ -0,0 +1,75 @@ +From ef451fff619412a1d8467456e5ed469e21d46c48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 10 Dec 2022 02:51:19 -0500 +Subject: drm/amdgpu: Fix size validation for non-exclusive domains (v4) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Luben Tuikov + +[ Upstream commit 7554886daa31eacc8e7fac9e15bbce67d10b8f1f ] + +Fix amdgpu_bo_validate_size() to check whether the TTM domain manager for the +requested memory exists, else we get a kernel oops when dereferencing "man". + +v2: Make the patch standalone, i.e. not dependent on local patches. +v3: Preserve old behaviour and just check that the manager pointer is not + NULL. +v4: Complain if GTT domain requested and it is uninitialized--most likely a + bug. + +Cc: Alex Deucher +Cc: Christian König +Cc: AMD Graphics +Signed-off-by: Luben Tuikov +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +index 3df13d841e4d..3be3cba3a16d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +@@ -446,27 +446,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, + + /* + * If GTT is part of requested domains the check must succeed to +- * allow fall back to GTT ++ * allow fall back to GTT. + */ + if (domain & AMDGPU_GEM_DOMAIN_GTT) { + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + +- if (size < man->size) ++ if (man && size < man->size) + return true; +- else +- goto fail; +- } +- +- if (domain & AMDGPU_GEM_DOMAIN_VRAM) { ++ else if (!man) ++ WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); ++ goto fail; ++ } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + +- if (size < man->size) ++ if (man && size < man->size) + return true; +- else +- goto fail; ++ goto fail; + } + +- + /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ + return true; + +-- +2.35.1 + diff --git a/queue-6.1/drm-amdkfd-fix-double-release-compute-pasid.patch b/queue-6.1/drm-amdkfd-fix-double-release-compute-pasid.patch new file mode 100644 index 00000000000..bede2a846b6 --- /dev/null +++ b/queue-6.1/drm-amdkfd-fix-double-release-compute-pasid.patch @@ -0,0 +1,181 @@ +From 04227c837c494c27d0db63b349e75d35db5b092f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Dec 2022 00:50:03 -0500 +Subject: drm/amdkfd: Fix double release compute pasid + +From: Philip Yang + +[ Upstream commit 1a799c4c190ea9f0e81028e3eb3037ed0ab17ff5 ] + +If kfd_process_device_init_vm returns failure after vm is converted to +compute vm and vm->pasid set to compute pasid, KFD will not take +pdd->drm_file reference. As a result, drm close file handler maybe +called to release the compute pasid before KFD process destroy worker to +release the same pasid and set vm->pasid to zero, this generates below +WARNING backtrace and NULL pointer access. + +Add helper amdgpu_amdkfd_gpuvm_set_vm_pasid and call it at the last step +of kfd_process_device_init_vm, to ensure vm pasid is the original pasid +if acquiring vm failed or is the compute pasid with pdd->drm_file +reference taken to avoid double release same pasid. + + amdgpu: Failed to create process VM object + ida_free called for id=32770 which is not allocated. + WARNING: CPU: 57 PID: 72542 at ../lib/idr.c:522 ida_free+0x96/0x140 + RIP: 0010:ida_free+0x96/0x140 + Call Trace: + amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu] + amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu] + drm_file_free.part.13+0x216/0x270 [drm] + drm_close_helper.isra.14+0x60/0x70 [drm] + drm_release+0x6e/0xf0 [drm] + __fput+0xcc/0x280 + ____fput+0xe/0x20 + task_work_run+0x96/0xc0 + do_exit+0x3d0/0xc10 + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + RIP: 0010:ida_free+0x76/0x140 + Call Trace: + amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu] + amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu] + drm_file_free.part.13+0x216/0x270 [drm] + drm_close_helper.isra.14+0x60/0x70 [drm] + drm_release+0x6e/0xf0 [drm] + __fput+0xcc/0x280 + ____fput+0xe/0x20 + task_work_run+0x96/0xc0 + do_exit+0x3d0/0xc10 + +Signed-off-by: Philip Yang +Reviewed-by: Felix Kuehling +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +- + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 39 +++++++++++++------ + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 ++++-- + 3 files changed, 40 insertions(+), 15 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index 647220a8762d..30f145dc8724 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -265,8 +265,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_ + (&((struct amdgpu_fpriv *) \ + ((struct drm_file *)(drm_priv))->driver_priv)->vm) + ++int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, ++ struct file *filp, u32 pasid); + int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, +- struct file *filp, u32 pasid, ++ struct file *filp, + void **process_info, + struct dma_fence **ef); + void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index fe87b3402f06..29f045079a3e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -1473,10 +1473,9 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) + amdgpu_bo_unreserve(bo); + } + +-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, +- struct file *filp, u32 pasid, +- void **process_info, +- struct dma_fence **ef) ++int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, ++ struct file *filp, u32 pasid) ++ + { + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; +@@ -1487,10 +1486,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, + return ret; + avm = &drv_priv->vm; + +- /* Already a compute VM? */ +- if (avm->process_info) +- return -EINVAL; +- + /* Free the original amdgpu allocated pasid, + * will be replaced with kfd allocated pasid. + */ +@@ -1499,14 +1494,36 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, + amdgpu_vm_set_pasid(adev, avm, 0); + } + +- /* Convert VM into a compute VM */ +- ret = amdgpu_vm_make_compute(adev, avm); ++ ret = amdgpu_vm_set_pasid(adev, avm, pasid); + if (ret) + return ret; + +- ret = amdgpu_vm_set_pasid(adev, avm, pasid); ++ return 0; ++} ++ ++int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, ++ struct file *filp, ++ void **process_info, ++ struct dma_fence **ef) ++{ ++ struct amdgpu_fpriv *drv_priv; ++ struct amdgpu_vm *avm; ++ int ret; ++ ++ ret = amdgpu_file_to_fpriv(filp, &drv_priv); + if (ret) + return ret; ++ avm = &drv_priv->vm; ++ ++ /* Already a compute VM? */ ++ if (avm->process_info) ++ return -EINVAL; ++ ++ /* Convert VM into a compute VM */ ++ ret = amdgpu_vm_make_compute(adev, avm); ++ if (ret) ++ return ret; ++ + /* Initialize KFD part of the VM and process info */ + ret = init_kfd_vm(avm, process_info, ef); + if (ret) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 9821fa9268d3..dd351105c1bc 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -1576,9 +1576,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, + p = pdd->process; + dev = pdd->dev; + +- ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( +- dev->adev, drm_file, p->pasid, +- &p->kgd_process_info, &p->ef); ++ ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file, ++ &p->kgd_process_info, ++ &p->ef); + if (ret) { + pr_err("Failed to create process VM object\n"); + return ret; +@@ -1593,10 +1593,16 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, + if (ret) + goto err_init_cwsr; + ++ ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid); ++ if (ret) ++ goto err_set_pasid; ++ + pdd->drm_file = drm_file; + + return 0; + ++err_set_pasid: ++ kfd_process_device_destroy_cwsr_dgpu(pdd); + err_init_cwsr: + kfd_process_device_destroy_ib_mem(pdd); + err_reserve_ib_mem: +-- +2.35.1 + diff --git a/queue-6.1/drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch b/queue-6.1/drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch new file mode 100644 index 00000000000..346b40a0653 --- /dev/null +++ b/queue-6.1/drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch @@ -0,0 +1,82 @@ +From 48c6d47302c6b736c81261d281a281643f80a504 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Dec 2022 10:15:17 -0500 +Subject: drm/amdkfd: Fix kfd_process_device_init_vm error handling + +From: Philip Yang + +[ Upstream commit 29d48b87db64b6697ddad007548e51d032081c59 ] + +Should only destroy the ib_mem and let process cleanup worker to free +the outstanding BOs. Reset the pointer in pdd->qpd structure, to avoid +NULL pointer access in process destroy worker. + + BUG: kernel NULL pointer dereference, address: 0000000000000010 + Call Trace: + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel+0x46/0xb0 [amdgpu] + kfd_process_device_destroy_cwsr_dgpu+0x40/0x70 [amdgpu] + kfd_process_destroy_pdds+0x71/0x190 [amdgpu] + kfd_process_wq_release+0x2a2/0x3b0 [amdgpu] + process_one_work+0x2a1/0x600 + worker_thread+0x39/0x3d0 + +Signed-off-by: Philip Yang +Reviewed-by: Felix Kuehling +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 951b63677248..9821fa9268d3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -689,13 +689,13 @@ void kfd_process_destroy_wq(void) + } + + static void kfd_process_free_gpuvm(struct kgd_mem *mem, +- struct kfd_process_device *pdd, void *kptr) ++ struct kfd_process_device *pdd, void **kptr) + { + struct kfd_dev *dev = pdd->dev; + +- if (kptr) { ++ if (kptr && *kptr) { + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); +- kptr = NULL; ++ *kptr = NULL; + } + + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); +@@ -795,7 +795,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd) + if (!qpd->ib_kaddr || !qpd->ib_base) + return; + +- kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr); ++ kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); + } + + struct kfd_process *kfd_create_process(struct file *filep) +@@ -1277,7 +1277,7 @@ static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd) + if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) + return; + +- kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr); ++ kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); + } + + void kfd_process_set_trap_handler(struct qcm_process_device *qpd, +@@ -1598,8 +1598,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, + return 0; + + err_init_cwsr: ++ kfd_process_device_destroy_ib_mem(pdd); + err_reserve_ib_mem: +- kfd_process_device_free_bos(pdd); + pdd->drm_priv = NULL; + + return ret; +-- +2.35.1 + diff --git a/queue-6.1/drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch b/queue-6.1/drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch new file mode 100644 index 00000000000..ad3ce76b93d --- /dev/null +++ b/queue-6.1/drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch @@ -0,0 +1,67 @@ +From 03c814daee5bd87464a2a115ecb382b0adb8c016 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 00:56:41 +0800 +Subject: drm/i915/gvt: fix double free bug in split_2MB_gtt_entry + +From: Zheng Wang + +[ Upstream commit 4a61648af68f5ba4884f0e3b494ee1cabc4b6620 ] + +If intel_gvt_dma_map_guest_page failed, it will call +ppgtt_invalidate_spt, which will finally free the spt. +But the caller function ppgtt_populate_spt_by_guest_entry +does not notice that, it will free spt again in its error +path. + +Fix this by canceling the mapping of DMA address and freeing sub_spt. +Besides, leave the handle of spt destroy to caller function instead +of callee function when error occurs. + +Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support") +Signed-off-by: Zheng Wang +Reviewed-by: Zhenyu Wang +Signed-off-by: Zhenyu Wang +Link: http://patchwork.freedesktop.org/patch/msgid/20221229165641.1192455-1-zyytlz.wz@163.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gvt/gtt.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c +index ce0eb03709c3..80c60754a5c1 100644 +--- a/drivers/gpu/drm/i915/gvt/gtt.c ++++ b/drivers/gpu/drm/i915/gvt/gtt.c +@@ -1214,10 +1214,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, + for_each_shadow_entry(sub_spt, &sub_se, sub_index) { + ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, + PAGE_SIZE, &dma_addr); +- if (ret) { +- ppgtt_invalidate_spt(spt); +- return ret; +- } ++ if (ret) ++ goto err; + sub_se.val64 = se->val64; + + /* Copy the PAT field from PDE. */ +@@ -1236,6 +1234,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, + ops->set_pfn(se, sub_spt->shadow_page.mfn); + ppgtt_set_shadow_entry(spt, se, index); + return 0; ++err: ++ /* Cancel the existing addess mappings of DMA addr. */ ++ for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { ++ gvt_vdbg_mm("invalidate 4K entry\n"); ++ ppgtt_invalidate_pte(sub_spt, &sub_se); ++ } ++ /* Release the new allocated spt. */ ++ trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, ++ sub_spt->guest_page.gfn, sub_spt->shadow_page.type); ++ ppgtt_free_spt(sub_spt); ++ return ret; + } + + static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, +-- +2.35.1 + diff --git a/queue-6.1/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch b/queue-6.1/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch new file mode 100644 index 00000000000..157035ddfc1 --- /dev/null +++ b/queue-6.1/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch @@ -0,0 +1,36 @@ +From 53fefdbb913a5e269514a76068a091e6b46b3ea7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Nov 2022 16:15:18 +0300 +Subject: drm/i915: unpin on error in intel_vgpu_shadow_mm_pin() + +From: Dan Carpenter + +[ Upstream commit 3792fc508c095abd84b10ceae12bd773e61fdc36 ] + +Call intel_vgpu_unpin_mm() on this error path. + +Fixes: 418741480809 ("drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.") +Signed-off-by: Dan Carpenter +Signed-off-by: Zhenyu Wang +Link: http://patchwork.freedesktop.org/patch/msgid/Y3OQ5tgZIVxyQ/WV@kili +Reviewed-by: Zhenyu Wang +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gvt/scheduler.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c +index d6fe94cd0fdb..8342d95f56cb 100644 +--- a/drivers/gpu/drm/i915/gvt/scheduler.c ++++ b/drivers/gpu/drm/i915/gvt/scheduler.c +@@ -696,6 +696,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) + + if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || + !workload->shadow_mm->ppgtt_mm.shadowed) { ++ intel_vgpu_unpin_mm(workload->shadow_mm); + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + return -EINVAL; + } +-- +2.35.1 + diff --git a/queue-6.1/drm-imx-ipuv3-plane-fix-overlay-plane-width.patch b/queue-6.1/drm-imx-ipuv3-plane-fix-overlay-plane-width.patch new file mode 100644 index 00000000000..53c381e90b5 --- /dev/null +++ b/queue-6.1/drm-imx-ipuv3-plane-fix-overlay-plane-width.patch @@ -0,0 +1,82 @@ +From fdcb266370be4190ec0c5fb8d8dae0bb9c0e4760 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Nov 2022 15:14:20 +0100 +Subject: drm/imx: ipuv3-plane: Fix overlay plane width + +From: Philipp Zabel + +[ Upstream commit 92d43bd3bc9728c1fb114d7011d46f5ea9489e28 ] + +ipu_src_rect_width() was introduced to support odd screen resolutions +such as 1366x768 by internally rounding up primary plane width to a +multiple of 8 and compensating with reduced horizontal blanking. +This also caused overlay plane width to be rounded up, which was not +intended. Fix overlay plane width by limiting the rounding up to the +primary plane. + +drm_rect_width(&new_state->src) >> 16 is the same value as +drm_rect_width(dst) because there is no plane scaling support. + +Fixes: 94dfec48fca7 ("drm/imx: Add 8 pixel alignment fix") +Reviewed-by: Lucas Stach +Link: https://lore.kernel.org/r/20221108141420.176696-1-p.zabel@pengutronix.de +Signed-off-by: Philipp Zabel +Link: https://patchwork.freedesktop.org/patch/msgid/20221108141420.176696-1-p.zabel@pengutronix.de +Tested-by: Ian Ray +(cherry picked from commit 4333472f8d7befe62359fecb1083cd57a6e07bfc) +Signed-off-by: Philipp Zabel +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/imx/ipuv3-plane.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c +index dba4f7d81d69..80142d9a4a55 100644 +--- a/drivers/gpu/drm/imx/ipuv3-plane.c ++++ b/drivers/gpu/drm/imx/ipuv3-plane.c +@@ -614,6 +614,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + break; + } + ++ if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG) ++ width = ipu_src_rect_width(new_state); ++ else ++ width = drm_rect_width(&new_state->src) >> 16; ++ + eba = drm_plane_state_to_eba(new_state, 0); + + /* +@@ -622,8 +627,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + */ + if (ipu_state->use_pre) { + axi_id = ipu_chan_assign_axi_id(ipu_plane->dma); +- ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, +- ipu_src_rect_width(new_state), ++ ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width, + drm_rect_height(&new_state->src) >> 16, + fb->pitches[0], fb->format->format, + fb->modifier, &eba); +@@ -678,9 +682,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + break; + } + +- ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8)); ++ ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width); + +- width = ipu_src_rect_width(new_state); + height = drm_rect_height(&new_state->src) >> 16; + info = drm_format_info(fb->format->format); + ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0], +@@ -744,8 +747,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16); + + ipu_cpmem_zero(ipu_plane->alpha_ch); +- ipu_cpmem_set_resolution(ipu_plane->alpha_ch, +- ipu_src_rect_width(new_state), ++ ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width, + drm_rect_height(&new_state->src) >> 16); + ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8); + ipu_cpmem_set_high_priority(ipu_plane->alpha_ch); +-- +2.35.1 + diff --git a/queue-6.1/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch b/queue-6.1/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch new file mode 100644 index 00000000000..1bfdad874da --- /dev/null +++ b/queue-6.1/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch @@ -0,0 +1,56 @@ +From 61d50856c783f1fbb593c7d347d9650d9a4f9122 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 09:43:05 +0100 +Subject: drm/meson: Reduce the FIFO lines held when AFBC is not used + +From: Carlo Caione + +[ Upstream commit 3b754ed6d1cd90017e66e5cc16f3923e4a952ffc ] + +Having a bigger number of FIFO lines held after vsync is only useful to +SoCs using AFBC to give time to the AFBC decoder to be reset, configured +and enabled again. + +For SoCs not using AFBC this, on the contrary, is causing on some +displays issues and a few pixels vertical offset in the displayed image. + +Conditionally increase the number of lines held after vsync only for +SoCs using AFBC, leaving the default value for all the others. + +Fixes: 24e0d4058eff ("drm/meson: hold 32 lines after vsync to give time for AFBC start") +Signed-off-by: Carlo Caione +Acked-by: Martin Blumenstingl +Acked-by: Neil Armstrong +[narmstrong: added fixes tag] +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20221216-afbc_s905x-v1-0-033bebf780d9@baylibre.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/meson/meson_viu.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c +index d4b907889a21..cd399b0b7181 100644 +--- a/drivers/gpu/drm/meson/meson_viu.c ++++ b/drivers/gpu/drm/meson/meson_viu.c +@@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv) + + /* Initialize OSD1 fifo control register */ + reg = VIU_OSD_DDR_PRIORITY_URGENT | +- VIU_OSD_HOLD_FIFO_LINES(31) | + VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */ + VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */ + VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */ + + if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) +- reg |= VIU_OSD_BURST_LENGTH_32; ++ reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31)); + else +- reg |= VIU_OSD_BURST_LENGTH_64; ++ reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4)); + + writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT)); + writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT)); +-- +2.35.1 + diff --git a/queue-6.1/drm-panfrost-fix-gem-handle-creation-ref-counting.patch b/queue-6.1/drm-panfrost-fix-gem-handle-creation-ref-counting.patch new file mode 100644 index 00000000000..e368e12b96c --- /dev/null +++ b/queue-6.1/drm-panfrost-fix-gem-handle-creation-ref-counting.patch @@ -0,0 +1,138 @@ +From 36926795b9293f7d99a5391697b7e496cb30c87f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 14:01:30 +0000 +Subject: drm/panfrost: Fix GEM handle creation ref-counting + +From: Steven Price + +[ Upstream commit 4217c6ac817451d5116687f3cc6286220dc43d49 ] + +panfrost_gem_create_with_handle() previously returned a BO but with the +only reference being from the handle, which user space could in theory +guess and release, causing a use-after-free. Additionally if the call to +panfrost_gem_mapping_get() in panfrost_ioctl_create_bo() failed then +a(nother) reference on the BO was dropped. + +The _create_with_handle() is a problematic pattern, so ditch it and +instead create the handle in panfrost_ioctl_create_bo(). If the call to +panfrost_gem_mapping_get() fails then this means that user space has +indeed gone behind our back and freed the handle. In which case just +return an error code. + +Reported-by: Rob Clark +Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver") +Signed-off-by: Steven Price +Reviewed-by: Rob Clark +Link: https://patchwork.freedesktop.org/patch/msgid/20221219140130.410578-1-steven.price@arm.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panfrost/panfrost_drv.c | 27 ++++++++++++++++--------- + drivers/gpu/drm/panfrost/panfrost_gem.c | 16 +-------------- + drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +---- + 3 files changed, 20 insertions(+), 28 deletions(-) + +diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c +index 2fa5afe21288..919e6cc04982 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_drv.c ++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c +@@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + struct panfrost_gem_object *bo; + struct drm_panfrost_create_bo *args = data; + struct panfrost_gem_mapping *mapping; ++ int ret; + + if (!args->size || args->pad || + (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) +@@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + !(args->flags & PANFROST_BO_NOEXEC)) + return -EINVAL; + +- bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, +- &args->handle); ++ bo = panfrost_gem_create(dev, args->size, args->flags); + if (IS_ERR(bo)) + return PTR_ERR(bo); + ++ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); ++ if (ret) ++ goto out; ++ + mapping = panfrost_gem_mapping_get(bo, priv); +- if (!mapping) { +- drm_gem_object_put(&bo->base.base); +- return -EINVAL; ++ if (mapping) { ++ args->offset = mapping->mmnode.start << PAGE_SHIFT; ++ panfrost_gem_mapping_put(mapping); ++ } else { ++ /* This can only happen if the handle from ++ * drm_gem_handle_create() has already been guessed and freed ++ * by user space ++ */ ++ ret = -EINVAL; + } + +- args->offset = mapping->mmnode.start << PAGE_SHIFT; +- panfrost_gem_mapping_put(mapping); +- +- return 0; ++out: ++ drm_gem_object_put(&bo->base.base); ++ return ret; + } + + /** +diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c +index 293e799e2fe8..3c812fbd126f 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_gem.c ++++ b/drivers/gpu/drm/panfrost/panfrost_gem.c +@@ -235,12 +235,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t + } + + struct panfrost_gem_object * +-panfrost_gem_create_with_handle(struct drm_file *file_priv, +- struct drm_device *dev, size_t size, +- u32 flags, +- uint32_t *handle) ++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) + { +- int ret; + struct drm_gem_shmem_object *shmem; + struct panfrost_gem_object *bo; + +@@ -256,16 +252,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, + bo->noexec = !!(flags & PANFROST_BO_NOEXEC); + bo->is_heap = !!(flags & PANFROST_BO_HEAP); + +- /* +- * Allocate an id of idr table where the obj is registered +- * and handle has the id what user can see. +- */ +- ret = drm_gem_handle_create(file_priv, &shmem->base, handle); +- /* drop reference from allocate - handle holds it now. */ +- drm_gem_object_put(&shmem->base); +- if (ret) +- return ERR_PTR(ret); +- + return bo; + } + +diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h +index 8088d5fd8480..ad2877eeeccd 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_gem.h ++++ b/drivers/gpu/drm/panfrost/panfrost_gem.h +@@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, + struct sg_table *sgt); + + struct panfrost_gem_object * +-panfrost_gem_create_with_handle(struct drm_file *file_priv, +- struct drm_device *dev, size_t size, +- u32 flags, +- uint32_t *handle); ++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); + + int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); + void panfrost_gem_close(struct drm_gem_object *obj, +-- +2.35.1 + diff --git a/queue-6.1/drm-virtio-fix-memory-leak-in-virtio_gpu_object_crea.patch b/queue-6.1/drm-virtio-fix-memory-leak-in-virtio_gpu_object_crea.patch new file mode 100644 index 00000000000..4eea9164894 --- /dev/null +++ b/queue-6.1/drm-virtio-fix-memory-leak-in-virtio_gpu_object_crea.patch @@ -0,0 +1,57 @@ +From c7f8c8df50ff9599e5282042b63f3a624ff7b8fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 17:19:05 +0800 +Subject: drm/virtio: Fix memory leak in virtio_gpu_object_create() + +From: Xiu Jianfeng + +[ Upstream commit a764da46cd15f8b40292d2c0b29c4bf9a3e66c7e ] + +The virtio_gpu_object_shmem_init() will alloc memory and save it in +@ents, so when virtio_gpu_array_alloc() fails, this memory should be +freed, this patch fixes it. + +Fixes: e7fef0923303 ("drm/virtio: Simplify error handling of virtio_gpu_object_create()") +Signed-off-by: Xiu Jianfeng +Reviewed-by: Dmitry Osipenko +Signed-off-by: Dmitry Osipenko +Link: https://patchwork.freedesktop.org/patch/msgid/20221109091905.55451-1-xiujianfeng@huawei.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/virtio/virtgpu_object.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c +index 8d7728181de0..c7e74cf13022 100644 +--- a/drivers/gpu/drm/virtio/virtgpu_object.c ++++ b/drivers/gpu/drm/virtio/virtgpu_object.c +@@ -184,7 +184,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object_array *objs = NULL; + struct drm_gem_shmem_object *shmem_obj; + struct virtio_gpu_object *bo; +- struct virtio_gpu_mem_entry *ents; ++ struct virtio_gpu_mem_entry *ents = NULL; + unsigned int nents; + int ret; + +@@ -210,7 +210,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, + ret = -ENOMEM; + objs = virtio_gpu_array_alloc(1); + if (!objs) +- goto err_put_id; ++ goto err_free_entry; + virtio_gpu_array_add_obj(objs, &bo->base.base); + + ret = virtio_gpu_array_lock_resv(objs); +@@ -239,6 +239,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, + + err_put_objs: + virtio_gpu_array_put_free(objs); ++err_free_entry: ++ kvfree(ents); + err_put_id: + virtio_gpu_resource_id_put(vgdev, bo->hw_res_handle); + err_free_gem: +-- +2.35.1 + diff --git a/queue-6.1/filelock-new-helper-vfs_inode_has_locks.patch b/queue-6.1/filelock-new-helper-vfs_inode_has_locks.patch new file mode 100644 index 00000000000..5459eac89a2 --- /dev/null +++ b/queue-6.1/filelock-new-helper-vfs_inode_has_locks.patch @@ -0,0 +1,89 @@ +From e559e8a462196a659fe7620c29952da7cfa50abe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Nov 2022 08:33:09 -0500 +Subject: filelock: new helper: vfs_inode_has_locks + +From: Jeff Layton + +[ Upstream commit ab1ddef98a715eddb65309ffa83267e4e84a571e ] + +Ceph has a need to know whether a particular inode has any locks set on +it. It's currently tracking that by a num_locks field in its +filp->private_data, but that's problematic as it tries to decrement this +field when releasing locks and that can race with the file being torn +down. + +Add a new vfs_inode_has_locks helper that just returns whether any locks +are currently held on the inode. + +Reviewed-by: Xiubo Li +Reviewed-by: Christoph Hellwig +Signed-off-by: Jeff Layton +Stable-dep-of: 461ab10ef7e6 ("ceph: switch to vfs_inode_has_locks() to fix file lock bug") +Signed-off-by: Sasha Levin +--- + fs/locks.c | 23 +++++++++++++++++++++++ + include/linux/fs.h | 6 ++++++ + 2 files changed, 29 insertions(+) + +diff --git a/fs/locks.c b/fs/locks.c +index 607f94a0e789..7dc129cc1a26 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -2669,6 +2669,29 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) + } + EXPORT_SYMBOL_GPL(vfs_cancel_lock); + ++/** ++ * vfs_inode_has_locks - are any file locks held on @inode? ++ * @inode: inode to check for locks ++ * ++ * Return true if there are any FL_POSIX or FL_FLOCK locks currently ++ * set on @inode. ++ */ ++bool vfs_inode_has_locks(struct inode *inode) ++{ ++ struct file_lock_context *ctx; ++ bool ret; ++ ++ ctx = smp_load_acquire(&inode->i_flctx); ++ if (!ctx) ++ return false; ++ ++ spin_lock(&ctx->flc_lock); ++ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock); ++ spin_unlock(&ctx->flc_lock); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(vfs_inode_has_locks); ++ + #ifdef CONFIG_PROC_FS + #include + #include +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 6b115bce14b9..081d1f539628 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1170,6 +1170,7 @@ extern int locks_delete_block(struct file_lock *); + extern int vfs_test_lock(struct file *, struct file_lock *); + extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); + extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); ++bool vfs_inode_has_locks(struct inode *inode); + extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); + extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); + extern void lease_get_mtime(struct inode *, struct timespec64 *time); +@@ -1284,6 +1285,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) + return 0; + } + ++static inline bool vfs_inode_has_locks(struct inode *inode) ++{ ++ return false; ++} ++ + static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) + { + return -ENOLCK; +-- +2.35.1 + diff --git a/queue-6.1/fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch b/queue-6.1/fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch new file mode 100644 index 00000000000..097ef2056eb --- /dev/null +++ b/queue-6.1/fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch @@ -0,0 +1,51 @@ +From 8bf2fff696584a7be41651408ac8500f3f84dfb3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Jan 2023 23:05:33 +0900 +Subject: fs/ntfs3: don't hold ni_lock when calling truncate_setsize() + +From: Tetsuo Handa + +[ Upstream commit 0226635c304cfd5c9db9b78c259cb713819b057e ] + +syzbot is reporting hung task at do_user_addr_fault() [1], for there is +a silent deadlock between PG_locked bit and ni_lock lock. + +Since filemap_update_page() calls filemap_read_folio() after calling +folio_trylock() which will set PG_locked bit, ntfs_truncate() must not +call truncate_setsize() which will wait for PG_locked bit to be cleared +when holding ni_lock lock. + +Link: https://lore.kernel.org/all/00000000000060d41f05f139aa44@google.com/ +Link: https://syzkaller.appspot.com/bug?extid=bed15dbf10294aa4f2ae [1] +Reported-by: syzbot +Debugged-by: Linus Torvalds +Co-developed-by: Hillf Danton +Signed-off-by: Hillf Danton +Signed-off-by: Tetsuo Handa +Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + fs/ntfs3/file.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c +index 4f2ffc7ef296..f31c0389a2e7 100644 +--- a/fs/ntfs3/file.c ++++ b/fs/ntfs3/file.c +@@ -486,10 +486,10 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) + + new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); + +- ni_lock(ni); +- + truncate_setsize(inode, new_size); + ++ ni_lock(ni); ++ + down_write(&ni->file.run_lock); + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, + &new_valid, ni->mi.sbi->options->prealloc, NULL); +-- +2.35.1 + diff --git a/queue-6.1/gpio-pca953x-avoid-to-use-uninitialized-value-pinctr.patch b/queue-6.1/gpio-pca953x-avoid-to-use-uninitialized-value-pinctr.patch new file mode 100644 index 00000000000..73a33f58feb --- /dev/null +++ b/queue-6.1/gpio-pca953x-avoid-to-use-uninitialized-value-pinctr.patch @@ -0,0 +1,41 @@ +From 97cff9a623d521908d0c7b8fb337bbf764b02e13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 11 Dec 2022 00:05:58 +0200 +Subject: gpio: pca953x: avoid to use uninitialized value pinctrl + +From: Haibo Chen + +[ Upstream commit 90fee3dd5bfc1b9f4c8c0ba6cd2a35c9d79ca4de ] + +There is a variable pinctrl declared without initializer. And then +has the case (switch operation chose the default case) to directly +use this uninitialized value, this is not a safe behavior. So here +initialize the pinctrl as 0 to avoid this issue. +This is reported by Coverity. + +Fixes: 13c5d4ce8060 ("gpio: pca953x: Add support for PCAL6534") +Signed-off-by: Haibo Chen +Signed-off-by: Andy Shevchenko +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-pca953x.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c +index ebe1943b85dd..bf21803a0036 100644 +--- a/drivers/gpio/gpio-pca953x.c ++++ b/drivers/gpio/gpio-pca953x.c +@@ -473,6 +473,9 @@ static u8 pcal6534_recalc_addr(struct pca953x_chip *chip, int reg, int off) + case PCAL6524_DEBOUNCE: + pinctrl = ((reg & PCAL_PINCTRL_MASK) >> 1) + 0x1c; + break; ++ default: ++ pinctrl = 0; ++ break; + } + + return pinctrl + addr + (off / BANK_SZ); +-- +2.35.1 + diff --git a/queue-6.1/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch b/queue-6.1/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch new file mode 100644 index 00000000000..3eca6275a93 --- /dev/null +++ b/queue-6.1/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch @@ -0,0 +1,36 @@ +From 47f54c464082c997593a6b59a037eed67e0c1dd9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Jan 2023 12:20:39 +0400 +Subject: gpio: sifive: Fix refcount leak in sifive_gpio_probe + +From: Miaoqian Lin + +[ Upstream commit 694175cd8a1643cde3acb45c9294bca44a8e08e9 ] + +of_irq_find_parent() returns a node pointer with refcount incremented, +We should use of_node_put() on it when not needed anymore. +Add missing of_node_put() to avoid refcount leak. + +Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs") +Signed-off-by: Miaoqian Lin +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-sifive.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c +index 238f3210970c..bc5660f61c57 100644 +--- a/drivers/gpio/gpio-sifive.c ++++ b/drivers/gpio/gpio-sifive.c +@@ -215,6 +215,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) + return -ENODEV; + } + parent = irq_find_host(irq_parent); ++ of_node_put(irq_parent); + if (!parent) { + dev_err(dev, "no IRQ parent domain\n"); + return -ENODEV; +-- +2.35.1 + diff --git a/queue-6.1/hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch b/queue-6.1/hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch new file mode 100644 index 00000000000..850eaf87881 --- /dev/null +++ b/queue-6.1/hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch @@ -0,0 +1,96 @@ +From 1189087ee5fe3663f17af994b08de51bc9088758 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Jan 2023 11:06:28 -0800 +Subject: hfs/hfsplus: avoid WARN_ON() for sanity check, use proper error + handling + +From: Linus Torvalds + +[ Upstream commit cb7a95af78d29442b8294683eca4897544b8ef46 ] + +Commit 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check") fixed +a build warning by turning a comment into a WARN_ON(), but it turns out +that syzbot then complains because it can trigger said warning with a +corrupted hfs image. + +The warning actually does warn about a bad situation, but we are much +better off just handling it as the error it is. So rather than warn +about us doing bad things, stop doing the bad things and return -EIO. + +While at it, also fix a memory leak that was introduced by an earlier +fix for a similar syzbot warning situation, and add a check for one case +that historically wasn't handled at all (ie neither comment nor +subsequent WARN_ON). + +Reported-by: syzbot+7bb7cd3595533513a9e7@syzkaller.appspotmail.com +Fixes: 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check") +Fixes: 8d824e69d9f3 ("hfs: fix OOB Read in __hfs_brec_find") +Link: https://lore.kernel.org/lkml/000000000000dbce4e05f170f289@google.com/ +Tested-by: Michael Schmitz +Cc: Arnd Bergmann +Cc: Matthew Wilcox +Cc: Viacheslav Dubeyko +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + fs/hfs/inode.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c +index a0746be3c1de..80d17c520d0b 100644 +--- a/fs/hfs/inode.c ++++ b/fs/hfs/inode.c +@@ -458,15 +458,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + /* panic? */ + return -EIO; + ++ res = -EIO; + if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) +- return -EIO; ++ goto out; + fd.search_key->cat = HFS_I(main_inode)->cat_key; + if (hfs_brec_find(&fd)) +- /* panic? */ + goto out; + + if (S_ISDIR(main_inode->i_mode)) { +- WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir)); ++ if (fd.entrylength < sizeof(struct hfs_cat_dir)) ++ goto out; + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_dir)); + if (rec.type != HFS_CDR_DIR || +@@ -479,6 +480,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_dir)); + } else if (HFS_IS_RSRC(inode)) { ++ if (fd.entrylength < sizeof(struct hfs_cat_file)) ++ goto out; + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + hfs_inode_write_fork(inode, rec.file.RExtRec, +@@ -486,7 +489,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + } else { +- WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file)); ++ if (fd.entrylength < sizeof(struct hfs_cat_file)) ++ goto out; + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + if (rec.type != HFS_CDR_FIL || +@@ -503,9 +507,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + } ++ res = 0; + out: + hfs_find_exit(&fd); +- return 0; ++ return res; + } + + static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, +-- +2.35.1 + diff --git a/queue-6.1/ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch b/queue-6.1/ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch new file mode 100644 index 00000000000..b1321180ab5 --- /dev/null +++ b/queue-6.1/ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch @@ -0,0 +1,53 @@ +From 456269a31d5dd4a048033eef697c54944f32fe48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 09:54:48 -0800 +Subject: ice: xsk: do not use xdp_return_frame() on tx_buf->raw_buf + +From: Maciej Fijalkowski + +[ Upstream commit 53fc61be273a1e76dd5e356f91805dce00ff2d2c ] + +Previously ice XDP xmit routine was changed in a way that it avoids +xdp_buff->xdp_frame conversion as it is simply not needed for handling +XDP_TX action and what is more it saves us CPU cycles. This routine is +re-used on ZC driver to handle XDP_TX action. + +Although for XDP_TX on Rx ZC xdp_buff that comes from xsk_buff_pool is +converted to xdp_frame, xdp_frame itself is not stored inside +ice_tx_buf, we only store raw data pointer. Casting this pointer to +xdp_frame and calling against it xdp_return_frame in +ice_clean_xdp_tx_buf() results in undefined behavior. + +To fix this, simply call page_frag_free() on tx_buf->raw_buf. +Later intention is to remove the buff->frame conversion in order to +simplify the codebase and improve XDP_TX performance on ZC. + +Fixes: 126cdfe1007a ("ice: xsk: Improve AF_XDP ZC Tx and use batching API") +Reported-and-tested-by: Robin Cowley +Signed-off-by: Maciej Fijalkowski +Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) +Signed-off-by: Tony Nguyen +Reviewed-by: Piotr Raczynski +Link: https://lore.kernel.org/r/20221220175448.693999-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c +index 056c904b83cc..79fa65d1cf20 100644 +--- a/drivers/net/ethernet/intel/ice/ice_xsk.c ++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c +@@ -772,7 +772,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) + static void + ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) + { +- xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); ++ page_frag_free(tx_buf->raw_buf); + xdp_ring->xdp_tx_active--; + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); +-- +2.35.1 + diff --git a/queue-6.1/io_uring-cancel-re-grab-ctx-mutex-after-finishing-wa.patch b/queue-6.1/io_uring-cancel-re-grab-ctx-mutex-after-finishing-wa.patch new file mode 100644 index 00000000000..a98d45a42e9 --- /dev/null +++ b/queue-6.1/io_uring-cancel-re-grab-ctx-mutex-after-finishing-wa.patch @@ -0,0 +1,62 @@ +From 1b2af895065c732348964d5c6dac2f1ca08f4891 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Dec 2022 07:11:33 -0700 +Subject: io_uring/cancel: re-grab ctx mutex after finishing wait + +From: Jens Axboe + +[ Upstream commit 23fffb2f09ce1145cbd751801d45ba74acaa6542 ] + +If we have a signal pending during cancelations, it'll cause the +task_work run to return an error. Since we didn't run task_work, the +current task is left in TASK_INTERRUPTIBLE state when we need to +re-grab the ctx mutex, and the kernel will rightfully complain about +that. + +Move the lock grabbing for the error cases outside the loop to avoid +that issue. + +Reported-by: syzbot+7df055631cd1be4586fd@syzkaller.appspotmail.com +Link: https://lore.kernel.org/io-uring/0000000000003a14a905f05050b0@google.com/ +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/cancel.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/io_uring/cancel.c b/io_uring/cancel.c +index 2291a53cdabd..b4f5dfacc0c3 100644 +--- a/io_uring/cancel.c ++++ b/io_uring/cancel.c +@@ -288,24 +288,23 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) + + ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); + ++ mutex_unlock(&ctx->uring_lock); + if (ret != -EALREADY) + break; + +- mutex_unlock(&ctx->uring_lock); + ret = io_run_task_work_sig(ctx); +- if (ret < 0) { +- mutex_lock(&ctx->uring_lock); ++ if (ret < 0) + break; +- } + ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); +- mutex_lock(&ctx->uring_lock); + if (!ret) { + ret = -ETIME; + break; + } ++ mutex_lock(&ctx->uring_lock); + } while (1); + + finish_wait(&ctx->cq_wait, &wait); ++ mutex_lock(&ctx->uring_lock); + + if (ret == -ENOENT || ret > 0) + ret = 0; +-- +2.35.1 + diff --git a/queue-6.1/io_uring-check-for-valid-register-opcode-earlier.patch b/queue-6.1/io_uring-check-for-valid-register-opcode-earlier.patch new file mode 100644 index 00000000000..70f4abae092 --- /dev/null +++ b/queue-6.1/io_uring-check-for-valid-register-opcode-earlier.patch @@ -0,0 +1,45 @@ +From 5fbf27b9b4abbc6049b89a3e162d7209be2cce86 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Dec 2022 06:37:08 -0700 +Subject: io_uring: check for valid register opcode earlier + +From: Jens Axboe + +[ Upstream commit 343190841a1f22b96996d9f8cfab902a4d1bfd0e ] + +We only check the register opcode value inside the restricted ring +section, move it into the main io_uring_register() function instead +and check it up front. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 71f1cabb9f3d..1bc68dfda340 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3897,8 +3897,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, + return -EEXIST; + + if (ctx->restricted) { +- if (opcode >= IORING_REGISTER_LAST) +- return -EINVAL; + opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); + if (!test_bit(opcode, ctx->restrictions.register_op)) + return -EACCES; +@@ -4054,6 +4052,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, + long ret = -EBADF; + struct fd f; + ++ if (opcode >= IORING_REGISTER_LAST) ++ return -EINVAL; ++ + f = fdget(fd); + if (!f.file) + return -EBADF; +-- +2.35.1 + diff --git a/queue-6.1/kunit-alloc_string_stream_fragment-error-handling-bu.patch b/queue-6.1/kunit-alloc_string_stream_fragment-error-handling-bu.patch new file mode 100644 index 00000000000..10e8393c9b2 --- /dev/null +++ b/queue-6.1/kunit-alloc_string_stream_fragment-error-handling-bu.patch @@ -0,0 +1,43 @@ +From ff721f4ccf263a3265125ecc219a588e6f3895e3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Oct 2022 07:42:41 -0700 +Subject: kunit: alloc_string_stream_fragment error handling bug fix + +From: YoungJun.park + +[ Upstream commit 93ef83050e597634d2c7dc838a28caf5137b9404 ] + +When it fails to allocate fragment, it does not free and return error. +And check the pointer inappropriately. + +Fixed merge conflicts with +commit 618887768bb7 ("kunit: update NULL vs IS_ERR() tests") +Shuah Khan + +Signed-off-by: YoungJun.park +Reviewed-by: David Gow +Signed-off-by: Shuah Khan +Signed-off-by: Sasha Levin +--- + lib/kunit/string-stream.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/lib/kunit/string-stream.c b/lib/kunit/string-stream.c +index a608746020a9..7aeabe1a3dc5 100644 +--- a/lib/kunit/string-stream.c ++++ b/lib/kunit/string-stream.c +@@ -23,8 +23,10 @@ static struct string_stream_fragment *alloc_string_stream_fragment( + return ERR_PTR(-ENOMEM); + + frag->fragment = kunit_kmalloc(test, len, gfp); +- if (!frag->fragment) ++ if (!frag->fragment) { ++ kunit_kfree(test, frag); + return ERR_PTR(-ENOMEM); ++ } + + return frag; + } +-- +2.35.1 + diff --git a/queue-6.1/mptcp-fix-deadlock-in-fastopen-error-path.patch b/queue-6.1/mptcp-fix-deadlock-in-fastopen-error-path.patch new file mode 100644 index 00000000000..4e5a36ef327 --- /dev/null +++ b/queue-6.1/mptcp-fix-deadlock-in-fastopen-error-path.patch @@ -0,0 +1,152 @@ +From 1526d97b93cf7b5552ee597aa9e179102bb63e69 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 11:52:14 -0800 +Subject: mptcp: fix deadlock in fastopen error path + +From: Paolo Abeni + +[ Upstream commit 7d803344fdc3e38079fabcf38b1e4cb6f8faa655 ] + +MatM reported a deadlock at fastopening time: + +INFO: task syz-executor.0:11454 blocked for more than 143 seconds. + Tainted: G S 6.1.0-rc5-03226-gdb0157db5153 #1 +"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. +task:syz-executor.0 state:D stack:25104 pid:11454 ppid:424 flags:0x00004006 +Call Trace: + + context_switch kernel/sched/core.c:5191 [inline] + __schedule+0x5c2/0x1550 kernel/sched/core.c:6503 + schedule+0xe8/0x1c0 kernel/sched/core.c:6579 + __lock_sock+0x142/0x260 net/core/sock.c:2896 + lock_sock_nested+0xdb/0x100 net/core/sock.c:3466 + __mptcp_close_ssk+0x1a3/0x790 net/mptcp/protocol.c:2328 + mptcp_destroy_common+0x16a/0x650 net/mptcp/protocol.c:3171 + mptcp_disconnect+0xb8/0x450 net/mptcp/protocol.c:3019 + __inet_stream_connect+0x897/0xa40 net/ipv4/af_inet.c:720 + tcp_sendmsg_fastopen+0x3dd/0x740 net/ipv4/tcp.c:1200 + mptcp_sendmsg_fastopen net/mptcp/protocol.c:1682 [inline] + mptcp_sendmsg+0x128a/0x1a50 net/mptcp/protocol.c:1721 + inet6_sendmsg+0x11f/0x150 net/ipv6/af_inet6.c:663 + sock_sendmsg_nosec net/socket.c:714 [inline] + sock_sendmsg+0xf7/0x190 net/socket.c:734 + ____sys_sendmsg+0x336/0x970 net/socket.c:2476 + ___sys_sendmsg+0x122/0x1c0 net/socket.c:2530 + __sys_sendmmsg+0x18d/0x460 net/socket.c:2616 + __do_sys_sendmmsg net/socket.c:2645 [inline] + __se_sys_sendmmsg net/socket.c:2642 [inline] + __x64_sys_sendmmsg+0x9d/0x110 net/socket.c:2642 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f5920a75e7d +RSP: 002b:00007f59201e8028 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 +RAX: ffffffffffffffda RBX: 00007f5920bb4f80 RCX: 00007f5920a75e7d +RDX: 0000000000000001 RSI: 0000000020002940 RDI: 0000000000000005 +RBP: 00007f5920ae7593 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000020004050 R11: 0000000000000246 R12: 0000000000000000 +R13: 000000000000000b R14: 00007f5920bb4f80 R15: 00007f59201c8000 + + +In the error path, tcp_sendmsg_fastopen() ends-up calling +mptcp_disconnect(), and the latter tries to close each +subflow, acquiring the socket lock on each of them. + +At fastopen time, we have a single subflow, and such subflow +socket lock is already held by the called, causing the deadlock. + +We already track the 'fastopen in progress' status inside the msk +socket. Use it to address the issue, making mptcp_disconnect() a +no op when invoked from the fastopen (error) path and doing the +relevant cleanup after releasing the subflow socket lock. + +While at the above, rename the fastopen status bit to something +more meaningful. + +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/321 +Fixes: fa9e57468aa1 ("mptcp: fix abba deadlock on fastopen") +Reported-by: Mat Martineau +Reviewed-by: Mat Martineau +Signed-off-by: Paolo Abeni +Signed-off-by: Mat Martineau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 18 +++++++++++++++--- + net/mptcp/protocol.h | 2 +- + 2 files changed, 16 insertions(+), 4 deletions(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 1dbc62537259..e64ea2ca1c7a 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1673,6 +1673,8 @@ static void mptcp_set_nospace(struct sock *sk) + set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); + } + ++static int mptcp_disconnect(struct sock *sk, int flags); ++ + static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, + size_t len, int *copied_syn) + { +@@ -1683,9 +1685,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh + lock_sock(ssk); + msg->msg_flags |= MSG_DONTWAIT; + msk->connect_flags = O_NONBLOCK; +- msk->is_sendmsg = 1; ++ msk->fastopening = 1; + ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); +- msk->is_sendmsg = 0; ++ msk->fastopening = 0; + msg->msg_flags = saved_flags; + release_sock(ssk); + +@@ -1699,6 +1701,8 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh + */ + if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) + *copied_syn = 0; ++ } else if (ret && ret != -EINPROGRESS) { ++ mptcp_disconnect(sk, 0); + } + + return ret; +@@ -3000,6 +3004,14 @@ static int mptcp_disconnect(struct sock *sk, int flags) + { + struct mptcp_sock *msk = mptcp_sk(sk); + ++ /* We are on the fastopen error path. We can't call straight into the ++ * subflows cleanup code due to lock nesting (we are already under ++ * msk->firstsocket lock). Do nothing and leave the cleanup to the ++ * caller. ++ */ ++ if (msk->fastopening) ++ return 0; ++ + inet_sk_state_store(sk, TCP_CLOSE); + + mptcp_stop_timer(sk); +@@ -3566,7 +3578,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) + /* if reaching here via the fastopen/sendmsg path, the caller already + * acquired the subflow socket lock, too. + */ +- if (msk->is_sendmsg) ++ if (msk->fastopening) + err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1); + else + err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags); +diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h +index 6a09ab99a12d..380bddbc52d4 100644 +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -286,7 +286,7 @@ struct mptcp_sock { + u8 recvmsg_inq:1, + cork:1, + nodelay:1, +- is_sendmsg:1; ++ fastopening:1; + int connect_flags; + struct work_struct work; + struct sk_buff *ooo_last_skb; +-- +2.35.1 + diff --git a/queue-6.1/mptcp-fix-lockdep-false-positive.patch b/queue-6.1/mptcp-fix-lockdep-false-positive.patch new file mode 100644 index 00000000000..97ca3dd25a7 --- /dev/null +++ b/queue-6.1/mptcp-fix-lockdep-false-positive.patch @@ -0,0 +1,160 @@ +From 635d909ddd53062e5800684c5a379feb32897e8e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 11:52:15 -0800 +Subject: mptcp: fix lockdep false positive + +From: Paolo Abeni + +[ Upstream commit fec3adfd754ccc99a7230e8ab9f105b65fb07bcc ] + +MattB reported a lockdep splat in the mptcp listener code cleanup: + + WARNING: possible circular locking dependency detected + packetdrill/14278 is trying to acquire lock: + ffff888017d868f0 ((work_completion)(&msk->work)){+.+.}-{0:0}, at: __flush_work (kernel/workqueue.c:3069) + + but task is already holding lock: + ffff888017d84130 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close (net/mptcp/protocol.c:2973) + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #1 (sk_lock-AF_INET){+.+.}-{0:0}: + __lock_acquire (kernel/locking/lockdep.c:5055) + lock_acquire (kernel/locking/lockdep.c:466) + lock_sock_nested (net/core/sock.c:3463) + mptcp_worker (net/mptcp/protocol.c:2614) + process_one_work (kernel/workqueue.c:2294) + worker_thread (include/linux/list.h:292) + kthread (kernel/kthread.c:376) + ret_from_fork (arch/x86/entry/entry_64.S:312) + + -> #0 ((work_completion)(&msk->work)){+.+.}-{0:0}: + check_prev_add (kernel/locking/lockdep.c:3098) + validate_chain (kernel/locking/lockdep.c:3217) + __lock_acquire (kernel/locking/lockdep.c:5055) + lock_acquire (kernel/locking/lockdep.c:466) + __flush_work (kernel/workqueue.c:3070) + __cancel_work_timer (kernel/workqueue.c:3160) + mptcp_cancel_work (net/mptcp/protocol.c:2758) + mptcp_subflow_queue_clean (net/mptcp/subflow.c:1817) + __mptcp_close_ssk (net/mptcp/protocol.c:2363) + mptcp_destroy_common (net/mptcp/protocol.c:3170) + mptcp_destroy (include/net/sock.h:1495) + __mptcp_destroy_sock (net/mptcp/protocol.c:2886) + __mptcp_close (net/mptcp/protocol.c:2959) + mptcp_close (net/mptcp/protocol.c:2974) + inet_release (net/ipv4/af_inet.c:432) + __sock_release (net/socket.c:651) + sock_close (net/socket.c:1367) + __fput (fs/file_table.c:320) + task_work_run (kernel/task_work.c:181 (discriminator 1)) + exit_to_user_mode_prepare (include/linux/resume_user_mode.h:49) + syscall_exit_to_user_mode (kernel/entry/common.c:130) + do_syscall_64 (arch/x86/entry/common.c:87) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) + + other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(sk_lock-AF_INET); + lock((work_completion)(&msk->work)); + lock(sk_lock-AF_INET); + lock((work_completion)(&msk->work)); + + *** DEADLOCK *** + +The report is actually a false positive, since the only existing lock +nesting is the msk socket lock acquired by the mptcp work. +cancel_work_sync() is invoked without the relevant socket lock being +held, but under a different (the msk listener) socket lock. + +We could silence the splat adding a per workqueue dynamic lockdep key, +but that looks overkill. Instead just tell lockdep the msk socket lock +is not held around cancel_work_sync(). + +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/322 +Fixes: 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") +Reported-by: Matthieu Baerts +Reviewed-by: Mat Martineau +Signed-off-by: Paolo Abeni +Signed-off-by: Mat Martineau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 2 +- + net/mptcp/protocol.h | 2 +- + net/mptcp/subflow.c | 19 +++++++++++++++++-- + 3 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index e64ea2ca1c7a..e97465f0c667 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2371,7 +2371,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + /* otherwise tcp will dispose of the ssk and subflow ctx */ + if (ssk->sk_state == TCP_LISTEN) { + tcp_set_state(ssk, TCP_CLOSE); +- mptcp_subflow_queue_clean(ssk); ++ mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + } + __tcp_close(ssk, 0); +diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h +index 380bddbc52d4..62e9ff237b6e 100644 +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -614,7 +614,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow); + void __mptcp_subflow_send_ack(struct sock *ssk); + void mptcp_subflow_reset(struct sock *ssk); +-void mptcp_subflow_queue_clean(struct sock *ssk); ++void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); + void mptcp_sock_graft(struct sock *sk, struct socket *parent); + struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); + bool __mptcp_close(struct sock *sk, long timeout); +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 613f515fedf0..9d3701fdb293 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1733,7 +1733,7 @@ static void subflow_state_change(struct sock *sk) + } + } + +-void mptcp_subflow_queue_clean(struct sock *listener_ssk) ++void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) + { + struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; + struct mptcp_sock *msk, *next, *head = NULL; +@@ -1782,8 +1782,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) + + do_cancel_work = __mptcp_close(sk, 0); + release_sock(sk); +- if (do_cancel_work) ++ if (do_cancel_work) { ++ /* lockdep will report a false positive ABBA deadlock ++ * between cancel_work_sync and the listener socket. ++ * The involved locks belong to different sockets WRT ++ * the existing AB chain. ++ * Using a per socket key is problematic as key ++ * deregistration requires process context and must be ++ * performed at socket disposal time, in atomic ++ * context. ++ * Just tell lockdep to consider the listener socket ++ * released here. ++ */ ++ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); + mptcp_cancel_work(sk); ++ mutex_acquire(&listener_sk->sk_lock.dep_map, ++ SINGLE_DEPTH_NESTING, 0, _RET_IP_); ++ } + sock_put(sk); + } + +-- +2.35.1 + diff --git a/queue-6.1/net-amd-xgbe-add-missed-tasklet_kill.patch b/queue-6.1/net-amd-xgbe-add-missed-tasklet_kill.patch new file mode 100644 index 00000000000..006e7362027 --- /dev/null +++ b/queue-6.1/net-amd-xgbe-add-missed-tasklet_kill.patch @@ -0,0 +1,71 @@ +From 218db834a39ddd3b834c0a21f77419232111ba0f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Dec 2022 16:14:47 +0800 +Subject: net: amd-xgbe: add missed tasklet_kill + +From: Jiguang Xiao + +[ Upstream commit d530ece70f16f912e1d1bfeea694246ab78b0a4b ] + +The driver does not call tasklet_kill in several places. +Add the calls to fix it. + +Fixes: 85b85c853401 ("amd-xgbe: Re-issue interrupt if interrupt status not cleared") +Signed-off-by: Jiguang Xiao +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 3 +++ + drivers/net/ethernet/amd/xgbe/xgbe-i2c.c | 4 +++- + drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 +++- + 3 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +index 7b666106feee..614c0278419b 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) + + devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + ++ tasklet_kill(&pdata->tasklet_dev); ++ tasklet_kill(&pdata->tasklet_ecc); ++ + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) + devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +index 22d4fc547a0a..a9ccc4258ee5 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) + xgbe_i2c_disable(pdata); + xgbe_i2c_clear_all_interrupts(pdata); + +- if (pdata->dev_irq != pdata->i2c_irq) ++ if (pdata->dev_irq != pdata->i2c_irq) { + devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); ++ tasklet_kill(&pdata->tasklet_i2c); ++ } + } + + static int xgbe_i2c_start(struct xgbe_prv_data *pdata) +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +index 4e97b4869522..0c5c1b155683 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +@@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) + /* Disable auto-negotiation */ + xgbe_an_disable_all(pdata); + +- if (pdata->dev_irq != pdata->an_irq) ++ if (pdata->dev_irq != pdata->an_irq) { + devm_free_irq(pdata->dev, pdata->an_irq, pdata); ++ tasklet_kill(&pdata->tasklet_an); ++ } + + pdata->phy_if.phy_impl.stop(pdata); + +-- +2.35.1 + diff --git a/queue-6.1/net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch b/queue-6.1/net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch new file mode 100644 index 00000000000..3cb875d485e --- /dev/null +++ b/queue-6.1/net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch @@ -0,0 +1,55 @@ +From c459b9c01a4f02052538e19d3528765d9b3e7089 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 22:34:05 +0800 +Subject: net: dsa: mv88e6xxx: depend on PTP conditionally + +From: Johnny S. Lee + +[ Upstream commit 30e725537546248bddc12eaac2fe0a258917f190 ] + +PTP hardware timestamping related objects are not linked when PTP +support for MV88E6xxx (NET_DSA_MV88E6XXX_PTP) is disabled, therefore +NET_DSA_MV88E6XXX should not depend on PTP_1588_CLOCK_OPTIONAL +regardless of NET_DSA_MV88E6XXX_PTP. + +Instead, condition more strictly on how NET_DSA_MV88E6XXX_PTP's +dependencies are met, making sure that it cannot be enabled when +NET_DSA_MV88E6XXX=y and PTP_1588_CLOCK=m. + +In other words, this commit allows NET_DSA_MV88E6XXX to be built-in +while PTP_1588_CLOCK is a module, as long as NET_DSA_MV88E6XXX_PTP is +prevented from being enabled. + +Fixes: e5f31552674e ("ethernet: fix PTP_1588_CLOCK dependencies") +Signed-off-by: Johnny S. Lee +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/Kconfig | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig +index 7a2445a34eb7..e3181d5471df 100644 +--- a/drivers/net/dsa/mv88e6xxx/Kconfig ++++ b/drivers/net/dsa/mv88e6xxx/Kconfig +@@ -2,7 +2,6 @@ + config NET_DSA_MV88E6XXX + tristate "Marvell 88E6xxx Ethernet switch fabric support" + depends on NET_DSA +- depends on PTP_1588_CLOCK_OPTIONAL + select IRQ_DOMAIN + select NET_DSA_TAG_EDSA + select NET_DSA_TAG_DSA +@@ -13,7 +12,8 @@ config NET_DSA_MV88E6XXX + config NET_DSA_MV88E6XXX_PTP + bool "PTP support for Marvell 88E6xxx" + default n +- depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK ++ depends on (NET_DSA_MV88E6XXX = y && PTP_1588_CLOCK = y) || \ ++ (NET_DSA_MV88E6XXX = m && PTP_1588_CLOCK) + help + Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch + chips that support it. +-- +2.35.1 + diff --git a/queue-6.1/net-ena-account-for-the-number-of-processed-bytes-in.patch b/queue-6.1/net-ena-account-for-the-number-of-processed-bytes-in.patch new file mode 100644 index 00000000000..3c8c6c554d1 --- /dev/null +++ b/queue-6.1/net-ena-account-for-the-number-of-processed-bytes-in.patch @@ -0,0 +1,36 @@ +From 448bf03765298da6b5992fa4f07690e8d08059ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 07:30:07 +0000 +Subject: net: ena: Account for the number of processed bytes in XDP + +From: David Arinzon + +[ Upstream commit c7f5e34d906320fdc996afa616676161c029cc02 ] + +The size of packets that were forwarded or dropped by XDP wasn't added +to the total processed bytes statistic. + +Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") +Signed-off-by: Shay Agroskin +Signed-off-by: David Arinzon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index e313bb45319c..69f2364b8468 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1719,6 +1719,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + } + if (xdp_verdict != XDP_PASS) { + xdp_flags |= xdp_verdict; ++ total_len += ena_rx_ctx.ena_bufs[0].len; + res_budget--; + continue; + } +-- +2.35.1 + diff --git a/queue-6.1/net-ena-don-t-register-memory-info-on-xdp-exchange.patch b/queue-6.1/net-ena-don-t-register-memory-info-on-xdp-exchange.patch new file mode 100644 index 00000000000..82c40a92f43 --- /dev/null +++ b/queue-6.1/net-ena-don-t-register-memory-info-on-xdp-exchange.patch @@ -0,0 +1,50 @@ +From ef2bd38b98500c94b67ee1f42252edc4112b372b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 07:30:06 +0000 +Subject: net: ena: Don't register memory info on XDP exchange + +From: David Arinzon + +[ Upstream commit 9c9e539956fa67efb8a65e32b72a853740b33445 ] + +Since the queues aren't destroyed when we only exchange XDP programs, +there's no need to re-register them again. + +Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") +Signed-off-by: Shay Agroskin +Signed-off-by: David Arinzon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 5a454b58498f..e313bb45319c 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -512,16 +512,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, int count) + { ++ struct bpf_prog *old_bpf_prog; + struct ena_ring *rx_ring; + int i = 0; + + for (i = first; i < count; i++) { + rx_ring = &adapter->rx_ring[i]; +- xchg(&rx_ring->xdp_bpf_prog, prog); +- if (prog) { ++ old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); ++ ++ if (!old_bpf_prog && prog) { + ena_xdp_register_rxq_info(rx_ring); + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; +- } else { ++ } else if (old_bpf_prog && !prog) { + ena_xdp_unregister_rxq_info(rx_ring); + rx_ring->rx_headroom = NET_SKB_PAD; + } +-- +2.35.1 + diff --git a/queue-6.1/net-ena-fix-rx_copybreak-value-update.patch b/queue-6.1/net-ena-fix-rx_copybreak-value-update.patch new file mode 100644 index 00000000000..8d5b7b252d7 --- /dev/null +++ b/queue-6.1/net-ena-fix-rx_copybreak-value-update.patch @@ -0,0 +1,94 @@ +From c56df2b9357df39b7881928d8cd7af6e0dd7f835 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 07:30:09 +0000 +Subject: net: ena: Fix rx_copybreak value update + +From: David Arinzon + +[ Upstream commit c7062aaee099f2f43d6f07a71744b44b94b94b34 ] + +Make the upper bound on rx_copybreak tighter, by +making sure it is smaller than the minimum of mtu and +ENA_PAGE_SIZE. With the current upper bound of mtu, +rx_copybreak can be larger than a page. Such large +rx_copybreak will not bring any performance benefit to +the user and therefore makes no sense. + +In addition, the value update was only reflected in +the adapter structure, but not applied for each ring, +causing it to not take effect. + +Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") +Signed-off-by: Osama Abboud +Signed-off-by: Arthur Kiyanovski +Signed-off-by: David Arinzon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 6 +----- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 18 ++++++++++++++++++ + drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 ++ + 3 files changed, 21 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c +index 98d6386b7f39..444ccef76da2 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -887,11 +887,7 @@ static int ena_set_tunable(struct net_device *netdev, + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + len = *(u32 *)data; +- if (len > adapter->netdev->mtu) { +- ret = -EINVAL; +- break; +- } +- adapter->rx_copybreak = len; ++ ret = ena_set_rx_copybreak(adapter, len); + break; + default: + ret = -EINVAL; +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 821355c5db10..663f9cd3babf 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2814,6 +2814,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, + return dev_was_up ? ena_up(adapter) : 0; + } + ++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak) ++{ ++ struct ena_ring *rx_ring; ++ int i; ++ ++ if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE)) ++ return -EINVAL; ++ ++ adapter->rx_copybreak = rx_copybreak; ++ ++ for (i = 0; i < adapter->num_io_queues; i++) { ++ rx_ring = &adapter->rx_ring[i]; ++ rx_ring->rx_copybreak = rx_copybreak; ++ } ++ ++ return 0; ++} ++ + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) + { + struct ena_com_dev *ena_dev = adapter->ena_dev; +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h +index 290ae9bf47ee..f9d862b630fa 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -392,6 +392,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, + + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); + ++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak); ++ + int ena_get_sset_count(struct net_device *netdev, int sset); + + static inline void ena_reset_device(struct ena_adapter *adapter, +-- +2.35.1 + diff --git a/queue-6.1/net-ena-fix-toeplitz-initial-hash-value.patch b/queue-6.1/net-ena-fix-toeplitz-initial-hash-value.patch new file mode 100644 index 00000000000..01046048e86 --- /dev/null +++ b/queue-6.1/net-ena-fix-toeplitz-initial-hash-value.patch @@ -0,0 +1,72 @@ +From 0a284a09c0dcc50d8a5dc4bab70ff17c3eb0354b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 07:30:05 +0000 +Subject: net: ena: Fix toeplitz initial hash value + +From: David Arinzon + +[ Upstream commit 332b49ff637d6c1a75b971022a8b992cf3c57db1 ] + +On driver initialization, RSS hash initial value is set to zero, +instead of the default value. This happens because we pass NULL as +the RSS key parameter, which caused us to never initialize +the RSS hash value. + +This patch fixes it by making sure the initial value is set, no matter +what the value of the RSS key is. + +Fixes: 91a65b7d3ed8 ("net: ena: fix potential crash when rxfh key is NULL") +Signed-off-by: Nati Koler +Signed-off-by: David Arinzon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 29 +++++++---------------- + 1 file changed, 9 insertions(+), 20 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c +index 8c8b4c88c7de..451c3a1b6255 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -2400,29 +2400,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + return -EOPNOTSUPP; + } + +- switch (func) { +- case ENA_ADMIN_TOEPLITZ: +- if (key) { +- if (key_len != sizeof(hash_key->key)) { +- netdev_err(ena_dev->net_device, +- "key len (%u) doesn't equal the supported size (%zu)\n", +- key_len, sizeof(hash_key->key)); +- return -EINVAL; +- } +- memcpy(hash_key->key, key, key_len); +- rss->hash_init_val = init_val; +- hash_key->key_parts = key_len / sizeof(hash_key->key[0]); ++ if ((func == ENA_ADMIN_TOEPLITZ) && key) { ++ if (key_len != sizeof(hash_key->key)) { ++ netdev_err(ena_dev->net_device, ++ "key len (%u) doesn't equal the supported size (%zu)\n", ++ key_len, sizeof(hash_key->key)); ++ return -EINVAL; + } +- break; +- case ENA_ADMIN_CRC32: +- rss->hash_init_val = init_val; +- break; +- default: +- netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n", +- func); +- return -EINVAL; ++ memcpy(hash_key->key, key, key_len); ++ hash_key->key_parts = key_len / sizeof(hash_key->key[0]); + } + ++ rss->hash_init_val = init_val; + old_func = rss->hash_func; + rss->hash_func = func; + rc = ena_com_set_hash_function(ena_dev); +-- +2.35.1 + diff --git a/queue-6.1/net-ena-set-default-value-for-rx-interrupt-moderatio.patch b/queue-6.1/net-ena-set-default-value-for-rx-interrupt-moderatio.patch new file mode 100644 index 00000000000..1c388c23ab7 --- /dev/null +++ b/queue-6.1/net-ena-set-default-value-for-rx-interrupt-moderatio.patch @@ -0,0 +1,42 @@ +From 177f94b9931aa78cc2607cc320bacbee80615366 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 07:30:10 +0000 +Subject: net: ena: Set default value for RX interrupt moderation + +From: David Arinzon + +[ Upstream commit e712f3e4920b3a1a5e6b536827d118e14862896c ] + +RX ring can be NULL in XDP use cases where only TX queues +are configured. In this scenario, the RX interrupt moderation +value sent to the device remains in its default value of 0. + +In this change, setting the default value of the RX interrupt +moderation to be the same as of the TX. + +Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") +Signed-off-by: David Arinzon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 663f9cd3babf..c4aff712b5d8 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1823,8 +1823,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) + static void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) + { ++ u32 rx_interval = tx_ring->smoothed_interval; + struct ena_eth_io_intr_reg intr_reg; +- u32 rx_interval = 0; ++ + /* Rx ring can be NULL when for XDP tx queues which don't have an + * accompanying rx_ring pair. + */ +-- +2.35.1 + diff --git a/queue-6.1/net-ena-update-numa-tph-hint-register-upon-numa-node.patch b/queue-6.1/net-ena-update-numa-tph-hint-register-upon-numa-node.patch new file mode 100644 index 00000000000..d8dcef41df1 --- /dev/null +++ b/queue-6.1/net-ena-update-numa-tph-hint-register-upon-numa-node.patch @@ -0,0 +1,155 @@ +From f727b36f59b206597e57cb2371d41ed545a8a20b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 07:30:11 +0000 +Subject: net: ena: Update NUMA TPH hint register upon NUMA node update + +From: David Arinzon + +[ Upstream commit a8ee104f986e720cea52133885cc822d459398c7 ] + +The device supports a PCIe optimization hint, which indicates on +which NUMA the queue is currently processed. This hint is utilized +by PCIe in order to reduce its access time by accessing the +correct NUMA resources and maintaining cache coherence. + +The driver calls the register update for the hint (called TPH - +TLP Processing Hint) during the NAPI loop. + +Though the update is expected upon a NUMA change (when a queue +is moved from one NUMA to the other), the current logic performs +a register update when the queue is moved to a different CPU, +but the CPU is not necessarily in a different NUMA. + +The changes include: +1. Performing the TPH update only when the queue has switched +a NUMA node. +2. Moving the TPH update call to be triggered only when NAPI was +scheduled from interrupt context, as opposed to a busy-polling loop. +This is due to the fact that during busy-polling, the frequency +of CPU switches for a particular queue is significantly higher, +thus, the likelihood to switch NUMA is much higher. Therefore, +providing the frequent updates to the device upon a NUMA update +are unlikely to be beneficial. + +Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") +Signed-off-by: David Arinzon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 27 +++++++++++++------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 +++-- + 2 files changed, 22 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index c4aff712b5d8..5ce01ac72637 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -680,6 +680,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, + ring->ena_dev = adapter->ena_dev; + ring->per_napi_packets = 0; + ring->cpu = 0; ++ ring->numa_node = 0; + ring->no_interrupt_event_cnt = 0; + u64_stats_init(&ring->syncp); + } +@@ -783,6 +784,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + tx_ring->cpu = ena_irq->cpu; ++ tx_ring->numa_node = node; + return 0; + + err_push_buf_intermediate_buf: +@@ -915,6 +917,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + rx_ring->cpu = ena_irq->cpu; ++ rx_ring->numa_node = node; + + return 0; + } +@@ -1863,20 +1866,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, + if (likely(tx_ring->cpu == cpu)) + goto out; + ++ tx_ring->cpu = cpu; ++ if (rx_ring) ++ rx_ring->cpu = cpu; ++ + numa_node = cpu_to_node(cpu); ++ ++ if (likely(tx_ring->numa_node == numa_node)) ++ goto out; ++ + put_cpu(); + + if (numa_node != NUMA_NO_NODE) { + ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); +- if (rx_ring) ++ tx_ring->numa_node = numa_node; ++ if (rx_ring) { ++ rx_ring->numa_node = numa_node; + ena_com_update_numa_node(rx_ring->ena_com_io_cq, + numa_node); ++ } + } + +- tx_ring->cpu = cpu; +- if (rx_ring) +- rx_ring->cpu = cpu; +- + return; + out: + put_cpu(); +@@ -1997,11 +2007,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget) + if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) + ena_adjust_adaptive_rx_intr_moderation(ena_napi); + ++ ena_update_ring_numa_node(tx_ring, rx_ring); + ena_unmask_interrupt(tx_ring, rx_ring); + } + +- ena_update_ring_numa_node(tx_ring, rx_ring); +- + ret = rx_work_done; + } else { + ret = budget; +@@ -2386,7 +2395,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; + ctx.queue_size = tx_ring->ring_size; +- ctx.numa_node = cpu_to_node(tx_ring->cpu); ++ ctx.numa_node = tx_ring->numa_node; + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { +@@ -2454,7 +2463,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; + ctx.queue_size = rx_ring->ring_size; +- ctx.numa_node = cpu_to_node(rx_ring->cpu); ++ ctx.numa_node = rx_ring->numa_node; + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h +index f9d862b630fa..2cb141079474 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -262,9 +262,11 @@ struct ena_ring { + bool disable_meta_caching; + u16 no_interrupt_event_cnt; + +- /* cpu for TPH */ ++ /* cpu and NUMA for TPH */ + int cpu; +- /* number of tx/rx_buffer_info's entries */ ++ int numa_node; ++ ++ /* number of tx/rx_buffer_info's entries */ + int ring_size; + + enum ena_admin_placement_policy_type tx_mem_queue_type; +-- +2.35.1 + diff --git a/queue-6.1/net-ena-use-bitmask-to-indicate-packet-redirection.patch b/queue-6.1/net-ena-use-bitmask-to-indicate-packet-redirection.patch new file mode 100644 index 00000000000..83edac1b864 --- /dev/null +++ b/queue-6.1/net-ena-use-bitmask-to-indicate-packet-redirection.patch @@ -0,0 +1,193 @@ +From 76874b9c4e9855661c7d43d95078c6cea25731de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 07:30:08 +0000 +Subject: net: ena: Use bitmask to indicate packet redirection + +From: David Arinzon + +[ Upstream commit 59811faa2c54dbcf44d575b5a8f6e7077da88dc2 ] + +Redirecting packets with XDP Redirect is done in two phases: +1. A packet is passed by the driver to the kernel using + xdp_do_redirect(). +2. After finishing polling for new packets the driver lets the kernel + know that it can now process the redirected packet using + xdp_do_flush_map(). + The packets' redirection is handled in the napi context of the + queue that called xdp_do_redirect() + +To avoid calling xdp_do_flush_map() each time the driver first checks +whether any packets were redirected, using + xdp_flags |= xdp_verdict; +and + if (xdp_flags & XDP_REDIRECT) + xdp_do_flush_map() + +essentially treating XDP instructions as a bitmask, which isn't the case: + enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, + XDP_TX, + XDP_REDIRECT, + }; + +Given the current possible values of xdp_action, the current design +doesn't have a bug (since XDP_REDIRECT = 100b), but it is still +flawed. + +This patch makes the driver use a bitmask instead, to avoid future +issues. + +Fixes: a318c70ad152 ("net: ena: introduce XDP redirect implementation") +Signed-off-by: Shay Agroskin +Signed-off-by: David Arinzon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 26 ++++++++++++-------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 9 +++++++ + 2 files changed, 25 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 69f2364b8468..821355c5db10 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -374,9 +374,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n, + + static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) + { ++ u32 verdict = ENA_XDP_PASS; + struct bpf_prog *xdp_prog; + struct ena_ring *xdp_ring; +- u32 verdict = XDP_PASS; + struct xdp_frame *xdpf; + u64 *xdp_stat; + +@@ -393,7 +393,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) + if (unlikely(!xdpf)) { + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; +- verdict = XDP_ABORTED; ++ verdict = ENA_XDP_DROP; + break; + } + +@@ -409,29 +409,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) + + spin_unlock(&xdp_ring->xdp_tx_lock); + xdp_stat = &rx_ring->rx_stats.xdp_tx; ++ verdict = ENA_XDP_TX; + break; + case XDP_REDIRECT: + if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { + xdp_stat = &rx_ring->rx_stats.xdp_redirect; ++ verdict = ENA_XDP_REDIRECT; + break; + } + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; +- verdict = XDP_ABORTED; ++ verdict = ENA_XDP_DROP; + break; + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; ++ verdict = ENA_XDP_DROP; + break; + case XDP_DROP: + xdp_stat = &rx_ring->rx_stats.xdp_drop; ++ verdict = ENA_XDP_DROP; + break; + case XDP_PASS: + xdp_stat = &rx_ring->rx_stats.xdp_pass; ++ verdict = ENA_XDP_PASS; + break; + default: + bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_invalid; ++ verdict = ENA_XDP_DROP; + } + + ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); +@@ -1621,12 +1627,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) + * we expect, then we simply drop it + */ + if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) +- return XDP_DROP; ++ return ENA_XDP_DROP; + + ret = ena_xdp_execute(rx_ring, xdp); + + /* The xdp program might expand the headers */ +- if (ret == XDP_PASS) { ++ if (ret == ENA_XDP_PASS) { + rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; + } +@@ -1665,7 +1671,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq); + + do { +- xdp_verdict = XDP_PASS; ++ xdp_verdict = ENA_XDP_PASS; + skb = NULL; + ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; + ena_rx_ctx.max_bufs = rx_ring->sgl_size; +@@ -1693,7 +1699,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); + + /* allocate skb and fill it */ +- if (xdp_verdict == XDP_PASS) ++ if (xdp_verdict == ENA_XDP_PASS) + skb = ena_rx_skb(rx_ring, + rx_ring->ena_bufs, + ena_rx_ctx.descs, +@@ -1711,13 +1717,13 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + /* Packets was passed for transmission, unmap it + * from RX side. + */ +- if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) { ++ if (xdp_verdict & ENA_XDP_FORWARDED) { + ena_unmap_rx_buff(rx_ring, + &rx_ring->rx_buffer_info[req_id]); + rx_ring->rx_buffer_info[req_id].page = NULL; + } + } +- if (xdp_verdict != XDP_PASS) { ++ if (xdp_verdict != ENA_XDP_PASS) { + xdp_flags |= xdp_verdict; + total_len += ena_rx_ctx.ena_bufs[0].len; + res_budget--; +@@ -1763,7 +1769,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + ena_refill_rx_bufs(rx_ring, refill_required); + } + +- if (xdp_flags & XDP_REDIRECT) ++ if (xdp_flags & ENA_XDP_REDIRECT) + xdp_do_flush_map(); + + return work_done; +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h +index 1bdce99bf688..290ae9bf47ee 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -409,6 +409,15 @@ enum ena_xdp_errors_t { + ENA_XDP_NO_ENOUGH_QUEUES, + }; + ++enum ENA_XDP_ACTIONS { ++ ENA_XDP_PASS = 0, ++ ENA_XDP_TX = BIT(0), ++ ENA_XDP_REDIRECT = BIT(1), ++ ENA_XDP_DROP = BIT(2) ++}; ++ ++#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) ++ + static inline bool ena_xdp_present(struct ena_adapter *adapter) + { + return !!adapter->xdp_bpf_prog; +-- +2.35.1 + diff --git a/queue-6.1/net-hns3-add-interrupts-re-initialization-while-doin.patch b/queue-6.1/net-hns3-add-interrupts-re-initialization-while-doin.patch new file mode 100644 index 00000000000..b6a06027895 --- /dev/null +++ b/queue-6.1/net-hns3-add-interrupts-re-initialization-while-doin.patch @@ -0,0 +1,43 @@ +From 8f9e5a1040b4b223b9dde5824d0a387d75a30dda Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 14:43:41 +0800 +Subject: net: hns3: add interrupts re-initialization while doing VF FLR + +From: Jie Wang + +[ Upstream commit 09e6b30eeb254f1818a008cace3547159e908dfd ] + +Currently keep alive message between PF and VF may be lost and the VF is +unalive in PF. So the VF will not do reset during PF FLR reset process. +This would make the allocated interrupt resources of VF invalid and VF +would't receive or respond to PF any more. + +So this patch adds VF interrupts re-initialization during VF FLR for VF +recovery in above cases. + +Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR") +Signed-off-by: Jie Wang +Signed-off-by: Hao Lan +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +index db6f7cdba958..081bd2c3f289 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +@@ -2767,7 +2767,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev) + struct pci_dev *pdev = hdev->pdev; + int ret = 0; + +- if (hdev->reset_type == HNAE3_VF_FULL_RESET && ++ if ((hdev->reset_type == HNAE3_VF_FULL_RESET || ++ hdev->reset_type == HNAE3_FLR_RESET) && + test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) { + hclgevf_misc_irq_uninit(hdev); + hclgevf_uninit_msi(hdev); +-- +2.35.1 + diff --git a/queue-6.1/net-hns3-fix-miss-l3e-checking-for-rx-packet.patch b/queue-6.1/net-hns3-fix-miss-l3e-checking-for-rx-packet.patch new file mode 100644 index 00000000000..9052aefde76 --- /dev/null +++ b/queue-6.1/net-hns3-fix-miss-l3e-checking-for-rx-packet.patch @@ -0,0 +1,69 @@ +From 3ad232b7ef9f1fc2f3bd0ba5539ae5db400f369c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 14:43:42 +0800 +Subject: net: hns3: fix miss L3E checking for rx packet + +From: Jian Shen + +[ Upstream commit 7d89b53cea1a702f97117fb4361523519bb1e52c ] + +For device supports RXD advanced layout, the driver will +return directly if the hardware finish the checksum +calculate. It cause missing L3E checking for ip packets. +Fixes it. + +Fixes: 1ddc028ac849 ("net: hns3: refactor out RX completion checksum") +Signed-off-by: Jian Shen +Signed-off-by: Hao Lan +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index 028577943ec5..248f15dac86b 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -3855,18 +3855,16 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) + return 0; + } + +-static bool hns3_checksum_complete(struct hns3_enet_ring *ring, ++static void hns3_checksum_complete(struct hns3_enet_ring *ring, + struct sk_buff *skb, u32 ptype, u16 csum) + { + if (ptype == HNS3_INVALID_PTYPE || + hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE) +- return false; ++ return; + + hns3_ring_stats_update(ring, csum_complete); + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)csum); +- +- return true; + } + + static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info, +@@ -3926,8 +3924,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, + ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, + HNS3_RXD_PTYPE_S); + +- if (hns3_checksum_complete(ring, skb, ptype, csum)) +- return; ++ hns3_checksum_complete(ring, skb, ptype, csum); + + /* check if hardware has done checksum */ + if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) +@@ -3936,6 +3933,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, + if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) | + BIT(HNS3_RXD_OL3E_B) | + BIT(HNS3_RXD_OL4E_B)))) { ++ skb->ip_summed = CHECKSUM_NONE; + hns3_ring_stats_update(ring, l3l4_csum_err); + + return; +-- +2.35.1 + diff --git a/queue-6.1/net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch b/queue-6.1/net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch new file mode 100644 index 00000000000..5eaaff5da48 --- /dev/null +++ b/queue-6.1/net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch @@ -0,0 +1,134 @@ +From 4bc9c0168c940dbf5bdbc1a0dd8afc502b64355e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 14:43:43 +0800 +Subject: net: hns3: fix VF promisc mode not update when mac table full + +From: Jian Shen + +[ Upstream commit 8ee57c7b8406c7aa8ca31e014440c87c6383f429 ] + +Currently, it missed set HCLGE_VPORT_STATE_PROMISC_CHANGE +flag for VF when vport->overflow_promisc_flags changed. +So the VF won't check whether to update promisc mode in +this case. So add it. + +Fixes: 1e6e76101fd9 ("net: hns3: configure promisc mode for VF asynchronously") +Signed-off-by: Jian Shen +Signed-off-by: Hao Lan +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_main.c | 75 +++++++++++-------- + 1 file changed, 43 insertions(+), 32 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index 4e54f91f7a6c..6c2742f59c77 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -12754,60 +12754,71 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable) + return ret; + } + +-static void hclge_sync_promisc_mode(struct hclge_dev *hdev) ++static int hclge_sync_vport_promisc_mode(struct hclge_vport *vport) + { +- struct hclge_vport *vport = &hdev->vport[0]; + struct hnae3_handle *handle = &vport->nic; ++ struct hclge_dev *hdev = vport->back; ++ bool uc_en = false; ++ bool mc_en = false; + u8 tmp_flags; ++ bool bc_en; + int ret; +- u16 i; + + if (vport->last_promisc_flags != vport->overflow_promisc_flags) { + set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); + vport->last_promisc_flags = vport->overflow_promisc_flags; + } + +- if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) { ++ if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, ++ &vport->state)) ++ return 0; ++ ++ /* for PF */ ++ if (!vport->vport_id) { + tmp_flags = handle->netdev_flags | vport->last_promisc_flags; + ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE, + tmp_flags & HNAE3_MPE); +- if (!ret) { +- clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, +- &vport->state); ++ if (!ret) + set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, + &vport->state); +- } ++ else ++ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, ++ &vport->state); ++ return ret; + } + +- for (i = 1; i < hdev->num_alloc_vport; i++) { +- bool uc_en = false; +- bool mc_en = false; +- bool bc_en; ++ /* for VF */ ++ if (vport->vf_info.trusted) { ++ uc_en = vport->vf_info.request_uc_en > 0 || ++ vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE; ++ mc_en = vport->vf_info.request_mc_en > 0 || ++ vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE; ++ } ++ bc_en = vport->vf_info.request_bc_en > 0; + +- vport = &hdev->vport[i]; ++ ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, ++ mc_en, bc_en); ++ if (ret) { ++ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); ++ return ret; ++ } ++ hclge_set_vport_vlan_fltr_change(vport); + +- if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, +- &vport->state)) +- continue; ++ return 0; ++} + +- if (vport->vf_info.trusted) { +- uc_en = vport->vf_info.request_uc_en > 0 || +- vport->overflow_promisc_flags & +- HNAE3_OVERFLOW_UPE; +- mc_en = vport->vf_info.request_mc_en > 0 || +- vport->overflow_promisc_flags & +- HNAE3_OVERFLOW_MPE; +- } +- bc_en = vport->vf_info.request_bc_en > 0; ++static void hclge_sync_promisc_mode(struct hclge_dev *hdev) ++{ ++ struct hclge_vport *vport; ++ int ret; ++ u16 i; + +- ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, +- mc_en, bc_en); +- if (ret) { +- set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, +- &vport->state); ++ for (i = 0; i < hdev->num_alloc_vport; i++) { ++ vport = &hdev->vport[i]; ++ ++ ret = hclge_sync_vport_promisc_mode(vport); ++ if (ret) + return; +- } +- hclge_set_vport_vlan_fltr_change(vport); + } + } + +-- +2.35.1 + diff --git a/queue-6.1/net-hns3-refine-the-handling-for-vf-heartbeat.patch b/queue-6.1/net-hns3-refine-the-handling-for-vf-heartbeat.patch new file mode 100644 index 00000000000..3ccf9f945f0 --- /dev/null +++ b/queue-6.1/net-hns3-refine-the-handling-for-vf-heartbeat.patch @@ -0,0 +1,311 @@ +From 85eba771de4da653e6d57b8f95b0d18c74aed6f3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Dec 2022 14:27:49 +0800 +Subject: net: hns3: refine the handling for VF heartbeat + +From: Jian Shen + +[ Upstream commit fec7352117fa301bfbc31bacc14bb9a579376b36 ] + +Currently, the PF check the VF alive by the KEEP_ALVE +mailbox from VF. VF keep sending the mailbox per 2 +seconds. Once PF lost the mailbox for more than 8 +seconds, it will regards the VF is abnormal, and stop +notifying the state change to VF, include link state, +vf mac, reset, even though it receives the KEEP_ALIVE +mailbox again. It's inreasonable. + +This patch fixes it. PF will record the state change which +need to notify VF when lost the VF's KEEP_ALIVE mailbox. +And notify VF when receive the mailbox again. Introduce a +new flag HCLGE_VPORT_STATE_INITED, used to distinguish the +case whether VF driver loaded or not. For VF will query +these states when initializing, so it's unnecessary to +notify it in this case. + +Fixes: aa5c4f175be6 ("net: hns3: add reset handling for VF when doing PF reset") +Signed-off-by: Jian Shen +Signed-off-by: Hao Lan +Reported-by: kernel test robot +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_main.c | 57 +++++++++++---- + .../hisilicon/hns3/hns3pf/hclge_main.h | 7 ++ + .../hisilicon/hns3/hns3pf/hclge_mbx.c | 71 ++++++++++++++++--- + 3 files changed, 112 insertions(+), 23 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index 6c2742f59c77..07ad5f35219e 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -3910,9 +3910,17 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) + return ret; + } + +- if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) ++ if (!reset || ++ !test_bit(HCLGE_VPORT_STATE_INITED, &vport->state)) + continue; + ++ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) && ++ hdev->reset_type == HNAE3_FUNC_RESET) { ++ set_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, ++ &vport->need_notify); ++ continue; ++ } ++ + /* Inform VF to process the reset. + * hclge_inform_reset_assert_to_vf may fail if VF + * driver is not loaded. +@@ -4609,18 +4617,25 @@ static void hclge_reset_service_task(struct hclge_dev *hdev) + + static void hclge_update_vport_alive(struct hclge_dev *hdev) + { ++#define HCLGE_ALIVE_SECONDS_NORMAL 8 ++ ++ unsigned long alive_time = HCLGE_ALIVE_SECONDS_NORMAL * HZ; + int i; + + /* start from vport 1 for PF is always alive */ + for (i = 1; i < hdev->num_alloc_vport; i++) { + struct hclge_vport *vport = &hdev->vport[i]; + +- if (time_after(jiffies, vport->last_active_jiffies + 8 * HZ)) ++ if (!test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) || ++ !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) ++ continue; ++ if (time_after(jiffies, vport->last_active_jiffies + ++ alive_time)) { + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); +- +- /* If vf is not alive, set to default value */ +- if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) +- vport->mps = HCLGE_MAC_DEFAULT_FRAME; ++ dev_warn(&hdev->pdev->dev, ++ "VF %u heartbeat timeout\n", ++ i - HCLGE_VF_VPORT_START_NUM); ++ } + } + } + +@@ -8064,9 +8079,11 @@ int hclge_vport_start(struct hclge_vport *vport) + { + struct hclge_dev *hdev = vport->back; + ++ set_bit(HCLGE_VPORT_STATE_INITED, &vport->state); + set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); + vport->last_active_jiffies = jiffies; ++ vport->need_notify = 0; + + if (test_bit(vport->vport_id, hdev->vport_config_block)) { + if (vport->vport_id) { +@@ -8084,7 +8101,9 @@ int hclge_vport_start(struct hclge_vport *vport) + + void hclge_vport_stop(struct hclge_vport *vport) + { ++ clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); ++ vport->need_notify = 0; + } + + static int hclge_client_start(struct hnae3_handle *handle) +@@ -9208,7 +9227,8 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, + return 0; + } + +- dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n", ++ dev_info(&hdev->pdev->dev, ++ "MAC of VF %d has been set to %s, will be active after VF reset\n", + vf, format_mac_addr); + return 0; + } +@@ -10465,12 +10485,16 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, + * for DEVICE_VERSION_V3, vf doesn't need to know about the port based + * VLAN state. + */ +- if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && +- test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) +- (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], +- vport->vport_id, +- state, &vlan_info); +- ++ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) { ++ if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) ++ (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], ++ vport->vport_id, ++ state, ++ &vlan_info); ++ else ++ set_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, ++ &vport->need_notify); ++ } + return 0; + } + +@@ -11941,7 +11965,7 @@ static void hclge_reset_vport_state(struct hclge_dev *hdev) + int i; + + for (i = 0; i < hdev->num_alloc_vport; i++) { +- hclge_vport_stop(vport); ++ clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + vport++; + } + } +@@ -12955,6 +12979,11 @@ static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid) + struct hclge_vlan_info vlan_info; + int ret; + ++ clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); ++ clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); ++ vport->need_notify = 0; ++ vport->mps = 0; ++ + /* after disable sriov, clean VF rate configured by PF */ + ret = hclge_tm_qs_shaper_cfg(vport, 0); + if (ret) +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +index 495b639b0dc2..13f23d606e77 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +@@ -995,9 +995,15 @@ enum HCLGE_VPORT_STATE { + HCLGE_VPORT_STATE_MAC_TBL_CHANGE, + HCLGE_VPORT_STATE_PROMISC_CHANGE, + HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, ++ HCLGE_VPORT_STATE_INITED, + HCLGE_VPORT_STATE_MAX + }; + ++enum HCLGE_VPORT_NEED_NOTIFY { ++ HCLGE_VPORT_NEED_NOTIFY_RESET, ++ HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, ++}; ++ + struct hclge_vlan_info { + u16 vlan_proto; /* so far support 802.1Q only */ + u16 qos; +@@ -1044,6 +1050,7 @@ struct hclge_vport { + struct hnae3_handle roce; + + unsigned long state; ++ unsigned long need_notify; + unsigned long last_active_jiffies; + u32 mps; /* Max packet size */ + struct hclge_vf_info vf_info; +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +index a7b06c63143c..04ff9bf12185 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +@@ -124,17 +124,26 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, + return status; + } + ++static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type) ++{ ++ __le16 msg_data; ++ u8 dest_vfid; ++ ++ dest_vfid = (u8)vport->vport_id; ++ msg_data = cpu_to_le16(reset_type); ++ ++ /* send this requested info to VF */ ++ return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), ++ HCLGE_MBX_ASSERTING_RESET, dest_vfid); ++} ++ + int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) + { + struct hclge_dev *hdev = vport->back; +- __le16 msg_data; + u16 reset_type; +- u8 dest_vfid; + + BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX); + +- dest_vfid = (u8)vport->vport_id; +- + if (hdev->reset_type == HNAE3_FUNC_RESET) + reset_type = HNAE3_VF_PF_FUNC_RESET; + else if (hdev->reset_type == HNAE3_FLR_RESET) +@@ -142,11 +151,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) + else + reset_type = HNAE3_VF_FUNC_RESET; + +- msg_data = cpu_to_le16(reset_type); +- +- /* send this requested info to VF */ +- return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), +- HCLGE_MBX_ASSERTING_RESET, dest_vfid); ++ return hclge_inform_vf_reset(vport, reset_type); + } + + static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head) +@@ -652,9 +657,56 @@ static int hclge_reset_vf(struct hclge_vport *vport) + return hclge_func_reset_cmd(hdev, vport->vport_id); + } + ++static void hclge_notify_vf_config(struct hclge_vport *vport) ++{ ++ struct hclge_dev *hdev = vport->back; ++ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); ++ struct hclge_port_base_vlan_config *vlan_cfg; ++ int ret; ++ ++ hclge_push_vf_link_status(vport); ++ if (test_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, &vport->need_notify)) { ++ ret = hclge_inform_vf_reset(vport, HNAE3_VF_PF_FUNC_RESET); ++ if (ret) { ++ dev_err(&hdev->pdev->dev, ++ "failed to inform VF %u reset!", ++ vport->vport_id - HCLGE_VF_VPORT_START_NUM); ++ return; ++ } ++ vport->need_notify = 0; ++ return; ++ } ++ ++ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && ++ test_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify)) { ++ vlan_cfg = &vport->port_base_vlan_cfg; ++ ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0], ++ vport->vport_id, ++ vlan_cfg->state, ++ &vlan_cfg->vlan_info); ++ if (ret) { ++ dev_err(&hdev->pdev->dev, ++ "failed to inform VF %u port base vlan!", ++ vport->vport_id - HCLGE_VF_VPORT_START_NUM); ++ return; ++ } ++ clear_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify); ++ } ++} ++ + static void hclge_vf_keep_alive(struct hclge_vport *vport) + { ++ struct hclge_dev *hdev = vport->back; ++ + vport->last_active_jiffies = jiffies; ++ ++ if (test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) && ++ !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { ++ set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); ++ dev_info(&hdev->pdev->dev, "VF %u is alive!", ++ vport->vport_id - HCLGE_VF_VPORT_START_NUM); ++ hclge_notify_vf_config(vport); ++ } + } + + static int hclge_set_vf_mtu(struct hclge_vport *vport, +@@ -954,6 +1006,7 @@ static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param) + hclge_rm_vport_all_mac_table(param->vport, true, + HCLGE_MAC_ADDR_MC); + hclge_rm_vport_all_vlan_table(param->vport, true); ++ param->vport->mps = 0; + return 0; + } + +-- +2.35.1 + diff --git a/queue-6.1/net-lan966x-fix-configuration-of-the-pcs.patch b/queue-6.1/net-lan966x-fix-configuration-of-the-pcs.patch new file mode 100644 index 00000000000..e297e856afb --- /dev/null +++ b/queue-6.1/net-lan966x-fix-configuration-of-the-pcs.patch @@ -0,0 +1,49 @@ +From be5beda2fdb3de7b572d943efc153b203a14f892 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Dec 2022 10:33:15 +0100 +Subject: net: lan966x: Fix configuration of the PCS + +From: Horatiu Vultur + +[ Upstream commit d717f9474e3fb7e6bd3e43ca16e131f04320ed6f ] + +When the PCS was taken out of reset, we were changing by mistake also +the speed to 100 Mbit. But in case the link was going down, the link +up routine was setting correctly the link speed. If the link was not +getting down then the speed was forced to run at 100 even if the +speed was something else. +On lan966x, to set the speed link to 1G or 2.5G a value of 1 needs to be +written in DEV_CLOCK_CFG_LINK_SPEED. This is similar to the procedure in +lan966x_port_init. + +The issue was reproduced using 1000base-x sfp module using the commands: +ip link set dev eth2 up +ip link addr add 10.97.10.2/24 dev eth2 +ethtool -s eth2 speed 1000 autoneg off + +Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") +Signed-off-by: Horatiu Vultur +Reviewed-by: Piotr Raczynski +Link: https://lore.kernel.org/r/20221221093315.939133-1-horatiu.vultur@microchip.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +index 1a61c6cdb077..0050fcb988b7 100644 +--- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c ++++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +@@ -381,7 +381,7 @@ int lan966x_port_pcs_set(struct lan966x_port *port, + } + + /* Take PCS out of reset */ +- lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(2) | ++ lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(LAN966X_SPEED_1000) | + DEV_CLOCK_CFG_PCS_RX_RST_SET(0) | + DEV_CLOCK_CFG_PCS_TX_RST_SET(0), + DEV_CLOCK_CFG_LINK_SPEED | +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch b/queue-6.1/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch new file mode 100644 index 00000000000..7673b4bbe79 --- /dev/null +++ b/queue-6.1/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch @@ -0,0 +1,39 @@ +From f90a32ba04992510d8dc7cd0566e65d490ea5bc5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 12:51:52 +0200 +Subject: net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error + path + +From: Jiri Pirko + +[ Upstream commit 2a35b2c2e6a252eda2134aae6a756861d9299531 ] + +There are two cleanup calls missing in mlx5_init_once() error path. +Add them making the error path flow to be the same as +mlx5_cleanup_once(). + +Fixes: 52ec462eca9b ("net/mlx5: Add reserved-gids support") +Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section") +Signed-off-by: Jiri Pirko +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index e58775a7d955..6776bf5b8d55 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1051,6 +1051,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) + err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); + mlx5_vxlan_destroy(dev->vxlan); ++ mlx5_cleanup_clock(dev); ++ mlx5_cleanup_reserved_gids(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); + err_events_cleanup: +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5-avoid-recovery-in-probe-flows.patch b/queue-6.1/net-mlx5-avoid-recovery-in-probe-flows.patch new file mode 100644 index 00000000000..6e918ab1428 --- /dev/null +++ b/queue-6.1/net-mlx5-avoid-recovery-in-probe-flows.patch @@ -0,0 +1,49 @@ +From 5b4b5d2ed3fa3fe39f274090d5f744c942e203c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Nov 2022 13:34:12 +0200 +Subject: net/mlx5: Avoid recovery in probe flows + +From: Shay Drory + +[ Upstream commit 9078e843efec530f279a155f262793c58b0746bd ] + +Currently, recovery is done without considering whether the device is +still in probe flow. +This may lead to recovery before device have finished probed +successfully. e.g.: while mlx5_init_one() is running. Recovery flow is +using functionality that is loaded only by mlx5_init_one(), and there +is no point in running recovery without mlx5_init_one() finished +successfully. + +Fix it by waiting for probe flow to finish and checking whether the +device is probed before trying to perform recovery. + +Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c +index 86ed87d704f7..96417c5feed7 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c +@@ -674,6 +674,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) + dev = container_of(priv, struct mlx5_core_dev, priv); + devlink = priv_to_devlink(dev); + ++ mutex_lock(&dev->intf_state_mutex); ++ if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { ++ mlx5_core_err(dev, "health works are not permitted at this stage\n"); ++ return; ++ } ++ mutex_unlock(&dev->intf_state_mutex); + enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + devl_lock(devlink); +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch b/queue-6.1/net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch new file mode 100644 index 00000000000..d2c56692320 --- /dev/null +++ b/queue-6.1/net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch @@ -0,0 +1,261 @@ +From b83f949cdaaa2e1254606e2ea156fde300380100 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Dec 2022 10:42:15 +0200 +Subject: net/mlx5: E-Switch, properly handle ingress tagged packets on VST + +From: Moshe Shemesh + +[ Upstream commit 1f0ae22ab470946143485a02cc1cd7e05c0f9120 ] + +Fix SRIOV VST mode behavior to insert cvlan when a guest tag is already +present in the frame. Previous VST mode behavior was to drop packets or +override existing tag, depending on the device version. + +In this patch we fix this behavior by correctly building the HW steering +rule with a push vlan action, or for older devices we ask the FW to stack +the vlan when a vlan is already present. + +Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes") +Fixes: dfcb1ed3c331 ("net/mlx5: E-Switch, Vport ingress/egress ACLs rules for VST mode") +Signed-off-by: Moshe Shemesh +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 7 +++- + .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 33 ++++++++++++++++--- + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 30 ++++++++++++----- + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 ++++ + include/linux/mlx5/device.h | 5 +++ + include/linux/mlx5/mlx5_ifc.h | 3 +- + 6 files changed, 68 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +index 60a73990017c..6b4c9ffad95b 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +@@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) + int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) + { ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_fc *drop_counter = NULL; +@@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + */ + int table_size = 2; + int dest_num = 0; ++ int actions_flag; + int err = 0; + + if (vport->egress.legacy.drop_counter) { +@@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + vport->vport, vport->info.vlan, vport->info.qos); + + /* Allowed vlan rule */ ++ actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW; ++ if (vst_mode_steering) ++ actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, +- MLX5_FLOW_CONTEXT_ACTION_ALLOW); ++ actions_flag); + if (err) + goto out; + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +index b1a5199260f6..093ed86a0acd 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +@@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) + int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) + { ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec = NULL; + struct mlx5_fc *counter = NULL; ++ bool vst_check_cvlan = false; ++ bool vst_push_cvlan = false; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. +@@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + goto out; + } + +- if (vport->info.vlan || vport->info.qos) ++ if ((vport->info.vlan || vport->info.qos)) { ++ if (vst_mode_steering) ++ vst_push_cvlan = true; ++ else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always)) ++ vst_check_cvlan = true; ++ } ++ ++ if (vst_check_cvlan || vport->info.spoofchk) ++ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; ++ ++ /* Create ingress allow rule */ ++ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; ++ if (vst_push_cvlan) { ++ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; ++ flow_act.vlan[0].prio = vport->info.qos; ++ flow_act.vlan[0].vid = vport->info.vlan; ++ flow_act.vlan[0].ethtype = ETH_P_8021Q; ++ } ++ ++ if (vst_check_cvlan) + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + +@@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + ether_addr_copy(smac_v, vport->info.mac); + } + +- /* Create ingress allow rule */ +- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; +- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { +@@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + goto out; + } + ++ if (!vst_check_cvlan && !vport->info.spoofchk) ++ goto out; ++ + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + /* Attach drop flow counter */ +@@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + return 0; + + out: +- esw_acl_ingress_lgcy_cleanup(esw, vport); ++ if (err) ++ esw_acl_ingress_lgcy_cleanup(esw, vport); + kvfree(spec); + return err; + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +index 374e3fbdc2cf..788a6ab5c463 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, + esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { +- /* insert only if no vlan in packet */ +- MLX5_SET(modify_esw_vport_context_in, in, +- esw_vport_context.vport_cvlan_insert, 1); +- ++ if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) { ++ /* insert either if vlan exist in packet or not */ ++ MLX5_SET(modify_esw_vport_context_in, in, ++ esw_vport_context.vport_cvlan_insert, ++ MLX5_VPORT_CVLAN_INSERT_ALWAYS); ++ } else { ++ /* insert only if no vlan in packet */ ++ MLX5_SET(modify_esw_vport_context_in, in, ++ esw_vport_context.vport_cvlan_insert, ++ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN); ++ } + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, +@@ -774,6 +781,7 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, + + static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) + { ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + u16 vport_num = vport->vport; + int flags; + int err; +@@ -800,8 +808,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) + + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; +- modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, +- vport->info.qos, flags); ++ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) ++ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, ++ vport->info.qos, flags); + + return 0; + } +@@ -1805,6 +1814,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos, u8 set_flags) + { + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + int err = 0; + + if (IS_ERR(evport)) +@@ -1812,9 +1822,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + if (vlan > 4095 || qos > 7) + return -EINVAL; + +- err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); +- if (err) +- return err; ++ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) { ++ err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); ++ if (err) ++ return err; ++ } + + evport->info.vlan = vlan; + evport->info.qos = qos; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +index 3029bc1c0dd0..5db76af35d3f 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +@@ -518,6 +518,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos, u8 set_flags); + ++static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw) ++{ ++ return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) && ++ MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan)); ++} ++ + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, + u8 vlan_depth) + { +diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h +index 1ff91cb79ded..2383076a7306 100644 +--- a/include/linux/mlx5/device.h ++++ b/include/linux/mlx5/device.h +@@ -1085,6 +1085,11 @@ enum { + MLX5_VPORT_ADMIN_STATE_AUTO = 0x2, + }; + ++enum { ++ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN = 0x1, ++ MLX5_VPORT_CVLAN_INSERT_ALWAYS = 0x3, ++}; ++ + enum { + MLX5_L3_PROT_TYPE_IPV4 = 0, + MLX5_L3_PROT_TYPE_IPV6 = 1, +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index 5a4e914e2a6f..e45bdec73baf 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -907,7 +907,8 @@ struct mlx5_ifc_e_switch_cap_bits { + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert_if_not_exist[0x1]; + u8 vport_cvlan_insert_overwrite[0x1]; +- u8 reserved_at_5[0x2]; ++ u8 reserved_at_5[0x1]; ++ u8 vport_cvlan_insert_always[0x1]; + u8 esw_shared_ingress_acl[0x1]; + u8 esw_uplink_ingress_acl[0x1]; + u8 root_ft_on_other_esw[0x1]; +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch b/queue-6.1/net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch new file mode 100644 index 00000000000..a1f8eb02300 --- /dev/null +++ b/queue-6.1/net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch @@ -0,0 +1,42 @@ +From 1ba8d29d8b0273d2ff517154dab9893183a5a812 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 18 Dec 2022 12:42:14 +0200 +Subject: net/mlx5: Fix io_eq_size and event_eq_size params validation + +From: Shay Drory + +[ Upstream commit 44aee8ea15ac205490a41b00cbafcccbf9f7f82b ] + +io_eq_size and event_eq_size params are of param type +DEVLINK_PARAM_TYPE_U32. But, the validation callback is addressing them +as DEVLINK_PARAM_TYPE_U16. + +This cause mismatch in validation in big-endian systems, in which +values in range were rejected while 268500991 was accepted. +Fix it by checking the U32 value in the validation callback. + +Fixes: 0844fa5f7b89 ("net/mlx5: Let user configure io_eq_size param") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +index 66c6a7017695..9e4e8d551884 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +@@ -563,7 +563,7 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) + { +- return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL; ++ return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; + } + + static const struct devlink_param mlx5_devlink_params[] = { +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5-fix-roce-setting-at-hca-level.patch b/queue-6.1/net-mlx5-fix-roce-setting-at-hca-level.patch new file mode 100644 index 00000000000..f87cd2bf2ef --- /dev/null +++ b/queue-6.1/net-mlx5-fix-roce-setting-at-hca-level.patch @@ -0,0 +1,55 @@ +From 5fd7e8b22ecb4d663b3c5b1af4ee681f1f40cbdb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 14:42:59 +0200 +Subject: net/mlx5: Fix RoCE setting at HCA level + +From: Shay Drory + +[ Upstream commit c4ad5f2bdad56265b23d3635494ecdb205431807 ] + +mlx5 PF can disable RoCE for its VFs and SFs. In such case RoCE is +marked as unsupported on those VFs/SFs. +The cited patch added an option for disable (and enable) RoCE at HCA +level. However, that commit didn't check whether RoCE is supported on +the HCA and enabled user to try and set RoCE to on. +Fix it by checking whether the HCA supports RoCE. + +Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +index 9e4e8d551884..97e9ec44a759 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +@@ -468,7 +468,7 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + bool new_state = val.vbool; + + if (new_state && !MLX5_CAP_GEN(dev, roce) && +- !MLX5_CAP_GEN(dev, roce_rw_supported)) { ++ !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); + return -EOPNOTSUPP; + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 6776bf5b8d55..00758312df06 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -614,7 +614,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) + MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, + MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + +- if (MLX5_CAP_GEN(dev, roce_rw_supported)) ++ if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, + mlx5_is_roce_on(dev)); + +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch b/queue-6.1/net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch new file mode 100644 index 00000000000..5f35d31e617 --- /dev/null +++ b/queue-6.1/net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch @@ -0,0 +1,66 @@ +From 2d0ade18178e138473288eb4eebedc92d1d7cef2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Dec 2022 14:28:34 +0200 +Subject: net/mlx5: Lag, fix failure to cancel delayed bond work + +From: Eli Cohen + +[ Upstream commit 4d1c1379d71777ddeda3e54f8fc26e9ecbfd1009 ] + +Commit 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings") +accidentally removed a call to cancel delayed bond work thus it may +cause queued delay to expire and fall on an already destroyed work +queue. + +Fix by restoring the call cancel_delayed_work_sync() before +destroying the workqueue. + +This prevents call trace such as this: + +[ 329.230417] BUG: kernel NULL pointer dereference, address: 0000000000000000 + [ 329.231444] #PF: supervisor write access in kernel mode + [ 329.232233] #PF: error_code(0x0002) - not-present page + [ 329.233007] PGD 0 P4D 0 + [ 329.233476] Oops: 0002 [#1] SMP + [ 329.234012] CPU: 5 PID: 145 Comm: kworker/u20:4 Tainted: G OE 6.0.0-rc5_mlnx #1 + [ 329.235282] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + [ 329.236868] Workqueue: mlx5_cmd_0000:08:00.1 cmd_work_handler [mlx5_core] + [ 329.237886] RIP: 0010:_raw_spin_lock+0xc/0x20 + [ 329.238585] Code: f0 0f b1 17 75 02 f3 c3 89 c6 e9 6f 3c 5f ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 31 c0 ba 01 00 00 00 0f b1 17 75 02 f3 c3 89 c6 e9 45 3c 5f ff 0f 1f 44 00 00 0f 1f + [ 329.241156] RSP: 0018:ffffc900001b0e98 EFLAGS: 00010046 + [ 329.241940] RAX: 0000000000000000 RBX: ffffffff82374ae0 RCX: 0000000000000000 + [ 329.242954] RDX: 0000000000000001 RSI: 0000000000000014 RDI: 0000000000000000 + [ 329.243974] RBP: ffff888106ccf000 R08: ffff8881004000c8 R09: ffff888100400000 + [ 329.244990] R10: 0000000000000000 R11: ffffffff826669f8 R12: 0000000000002000 + [ 329.246009] R13: 0000000000000005 R14: ffff888100aa7ce0 R15: ffff88852ca80000 + [ 329.247030] FS: 0000000000000000(0000) GS:ffff88852ca80000(0000) knlGS:0000000000000000 + [ 329.248260] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [ 329.249111] CR2: 0000000000000000 CR3: 000000016d675001 CR4: 0000000000770ee0 + [ 329.250133] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [ 329.251152] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [ 329.252176] PKRU: 55555554 + +Fixes: 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings") +Signed-off-by: Eli Cohen +Reviewed-by: Maor Dickman +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +index 32c3e0a649a7..ad32b80e8501 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +@@ -228,6 +228,7 @@ static void mlx5_ldev_free(struct kref *ref) + if (ldev->nb.notifier_call) + unregister_netdevice_notifier_net(&init_net, &ldev->nb); + mlx5_lag_mp_cleanup(ldev); ++ cancel_delayed_work_sync(&ldev->bond_work); + destroy_workqueue(ldev->wq); + mlx5_lag_mpesw_cleanup(ldev); + mutex_destroy(&ldev->lock); +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch b/queue-6.1/net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch new file mode 100644 index 00000000000..6197b0b7e17 --- /dev/null +++ b/queue-6.1/net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch @@ -0,0 +1,56 @@ +From 2344a3ab8333cb5c159f5a626c3f6a5b700c1b63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Dec 2022 09:22:50 +0800 +Subject: net/mlx5e: Always clear dest encap in neigh-update-del + +From: Chris Mi + +[ Upstream commit 2951b2e142ecf6e0115df785ba91e91b6da74602 ] + +The cited commit introduced a bug for multiple encapsulations flow. +If one dest encap becomes invalid, the flow is set slow path flag. +But when other dests encap become invalid, they are not cleared due +to slow path flag of the flow. When neigh-update-add is running, it +will use invalid encap. + +Fix it by checking slow path flag after clearing dest encap. + +Fixes: 9a5f9cc794e1 ("net/mlx5e: Fix possible use-after-free deleting fdb rule") +Signed-off-by: Chris Mi +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +index ff73d25bc6eb..2aaf8ab857b8 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +@@ -222,7 +222,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + int err; + + list_for_each_entry(flow, flow_list, tmp_list) { +- if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) ++ if (!mlx5e_is_offloaded_flow(flow)) + continue; + + attr = mlx5e_tc_get_encap_attr(flow); +@@ -231,6 +231,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + ++ /* Clear pkt_reformat before checking slow path flag. Because ++ * in next iteration, the same flow is already set slow path ++ * flag, but still need to clear the pkt_reformat. ++ */ ++ if (flow_flag_test(flow, SLOW)) ++ continue; ++ + /* update from encap rule to slow path rule */ + spec = &flow->attr->parse_attr->spec; + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5e-ct-fix-ct-debugfs-folder-name.patch b/queue-6.1/net-mlx5e-ct-fix-ct-debugfs-folder-name.patch new file mode 100644 index 00000000000..96daffae290 --- /dev/null +++ b/queue-6.1/net-mlx5e-ct-fix-ct-debugfs-folder-name.patch @@ -0,0 +1,46 @@ +From 68be07bc14b933de1585535ca1fd7a093c403702 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Nov 2022 13:54:29 +0800 +Subject: net/mlx5e: CT: Fix ct debugfs folder name + +From: Chris Mi + +[ Upstream commit 849190e3e4ccf452fbe2240eace30a9ca83fb8d2 ] + +Need to use sprintf to build a string instead of sscanf. Otherwise +dirname is null and both "ct_nic" and "ct_fdb" won't be created. +But its redundant anyway as driver could be in switchdev mode but +still add nic rules. So use "ct" as folder name. + +Fixes: 77422a8f6f61 ("net/mlx5e: CT: Add ct driver counters") +Signed-off-by: Chris Mi +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +index 864ce0c393e6..f01f7dfdbcf8 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +@@ -2080,14 +2080,9 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, + static void + mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) + { +- bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB; + struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; +- char dirname[16] = {}; + +- if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0) +- return; +- +- ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev)); ++ ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev)); + debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, + &ct_dbgfs->stats.offloaded); + debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch b/queue-6.1/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch new file mode 100644 index 00000000000..8da7c5de2a2 --- /dev/null +++ b/queue-6.1/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch @@ -0,0 +1,48 @@ +From f89713bd7279018c47fd8b7f8901228ef25f15fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Dec 2022 16:02:57 +0200 +Subject: net/mlx5e: Fix hw mtu initializing at XDP SQ allocation + +From: Adham Faris + +[ Upstream commit 1e267ab88dc44c48f556218f7b7f14c76f7aa066 ] + +Current xdp xmit functions logic (mlx5e_xmit_xdp_frame_mpwqe or +mlx5e_xmit_xdp_frame), validates xdp packet length by comparing it to +hw mtu (configured at xdp sq allocation) before xmiting it. This check +does not account for ethernet fcs length (calculated and filled by the +nic). Hence, when we try sending packets with length > (hw-mtu - +ethernet-fcs-size), the device port drops it and tx_errors_phy is +incremented. Desired behavior is to catch these packets and drop them +by the driver. + +Fix this behavior in XDP SQ allocation function (mlx5e_alloc_xdpsq) by +subtracting ethernet FCS header size (4 Bytes) from current hw mtu +value, since ethernet FCS is calculated and written to ethernet frames +by the nic. + +Fixes: d8bec2b29a82 ("net/mlx5e: Support bpf_xdp_adjust_head()") +Signed-off-by: Adham Faris +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 5e41dfdf79c8..951ede433813 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -1298,7 +1298,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; +- sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); ++ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; + sq->xsk_pool = xsk_pool; + + sq->stats = sq->xsk_pool ? +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5e-fix-rx-reporter-for-xsk-rqs.patch b/queue-6.1/net-mlx5e-fix-rx-reporter-for-xsk-rqs.patch new file mode 100644 index 00000000000..e720663453d --- /dev/null +++ b/queue-6.1/net-mlx5e-fix-rx-reporter-for-xsk-rqs.patch @@ -0,0 +1,41 @@ +From 28cc66eabf3177c684622afa56551dba5d68fe86 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 27 Nov 2022 09:21:28 +0200 +Subject: net/mlx5e: Fix RX reporter for XSK RQs + +From: Tariq Toukan + +[ Upstream commit f8c18a5749cf917096f75dd59885b7a0fe9298ba ] + +RX reporter mistakenly reads from the regular (inactive) RQ +when XSK RQ is active. Fix it here. + +Fixes: 3db4c85cde7a ("net/mlx5e: xsk: Use queue indices starting from 0 for XSK queues") +Signed-off-by: Tariq Toukan +Reviewed-by: Gal Pressman +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +index 5f6f95ad6888..1ae15b8536a8 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +@@ -459,7 +459,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + goto unlock; + + for (i = 0; i < priv->channels.num; i++) { +- struct mlx5e_rq *rq = &priv->channels.c[i]->rq; ++ struct mlx5e_channel *c = priv->channels.c[i]; ++ struct mlx5e_rq *rq; ++ ++ rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ? ++ &c->xskrq : &c->rq; + + err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); + if (err) +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch b/queue-6.1/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch new file mode 100644 index 00000000000..1271c52f0c3 --- /dev/null +++ b/queue-6.1/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch @@ -0,0 +1,45 @@ +From d9820b5c609fb6cb6e1dc546f245bf70913e85cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Nov 2022 15:24:21 +0200 +Subject: net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by + default + +From: Dragos Tatulea + +[ Upstream commit b12d581e83e3ae1080c32ab83f123005bd89a840 ] + +mlx5e_build_nic_params will turn CQE compression on if the hardware +capability is enabled and the slow_pci_heuristic condition is detected. +As IPoIB doesn't support CQE compression, make sure to disable the +feature in the IPoIB profile init. + +Please note that the feature is not exposed to the user for IPoIB +interfaces, so it can't be subsequently turned on. + +Fixes: b797a684b0dd ("net/mlx5e: Enable CQE compression when PCI is slower than link") +Signed-off-by: Dragos Tatulea +Reviewed-by: Gal Pressman +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +index 4e3a75496dd9..84f5352b0ce1 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +@@ -71,6 +71,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, + params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; + params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; + params->tunneled_offload_en = false; ++ ++ /* CQE compression is not supported for IPoIB */ ++ params->rx_cqe_compress_def = false; ++ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + } + + /* Called directly after IPoIB netdevice was created to initialize SW structs */ +-- +2.35.1 + diff --git a/queue-6.1/net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch b/queue-6.1/net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch new file mode 100644 index 00000000000..e3dd8f87419 --- /dev/null +++ b/queue-6.1/net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch @@ -0,0 +1,50 @@ +From 674c33d35519ca4bbe6dfebdaa6c0d1cb2125cf5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Aug 2021 14:45:17 +0300 +Subject: net/mlx5e: Set geneve_tlv_option_0_exist when matching on geneve + option + +From: Maor Dickman + +[ Upstream commit e54638a8380bd9c146a883035fffd0a821813682 ] + +The cited patch added support of matching on geneve option by setting +geneve_tlv_option_0_data mask and key but didn't set geneve_tlv_option_0_exist +bit which is required on some HWs when matching geneve_tlv_option_0_data parameter, +this may cause in some cases for packets to wrongly match on rules with different +geneve option. + +Example of such case is packet with geneve_tlv_object class=789 and data=456 +will wrongly match on rule with match geneve_tlv_object class=123 and data=456. + +Fix it by setting geneve_tlv_option_0_exist bit when supported by the HW when matching +on geneve_tlv_option_0_data parameter. + +Fixes: 9272e3df3023 ("net/mlx5e: Geneve, Add support for encap/decap flows offload") +Signed-off-by: Maor Dickman +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +index f5b26f5a7de4..054d80c4e65c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +@@ -273,6 +273,11 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); + MLX5_SET(fte_match_set_misc3, misc_3_c, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); ++ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ++ ft_field_support.geneve_tlv_option_0_exist)) { ++ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist); ++ MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist); ++ } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + +-- +2.35.1 + diff --git a/queue-6.1/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch b/queue-6.1/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch new file mode 100644 index 00000000000..cd2b6bd1613 --- /dev/null +++ b/queue-6.1/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch @@ -0,0 +1,35 @@ +From b52209350509e75626033f5ac5ab9258b992f3d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 10:29:25 +0400 +Subject: net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe + +From: Miaoqian Lin + +[ Upstream commit d039535850ee47079d59527e96be18d8e0daa84b ] + +of_phy_find_device() return device node with refcount incremented. +Call put_device() to relese it when not needed anymore. + +Fixes: ab4e6ee578e8 ("net: phy: xgmiitorgmii: Check phy_driver ready before accessing") +Signed-off-by: Miaoqian Lin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/xilinx_gmii2rgmii.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c +index 8dcb49ed1f3d..7fd9fe6a602b 100644 +--- a/drivers/net/phy/xilinx_gmii2rgmii.c ++++ b/drivers/net/phy/xilinx_gmii2rgmii.c +@@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) + + if (!priv->phy_dev->drv) { + dev_info(dev, "Attached phy not ready\n"); ++ put_device(&priv->phy_dev->mdio.dev); + return -EPROBE_DEFER; + } + +-- +2.35.1 + diff --git a/queue-6.1/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch b/queue-6.1/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch new file mode 100644 index 00000000000..9a5c43ba7bf --- /dev/null +++ b/queue-6.1/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch @@ -0,0 +1,42 @@ +From bb4b29c2ece3acb7444f4d84206ec2420dccc0cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Jan 2023 16:57:43 -0500 +Subject: net: sched: atm: dont intepret cls results when asked to drop + +From: Jamal Hadi Salim + +[ Upstream commit a2965c7be0522eaa18808684b7b82b248515511b ] + +If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume +res.class contains a valid pointer +Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent") + +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_atm.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c +index f52255fea652..4a981ca90b0b 100644 +--- a/net/sched/sch_atm.c ++++ b/net/sched/sch_atm.c +@@ -393,10 +393,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + result = tcf_classify(skb, NULL, fl, &res, true); + if (result < 0) + continue; ++ if (result == TC_ACT_SHOT) ++ goto done; ++ + flow = (struct atm_flow_data *)res.class; + if (!flow) + flow = lookup_flow(sch, res.classid); +- goto done; ++ goto drop; + } + } + flow = NULL; +-- +2.35.1 + diff --git a/queue-6.1/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch b/queue-6.1/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch new file mode 100644 index 00000000000..0ce3337a77b --- /dev/null +++ b/queue-6.1/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch @@ -0,0 +1,147 @@ +From ce75e6e0db053fe4fd10b9930d80f5a620ac7a85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Jan 2023 16:57:44 -0500 +Subject: net: sched: cbq: dont intepret cls results when asked to drop + +From: Jamal Hadi Salim + +[ Upstream commit caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 ] + +If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that +res.class contains a valid pointer + +Sample splat reported by Kyle Zeng + +[ 5.405624] 0: reclassify loop, rule prio 0, protocol 800 +[ 5.406326] ================================================================== +[ 5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0 +[ 5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299 +[ 5.408731] +[ 5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15 +[ 5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), +BIOS 1.15.0-1 04/01/2014 +[ 5.410439] Call Trace: +[ 5.410764] dump_stack+0x87/0xcd +[ 5.411153] print_address_description+0x7a/0x6b0 +[ 5.411687] ? vprintk_func+0xb9/0xc0 +[ 5.411905] ? printk+0x76/0x96 +[ 5.412110] ? cbq_enqueue+0x54b/0xea0 +[ 5.412323] kasan_report+0x17d/0x220 +[ 5.412591] ? cbq_enqueue+0x54b/0xea0 +[ 5.412803] __asan_report_load1_noabort+0x10/0x20 +[ 5.413119] cbq_enqueue+0x54b/0xea0 +[ 5.413400] ? __kasan_check_write+0x10/0x20 +[ 5.413679] __dev_queue_xmit+0x9c0/0x1db0 +[ 5.413922] dev_queue_xmit+0xc/0x10 +[ 5.414136] ip_finish_output2+0x8bc/0xcd0 +[ 5.414436] __ip_finish_output+0x472/0x7a0 +[ 5.414692] ip_finish_output+0x5c/0x190 +[ 5.414940] ip_output+0x2d8/0x3c0 +[ 5.415150] ? ip_mc_finish_output+0x320/0x320 +[ 5.415429] __ip_queue_xmit+0x753/0x1760 +[ 5.415664] ip_queue_xmit+0x47/0x60 +[ 5.415874] __tcp_transmit_skb+0x1ef9/0x34c0 +[ 5.416129] tcp_connect+0x1f5e/0x4cb0 +[ 5.416347] tcp_v4_connect+0xc8d/0x18c0 +[ 5.416577] __inet_stream_connect+0x1ae/0xb40 +[ 5.416836] ? local_bh_enable+0x11/0x20 +[ 5.417066] ? lock_sock_nested+0x175/0x1d0 +[ 5.417309] inet_stream_connect+0x5d/0x90 +[ 5.417548] ? __inet_stream_connect+0xb40/0xb40 +[ 5.417817] __sys_connect+0x260/0x2b0 +[ 5.418037] __x64_sys_connect+0x76/0x80 +[ 5.418267] do_syscall_64+0x31/0x50 +[ 5.418477] entry_SYSCALL_64_after_hwframe+0x61/0xc6 +[ 5.418770] RIP: 0033:0x473bb7 +[ 5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 +00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 +00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 +24 89 +[ 5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX: +000000000000002a +[ 5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7 +[ 5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007 +[ 5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004 +[ 5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 +[ 5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002 +[ 5.422471] +[ 5.422562] Allocated by task 299: +[ 5.422782] __kasan_kmalloc+0x12d/0x160 +[ 5.423007] kasan_kmalloc+0x5/0x10 +[ 5.423208] kmem_cache_alloc_trace+0x201/0x2e0 +[ 5.423492] tcf_proto_create+0x65/0x290 +[ 5.423721] tc_new_tfilter+0x137e/0x1830 +[ 5.423957] rtnetlink_rcv_msg+0x730/0x9f0 +[ 5.424197] netlink_rcv_skb+0x166/0x300 +[ 5.424428] rtnetlink_rcv+0x11/0x20 +[ 5.424639] netlink_unicast+0x673/0x860 +[ 5.424870] netlink_sendmsg+0x6af/0x9f0 +[ 5.425100] __sys_sendto+0x58d/0x5a0 +[ 5.425315] __x64_sys_sendto+0xda/0xf0 +[ 5.425539] do_syscall_64+0x31/0x50 +[ 5.425764] entry_SYSCALL_64_after_hwframe+0x61/0xc6 +[ 5.426065] +[ 5.426157] The buggy address belongs to the object at ffff88800e312200 +[ 5.426157] which belongs to the cache kmalloc-128 of size 128 +[ 5.426955] The buggy address is located 42 bytes to the right of +[ 5.426955] 128-byte region [ffff88800e312200, ffff88800e312280) +[ 5.427688] The buggy address belongs to the page: +[ 5.427992] page:000000009875fabc refcount:1 mapcount:0 +mapping:0000000000000000 index:0x0 pfn:0xe312 +[ 5.428562] flags: 0x100000000000200(slab) +[ 5.428812] raw: 0100000000000200 dead000000000100 dead000000000122 +ffff888007843680 +[ 5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff +ffff88800e312401 +[ 5.429875] page dumped because: kasan: bad access detected +[ 5.430214] page->mem_cgroup:ffff88800e312401 +[ 5.430471] +[ 5.430564] Memory state around the buggy address: +[ 5.430846] ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc +fc fc fc fc +[ 5.431267] ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 fc +[ 5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc +fc fc fc fc +[ 5.432123] ^ +[ 5.432391] ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 fc +[ 5.432810] ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc +fc fc fc fc +[ 5.433229] ================================================================== +[ 5.433648] Disabling lock debugging due to kernel taint + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Kyle Zeng +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_cbq.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c +index 6568e17c4c63..36db5f6782f2 100644 +--- a/net/sched/sch_cbq.c ++++ b/net/sched/sch_cbq.c +@@ -230,6 +230,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) + result = tcf_classify(skb, NULL, fl, &res, true); + if (!fl || result < 0) + goto fallback; ++ if (result == TC_ACT_SHOT) ++ return NULL; + + cl = (void *)res.class; + if (!cl) { +@@ -250,8 +252,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) + case TC_ACT_TRAP: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + fallthrough; +- case TC_ACT_SHOT: +- return NULL; + case TC_ACT_RECLASSIFY: + return cbq_reclassify(skb, cl); + } +-- +2.35.1 + diff --git a/queue-6.1/net-sched-fix-memory-leak-in-tcindex_set_parms.patch b/queue-6.1/net-sched-fix-memory-leak-in-tcindex_set_parms.patch new file mode 100644 index 00000000000..29ce72e3c73 --- /dev/null +++ b/queue-6.1/net-sched-fix-memory-leak-in-tcindex_set_parms.patch @@ -0,0 +1,150 @@ +From 17f7acf21aec1852cf5825b0ba2d101b16ad6cc6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 11:51:19 +0800 +Subject: net: sched: fix memory leak in tcindex_set_parms + +From: Hawkins Jiawei + +[ Upstream commit 399ab7fe0fa0d846881685fd4e57e9a8ef7559f7 ] + +Syzkaller reports a memory leak as follows: +==================================== +BUG: memory leak +unreferenced object 0xffff88810c287f00 (size 256): + comm "syz-executor105", pid 3600, jiffies 4294943292 (age 12.990s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046 + [] kmalloc include/linux/slab.h:576 [inline] + [] kmalloc_array include/linux/slab.h:627 [inline] + [] kcalloc include/linux/slab.h:659 [inline] + [] tcf_exts_init include/net/pkt_cls.h:250 [inline] + [] tcindex_set_parms+0xa7/0xbe0 net/sched/cls_tcindex.c:342 + [] tcindex_change+0xdf/0x120 net/sched/cls_tcindex.c:553 + [] tc_new_tfilter+0x4f2/0x1100 net/sched/cls_api.c:2147 + [] rtnetlink_rcv_msg+0x4dc/0x5d0 net/core/rtnetlink.c:6082 + [] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2540 + [] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] + [] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345 + [] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921 + [] sock_sendmsg_nosec net/socket.c:714 [inline] + [] sock_sendmsg+0x56/0x80 net/socket.c:734 + [] ____sys_sendmsg+0x178/0x410 net/socket.c:2482 + [] ___sys_sendmsg+0xa8/0x110 net/socket.c:2536 + [] __sys_sendmmsg+0x105/0x330 net/socket.c:2622 + [] __do_sys_sendmmsg net/socket.c:2651 [inline] + [] __se_sys_sendmmsg net/socket.c:2648 [inline] + [] __x64_sys_sendmmsg+0x24/0x30 net/socket.c:2648 + [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] + [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + [] entry_SYSCALL_64_after_hwframe+0x63/0xcd +==================================== + +Kernel uses tcindex_change() to change an existing +filter properties. + +Yet the problem is that, during the process of changing, +if `old_r` is retrieved from `p->perfect`, then +kernel uses tcindex_alloc_perfect_hash() to newly +allocate filter results, uses tcindex_filter_result_init() +to clear the old filter result, without destroying +its tcf_exts structure, which triggers the above memory leak. + +To be more specific, there are only two source for the `old_r`, +according to the tcindex_lookup(). `old_r` is retrieved from +`p->perfect`, or `old_r` is retrieved from `p->h`. + + * If `old_r` is retrieved from `p->perfect`, kernel uses +tcindex_alloc_perfect_hash() to newly allocate the +filter results. Then `r` is assigned with `cp->perfect + handle`, +which is newly allocated. So condition `old_r && old_r != r` is +true in this situation, and kernel uses tcindex_filter_result_init() +to clear the old filter result, without destroying +its tcf_exts structure + + * If `old_r` is retrieved from `p->h`, then `p->perfect` is NULL +according to the tcindex_lookup(). Considering that `cp->h` +is directly copied from `p->h` and `p->perfect` is NULL, +`r` is assigned with `tcindex_lookup(cp, handle)`, whose value +should be the same as `old_r`, so condition `old_r && old_r != r` +is false in this situation, kernel ignores using +tcindex_filter_result_init() to clear the old filter result. + +So only when `old_r` is retrieved from `p->perfect` does kernel use +tcindex_filter_result_init() to clear the old filter result, which +triggers the above memory leak. + +Considering that there already exists a tc_filter_wq workqueue +to destroy the old tcindex_data by tcindex_partial_destroy_work() +at the end of tcindex_set_parms(), this patch solves +this memory leak bug by removing this old filter result +clearing part and delegating it to the tc_filter_wq workqueue. + +Note that this patch doesn't introduce any other issues. If +`old_r` is retrieved from `p->perfect`, this patch just +delegates old filter result clearing part to the +tc_filter_wq workqueue; If `old_r` is retrieved from `p->h`, +kernel doesn't reach the old filter result clearing part, so +removing this part has no effect. + +[Thanks to the suggestion from Jakub Kicinski, Cong Wang, Paolo Abeni +and Dmitry Vyukov] + +Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") +Link: https://lore.kernel.org/all/0000000000001de5c505ebc9ec59@google.com/ +Reported-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com +Tested-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com +Cc: Cong Wang +Cc: Jakub Kicinski +Cc: Paolo Abeni +Cc: Dmitry Vyukov +Acked-by: Paolo Abeni +Signed-off-by: Hawkins Jiawei +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_tcindex.c | 12 ++---------- + 1 file changed, 2 insertions(+), 10 deletions(-) + +diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c +index 1c9eeb98d826..4bdcbee4bec5 100644 +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -332,7 +332,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + struct tcindex_filter_result *r, struct nlattr **tb, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) + { +- struct tcindex_filter_result new_filter_result, *old_r = r; ++ struct tcindex_filter_result new_filter_result; + struct tcindex_data *cp = NULL, *oldp; + struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; +@@ -401,7 +401,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + err = tcindex_filter_result_init(&new_filter_result, cp, net); + if (err < 0) + goto errout_alloc; +- if (old_r) ++ if (r) + cr = r->res; + + err = -EBUSY; +@@ -478,14 +478,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + tcf_bind_filter(tp, &cr, base); + } + +- if (old_r && old_r != r) { +- err = tcindex_filter_result_init(old_r, cp, net); +- if (err < 0) { +- kfree(f); +- goto errout_alloc; +- } +- } +- + oldp = p; + r->res = cr; + tcf_exts_change(&r->exts, &e); +-- +2.35.1 + diff --git a/queue-6.1/net-sparx5-fix-reading-of-the-mac-address.patch b/queue-6.1/net-sparx5-fix-reading-of-the-mac-address.patch new file mode 100644 index 00000000000..b7ceef671aa --- /dev/null +++ b/queue-6.1/net-sparx5-fix-reading-of-the-mac-address.patch @@ -0,0 +1,40 @@ +From 6993e24ff30b4358f078ba35158acb5868681a8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Jan 2023 13:12:15 +0100 +Subject: net: sparx5: Fix reading of the MAC address + +From: Horatiu Vultur + +[ Upstream commit 588ab2dc25f60efeb516b4abedb6c551949cc185 ] + +There is an issue with the checking of the return value of +'of_get_mac_address', which returns 0 on success and negative value on +failure. The driver interpretated the result the opposite way. Therefore +if there was a MAC address defined in the DT, then the driver was +generating a random MAC address otherwise it would use address 0. +Fix this by checking correctly the return value of 'of_get_mac_address' + +Fixes: b74ef9f9cb91 ("net: sparx5: Do not use mac_addr uninitialized in mchp_sparx5_probe()") +Signed-off-by: Horatiu Vultur +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +index b6bbb3c9bd7a..3423c95cc84a 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +@@ -824,7 +824,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) + if (err) + goto cleanup_config; + +- if (!of_get_mac_address(np, sparx5->base_mac)) { ++ if (of_get_mac_address(np, sparx5->base_mac)) { + dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n"); + eth_random_addr(sparx5->base_mac); + sparx5->base_mac[5] = 0; +-- +2.35.1 + diff --git a/queue-6.1/net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch b/queue-6.1/net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch new file mode 100644 index 00000000000..8a6f830d45e --- /dev/null +++ b/queue-6.1/net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch @@ -0,0 +1,84 @@ +From c719077aea4f6719ead557362743fa1c8471d1f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 12:19:17 +0100 +Subject: net/ulp: prevent ULP without clone op from entering the LISTEN status + +From: Paolo Abeni + +[ Upstream commit 2c02d41d71f90a5168391b6a5f2954112ba2307c ] + +When an ULP-enabled socket enters the LISTEN status, the listener ULP data +pointer is copied inside the child/accepted sockets by sk_clone_lock(). + +The relevant ULP can take care of de-duplicating the context pointer via +the clone() operation, but only MPTCP and SMC implement such op. + +Other ULPs may end-up with a double-free at socket disposal time. + +We can't simply clear the ULP data at clone time, as TLS replaces the +socket ops with custom ones assuming a valid TLS ULP context is +available. + +Instead completely prevent clone-less ULP sockets from entering the +LISTEN status. + +Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") +Reported-by: slipper +Signed-off-by: Paolo Abeni +Link: https://lore.kernel.org/r/4b80c3d1dbe3d0ab072f80450c202d9bc88b4b03.1672740602.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ + net/ipv4/tcp_ulp.c | 4 ++++ + 2 files changed, 18 insertions(+) + +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 0465ada82799..647b3c6b575e 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -1200,12 +1200,26 @@ void inet_csk_prepare_forced_close(struct sock *sk) + } + EXPORT_SYMBOL(inet_csk_prepare_forced_close); + ++static int inet_ulp_can_listen(const struct sock *sk) ++{ ++ const struct inet_connection_sock *icsk = inet_csk(sk); ++ ++ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) ++ return -EINVAL; ++ ++ return 0; ++} ++ + int inet_csk_listen_start(struct sock *sk) + { + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet = inet_sk(sk); + int err; + ++ err = inet_ulp_can_listen(sk); ++ if (unlikely(err)) ++ return err; ++ + reqsk_queue_alloc(&icsk->icsk_accept_queue); + + sk->sk_ack_backlog = 0; +diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c +index 9ae50b1bd844..05b6077b9f2c 100644 +--- a/net/ipv4/tcp_ulp.c ++++ b/net/ipv4/tcp_ulp.c +@@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + ++ err = -EINVAL; ++ if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) ++ goto out_err; ++ + err = ulp_ops->init(sk); + if (err) + goto out_err; +-- +2.35.1 + diff --git a/queue-6.1/net-vrf-determine-the-dst-using-the-original-ifindex.patch b/queue-6.1/net-vrf-determine-the-dst-using-the-original-ifindex.patch new file mode 100644 index 00000000000..287c2cd5ce8 --- /dev/null +++ b/queue-6.1/net-vrf-determine-the-dst-using-the-original-ifindex.patch @@ -0,0 +1,66 @@ +From ddec7e46d00ed9cf16e5c228c9cfff8a27a0b7bc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 18:18:25 +0100 +Subject: net: vrf: determine the dst using the original ifindex for multicast + +From: Antoine Tenart + +[ Upstream commit f2575c8f404911da83f25b688e12afcf4273e640 ] + +Multicast packets received on an interface bound to a VRF are marked as +belonging to the VRF and the skb device is updated to point to the VRF +device itself. This was fine even when a route was associated to a +device as when performing a fib table lookup 'oif' in fib6_table_lookup +(coming from 'skb->dev->ifindex' in ip6_route_input) was set to 0 when +FLOWI_FLAG_SKIP_NH_OIF was set. + +With commit 40867d74c374 ("net: Add l3mdev index to flow struct and +avoid oif reset for port devices") this is not longer true and multicast +traffic is not received on the original interface. + +Instead of adding back a similar check in fib6_table_lookup determine +the dst using the original ifindex for multicast VRF traffic. To make +things consistent across the function do the above for all strict +packets, which was the logic before commit 6f12fa775530 ("vrf: mark skb +for multicast or link-local as enslaved to VRF"). Note that reverting to +this behavior should be fine as the change was about marking packets +belonging to the VRF, not about their dst. + +Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") +Reported-by: Jianlin Shi +Signed-off-by: Antoine Tenart +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20221220171825.1172237-1-atenart@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/vrf.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index badf6f09ae51..f6dcec66f0a4 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -1385,8 +1385,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + + /* loopback, multicast & non-ND link-local traffic; do not push through + * packet taps again. Reset pkt_type for upper layers to process skb. +- * For strict packets with a source LLA, determine the dst using the +- * original ifindex. ++ * For non-loopback strict packets, determine the dst using the original ++ * ifindex. + */ + if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { + skb->dev = vrf_dev; +@@ -1395,7 +1395,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + + if (skb->pkt_type == PACKET_LOOPBACK) + skb->pkt_type = PACKET_HOST; +- else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ++ else + vrf_ip6_input_dst(skb, vrf_dev, orig_iif); + + goto out; +-- +2.35.1 + diff --git a/queue-6.1/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch b/queue-6.1/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch new file mode 100644 index 00000000000..2a9c1fdee38 --- /dev/null +++ b/queue-6.1/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch @@ -0,0 +1,109 @@ +From 87690dfb100229e0e229eafdb718e7928487a428 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 13:24:37 +0100 +Subject: netfilter: ipset: fix hash:net,port,net hang with /0 subnet +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jozsef Kadlecsik + +[ Upstream commit a31d47be64b9b74f8cfedffe03e0a8a1f9e51f23 ] + +The hash:net,port,net set type supports /0 subnets. However, the patch +commit 5f7b51bf09baca8e titled "netfilter: ipset: Limit the maximal range +of consecutive elements to add/delete" did not take into account it and +resulted in an endless loop. The bug is actually older but the patch +5f7b51bf09baca8e brings it out earlier. + +Handle /0 subnets properly in hash:net,port,net set types. + +Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") +Reported-by: Марк Коренберг +Signed-off-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipset/ip_set_hash_netportnet.c | 40 ++++++++++---------- + 1 file changed, 21 insertions(+), 19 deletions(-) + +diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c +index 19bcdb3141f6..005a7ce87217 100644 +--- a/net/netfilter/ipset/ip_set_hash_netportnet.c ++++ b/net/netfilter/ipset/ip_set_hash_netportnet.c +@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); + } + ++static u32 ++hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr) ++{ ++ if (from == 0 && to == UINT_MAX) { ++ *cidr = 0; ++ return to; ++ } ++ return ip_set_range_to_cidr(from, to, cidr); ++} ++ + static int + hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netportnet4 *h = set->data; ++ struct hash_netportnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; +- u32 ip2_from = 0, ip2_to = 0, ip2, ipn; +- u64 n = 0, m = 0; ++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; + bool with_ports = false; + int ret; + +@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); +- n++; +- } while (ipn++ < ip_to); +- ipn = ip2_from; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); +- m++; +- } while (ipn++ < ip2_to); +- +- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip[0]); +@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + + do { + e.ip[0] = htonl(ip); +- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ++ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]); + for (; p <= port_to; p++) { + e.port = htons(p); + do { ++ i++; + e.ip[1] = htonl(ip2); +- ip2 = ip_set_range_to_cidr(ip2, ip2_to, +- &e.cidr[1]); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netportnet4_data_next(&h->next, ++ &e); ++ return -ERANGE; ++ } ++ ip2 = hash_netportnet4_range_to_cidr(ip2, ++ ip2_to, &e.cidr[1]); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +-- +2.35.1 + diff --git a/queue-6.1/netfilter-ipset-rework-long-task-execution-when-addi.patch b/queue-6.1/netfilter-ipset-rework-long-task-execution-when-addi.patch new file mode 100644 index 00000000000..b2d28610947 --- /dev/null +++ b/queue-6.1/netfilter-ipset-rework-long-task-execution-when-addi.patch @@ -0,0 +1,462 @@ +From 72c067783c164274e5929ecba60c80733bf5daca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 13:24:38 +0100 +Subject: netfilter: ipset: Rework long task execution when adding/deleting + entries + +From: Jozsef Kadlecsik + +[ Upstream commit 5e29dc36bd5e2166b834ceb19990d9e68a734d7d ] + +When adding/deleting large number of elements in one step in ipset, it can +take a reasonable amount of time and can result in soft lockup errors. The +patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of +consecutive elements to add/delete") tried to fix it by limiting the max +elements to process at all. However it was not enough, it is still possible +that we get hung tasks. Lowering the limit is not reasonable, so the +approach in this patch is as follows: rely on the method used at resizing +sets and save the state when we reach a smaller internal batch limit, +unlock/lock and proceed from the saved state. Thus we can avoid long +continuous tasks and at the same time removed the limit to add/delete large +number of elements in one step. + +The nfnl mutex is held during the whole operation which prevents one to +issue other ipset commands in parallel. + +Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") +Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com +Signed-off-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/linux/netfilter/ipset/ip_set.h | 2 +- + net/netfilter/ipset/ip_set_core.c | 7 ++++--- + net/netfilter/ipset/ip_set_hash_ip.c | 14 ++++++------- + net/netfilter/ipset/ip_set_hash_ipmark.c | 13 ++++++------ + net/netfilter/ipset/ip_set_hash_ipport.c | 13 ++++++------ + net/netfilter/ipset/ip_set_hash_ipportip.c | 13 ++++++------ + net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 +++++++----- + net/netfilter/ipset/ip_set_hash_net.c | 17 +++++++-------- + net/netfilter/ipset/ip_set_hash_netiface.c | 15 ++++++-------- + net/netfilter/ipset/ip_set_hash_netnet.c | 23 +++++++-------------- + net/netfilter/ipset/ip_set_hash_netport.c | 19 +++++++---------- + 11 files changed, 68 insertions(+), 81 deletions(-) + +diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h +index ada1296c87d5..72f5ebc5c97a 100644 +--- a/include/linux/netfilter/ipset/ip_set.h ++++ b/include/linux/netfilter/ipset/ip_set.h +@@ -197,7 +197,7 @@ struct ip_set_region { + }; + + /* Max range where every element is added/deleted in one step */ +-#define IPSET_MAX_RANGE (1<<20) ++#define IPSET_MAX_RANGE (1<<14) + + /* The max revision number supported by any set type + 1 */ + #define IPSET_REVISION_MAX 9 +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index e7ba5b6dd2b7..46ebee9400da 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, + ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); + ip_set_unlock(set); + retried = true; +- } while (ret == -EAGAIN && +- set->variant->resize && +- (ret = set->variant->resize(set, retried)) == 0); ++ } while (ret == -ERANGE || ++ (ret == -EAGAIN && ++ set->variant->resize && ++ (ret = set->variant->resize(set, retried)) == 0)); + + if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) + return 0; +diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c +index 75d556d71652..24adcdd7a0b1 100644 +--- a/net/netfilter/ipset/ip_set_hash_ip.c ++++ b/net/netfilter/ipset/ip_set_hash_ip.c +@@ -98,11 +98,11 @@ static int + hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ip4 *h = set->data; ++ struct hash_ip4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ip4_elem e = { 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, hosts; ++ u32 ip = 0, ip_to = 0, hosts, i = 0; + int ret = 0; + + if (tb[IPSET_ATTR_LINENO]) +@@ -147,14 +147,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + + hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + +- /* 64bit division is not allowed on 32bit */ +- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); +- for (; ip <= ip_to;) { ++ for (; ip <= ip_to; i++) { + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ip4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c +index 153de3457423..a22ec1a6f6ec 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipmark.c ++++ b/net/netfilter/ipset/ip_set_hash_ipmark.c +@@ -97,11 +97,11 @@ static int + hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipmark4 *h = set->data; ++ struct hash_ipmark4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0; ++ u32 ip, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + ip_set_mask_from_to(ip, ip_to, cidr); + } + +- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); +- for (; ip <= ip_to; ip++) { ++ for (; ip <= ip_to; ip++, i++) { + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipmark4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c +index 7303138e46be..10481760a9b2 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipport.c ++++ b/net/netfilter/ipset/ip_set_hash_ipport.c +@@ -105,11 +105,11 @@ static int + hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipport4 *h = set->data; ++ struct hash_ipport4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem e = { .ip = 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0, p = 0, port, port_to; ++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; + bool with_ports = false; + int ret; + +@@ -173,17 +173,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); + for (; ip <= ip_to; ip++) { + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.ip = htonl(ip); + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipport4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c +index 334fb1ad0e86..39a01934b153 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipportip.c ++++ b/net/netfilter/ipset/ip_set_hash_ipportip.c +@@ -108,11 +108,11 @@ static int + hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipportip4 *h = set->data; ++ struct hash_ipportip4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem e = { .ip = 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0, p = 0, port, port_to; ++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; + bool with_ports = false; + int ret; + +@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); + for (; ip <= ip_to; ip++) { + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.ip = htonl(ip); + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipportip4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c +index 7df94f437f60..5c6de605a9fb 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c ++++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c +@@ -160,12 +160,12 @@ static int + hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipportnet4 *h = set->data; ++ struct hash_ipportnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; +- u32 ip2_from = 0, ip2_to = 0, ip2; ++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; + bool with_ports = false; + u8 cidr; + int ret; +@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); +@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + for (; p <= port_to; p++) { + e.port = htons(p); + do { ++ i++; + e.ip2 = htonl(ip2); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); + e.cidr = cidr - 1; ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipportnet4_data_next(&h->next, ++ &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c +index 1422739d9aa2..ce0a9ce5a91f 100644 +--- a/net/netfilter/ipset/ip_set_hash_net.c ++++ b/net/netfilter/ipset/ip_set_hash_net.c +@@ -136,11 +136,11 @@ static int + hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_net4 *h = set->data; ++ struct hash_net4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem e = { .cidr = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, ipn, n = 0; ++ u32 ip = 0, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) + ip = ntohl(h->next.ip); + do { ++ i++; + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_net4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c +index 9810f5bf63f5..031073286236 100644 +--- a/net/netfilter/ipset/ip_set_hash_netiface.c ++++ b/net/netfilter/ipset/ip_set_hash_netiface.c +@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, ipn, n = 0; ++ u32 ip = 0, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip, ip_to, e.cidr); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) + ip = ntohl(h->next.ip); + do { ++ i++; + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netiface4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ret = adtfn(set, &e, &ext, &ext, flags); + +diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c +index 3d09eefe998a..c07b70bf32db 100644 +--- a/net/netfilter/ipset/ip_set_hash_netnet.c ++++ b/net/netfilter/ipset/ip_set_hash_netnet.c +@@ -163,13 +163,12 @@ static int + hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netnet4 *h = set->data; ++ struct hash_netnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0; +- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; +- u64 n = 0, m = 0; ++ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -245,19 +244,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); +- n++; +- } while (ipn++ < ip_to); +- ipn = ip2_from; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); +- m++; +- } while (ipn++ < ip2_to); +- +- if (n*m > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip[0]); +@@ -270,7 +256,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + e.ip[0] = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + do { ++ i++; + e.ip[1] = htonl(ip2); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netnet4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c +index 09cf72eb37f8..d1a0628df4ef 100644 +--- a/net/netfilter/ipset/ip_set_hash_netport.c ++++ b/net/netfilter/ipset/ip_set_hash_netport.c +@@ -154,12 +154,11 @@ static int + hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netport4 *h = set->data; ++ struct hash_netport4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; +- u64 n = 0; ++ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0; + bool with_ports = false; + u8 cidr; + int ret; +@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip, ip_to, e.cidr + 1); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip); +@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + e.ip = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr = cidr - 1; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netport4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +-- +2.35.1 + diff --git a/queue-6.1/netfilter-nf_tables-add-function-to-create-set-state.patch b/queue-6.1/netfilter-nf_tables-add-function-to-create-set-state.patch new file mode 100644 index 00000000000..222a974c2b6 --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-add-function-to-create-set-state.patch @@ -0,0 +1,185 @@ +From 60a23ceee12cf0743663238309cb8e586c6c2690 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 18:00:10 +0100 +Subject: netfilter: nf_tables: add function to create set stateful expressions + +From: Pablo Neira Ayuso + +[ Upstream commit a8fe4154fa5a1bae590b243ed60f871e5a5e1378 ] + +Add a helper function to allocate and initialize the stateful expressions +that are defined in a set. + +This patch allows to reuse this code from the set update path, to check +that type of the update matches the existing set in the kernel. + +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: f6594c372afd ("netfilter: nf_tables: perform type checking for existing sets") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 106 ++++++++++++++++++++++------------ + 1 file changed, 68 insertions(+), 38 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index a31f8dc40646..9f35a249c2c3 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4340,6 +4340,59 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, + return err; + } + ++static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, ++ const struct nlattr * const *nla, ++ struct nft_expr **exprs, int *num_exprs, ++ u32 flags) ++{ ++ struct nft_expr *expr; ++ int err, i; ++ ++ if (nla[NFTA_SET_EXPR]) { ++ expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]); ++ if (IS_ERR(expr)) { ++ err = PTR_ERR(expr); ++ goto err_set_expr_alloc; ++ } ++ exprs[0] = expr; ++ (*num_exprs)++; ++ } else if (nla[NFTA_SET_EXPRESSIONS]) { ++ struct nlattr *tmp; ++ int left; ++ ++ if (!(flags & NFT_SET_EXPR)) { ++ err = -EINVAL; ++ goto err_set_expr_alloc; ++ } ++ i = 0; ++ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { ++ if (i == NFT_SET_EXPR_MAX) { ++ err = -E2BIG; ++ goto err_set_expr_alloc; ++ } ++ if (nla_type(tmp) != NFTA_LIST_ELEM) { ++ err = -EINVAL; ++ goto err_set_expr_alloc; ++ } ++ expr = nft_set_elem_expr_alloc(ctx, set, tmp); ++ if (IS_ERR(expr)) { ++ err = PTR_ERR(expr); ++ goto err_set_expr_alloc; ++ } ++ exprs[i++] = expr; ++ (*num_exprs)++; ++ } ++ } ++ ++ return 0; ++ ++err_set_expr_alloc: ++ for (i = 0; i < *num_exprs; i++) ++ nft_expr_destroy(ctx, exprs[i]); ++ ++ return err; ++} ++ + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) + { +@@ -4347,7 +4400,6 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; + const struct nft_set_ops *ops; +- struct nft_expr *expr = NULL; + struct net *net = info->net; + struct nft_set_desc desc; + struct nft_table *table; +@@ -4355,6 +4407,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + struct nft_set *set; + struct nft_ctx ctx; + size_t alloc_size; ++ int num_exprs = 0; + char *name; + int err, i; + u16 udlen; +@@ -4481,6 +4534,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + return PTR_ERR(set); + } + } else { ++ struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {}; ++ + if (info->nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); + return -EEXIST; +@@ -4488,6 +4543,13 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (info->nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + ++ err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); ++ if (err < 0) ++ return err; ++ ++ for (i = 0; i < num_exprs; i++) ++ nft_expr_destroy(&ctx, exprs[i]); ++ + return 0; + } + +@@ -4555,43 +4617,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (err < 0) + goto err_set_init; + +- if (nla[NFTA_SET_EXPR]) { +- expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); +- if (IS_ERR(expr)) { +- err = PTR_ERR(expr); +- goto err_set_expr_alloc; +- } +- set->exprs[0] = expr; +- set->num_exprs++; +- } else if (nla[NFTA_SET_EXPRESSIONS]) { +- struct nft_expr *expr; +- struct nlattr *tmp; +- int left; +- +- if (!(flags & NFT_SET_EXPR)) { +- err = -EINVAL; +- goto err_set_expr_alloc; +- } +- i = 0; +- nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { +- if (i == NFT_SET_EXPR_MAX) { +- err = -E2BIG; +- goto err_set_expr_alloc; +- } +- if (nla_type(tmp) != NFTA_LIST_ELEM) { +- err = -EINVAL; +- goto err_set_expr_alloc; +- } +- expr = nft_set_elem_expr_alloc(&ctx, set, tmp); +- if (IS_ERR(expr)) { +- err = PTR_ERR(expr); +- goto err_set_expr_alloc; +- } +- set->exprs[i++] = expr; +- set->num_exprs++; +- } +- } ++ err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags); ++ if (err < 0) ++ goto err_set_destroy; + ++ set->num_exprs = num_exprs; + set->handle = nf_tables_alloc_handle(table); + + err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); +@@ -4605,7 +4635,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + err_set_expr_alloc: + for (i = 0; i < set->num_exprs; i++) + nft_expr_destroy(&ctx, set->exprs[i]); +- ++err_set_destroy: + ops->destroy(set); + err_set_init: + kfree(set->name); +-- +2.35.1 + diff --git a/queue-6.1/netfilter-nf_tables-consolidate-set-description.patch b/queue-6.1/netfilter-nf_tables-consolidate-set-description.patch new file mode 100644 index 00000000000..92adbb0fbfd --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-consolidate-set-description.patch @@ -0,0 +1,225 @@ +From d56b981e52b792fc938558dbfe2b9e90ee6a3f74 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 20:07:52 +0100 +Subject: netfilter: nf_tables: consolidate set description + +From: Pablo Neira Ayuso + +[ Upstream commit bed4a63ea4ae77cfe5aae004ef87379f0655260a ] + +Add the following fields to the set description: + +- key type +- data type +- object type +- policy +- gc_int: garbage collection interval) +- timeout: element timeout + +This prepares for stricter set type checks on updates in a follow up +patch. + +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: f6594c372afd ("netfilter: nf_tables: perform type checking for existing sets") +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 12 +++++++ + net/netfilter/nf_tables_api.c | 58 +++++++++++++++---------------- + 2 files changed, 40 insertions(+), 30 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index cdb7db9b0e25..ddcdde230747 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -311,17 +311,29 @@ struct nft_set_iter { + /** + * struct nft_set_desc - description of set elements + * ++ * @ktype: key type + * @klen: key length ++ * @dtype: data type + * @dlen: data length ++ * @objtype: object type ++ * @flags: flags + * @size: number of set elements ++ * @policy: set policy ++ * @gc_int: garbage collector interval + * @field_len: length of each field in concatenation, bytes + * @field_count: number of concatenated fields in element + * @expr: set must support for expressions + */ + struct nft_set_desc { ++ u32 ktype; + unsigned int klen; ++ u32 dtype; + unsigned int dlen; ++ u32 objtype; + unsigned int size; ++ u32 policy; ++ u32 gc_int; ++ u64 timeout; + u8 field_len[NFT_REG32_COUNT]; + u8 field_count; + bool expr; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 7a09421f19e1..a31f8dc40646 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3732,8 +3732,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) + static const struct nft_set_ops * + nft_select_set_ops(const struct nft_ctx *ctx, + const struct nlattr * const nla[], +- const struct nft_set_desc *desc, +- enum nft_set_policies policy) ++ const struct nft_set_desc *desc) + { + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + const struct nft_set_ops *ops, *bops; +@@ -3762,7 +3761,7 @@ nft_select_set_ops(const struct nft_ctx *ctx, + if (!ops->estimate(desc, flags, &est)) + continue; + +- switch (policy) { ++ switch (desc->policy) { + case NFT_SET_POL_PERFORMANCE: + if (est.lookup < best.lookup) + break; +@@ -4344,7 +4343,6 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) + { +- u32 ktype, dtype, flags, policy, gc_int, objtype; + struct netlink_ext_ack *extack = info->extack; + u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; +@@ -4357,10 +4355,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + struct nft_set *set; + struct nft_ctx ctx; + size_t alloc_size; +- u64 timeout; + char *name; + int err, i; + u16 udlen; ++ u32 flags; + u64 size; + + if (nla[NFTA_SET_TABLE] == NULL || +@@ -4371,10 +4369,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + + memset(&desc, 0, sizeof(desc)); + +- ktype = NFT_DATA_VALUE; ++ desc.ktype = NFT_DATA_VALUE; + if (nla[NFTA_SET_KEY_TYPE] != NULL) { +- ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); +- if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) ++ desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); ++ if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + return -EINVAL; + } + +@@ -4399,17 +4397,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + return -EOPNOTSUPP; + } + +- dtype = 0; ++ desc.dtype = 0; + if (nla[NFTA_SET_DATA_TYPE] != NULL) { + if (!(flags & NFT_SET_MAP)) + return -EINVAL; + +- dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); +- if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && +- dtype != NFT_DATA_VERDICT) ++ desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); ++ if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && ++ desc.dtype != NFT_DATA_VERDICT) + return -EINVAL; + +- if (dtype != NFT_DATA_VERDICT) { ++ if (desc.dtype != NFT_DATA_VERDICT) { + if (nla[NFTA_SET_DATA_LEN] == NULL) + return -EINVAL; + desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); +@@ -4424,34 +4422,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (!(flags & NFT_SET_OBJECT)) + return -EINVAL; + +- objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); +- if (objtype == NFT_OBJECT_UNSPEC || +- objtype > NFT_OBJECT_MAX) ++ desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); ++ if (desc.objtype == NFT_OBJECT_UNSPEC || ++ desc.objtype > NFT_OBJECT_MAX) + return -EOPNOTSUPP; + } else if (flags & NFT_SET_OBJECT) + return -EINVAL; + else +- objtype = NFT_OBJECT_UNSPEC; ++ desc.objtype = NFT_OBJECT_UNSPEC; + +- timeout = 0; ++ desc.timeout = 0; + if (nla[NFTA_SET_TIMEOUT] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + +- err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); ++ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); + if (err) + return err; + } +- gc_int = 0; ++ desc.gc_int = 0; + if (nla[NFTA_SET_GC_INTERVAL] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; +- gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); ++ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + } + +- policy = NFT_SET_POL_PERFORMANCE; ++ desc.policy = NFT_SET_POL_PERFORMANCE; + if (nla[NFTA_SET_POLICY] != NULL) +- policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); ++ desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + + if (nla[NFTA_SET_DESC] != NULL) { + err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); +@@ -4496,7 +4494,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENOENT; + +- ops = nft_select_set_ops(&ctx, nla, &desc, policy); ++ ops = nft_select_set_ops(&ctx, nla, &desc); + if (IS_ERR(ops)) + return PTR_ERR(ops); + +@@ -4536,18 +4534,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + set->table = table; + write_pnet(&set->net, net); + set->ops = ops; +- set->ktype = ktype; ++ set->ktype = desc.ktype; + set->klen = desc.klen; +- set->dtype = dtype; +- set->objtype = objtype; ++ set->dtype = desc.dtype; ++ set->objtype = desc.objtype; + set->dlen = desc.dlen; + set->flags = flags; + set->size = desc.size; +- set->policy = policy; ++ set->policy = desc.policy; + set->udlen = udlen; + set->udata = udata; +- set->timeout = timeout; +- set->gc_int = gc_int; ++ set->timeout = desc.timeout; ++ set->gc_int = desc.gc_int; + + set->field_count = desc.field_count; + for (i = 0; i < desc.field_count; i++) +-- +2.35.1 + diff --git a/queue-6.1/netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch b/queue-6.1/netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch new file mode 100644 index 00000000000..849975ba8af --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch @@ -0,0 +1,209 @@ +From 95aede729f87fd4012a877f2c29e343effd3ae74 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 20:10:12 +0100 +Subject: netfilter: nf_tables: honor set timeout and garbage collection + updates + +From: Pablo Neira Ayuso + +[ Upstream commit 123b99619cca94bdca0bf7bde9abe28f0a0dfe06 ] + +Set timeout and garbage collection interval updates are ignored on +updates. Add transaction to update global set element timeout and +garbage collection interval. + +Fixes: 96518518cc41 ("netfilter: add nftables") +Suggested-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 13 ++++++- + net/netfilter/nf_tables_api.c | 63 ++++++++++++++++++++++--------- + 2 files changed, 57 insertions(+), 19 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index ddcdde230747..1daededfa75e 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -592,7 +592,9 @@ void *nft_set_catchall_gc(const struct nft_set *set); + + static inline unsigned long nft_set_gc_interval(const struct nft_set *set) + { +- return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; ++ u32 gc_int = READ_ONCE(set->gc_int); ++ ++ return gc_int ? msecs_to_jiffies(gc_int) : HZ; + } + + /** +@@ -1563,6 +1565,9 @@ struct nft_trans_rule { + struct nft_trans_set { + struct nft_set *set; + u32 set_id; ++ u32 gc_int; ++ u64 timeout; ++ bool update; + bool bound; + }; + +@@ -1572,6 +1577,12 @@ struct nft_trans_set { + (((struct nft_trans_set *)trans->data)->set_id) + #define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) ++#define nft_trans_set_update(trans) \ ++ (((struct nft_trans_set *)trans->data)->update) ++#define nft_trans_set_timeout(trans) \ ++ (((struct nft_trans_set *)trans->data)->timeout) ++#define nft_trans_set_gc_int(trans) \ ++ (((struct nft_trans_set *)trans->data)->gc_int) + + struct nft_trans_chain { + bool update; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 6e68cab474c2..3ba8c291fcaa 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) + return 0; + } + +-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, +- struct nft_set *set) ++static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, ++ struct nft_set *set, ++ const struct nft_set_desc *desc) + { + struct nft_trans *trans; + +@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + if (trans == NULL) + return -ENOMEM; + +- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { ++ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { + nft_trans_set_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); + nft_activate_next(ctx->net, set); + } + nft_trans_set(trans) = set; ++ if (desc) { ++ nft_trans_set_update(trans) = true; ++ nft_trans_set_gc_int(trans) = desc->gc_int; ++ nft_trans_set_timeout(trans) = desc->timeout; ++ } + nft_trans_commit_list_add_tail(ctx->net, trans); + + return 0; + } + ++static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, ++ struct nft_set *set) ++{ ++ return __nft_trans_set_add(ctx, msg_type, set, NULL); ++} ++ + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) + { + int err; +@@ -3996,8 +4008,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb, + static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + const struct nft_set *set, u16 event, u16 flags) + { +- struct nlmsghdr *nlh; ++ u64 timeout = READ_ONCE(set->timeout); ++ u32 gc_int = READ_ONCE(set->gc_int); + u32 portid = ctx->portid; ++ struct nlmsghdr *nlh; + struct nlattr *nest; + u32 seq = ctx->seq; + int i; +@@ -4033,13 +4047,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) + goto nla_put_failure; + +- if (set->timeout && ++ if (timeout && + nla_put_be64(skb, NFTA_SET_TIMEOUT, +- nf_jiffies64_to_msecs(set->timeout), ++ nf_jiffies64_to_msecs(timeout), + NFTA_SET_PAD)) + goto nla_put_failure; +- if (set->gc_int && +- nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) ++ if (gc_int && ++ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int))) + goto nla_put_failure; + + if (set->policy != NFT_SET_POL_PERFORMANCE) { +@@ -4584,7 +4598,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + for (i = 0; i < num_exprs; i++) + nft_expr_destroy(&ctx, exprs[i]); + +- return err; ++ if (err < 0) ++ return err; ++ ++ return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc); + } + + if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) +@@ -6022,7 +6039,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + return err; + } else if (set->flags & NFT_SET_TIMEOUT && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { +- timeout = set->timeout; ++ timeout = READ_ONCE(set->timeout); + } + + expiration = 0; +@@ -6123,7 +6140,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + if (err < 0) + goto err_parse_key_end; + +- if (timeout != set->timeout) { ++ if (timeout != READ_ONCE(set->timeout)) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + if (err < 0) + goto err_parse_key_end; +@@ -9039,14 +9056,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + break; + case NFT_MSG_NEWSET: +- nft_clear(net, nft_trans_set(trans)); +- /* This avoids hitting -EBUSY when deleting the table +- * from the transaction. +- */ +- if (nft_set_is_anonymous(nft_trans_set(trans)) && +- !list_empty(&nft_trans_set(trans)->bindings)) +- trans->ctx.table->use--; ++ if (nft_trans_set_update(trans)) { ++ struct nft_set *set = nft_trans_set(trans); + ++ WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans)); ++ WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans)); ++ } else { ++ nft_clear(net, nft_trans_set(trans)); ++ /* This avoids hitting -EBUSY when deleting the table ++ * from the transaction. ++ */ ++ if (nft_set_is_anonymous(nft_trans_set(trans)) && ++ !list_empty(&nft_trans_set(trans)->bindings)) ++ trans->ctx.table->use--; ++ } + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_NEWSET, GFP_KERNEL); + nft_trans_destroy(trans); +@@ -9268,6 +9291,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSET: ++ if (nft_trans_set_update(trans)) { ++ nft_trans_destroy(trans); ++ break; ++ } + trans->ctx.table->use--; + if (nft_trans_set_bound(trans)) { + nft_trans_destroy(trans); +-- +2.35.1 + diff --git a/queue-6.1/netfilter-nf_tables-perform-type-checking-for-existi.patch b/queue-6.1/netfilter-nf_tables-perform-type-checking-for-existi.patch new file mode 100644 index 00000000000..30ca8adf22b --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-perform-type-checking-for-existi.patch @@ -0,0 +1,89 @@ +From e837038ae4041b4f8656ce23a8ca06a7897702dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 20:09:00 +0100 +Subject: netfilter: nf_tables: perform type checking for existing sets + +From: Pablo Neira Ayuso + +[ Upstream commit f6594c372afd5cec8b1e9ee9ea8f8819d59c6fb1 ] + +If a ruleset declares a set name that matches an existing set in the +kernel, then validate that this declaration really refers to the same +set, otherwise bail out with EEXIST. + +Currently, the kernel reports success when adding a set that already +exists in the kernel. This usually results in EINVAL errors at a later +stage, when the user adds elements to the set, if the set declaration +mismatches the existing set representation in the kernel. + +Add a new function to check that the set declaration really refers to +the same existing set in the kernel. + +Fixes: 96518518cc41 ("netfilter: add nftables") +Reported-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 36 ++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 9f35a249c2c3..6e68cab474c2 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4393,6 +4393,34 @@ static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, + return err; + } + ++static bool nft_set_is_same(const struct nft_set *set, ++ const struct nft_set_desc *desc, ++ struct nft_expr *exprs[], u32 num_exprs, u32 flags) ++{ ++ int i; ++ ++ if (set->ktype != desc->ktype || ++ set->dtype != desc->dtype || ++ set->flags != flags || ++ set->klen != desc->klen || ++ set->dlen != desc->dlen || ++ set->field_count != desc->field_count || ++ set->num_exprs != num_exprs) ++ return false; ++ ++ for (i = 0; i < desc->field_count; i++) { ++ if (set->field_len[i] != desc->field_len[i]) ++ return false; ++ } ++ ++ for (i = 0; i < num_exprs; i++) { ++ if (set->exprs[i]->ops != exprs[i]->ops) ++ return false; ++ } ++ ++ return true; ++} ++ + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) + { +@@ -4547,10 +4575,16 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (err < 0) + return err; + ++ err = 0; ++ if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) { ++ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); ++ err = -EEXIST; ++ } ++ + for (i = 0; i < num_exprs; i++) + nft_expr_destroy(&ctx, exprs[i]); + +- return 0; ++ return err; + } + + if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) +-- +2.35.1 + diff --git a/queue-6.1/nfc-fix-potential-resource-leaks.patch b/queue-6.1/nfc-fix-potential-resource-leaks.patch new file mode 100644 index 00000000000..643a9d287a1 --- /dev/null +++ b/queue-6.1/nfc-fix-potential-resource-leaks.patch @@ -0,0 +1,127 @@ +From 40c982678109468ae6190c8432cda8b83b28ff50 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Dec 2022 11:37:18 +0400 +Subject: nfc: Fix potential resource leaks + +From: Miaoqian Lin + +[ Upstream commit df49908f3c52d211aea5e2a14a93bbe67a2cb3af ] + +nfc_get_device() take reference for the device, add missing +nfc_put_device() to release it when not need anymore. +Also fix the style warnning by use error EOPNOTSUPP instead of +ENOTSUPP. + +Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation") +Fixes: 29e76924cf08 ("nfc: netlink: Add capability to reply to vendor_cmd with data") +Signed-off-by: Miaoqian Lin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/nfc/netlink.c | 52 ++++++++++++++++++++++++++++++++++------------- + 1 file changed, 38 insertions(+), 14 deletions(-) + +diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c +index 9d91087b9399..1fc339084d89 100644 +--- a/net/nfc/netlink.c ++++ b/net/nfc/netlink.c +@@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) + u32 dev_idx, se_idx; + u8 *apdu; + size_t apdu_len; ++ int rc; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX] || +@@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) + if (!dev) + return -ENODEV; + +- if (!dev->ops || !dev->ops->se_io) +- return -ENOTSUPP; ++ if (!dev->ops || !dev->ops->se_io) { ++ rc = -EOPNOTSUPP; ++ goto put_dev; ++ } + + apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); +- if (apdu_len == 0) +- return -EINVAL; ++ if (apdu_len == 0) { ++ rc = -EINVAL; ++ goto put_dev; ++ } + + apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); +- if (!apdu) +- return -EINVAL; ++ if (!apdu) { ++ rc = -EINVAL; ++ goto put_dev; ++ } + + ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); +- if (!ctx) +- return -ENOMEM; ++ if (!ctx) { ++ rc = -ENOMEM; ++ goto put_dev; ++ } + + ctx->dev_idx = dev_idx; + ctx->se_idx = se_idx; + +- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); ++ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); ++ ++put_dev: ++ nfc_put_device(dev); ++ return rc; + } + + static int nfc_genl_vendor_cmd(struct sk_buff *skb, +@@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, + subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); + + dev = nfc_get_device(dev_idx); +- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) ++ if (!dev) + return -ENODEV; + ++ if (!dev->vendor_cmds || !dev->n_vendor_cmds) { ++ err = -ENODEV; ++ goto put_dev; ++ } ++ + if (info->attrs[NFC_ATTR_VENDOR_DATA]) { + data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); + data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); +- if (data_len == 0) +- return -EINVAL; ++ if (data_len == 0) { ++ err = -EINVAL; ++ goto put_dev; ++ } + } else { + data = NULL; + data_len = 0; +@@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, + dev->cur_cmd_info = info; + err = cmd->doit(dev, data, data_len); + dev->cur_cmd_info = NULL; +- return err; ++ goto put_dev; + } + +- return -EOPNOTSUPP; ++ err = -EOPNOTSUPP; ++ ++put_dev: ++ nfc_put_device(dev); ++ return err; + } + + /* message building helper */ +-- +2.35.1 + diff --git a/queue-6.1/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch b/queue-6.1/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch new file mode 100644 index 00000000000..e35d4a21f36 --- /dev/null +++ b/queue-6.1/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch @@ -0,0 +1,42 @@ +From d8e1f5bd4597e2178be04567f382c90bcd92bb90 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 09:51:30 -0500 +Subject: nfsd: shut down the NFSv4 state objects before the filecache + +From: Jeff Layton + +[ Upstream commit 789e1e10f214c00ca18fc6610824c5b9876ba5f2 ] + +Currently, we shut down the filecache before trying to clean up the +stateids that depend on it. This leads to the kernel trying to free an +nfsd_file twice, and a refcount overput on the nf_mark. + +Change the shutdown procedure to tear down all of the stateids prior +to shutting down the filecache. + +Reported-and-tested-by: Wang Yugui +Signed-off-by: Jeff Layton +Fixes: 5e113224c17e ("nfsd: nfsd_file cache entries should be per net namespace") +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfssvc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index bfbd9f672f59..8b1afde19211 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -447,8 +447,8 @@ static void nfsd_shutdown_net(struct net *net) + { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + +- nfsd_file_cache_shutdown_net(net); + nfs4_state_shutdown_net(net); ++ nfsd_file_cache_shutdown_net(net); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = false; +-- +2.35.1 + diff --git a/queue-6.1/nvme-also-return-i-o-command-effects-from-nvme_comma.patch b/queue-6.1/nvme-also-return-i-o-command-effects-from-nvme_comma.patch new file mode 100644 index 00000000000..6ce11765795 --- /dev/null +++ b/queue-6.1/nvme-also-return-i-o-command-effects-from-nvme_comma.patch @@ -0,0 +1,81 @@ +From f6a0d624bffcaf07bd740a632b313a500f94d5ec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Dec 2022 10:12:17 +0100 +Subject: nvme: also return I/O command effects from nvme_command_effects + +From: Christoph Hellwig + +[ Upstream commit 831ed60c2aca2d7c517b2da22897a90224a97d27 ] + +To be able to use the Commands Supported and Effects Log for allowing +unprivileged passtrough, it needs to be corretly reported for I/O +commands as well. Return the I/O command effects from +nvme_command_effects, and also add a default list of effects for the +NVM command set. For other command sets, the Commands Supported and +Effects log is required to be present already. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Keith Busch +Reviewed-by: Kanchan Joshi +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 32 ++++++++++++++++++++++++++------ + 1 file changed, 26 insertions(+), 6 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 108b5022cead..1ded96d1bfd2 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -1069,6 +1069,18 @@ static u32 nvme_known_admin_effects(u8 opcode) + return 0; + } + ++static u32 nvme_known_nvm_effects(u8 opcode) ++{ ++ switch (opcode) { ++ case nvme_cmd_write: ++ case nvme_cmd_write_zeroes: ++ case nvme_cmd_write_uncor: ++ return NVME_CMD_EFFECTS_LBCC; ++ default: ++ return 0; ++ } ++} ++ + u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) + { + u32 effects = 0; +@@ -1076,16 +1088,24 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) + if (ns) { + if (ns->head->effects) + effects = le32_to_cpu(ns->head->effects->iocs[opcode]); ++ if (ns->head->ids.csi == NVME_CAP_CSS_NVM) ++ effects |= nvme_known_nvm_effects(opcode); + if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) + dev_warn_once(ctrl->device, +- "IO command:%02x has unhandled effects:%08x\n", ++ "IO command:%02x has unusual effects:%08x\n", + opcode, effects); +- return 0; +- } + +- if (ctrl->effects) +- effects = le32_to_cpu(ctrl->effects->acs[opcode]); +- effects |= nvme_known_admin_effects(opcode); ++ /* ++ * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues, ++ * which would deadlock when done on an I/O command. Note that ++ * We already warn about an unusual effect above. ++ */ ++ effects &= ~NVME_CMD_EFFECTS_CSE_MASK; ++ } else { ++ if (ctrl->effects) ++ effects = le32_to_cpu(ctrl->effects->acs[opcode]); ++ effects |= nvme_known_admin_effects(opcode); ++ } + + return effects; + } +-- +2.35.1 + diff --git a/queue-6.1/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch b/queue-6.1/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch new file mode 100644 index 00000000000..b18ea5fa5ed --- /dev/null +++ b/queue-6.1/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch @@ -0,0 +1,81 @@ +From 29662d52cff9ee724cbc515992438a0b2f20ccc1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 09:57:21 +0800 +Subject: nvme: fix multipath crash caused by flush request when blktrace is + enabled + +From: Yanjun Zhang + +[ Upstream commit 3659fb5ac29a5e6102bebe494ac789fd47fb78f4 ] + +The flush request initialized by blk_kick_flush has NULL bio, +and it may be dealt with nvme_end_req during io completion. +When blktrace is enabled, nvme_trace_bio_complete with multipath +activated trying to access NULL pointer bio from flush request +results in the following crash: + +[ 2517.831677] BUG: kernel NULL pointer dereference, address: 000000000000001a +[ 2517.835213] #PF: supervisor read access in kernel mode +[ 2517.838724] #PF: error_code(0x0000) - not-present page +[ 2517.842222] PGD 7b2d51067 P4D 0 +[ 2517.845684] Oops: 0000 [#1] SMP NOPTI +[ 2517.849125] CPU: 2 PID: 732 Comm: kworker/2:1H Kdump: loaded Tainted: G S 5.15.67-0.cl9.x86_64 #1 +[ 2517.852723] Hardware name: XFUSION 2288H V6/BC13MBSBC, BIOS 1.13 07/27/2022 +[ 2517.856358] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] +[ 2517.859993] RIP: 0010:blk_add_trace_bio_complete+0x6/0x30 +[ 2517.863628] Code: 1f 44 00 00 48 8b 46 08 31 c9 ba 04 00 10 00 48 8b 80 50 03 00 00 48 8b 78 50 e9 e5 fe ff ff 0f 1f 44 00 00 41 54 49 89 f4 55 <0f> b6 7a 1a 48 89 d5 e8 3e 1c 2b 00 48 89 ee 4c 89 e7 5d 89 c1 ba +[ 2517.871269] RSP: 0018:ff7f6a008d9dbcd0 EFLAGS: 00010286 +[ 2517.875081] RAX: ff3d5b4be00b1d50 RBX: 0000000002040002 RCX: ff3d5b0a270f2000 +[ 2517.878966] RDX: 0000000000000000 RSI: ff3d5b0b021fb9f8 RDI: 0000000000000000 +[ 2517.882849] RBP: ff3d5b0b96a6fa00 R08: 0000000000000001 R09: 0000000000000000 +[ 2517.886718] R10: 000000000000000c R11: 000000000000000c R12: ff3d5b0b021fb9f8 +[ 2517.890575] R13: 0000000002000000 R14: ff3d5b0b021fb1b0 R15: 0000000000000018 +[ 2517.894434] FS: 0000000000000000(0000) GS:ff3d5b42bfc80000(0000) knlGS:0000000000000000 +[ 2517.898299] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 2517.902157] CR2: 000000000000001a CR3: 00000004f023e005 CR4: 0000000000771ee0 +[ 2517.906053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 2517.909930] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 2517.913761] PKRU: 55555554 +[ 2517.917558] Call Trace: +[ 2517.921294] +[ 2517.924982] nvme_complete_rq+0x1c3/0x1e0 [nvme_core] +[ 2517.928715] nvme_tcp_recv_pdu+0x4d7/0x540 [nvme_tcp] +[ 2517.932442] nvme_tcp_recv_skb+0x4f/0x240 [nvme_tcp] +[ 2517.936137] ? nvme_tcp_recv_pdu+0x540/0x540 [nvme_tcp] +[ 2517.939830] tcp_read_sock+0x9c/0x260 +[ 2517.943486] nvme_tcp_try_recv+0x65/0xa0 [nvme_tcp] +[ 2517.947173] nvme_tcp_io_work+0x64/0x90 [nvme_tcp] +[ 2517.950834] process_one_work+0x1e8/0x390 +[ 2517.954473] worker_thread+0x53/0x3c0 +[ 2517.958069] ? process_one_work+0x390/0x390 +[ 2517.961655] kthread+0x10c/0x130 +[ 2517.965211] ? set_kthread_struct+0x40/0x40 +[ 2517.968760] ret_from_fork+0x1f/0x30 +[ 2517.972285] + +To avoid this situation, add a NULL check for req->bio before +calling trace_block_bio_complete. + +Signed-off-by: Yanjun Zhang +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/nvme.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h +index 8a0db9e06dc6..cbda8a19409b 100644 +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -888,7 +888,7 @@ static inline void nvme_trace_bio_complete(struct request *req) + { + struct nvme_ns *ns = req->q->queuedata; + +- if (req->cmd_flags & REQ_NVME_MPATH) ++ if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio) + trace_block_bio_complete(ns->head->disk->queue, req->bio); + } + +-- +2.35.1 + diff --git a/queue-6.1/nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch b/queue-6.1/nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch new file mode 100644 index 00000000000..39f07890996 --- /dev/null +++ b/queue-6.1/nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch @@ -0,0 +1,75 @@ +From fa428f4d953a62ebc3f6e4108660e2feb613e0f3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Dec 2022 15:20:04 +0100 +Subject: nvmet: use NVME_CMD_EFFECTS_CSUPP instead of open coding it + +From: Christoph Hellwig + +[ Upstream commit 61f37154c599cf9f2f84dcbd9be842f8645a7099 ] + +Use NVME_CMD_EFFECTS_CSUPP instead of open coding it and assign a +single value to multiple array entries instead of repeated assignments. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Keith Busch +Reviewed-by: Sagi Grimberg +Reviewed-by: Kanchan Joshi +Reviewed-by: Chaitanya Kulkarni +Signed-off-by: Sasha Levin +--- + drivers/nvme/target/admin-cmd.c | 35 ++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 16 deletions(-) + +diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c +index c8a061ce3ee5..76ceaadd6eea 100644 +--- a/drivers/nvme/target/admin-cmd.c ++++ b/drivers/nvme/target/admin-cmd.c +@@ -164,26 +164,29 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) + + static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) + { +- log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0); +- +- log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); ++ log->acs[nvme_admin_get_log_page] = ++ log->acs[nvme_admin_identify] = ++ log->acs[nvme_admin_abort_cmd] = ++ log->acs[nvme_admin_set_features] = ++ log->acs[nvme_admin_get_features] = ++ log->acs[nvme_admin_async_event] = ++ log->acs[nvme_admin_keep_alive] = ++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); ++ ++ log->iocs[nvme_cmd_read] = ++ log->iocs[nvme_cmd_write] = ++ log->iocs[nvme_cmd_flush] = ++ log->iocs[nvme_cmd_dsm] = ++ log->iocs[nvme_cmd_write_zeroes] = ++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); + } + + static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log) + { +- log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0); ++ log->iocs[nvme_cmd_zone_append] = ++ log->iocs[nvme_cmd_zone_mgmt_send] = ++ log->iocs[nvme_cmd_zone_mgmt_recv] = ++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); + } + + static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) +-- +2.35.1 + diff --git a/queue-6.1/octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch b/queue-6.1/octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch new file mode 100644 index 00000000000..c4f8680de8d --- /dev/null +++ b/queue-6.1/octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch @@ -0,0 +1,111 @@ +From 79ba79f56c445673dd67d31c491ad94cb3e4fee4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 09:20:12 +0530 +Subject: octeontx2-pf: Fix lmtst ID used in aura free + +From: Geetha sowjanya + +[ Upstream commit 4af1b64f80fbe1275fb02c5f1c0cef099a4a231f ] + +Current code uses per_cpu pointer to get the lmtst_id mapped to +the core on which aura_free() is executed. Using per_cpu pointer +without preemption disable causing mismatch between lmtst_id and +core on which pointer gets freed. This patch fixes the issue by +disabling preemption around aura_free. + +Fixes: ef6c8da71eaf ("octeontx2-pf: cn10K: Reserve LMTST lines per core") +Signed-off-by: Sunil Goutham +Signed-off-by: Geetha sowjanya +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../marvell/octeontx2/nic/otx2_common.c | 30 +++++++++++++------ + 1 file changed, 21 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +index 9e10e7471b88..88f8772a61cd 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +@@ -1012,6 +1012,7 @@ static void otx2_pool_refill_task(struct work_struct *work) + rbpool = cq->rbpool; + free_ptrs = cq->pool_ptrs; + ++ get_cpu(); + while (cq->pool_ptrs) { + if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) { + /* Schedule a WQ if we fails to free atleast half of the +@@ -1031,6 +1032,7 @@ static void otx2_pool_refill_task(struct work_struct *work) + pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); + cq->pool_ptrs--; + } ++ put_cpu(); + cq->refill_task_sched = false; + } + +@@ -1368,6 +1370,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) + if (err) + goto fail; + ++ get_cpu(); + /* Allocate pointers and free them to aura/pool */ + for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); +@@ -1376,18 +1379,24 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) + sq = &qset->sq[qidx]; + sq->sqb_count = 0; + sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); +- if (!sq->sqb_ptrs) +- return -ENOMEM; ++ if (!sq->sqb_ptrs) { ++ err = -ENOMEM; ++ goto err_mem; ++ } + + for (ptr = 0; ptr < num_sqbs; ptr++) { +- if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) +- return -ENOMEM; ++ err = otx2_alloc_rbuf(pfvf, pool, &bufptr); ++ if (err) ++ goto err_mem; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); + sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; + } + } + +- return 0; ++err_mem: ++ put_cpu(); ++ return err ? -ENOMEM : 0; ++ + fail: + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + otx2_aura_pool_free(pfvf); +@@ -1426,18 +1435,21 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) + if (err) + goto fail; + ++ get_cpu(); + /* Allocate pointers and free them to aura/pool */ + for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { + pool = &pfvf->qset.pool[pool_id]; + for (ptr = 0; ptr < num_ptrs; ptr++) { +- if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) +- return -ENOMEM; ++ err = otx2_alloc_rbuf(pfvf, pool, &bufptr); ++ if (err) ++ goto err_mem; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, + bufptr + OTX2_HEAD_ROOM); + } + } +- +- return 0; ++err_mem: ++ put_cpu(); ++ return err ? -ENOMEM : 0; + fail: + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + otx2_aura_pool_free(pfvf); +-- +2.35.1 + diff --git a/queue-6.1/perf-lock-contention-fix-core-dump-related-to-not-fi.patch b/queue-6.1/perf-lock-contention-fix-core-dump-related-to-not-fi.patch new file mode 100644 index 00000000000..ee2e57804e2 --- /dev/null +++ b/queue-6.1/perf-lock-contention-fix-core-dump-related-to-not-fi.patch @@ -0,0 +1,143 @@ +From 7516ba3569b0d898f2d672aa28024f503b128fba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 11:26:27 +0100 +Subject: perf lock contention: Fix core dump related to not finding the + "__sched_text_end" symbol on s/390 + +From: Thomas Richter + +[ Upstream commit d8d85ce86dc82de4f88b821a78f533b9d5b22a45 ] + +The test case perf lock contention dumps core on s390. Run the following +commands: + + # ./perf lock record -- ./perf bench sched messaging + # Running 'sched/messaging' benchmark: + # 20 sender and receiver processes per group + # 10 groups == 400 processes run + + Total time: 2.799 [sec] + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.073 MB perf.data (100 samples) ] + # + # ./perf lock contention + Segmentation fault (core dumped) + # + +The function call stack is lengthy, here are the top 5 functions: + + # gdb ./perf core.24048 + GNU gdb (GDB) Fedora Linux 12.1-6.fc37 + Core was generated by `./perf lock contention'. + Program terminated with signal SIGSEGV, Segmentation fault. + #0 0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356 + 3356 machine->sched.text_end = kmap->unmap_ip(kmap, sym->start); + + (gdb) where + #0 0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356 + #1 0x000000000109f244 in callchain_id (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:957 + #2 0x000000000109e094 in get_key_by_aggr_mode (key=0x3ffea4f7290, addr=27758136, evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:586 + #3 0x000000000109f4d0 in report_lock_contention_begin_event (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1004 + #4 0x00000000010a00ae in evsel__process_contention_begin (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1254 + #5 0x00000000010a0e14 in process_sample_event (tool=0x3ffea4f8480, event=0x3ff85601ef8, sample=0x3ffea4f77d0, evsel=0x30313e0, machine=0x3029e28) at builtin-lock.c:1464 + ..... + +The issue is in function machine__is_lock_function() in file +./util/machine.c lines 3355: + + /* should not fail from here */ + sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap); + machine->sched.text_end = kmap->unmap_ip(kmap, sym->start) + +On s390 the symbol __sched_text_end is *NOT* in the symbol list and the +resulting pointer sym is set to NULL. The sym->start is then a NULL pointer +access and generates the core dump. + +The reason why __sched_text_end is not in the symbol list on s390 is +simple: + +When the symbol list is created at perf start up with function calls + + dso__load + +--> dso__load_vmlinux_path + +--> dso__load_vmlinux + +--> dso__load_sym + +--> dso__load_sym_internal (reads kernel symbols) + +--> symbols__fixup_end + +--> symbols__fixup_duplicate + +The issue is in function symbols__fixup_duplicate(). It deletes all +symbols with have the same address. On s390: + + # nm -g ~/linux/vmlinux| fgrep c68390 + 0000000000c68390 T __cpuidle_text_start + 0000000000c68390 T __sched_text_end + # + +two symbols have identical addresses and __sched_text_end is considered +duplicate (in ascending sort order) and removed from the symbol list. +Therefore it is missing and an invalid pointer reference occurs. The +code checks for symbol __sched_text_start and when it exists assumes +symbol __sched_text_end is also in the symbol table. However this is not +the case on s390. + +Same situation exists for symbol __lock_text_start: + +0000000000c68770 T __cpuidle_text_end +0000000000c68770 T __lock_text_start + +This symbol is also removed from the symbol table but used in function +machine__is_lock_function(). + +To fix this and keep duplicate symbols in the symbol table, set +symbol_conf.allow_aliases to true. This prevents the removal of +duplicate symbols in function symbols__fixup_duplicate(). + +Output After: + + # ./perf lock contention + contended total wait max wait avg wait type caller + + 48 124.39 ms 123.99 ms 2.59 ms rwsem:W unlink_anon_vmas+0x24a + 47 83.68 ms 83.26 ms 1.78 ms rwsem:W free_pgtables+0x132 + 5 41.22 us 10.55 us 8.24 us rwsem:W free_pgtables+0x140 + 4 40.12 us 20.55 us 10.03 us rwsem:W copy_process+0x1ac8 + # + +Fixes: 0d2997f750d1de39 ("perf lock: Look up callchain for the contended locks") +Signed-off-by: Thomas Richter +Acked-by: Namhyung Kim +Cc: Heiko Carstens +Cc: Sumanth Korikkar +Cc: Sven Schnelle +Cc: Vasily Gorbik +Link: https://lore.kernel.org/r/20221230102627.2410847-1-tmricht@linux.ibm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/builtin-lock.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c +index 66520712a167..470106643ed5 100644 +--- a/tools/perf/builtin-lock.c ++++ b/tools/perf/builtin-lock.c +@@ -1582,6 +1582,7 @@ static int __cmd_report(bool display_info) + + /* for lock function check */ + symbol_conf.sort_by_name = true; ++ symbol_conf.allow_aliases = true; + symbol__init(&session->header.env); + + if (!perf_session__has_traces(session, "lock record")) +@@ -1660,6 +1661,7 @@ static int __cmd_contention(int argc, const char **argv) + + /* for lock function check */ + symbol_conf.sort_by_name = true; ++ symbol_conf.allow_aliases = true; + symbol__init(&session->header.env); + + if (use_bpf) { +-- +2.35.1 + diff --git a/queue-6.1/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch b/queue-6.1/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch new file mode 100644 index 00000000000..045db9a28dd --- /dev/null +++ b/queue-6.1/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch @@ -0,0 +1,92 @@ +From 32e303867fe237cf92d512d614ede36b72b16fcb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 5 Nov 2022 12:01:14 +0900 +Subject: perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as + unsinged data + +From: Masami Hiramatsu (Google) + +[ Upstream commit a9dfc46c67b52ad43b8e335e28f4cf8002c67793 ] + +DWARF version 5 standard Sec 2.14 says that + + Any debugging information entry representing the declaration of an object, + module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and + DW_AT_decl_column attributes, each of whose value is an unsigned integer + constant. + +So it should be an unsigned integer data. Also, even though the standard +doesn't clearly say the DW_AT_call_file is signed or unsigned, the +elfutils (eu-readelf) interprets it as unsigned integer data and it is +natural to handle it as unsigned integer data as same as DW_AT_decl_file. +This changes the DW_AT_call_file as unsigned integer data too. + +Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances") +Signed-off-by: Masami Hiramatsu +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Masami Hiramatsu +Cc: Peter Zijlstra +Cc: stable@vger.kernel.org +Cc: Steven Rostedt (VMware) +Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3 +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/dwarf-aux.c | 21 ++++----------------- + 1 file changed, 4 insertions(+), 17 deletions(-) + +diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c +index a07efbadb775..623527edeac1 100644 +--- a/tools/perf/util/dwarf-aux.c ++++ b/tools/perf/util/dwarf-aux.c +@@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, + return 0; + } + +-/* Get attribute and translate it as a sdata */ +-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, +- Dwarf_Sword *result) +-{ +- Dwarf_Attribute attr; +- +- if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || +- dwarf_formsdata(&attr, result) != 0) +- return -ENOENT; +- +- return 0; +-} +- + /** + * die_is_signed_type - Check whether a type DIE is signed or not + * @tp_die: a DIE of a type +@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) + /* Get the call file index number in CU DIE */ + static int die_get_call_fileno(Dwarf_Die *in_die) + { +- Dwarf_Sword idx; ++ Dwarf_Word idx; + +- if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) ++ if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; +@@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die) + /* Get the declared file index number in CU DIE */ + static int die_get_decl_fileno(Dwarf_Die *pdie) + { +- Dwarf_Sword idx; ++ Dwarf_Word idx; + +- if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) ++ if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; +-- +2.35.1 + diff --git a/queue-6.1/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch b/queue-6.1/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch new file mode 100644 index 00000000000..affed2996de --- /dev/null +++ b/queue-6.1/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch @@ -0,0 +1,54 @@ +From a7050fad07ade3448cacc5e94e2cefff27c48b31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Nov 2022 22:48:39 +0900 +Subject: perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor + +From: Masami Hiramatsu (Google) + +[ Upstream commit f828929ab7f0dc3353e4a617f94f297fa8f3dec3 ] + +Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute +acccessor functions, so that it can find the specified attribute from +abstact origin DIE etc. + +Signed-off-by: Masami Hiramatsu +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Peter Zijlstra +Cc: Steven Rostedt (VMware) +Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3 +Signed-off-by: Arnaldo Carvalho de Melo +Stable-dep-of: a9dfc46c67b5 ("perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data") +Signed-off-by: Sasha Levin +--- + tools/perf/util/dwarf-aux.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c +index 609ca1671501..a07efbadb775 100644 +--- a/tools/perf/util/dwarf-aux.c ++++ b/tools/perf/util/dwarf-aux.c +@@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, + { + Dwarf_Attribute attr; + +- if (dwarf_attr(tp_die, attr_name, &attr) == NULL || ++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || + dwarf_formudata(&attr, result) != 0) + return -ENOENT; + +@@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, + { + Dwarf_Attribute attr; + +- if (dwarf_attr(tp_die, attr_name, &attr) == NULL || ++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || + dwarf_formsdata(&attr, result) != 0) + return -ENOENT; + +-- +2.35.1 + diff --git a/queue-6.1/perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch b/queue-6.1/perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch new file mode 100644 index 00000000000..d767281ff97 --- /dev/null +++ b/queue-6.1/perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch @@ -0,0 +1,146 @@ +From 0537a9c8ebb0edc82b4eae0ef6c293db924f695e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 22:44:02 -0800 +Subject: perf stat: Fix handling of --for-each-cgroup with --bpf-counters to + match non BPF mode + +From: Namhyung Kim + +[ Upstream commit 54b353a20c7e8be98414754f5aff98c8a68fcc1f ] + +The --for-each-cgroup can have the same cgroup multiple times, but this +confuses BPF counters (since they have the same cgroup id), making only +the last cgroup events to be counted. + +Let's check the cgroup name before adding a new entry to the cgroups +list. + +Before: + + $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 + + Performance counter stats for 'system wide': + + msec cpu-clock / + context-switches / + cpu-migrations / + page-faults / + cycles / + instructions / + branches / + branch-misses / + 8,016.04 msec cpu-clock / # 7.998 CPUs utilized + 6,152 context-switches / # 767.461 /sec + 250 cpu-migrations / # 31.187 /sec + 442 page-faults / # 55.139 /sec + 613,111,487 cycles / # 0.076 GHz + 280,599,604 instructions / # 0.46 insn per cycle + 57,692,724 branches / # 7.197 M/sec + 3,385,168 branch-misses / # 5.87% of all branches + + 1.002220125 seconds time elapsed + +After it becomes similar to the non-BPF mode: + + $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 + + Performance counter stats for 'system wide': + + 8,013.38 msec cpu-clock / # 7.998 CPUs utilized + 6,859 context-switches / # 855.944 /sec + 334 cpu-migrations / # 41.680 /sec + 345 page-faults / # 43.053 /sec + 782,326,119 cycles / # 0.098 GHz + 471,645,724 instructions / # 0.60 insn per cycle + 94,963,430 branches / # 11.851 M/sec + 3,685,511 branch-misses / # 3.88% of all branches + + 1.001864539 seconds time elapsed + +Committer notes: + +As a reminder, to test with BPF counters one has to use BUILD_BPF_SKEL=1 +in the make command line and have clang/llvm installed when building +perf, otherwise the --bpf-counters option will not be available: + + # perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 + Error: unknown option `bpf-counters' + + Usage: perf stat [] [] + + -a, --all-cpus system-wide collection from all CPUs + + # + +Fixes: bb1c15b60b981d10 ("perf stat: Support regex pattern in --for-each-cgroup") +Signed-off-by: Namhyung Kim +Tested-by: Arnaldo Carvalho de Melo +Cc: Adrian Hunter +Cc: bpf@vger.kernel.org +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Song Liu +Link: https://lore.kernel.org/r/20230104064402.1551516-5-namhyung@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/cgroup.c | 23 ++++++++++++++++++----- + 1 file changed, 18 insertions(+), 5 deletions(-) + +diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c +index e99b41f9be45..cd978c240e0d 100644 +--- a/tools/perf/util/cgroup.c ++++ b/tools/perf/util/cgroup.c +@@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus + return 0; + } + ++static int check_and_add_cgroup_name(const char *fpath) ++{ ++ struct cgroup_name *cn; ++ ++ list_for_each_entry(cn, &cgroup_list, list) { ++ if (!strcmp(cn->name, fpath)) ++ return 0; ++ } ++ ++ /* pretend if it's added by ftw() */ ++ return add_cgroup_name(fpath, NULL, FTW_D, NULL); ++} ++ + static void release_cgroup_list(void) + { + struct cgroup_name *cn; +@@ -242,7 +255,7 @@ static int list_cgroups(const char *str) + struct cgroup_name *cn; + char *s; + +- /* use given name as is - for testing purpose */ ++ /* use given name as is when no regex is given */ + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; +@@ -253,13 +266,13 @@ static int list_cgroups(const char *str) + s = strndup(str, e - str); + if (!s) + return -1; +- /* pretend if it's added by ftw() */ +- ret = add_cgroup_name(s, NULL, FTW_D, NULL); ++ ++ ret = check_and_add_cgroup_name(s); + free(s); +- if (ret) ++ if (ret < 0) + return -1; + } else { +- if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) ++ if (check_and_add_cgroup_name("/") < 0) + return -1; + } + +-- +2.35.1 + diff --git a/queue-6.1/perf-stat-fix-handling-of-unsupported-cgroup-events-.patch b/queue-6.1/perf-stat-fix-handling-of-unsupported-cgroup-events-.patch new file mode 100644 index 00000000000..38372d77fee --- /dev/null +++ b/queue-6.1/perf-stat-fix-handling-of-unsupported-cgroup-events-.patch @@ -0,0 +1,89 @@ +From 485e4e36f0512e34ada9834c0f3a8024afb8f3d7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 22:44:01 -0800 +Subject: perf stat: Fix handling of unsupported cgroup events when using BPF + counters + +From: Namhyung Kim + +[ Upstream commit 2d656b0f81b22101db0447f890e39fdd736b745e ] + +When --for-each-cgroup option is used, it fails when any of events is +not supported and exits immediately. This is not how 'perf stat' +handles unsupported events. + +Let's ignore the failure and proceed with others so that the output is +similar to when BPF counters are not used: + +Before: + + $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1 + Failed to open first cgroup events + $ + +After it shows output similat to when --bpf-counters isn't specified: + + $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1 + + Performance counter stats for 'system wide': + + L1-icache-loads system.slice + 29,892,418 L1-dcache-loads system.slice + L1-icache-loads user.slice + 52,497,220 L1-dcache-loads user.slice + $ + +Fixes: 944138f048f7d759 ("perf stat: Enable BPF counter with --for-each-cgroup") +Signed-off-by: Namhyung Kim +Tested-by: Arnaldo Carvalho de Melo +Cc: Adrian Hunter +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Peter Zijlstra +Cc: Song Liu +Link: https://lore.kernel.org/r/20230104064402.1551516-4-namhyung@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/bpf_counter_cgroup.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c +index 3c2df7522f6f..1c82377ed78b 100644 +--- a/tools/perf/util/bpf_counter_cgroup.c ++++ b/tools/perf/util/bpf_counter_cgroup.c +@@ -116,27 +116,19 @@ static int bperf_load_program(struct evlist *evlist) + + /* open single copy of the events w/o cgroup */ + err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1); +- if (err) { +- pr_err("Failed to open first cgroup events\n"); +- goto out; +- } ++ if (err == 0) ++ evsel->supported = true; + + map_fd = bpf_map__fd(skel->maps.events); + perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) { + int fd = FD(evsel, j); + __u32 idx = evsel->core.idx * total_cpus + cpu.cpu; + +- err = bpf_map_update_elem(map_fd, &idx, &fd, +- BPF_ANY); +- if (err < 0) { +- pr_err("Failed to update perf_event fd\n"); +- goto out; +- } ++ bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); + } + + evsel->cgrp = leader_cgrp; + } +- evsel->supported = true; + + if (evsel->cgrp == cgrp) + continue; +-- +2.35.1 + diff --git a/queue-6.1/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch b/queue-6.1/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch new file mode 100644 index 00000000000..3d240c01974 --- /dev/null +++ b/queue-6.1/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch @@ -0,0 +1,52 @@ +From dde371d1094d87fa457a1ebbc51d167ef8c2fa9b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 13:09:00 +0400 +Subject: perf tools: Fix resources leak in perf_data__open_dir() + +From: Miaoqian Lin + +[ Upstream commit 0a6564ebd953c4590663c9a3c99a3ea9920ade6f ] + +In perf_data__open_dir(), opendir() opens the directory stream. Add +missing closedir() to release it after use. + +Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function") +Reviewed-by: Adrian Hunter +Signed-off-by: Miaoqian Lin +Cc: Alexander Shishkin +Cc: Alexey Bayduraev +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/data.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c +index a7f68c309545..fc16299c915f 100644 +--- a/tools/perf/util/data.c ++++ b/tools/perf/util/data.c +@@ -132,6 +132,7 @@ int perf_data__open_dir(struct perf_data *data) + file->size = st.st_size; + } + ++ closedir(dir); + if (!files) + return -EINVAL; + +@@ -140,6 +141,7 @@ int perf_data__open_dir(struct perf_data *data) + return 0; + + out_err: ++ closedir(dir); + close_dir(files, nr); + return ret; + } +-- +2.35.1 + diff --git a/queue-6.1/phy-qcom-qmp-combo-fix-broken-power-on.patch b/queue-6.1/phy-qcom-qmp-combo-fix-broken-power-on.patch new file mode 100644 index 00000000000..61e4e5bd6fc --- /dev/null +++ b/queue-6.1/phy-qcom-qmp-combo-fix-broken-power-on.patch @@ -0,0 +1,96 @@ +From c0edde1806ccdbd8d300a06c87c92cafbebb4f60 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Nov 2022 09:13:44 +0100 +Subject: phy: qcom-qmp-combo: fix broken power on + +From: Johan Hovold + +[ Upstream commit 7a7d86d14d073dfa3429c550667a8e78b99edbd4 ] + +The PHY is powered on during phy-init by setting the SW_PWRDN bit in the +COM_POWER_DOWN_CTRL register and then setting the same bit in the in the +PCS_POWER_DOWN_CONTROL register that belongs to the USB part of the +PHY. + +Currently, whether power on succeeds depends on probe order and having +the USB part of the PHY be initialised first. In case the DP part of the +PHY is instead initialised first, the intended power on of the USB block +results in a corrupted DP_PHY register (e.g. DP_PHY_AUX_CFG8). + +Add a pointer to the USB part of the PHY to the driver data and use that +to power on the PHY also if the DP part of the PHY is initialised first. + +Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") +Cc: stable@vger.kernel.org # 5.10 +Reviewed-by: Dmitry Baryshkov +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20221114081346.5116-5-johan+linaro@kernel.org +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +index 91f8ee79000d..adcda7762acf 100644 +--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c ++++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +@@ -955,6 +955,7 @@ struct qcom_qmp { + struct regulator_bulk_data *vregs; + + struct qmp_phy **phys; ++ struct qmp_phy *usb_phy; + + struct mutex phy_mutex; + int init_count; +@@ -1978,7 +1979,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) + { + struct qcom_qmp *qmp = qphy->qmp; + const struct qmp_phy_cfg *cfg = qphy->cfg; +- void __iomem *pcs = qphy->pcs; ++ struct qmp_phy *usb_phy = qmp->usb_phy; + void __iomem *dp_com = qmp->dp_com; + int ret; + +@@ -2031,13 +2032,13 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) + qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); + qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); + +- if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) +- qphy_setbits(pcs, +- cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], +- cfg->pwrdn_ctrl); ++ if (usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) ++ qphy_setbits(usb_phy->pcs, ++ usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], ++ usb_phy->cfg->pwrdn_ctrl); + else +- qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, +- cfg->pwrdn_ctrl); ++ qphy_setbits(usb_phy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, ++ usb_phy->cfg->pwrdn_ctrl); + + mutex_unlock(&qmp->phy_mutex); + +@@ -2925,6 +2926,8 @@ static int qmp_combo_probe(struct platform_device *pdev) + goto err_node_put; + } + ++ qmp->usb_phy = qmp->phys[id]; ++ + /* + * Register the pipe clock provided by phy. + * See function description to see details of this pipe clock. +@@ -2940,6 +2943,9 @@ static int qmp_combo_probe(struct platform_device *pdev) + id++; + } + ++ if (!qmp->usb_phy) ++ return -EINVAL; ++ + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + + return PTR_ERR_OR_ZERO(phy_provider); +-- +2.35.1 + diff --git a/queue-6.1/qed-allow-sleep-in-qed_mcp_trace_dump.patch b/queue-6.1/qed-allow-sleep-in-qed_mcp_trace_dump.patch new file mode 100644 index 00000000000..dc5353ddd00 --- /dev/null +++ b/queue-6.1/qed-allow-sleep-in-qed_mcp_trace_dump.patch @@ -0,0 +1,168 @@ +From 4346eaf3906d0aff094f87f8b611229bc8905296 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 16:30:21 -0700 +Subject: qed: allow sleep in qed_mcp_trace_dump() + +From: Caleb Sander + +[ Upstream commit 5401c3e0992860b11fb4b25796e4c4f1921740df ] + +By default, qed_mcp_cmd_and_union() delays 10us at a time in a loop +that can run 500K times, so calls to qed_mcp_nvm_rd_cmd() +may block the current thread for over 5s. +We observed thread scheduling delays over 700ms in production, +with stacktraces pointing to this code as the culprit. + +qed_mcp_trace_dump() is called from ethtool, so sleeping is permitted. +It already can sleep in qed_mcp_halt(), which calls qed_mcp_cmd(). +Add a "can sleep" parameter to qed_find_nvram_image() and +qed_nvram_read() so they can sleep during qed_mcp_trace_dump(). +qed_mcp_trace_get_meta_info() and qed_mcp_trace_read_meta(), +called only by qed_mcp_trace_dump(), allow these functions to sleep. +I can't tell if the other caller (qed_grc_dump_mcp_hw_dump()) can sleep, +so keep b_can_sleep set to false when it calls these functions. + +An example stacktrace from a custom warning we added to the kernel +showing a thread that has not scheduled despite long needing resched: +[ 2745.362925,17] ------------[ cut here ]------------ +[ 2745.362941,17] WARNING: CPU: 23 PID: 5640 at arch/x86/kernel/irq.c:233 do_IRQ+0x15e/0x1a0() +[ 2745.362946,17] Thread not rescheduled for 744 ms after irq 99 +[ 2745.362956,17] Modules linked in: ... +[ 2745.363339,17] CPU: 23 PID: 5640 Comm: lldpd Tainted: P O 4.4.182+ #202104120910+6d1da174272d.61x +[ 2745.363343,17] Hardware name: FOXCONN MercuryB/Quicksilver Controller, BIOS H11P1N09 07/08/2020 +[ 2745.363346,17] 0000000000000000 ffff885ec07c3ed8 ffffffff8131eb2f ffff885ec07c3f20 +[ 2745.363358,17] ffffffff81d14f64 ffff885ec07c3f10 ffffffff81072ac2 ffff88be98ed0000 +[ 2745.363369,17] 0000000000000063 0000000000000174 0000000000000074 0000000000000000 +[ 2745.363379,17] Call Trace: +[ 2745.363382,17] [] dump_stack+0x8e/0xcf +[ 2745.363393,17] [] warn_slowpath_common+0x82/0xc0 +[ 2745.363398,17] [] warn_slowpath_fmt+0x4c/0x50 +[ 2745.363404,17] [] ? rcu_irq_exit+0xae/0xc0 +[ 2745.363408,17] [] do_IRQ+0x15e/0x1a0 +[ 2745.363413,17] [] common_interrupt+0x89/0x89 +[ 2745.363416,17] [] ? delay_tsc+0x24/0x50 +[ 2745.363425,17] [] __udelay+0x34/0x40 +[ 2745.363457,17] [] qed_mcp_cmd_and_union+0x36f/0x7d0 [qed] +[ 2745.363473,17] [] qed_mcp_nvm_rd_cmd+0x4d/0x90 [qed] +[ 2745.363490,17] [] qed_mcp_trace_dump+0x4a7/0x630 [qed] +[ 2745.363504,17] [] ? qed_fw_asserts_dump+0x1d6/0x1f0 [qed] +[ 2745.363520,17] [] qed_dbg_mcp_trace_get_dump_buf_size+0x37/0x80 [qed] +[ 2745.363536,17] [] qed_dbg_feature_size+0x61/0xa0 [qed] +[ 2745.363551,17] [] qed_dbg_all_data_size+0x247/0x260 [qed] +[ 2745.363560,17] [] qede_get_regs_len+0x30/0x40 [qede] +[ 2745.363566,17] [] ethtool_get_drvinfo+0xe3/0x190 +[ 2745.363570,17] [] dev_ethtool+0x1362/0x2140 +[ 2745.363575,17] [] ? finish_task_switch+0x76/0x260 +[ 2745.363580,17] [] ? __schedule+0x3c6/0x9d0 +[ 2745.363585,17] [] ? hrtimer_start_range_ns+0x1d0/0x370 +[ 2745.363589,17] [] ? dev_get_by_name_rcu+0x6b/0x90 +[ 2745.363594,17] [] dev_ioctl+0xe8/0x710 +[ 2745.363599,17] [] sock_do_ioctl+0x48/0x60 +[ 2745.363603,17] [] sock_ioctl+0x1c7/0x280 +[ 2745.363608,17] [] ? seccomp_phase1+0x83/0x220 +[ 2745.363612,17] [] do_vfs_ioctl+0x2b3/0x4e0 +[ 2745.363616,17] [] SyS_ioctl+0x41/0x70 +[ 2745.363619,17] [] entry_SYSCALL_64_fastpath+0x1e/0x79 +[ 2745.363622,17] ---[ end trace f6954aa440266421 ]--- + +Fixes: c965db4446291 ("qed: Add support for debug data collection") +Signed-off-by: Caleb Sander +Acked-by: Alok Prasad +Link: https://lore.kernel.org/r/20230103233021.1457646-1-csander@purestorage.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qlogic/qed/qed_debug.c | 28 +++++++++++++++------ + 1 file changed, 20 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c +index 86ecb080b153..cdcead614e9f 100644 +--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c +@@ -1832,7 +1832,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 image_type, + u32 *nvram_offset_bytes, +- u32 *nvram_size_bytes) ++ u32 *nvram_size_bytes, ++ bool b_can_sleep) + { + u32 ret_mcp_resp, ret_mcp_param, ret_txn_size; + struct mcp_file_att file_att; +@@ -1846,7 +1847,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, + &ret_mcp_resp, + &ret_mcp_param, + &ret_txn_size, +- (u32 *)&file_att, false); ++ (u32 *)&file_att, ++ b_can_sleep); + + /* Check response */ + if (nvm_result || (ret_mcp_resp & FW_MSG_CODE_MASK) != +@@ -1873,7 +1875,9 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, + static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 nvram_offset_bytes, +- u32 nvram_size_bytes, u32 *ret_buf) ++ u32 nvram_size_bytes, ++ u32 *ret_buf, ++ bool b_can_sleep) + { + u32 ret_mcp_resp, ret_mcp_param, ret_read_size, bytes_to_copy; + s32 bytes_left = nvram_size_bytes; +@@ -1899,7 +1903,7 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, + &ret_mcp_resp, + &ret_mcp_param, &ret_read_size, + (u32 *)((u8 *)ret_buf + read_offset), +- false)) ++ b_can_sleep)) + return DBG_STATUS_NVRAM_READ_FAILED; + + /* Check response */ +@@ -3380,7 +3384,8 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn, + p_ptt, + NVM_TYPE_HW_DUMP_OUT, + &hw_dump_offset_bytes, +- &hw_dump_size_bytes); ++ &hw_dump_size_bytes, ++ false); + if (status != DBG_STATUS_OK) + return 0; + +@@ -3397,7 +3402,9 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn, + status = qed_nvram_read(p_hwfn, + p_ptt, + hw_dump_offset_bytes, +- hw_dump_size_bytes, dump_buf + offset); ++ hw_dump_size_bytes, ++ dump_buf + offset, ++ false); + if (status != DBG_STATUS_OK) { + DP_NOTICE(p_hwfn, + "Failed to read MCP HW Dump image from NVRAM\n"); +@@ -4123,7 +4130,9 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn, + return qed_find_nvram_image(p_hwfn, + p_ptt, + nvram_image_type, +- trace_meta_offset, trace_meta_size); ++ trace_meta_offset, ++ trace_meta_size, ++ true); + } + + /* Reads the MCP Trace meta data from NVRAM into the specified buffer */ +@@ -4139,7 +4148,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, + /* Read meta data from NVRAM */ + status = qed_nvram_read(p_hwfn, + p_ptt, +- nvram_offset_in_bytes, size_in_bytes, buf); ++ nvram_offset_in_bytes, ++ size_in_bytes, ++ buf, ++ true); + if (status != DBG_STATUS_OK) + return status; + +-- +2.35.1 + diff --git a/queue-6.1/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch b/queue-6.1/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch new file mode 100644 index 00000000000..236cab26633 --- /dev/null +++ b/queue-6.1/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch @@ -0,0 +1,103 @@ +From 20e21ce844f6c04c4311d8dd6a1c9356348c9ba7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 14:52:28 +0300 +Subject: qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure + +From: Daniil Tatianin + +[ Upstream commit 13a7c8964afcd8ca43c0b6001ebb0127baa95362 ] + +adapter->dcb would get silently freed inside qlcnic_dcb_enable() in +case qlcnic_dcb_attach() would return an error, which always happens +under OOM conditions. This would lead to use-after-free because both +of the existing callers invoke qlcnic_dcb_get_info() on the obtained +pointer, which is potentially freed at that point. + +Propagate errors from qlcnic_dcb_enable(), and instead free the dcb +pointer at callsite using qlcnic_dcb_free(). This also removes the now +unused qlcnic_clear_dcb_ops() helper, which was a simple wrapper around +kfree() also causing memory leaks for partially initialized dcb. + +Found by Linux Verification Center (linuxtesting.org) with the SVACE +static analysis tool. + +Fixes: 3c44bba1d270 ("qlcnic: Disable DCB operations from SR-IOV VFs") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Daniil Tatianin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 8 +++++++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h | 10 ++-------- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 +++++++- + 3 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +index dbb800769cb6..c95d56e56c59 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +@@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter) + goto disable_mbx_intr; + + qlcnic_83xx_clear_function_resources(adapter); +- qlcnic_dcb_enable(adapter->dcb); ++ ++ err = qlcnic_dcb_enable(adapter->dcb); ++ if (err) { ++ qlcnic_dcb_free(adapter->dcb); ++ goto disable_mbx_intr; ++ } ++ + qlcnic_83xx_initialize_nic(adapter, 1); + qlcnic_dcb_get_info(adapter->dcb); + +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +index 7519773eaca6..22afa2be85fd 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +@@ -41,11 +41,6 @@ struct qlcnic_dcb { + unsigned long state; + }; + +-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb) +-{ +- kfree(dcb); +-} +- + static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) + { + if (dcb && dcb->ops->get_hw_capability) +@@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb) + dcb->ops->init_dcbnl_ops(dcb); + } + +-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb) ++static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb) + { +- if (dcb && qlcnic_dcb_attach(dcb)) +- qlcnic_clear_dcb_ops(dcb); ++ return dcb ? qlcnic_dcb_attach(dcb) : 0; + } + #endif +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +index 28476b982bab..44dac3c0908e 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +@@ -2599,7 +2599,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + "Device does not support MSI interrupts\n"); + + if (qlcnic_82xx_check(adapter)) { +- qlcnic_dcb_enable(adapter->dcb); ++ err = qlcnic_dcb_enable(adapter->dcb); ++ if (err) { ++ qlcnic_dcb_free(adapter->dcb); ++ dev_err(&pdev->dev, "Failed to enable DCB\n"); ++ goto err_out_free_hw; ++ } ++ + qlcnic_dcb_get_info(adapter->dcb); + err = qlcnic_setup_intr(adapter); + +-- +2.35.1 + diff --git a/queue-6.1/rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch b/queue-6.1/rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch new file mode 100644 index 00000000000..0c10ab6f79f --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch @@ -0,0 +1,112 @@ +From 3c4abc537b07bdda8b549a4ff2196f42425073d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Dec 2022 14:56:09 +0200 +Subject: RDMA/mlx5: Fix mlx5_ib_get_hw_stats when used for device + +From: Shay Drory + +[ Upstream commit 38b50aa44495d5eb4218f0b82fc2da76505cec53 ] + +Currently, when mlx5_ib_get_hw_stats() is used for device (port_num = 0), +there is a special handling in order to use the correct counters, but, +port_num is being passed down the stack without any change. Also, some +functions assume that port_num >=1. As a result, the following oops can +occur. + + BUG: unable to handle page fault for address: ffff89510294f1a8 + #PF: supervisor write access in kernel mode + #PF: error_code(0x0002) - not-present page + PGD 0 P4D 0 + Oops: 0002 [#1] SMP + CPU: 8 PID: 1382 Comm: devlink Tainted: G W 6.1.0-rc4_for_upstream_base_2022_11_10_16_12 #1 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + RIP: 0010:_raw_spin_lock+0xc/0x20 + Call Trace: + + mlx5_ib_get_native_port_mdev+0x73/0xe0 [mlx5_ib] + do_get_hw_stats.constprop.0+0x109/0x160 [mlx5_ib] + mlx5_ib_get_hw_stats+0xad/0x180 [mlx5_ib] + ib_setup_device_attrs+0xf0/0x290 [ib_core] + ib_register_device+0x3bb/0x510 [ib_core] + ? atomic_notifier_chain_register+0x67/0x80 + __mlx5_ib_add+0x2b/0x80 [mlx5_ib] + mlx5r_probe+0xb8/0x150 [mlx5_ib] + ? auxiliary_match_id+0x6a/0x90 + auxiliary_bus_probe+0x3c/0x70 + ? driver_sysfs_add+0x6b/0x90 + really_probe+0xcd/0x380 + __driver_probe_device+0x80/0x170 + driver_probe_device+0x1e/0x90 + __device_attach_driver+0x7d/0x100 + ? driver_allows_async_probing+0x60/0x60 + ? driver_allows_async_probing+0x60/0x60 + bus_for_each_drv+0x7b/0xc0 + __device_attach+0xbc/0x200 + bus_probe_device+0x87/0xa0 + device_add+0x404/0x940 + ? dev_set_name+0x53/0x70 + __auxiliary_device_add+0x43/0x60 + add_adev+0x99/0xe0 [mlx5_core] + mlx5_attach_device+0xc8/0x120 [mlx5_core] + mlx5_load_one_devl_locked+0xb2/0xe0 [mlx5_core] + devlink_reload+0x133/0x250 + devlink_nl_cmd_reload+0x480/0x570 + ? devlink_nl_pre_doit+0x44/0x2b0 + genl_family_rcv_msg_doit.isra.0+0xc2/0x110 + genl_rcv_msg+0x180/0x2b0 + ? devlink_nl_cmd_region_read_dumpit+0x540/0x540 + ? devlink_reload+0x250/0x250 + ? devlink_put+0x50/0x50 + ? genl_family_rcv_msg_doit.isra.0+0x110/0x110 + netlink_rcv_skb+0x54/0x100 + genl_rcv+0x24/0x40 + netlink_unicast+0x1f6/0x2c0 + netlink_sendmsg+0x237/0x490 + sock_sendmsg+0x33/0x40 + __sys_sendto+0x103/0x160 + ? handle_mm_fault+0x10e/0x290 + ? do_user_addr_fault+0x1c0/0x5f0 + __x64_sys_sendto+0x25/0x30 + do_syscall_64+0x3d/0x90 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +Fix it by setting port_num to 1 in order to get device status and remove +unused variable. + +Fixes: aac4492ef23a ("IB/mlx5: Update counter implementation for dual port RoCE") +Link: https://lore.kernel.org/r/98b82994c3cd3fa593b8a75ed3f3901e208beb0f.1672231736.git.leonro@nvidia.com +Signed-off-by: Shay Drory +Reviewed-by: Patrisious Haddad +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/counters.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c +index 945758f39523..3e1272695d99 100644 +--- a/drivers/infiniband/hw/mlx5/counters.c ++++ b/drivers/infiniband/hw/mlx5/counters.c +@@ -278,7 +278,6 @@ static int do_get_hw_stats(struct ib_device *ibdev, + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); + struct mlx5_core_dev *mdev; + int ret, num_counters; +- u32 mdev_port_num; + + if (!stats) + return -EINVAL; +@@ -299,8 +298,9 @@ static int do_get_hw_stats(struct ib_device *ibdev, + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { +- mdev = mlx5_ib_get_native_port_mdev(dev, port_num, +- &mdev_port_num); ++ if (!port_num) ++ port_num = 1; ++ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL); + if (!mdev) { + /* If port is not affiliated yet, its in down state + * which doesn't have any counters yet, so it would be +-- +2.35.1 + diff --git a/queue-6.1/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch b/queue-6.1/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch new file mode 100644 index 00000000000..5ba0a1c995e --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch @@ -0,0 +1,95 @@ +From 048be1a5e44d33bdf29fa27bfd421a54b844084a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Dec 2022 14:56:10 +0200 +Subject: RDMA/mlx5: Fix validation of max_rd_atomic caps for DC + +From: Maor Gottlieb + +[ Upstream commit 8de8482fe5732fbef4f5af82bc0c0362c804cd1f ] + +Currently, when modifying DC, we validate max_rd_atomic user attribute +against the RC cap, validate against DC. RC and DC QP types have different +device limitations. + +This can cause userspace created DC QPs to malfunction. + +Fixes: c32a4f296e1d ("IB/mlx5: Add support for DC Initiator QP") +Link: https://lore.kernel.org/r/0c5aee72cea188c3bb770f4207cce7abc9b6fc74.1672231736.git.leonro@nvidia.com +Signed-off-by: Maor Gottlieb +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++---------- + 1 file changed, 35 insertions(+), 14 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c +index 40d9410ec303..cf953d23d18d 100644 +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -4502,6 +4502,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + return false; + } + ++static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr, ++ int attr_mask, enum ib_qp_type qp_type) ++{ ++ int log_max_ra_res; ++ int log_max_ra_req; ++ ++ if (qp_type == MLX5_IB_QPT_DCI) { ++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_res_dc); ++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_req_dc); ++ } else { ++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_res_qp); ++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_req_qp); ++ } ++ ++ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && ++ attr->max_rd_atomic > log_max_ra_res) { ++ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", ++ attr->max_rd_atomic); ++ return false; ++ } ++ ++ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && ++ attr->max_dest_rd_atomic > log_max_ra_req) { ++ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", ++ attr->max_dest_rd_atomic); ++ return false; ++ } ++ return true; ++} ++ + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) + { +@@ -4589,21 +4623,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + goto out; + } + +- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && +- attr->max_rd_atomic > +- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { +- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", +- attr->max_rd_atomic); +- goto out; +- } +- +- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && +- attr->max_dest_rd_atomic > +- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { +- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", +- attr->max_dest_rd_atomic); ++ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type)) + goto out; +- } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; +-- +2.35.1 + diff --git a/queue-6.1/selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch b/queue-6.1/selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch new file mode 100644 index 00000000000..b117d00fb02 --- /dev/null +++ b/queue-6.1/selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch @@ -0,0 +1,62 @@ +From e9ae3ec87bcf6f869522a11c27a17b9ae84b327a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 17:18:28 +0800 +Subject: selftests: net: fix cleanup_v6() for arp_ndisc_evict_nocarrier + +From: Po-Hsu Lin + +[ Upstream commit 9c4d7f45d60745a1cea0e841fa5e3444c398d2f1 ] + +The cleanup_v6() will cause the arp_ndisc_evict_nocarrier script exit +with 255 (No such file or directory), even the tests are good: + + # selftests: net: arp_ndisc_evict_nocarrier.sh + # run arp_evict_nocarrier=1 test + # RTNETLINK answers: File exists + # ok + # run arp_evict_nocarrier=0 test + # RTNETLINK answers: File exists + # ok + # run all.arp_evict_nocarrier=0 test + # RTNETLINK answers: File exists + # ok + # run ndisc_evict_nocarrier=1 test + # ok + # run ndisc_evict_nocarrier=0 test + # ok + # run all.ndisc_evict_nocarrier=0 test + # ok + not ok 1 selftests: net: arp_ndisc_evict_nocarrier.sh # exit=255 + +This is because it's trying to modify the parameter for ipv4 instead. + +Also, tests for ipv6 (run_ndisc_evict_nocarrier_enabled() and +run_ndisc_evict_nocarrier_disabled() are working on veth1, reflect +this fact in cleanup_v6(). + +Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier") +Signed-off-by: Po-Hsu Lin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +index b5af08af8559..b4ec1eeee6c9 100755 +--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh ++++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +@@ -24,8 +24,8 @@ cleanup_v6() + ip netns del me + ip netns del peer + +- sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1 +- sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 ++ sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 ++ sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + } + + create_ns() +-- +2.35.1 + diff --git a/queue-6.1/selftests-net-return-non-zero-for-failures-reported-.patch b/queue-6.1/selftests-net-return-non-zero-for-failures-reported-.patch new file mode 100644 index 00000000000..90c61c6e24d --- /dev/null +++ b/queue-6.1/selftests-net-return-non-zero-for-failures-reported-.patch @@ -0,0 +1,100 @@ +From 3c446ce8ee0a1b11671c542e1dcbd47d4416fc3b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 17:18:29 +0800 +Subject: selftests: net: return non-zero for failures reported in + arp_ndisc_evict_nocarrier + +From: Po-Hsu Lin + +[ Upstream commit 1856628baa17032531916984808d1bdfd62700d4 ] + +Return non-zero return value if there is any failure reported in this +script during the test. Otherwise it can only reflect the status of +the last command. + +Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier") +Signed-off-by: Po-Hsu Lin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../selftests/net/arp_ndisc_evict_nocarrier.sh | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +index b4ec1eeee6c9..4a110bb01e53 100755 +--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh ++++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +@@ -18,6 +18,7 @@ readonly V4_ADDR1=10.0.10.2 + readonly V6_ADDR0=2001:db8:91::1 + readonly V6_ADDR1=2001:db8:91::2 + nsid=100 ++ret=0 + + cleanup_v6() + { +@@ -61,7 +62,7 @@ setup_v6() { + if [ $? -ne 0 ]; then + cleanup_v6 + echo "failed" +- exit ++ exit 1 + fi + + # Set veth2 down, which will put veth1 in NOCARRIER state +@@ -88,7 +89,7 @@ setup_v4() { + if [ $? -ne 0 ]; then + cleanup_v4 + echo "failed" +- exit ++ exit 1 + fi + + # Set veth1 down, which will put veth0 in NOCARRIER state +@@ -115,6 +116,7 @@ run_arp_evict_nocarrier_enabled() { + + if [ $? -eq 0 ];then + echo "failed" ++ ret=1 + else + echo "ok" + fi +@@ -134,6 +136,7 @@ run_arp_evict_nocarrier_disabled() { + echo "ok" + else + echo "failed" ++ ret=1 + fi + + cleanup_v4 +@@ -164,6 +167,7 @@ run_ndisc_evict_nocarrier_enabled() { + + if [ $? -eq 0 ];then + echo "failed" ++ ret=1 + else + echo "ok" + fi +@@ -182,6 +186,7 @@ run_ndisc_evict_nocarrier_disabled() { + echo "ok" + else + echo "failed" ++ ret=1 + fi + + cleanup_v6 +@@ -198,6 +203,7 @@ run_ndisc_evict_nocarrier_disabled_all() { + echo "ok" + else + echo "failed" ++ ret=1 + fi + + cleanup_v6 +@@ -218,3 +224,4 @@ if [ "$(id -u)" -ne 0 ];then + fi + + run_all_tests ++exit $ret +-- +2.35.1 + diff --git a/queue-6.1/series b/queue-6.1/series index fad2dc6a733..6caae907b1b 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -5,3 +5,129 @@ cifs-refcount-only-the-selected-iface-during-interface-update.patch usb-dwc3-gadget-ignore-end-transfer-delay-on-teardown.patch btrfs-fix-off-by-one-in-delalloc-search-during-lseek.patch btrfs-fix-compat_ro-checks-against-remount.patch +perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch +perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch +phy-qcom-qmp-combo-fix-broken-power-on.patch +btrfs-move-btrfs_get_block_group-helper-out-of-disk-.patch +btrfs-move-flush-related-definitions-to-space-info.h.patch +btrfs-move-btrfs_print_data_csum_error-into-inode.c.patch +btrfs-move-fs-wide-helpers-out-of-ctree.h.patch +btrfs-move-assert-helpers-out-of-ctree.h.patch +btrfs-move-the-printk-helpers-out-of-ctree.h.patch +btrfs-rename-struct-funcs.c-to-accessors.c.patch +btrfs-rename-tree-defrag.c-to-defrag.c.patch +btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch +sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch +wifi-ath9k-use-proper-statements-in-conditionals.patch +bpf-pull-before-calling-skb_postpull_rcsum.patch +drm-panfrost-fix-gem-handle-creation-ref-counting.patch +netfilter-nf_tables-consolidate-set-description.patch +netfilter-nf_tables-add-function-to-create-set-state.patch +netfilter-nf_tables-perform-type-checking-for-existi.patch +ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch +net-vrf-determine-the-dst-using-the-original-ifindex.patch +vmxnet3-correctly-report-csum_level-for-encapsulated.patch +mptcp-fix-deadlock-in-fastopen-error-path.patch +mptcp-fix-lockdep-false-positive.patch +netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch +bonding-fix-lockdep-splat-in-bond_miimon_commit.patch +net-lan966x-fix-configuration-of-the-pcs.patch +veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch +nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch +net-hns3-add-interrupts-re-initialization-while-doin.patch +net-hns3-fix-miss-l3e-checking-for-rx-packet.patch +net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch +net-sched-fix-memory-leak-in-tcindex_set_parms.patch +qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch +net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch +nfc-fix-potential-resource-leaks.patch +bnxt_en-simplify-bnxt_xdp_buff_init.patch +bnxt_en-fix-xdp-rx-path.patch +bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch +bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch +vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch +vdpa-mlx5-fix-wrong-mac-address-deletion.patch +vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch +vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch +vringh-fix-range-used-in-iotlb_translate.patch +vhost-fix-range-used-in-translate_desc.patch +vhost-vdpa-fix-an-iotlb-memory-leak.patch +vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch +virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch +vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch +vdpasim-fix-memory-leak-when-freeing-iotlbs.patch +net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch +net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch +net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch +net-mlx5-avoid-recovery-in-probe-flows.patch +net-mlx5-fix-roce-setting-at-hca-level.patch +net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch +net-mlx5e-fix-rx-reporter-for-xsk-rqs.patch +net-mlx5e-ct-fix-ct-debugfs-folder-name.patch +net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch +net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch +net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch +net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch +bpf-always-use-maximal-size-for-copy_array.patch +tcp-add-time_wait-sockets-in-bhash2.patch +net-hns3-refine-the-handling-for-vf-heartbeat.patch +net-amd-xgbe-add-missed-tasklet_kill.patch +net-ena-fix-toeplitz-initial-hash-value.patch +net-ena-don-t-register-memory-info-on-xdp-exchange.patch +net-ena-account-for-the-number-of-processed-bytes-in.patch +net-ena-use-bitmask-to-indicate-packet-redirection.patch +net-ena-fix-rx_copybreak-value-update.patch +net-ena-set-default-value-for-rx-interrupt-moderatio.patch +net-ena-update-numa-tph-hint-register-upon-numa-node.patch +net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch +gpio-pca953x-avoid-to-use-uninitialized-value-pinctr.patch +rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch +rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch +selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch +selftests-net-return-non-zero-for-failures-reported-.patch +drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch +filelock-new-helper-vfs_inode_has_locks.patch +ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch +gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch +net-sched-atm-dont-intepret-cls-results-when-asked-t.patch +net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch +vxlan-fix-memory-leaks-in-error-path.patch +net-sparx5-fix-reading-of-the-mac-address.patch +netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch +netfilter-ipset-rework-long-task-execution-when-addi.patch +drm-virtio-fix-memory-leak-in-virtio_gpu_object_crea.patch +perf-tools-fix-resources-leak-in-perf_data__open_dir.patch +drm-imx-ipuv3-plane-fix-overlay-plane-width.patch +fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch +drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch +octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch +usb-rndis_host-secure-rndis_query-check-against-int-.patch +perf-lock-contention-fix-core-dump-related-to-not-fi.patch +perf-stat-fix-handling-of-unsupported-cgroup-events-.patch +perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch +drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch +drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch +ublk-honor-io_uring_f_nonblock-for-handling-control-.patch +qed-allow-sleep-in-qed_mcp_trace_dump.patch +net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch +caif-fix-memory-leak-in-cfctrl_linkup_request.patch +udf-fix-extension-of-the-last-extent-in-the-file.patch +usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch +hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch +asoc-sof-revert-core-unregister-clients-and-machine-.patch +9p-client-fix-data-race-on-req-status.patch +asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch +asoc-sof-mediatek-initialize-panic_info-to-zero.patch +drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch +drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch +drm-amdkfd-fix-double-release-compute-pasid.patch +io_uring-cancel-re-grab-ctx-mutex-after-finishing-wa.patch +nvme-fix-multipath-crash-caused-by-flush-request-whe.patch +acpi-video-allow-gpu-drivers-to-report-no-panels.patch +drm-amd-display-report-to-acpi-video-if-no-panels-we.patch +acpi-video-don-t-enable-fallback-path-for-creating-a.patch +io_uring-check-for-valid-register-opcode-earlier.patch +kunit-alloc_string_stream_fragment-error-handling-bu.patch +nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch +nvme-also-return-i-o-command-effects-from-nvme_comma.patch +asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch diff --git a/queue-6.1/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch b/queue-6.1/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch new file mode 100644 index 00000000000..d67582474d5 --- /dev/null +++ b/queue-6.1/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch @@ -0,0 +1,133 @@ +From 9b27c100f0365f75b0db83c2b3d6f975a3e79a6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Dec 2022 13:14:31 +0900 +Subject: SUNRPC: ensure the matching upcall is in-flight upon downcall + +From: minoura makoto + +[ Upstream commit b18cba09e374637a0a3759d856a6bca94c133952 ] + +Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid +but different gss service") introduced `auth` argument to +__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL +since it (and auth->service) was not (yet) determined. + +When multiple upcalls with the same uid and different service are +ongoing, it could happen that __gss_find_upcall(), which returns the +first match found in the pipe->in_downcall list, could not find the +correct gss_msg corresponding to the downcall we are looking for. +Moreover, it might return a msg which is not sent to rpc.gssd yet. + +We could see mount.nfs process hung in D state with multiple mount.nfs +are executed in parallel. The call trace below is of CentOS 7.9 +kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/ +elrepo kernel-ml-6.0.7-1.el7. + +PID: 71258 TASK: ffff91ebd4be0000 CPU: 36 COMMAND: "mount.nfs" + #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f + #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9 + #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss] + #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc +[sunrpc] + #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss] + #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc] + #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc] + #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc] + #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc] + #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc] + +The scenario is like this. Let's say there are two upcalls for +services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe. + +When rpc.gssd reads pipe to get the upcall msg corresponding to +service B from pipe->pipe and then writes the response, in +gss_pipe_downcall the msg corresponding to service A will be picked +because only uid is used to find the msg and it is before the one for +B in pipe->in_downcall. And the process waiting for the msg +corresponding to service A will be woken up. + +Actual scheduing of that process might be after rpc.gssd processes the +next msg. In rpc_pipe_generic_upcall it clears msg->errno (for A). +The process is scheduled to see gss_msg->ctx == NULL and +gss_msg->msg.errno == 0, therefore it cannot break the loop in +gss_create_upcall and is never woken up after that. + +This patch adds a simple check to ensure that a msg which is not +sent to rpc.gssd yet is not chosen as the matching upcall upon +receiving a downcall. + +Signed-off-by: minoura makoto +Signed-off-by: Hiroshi Shimamoto +Tested-by: Hiroshi Shimamoto +Cc: Trond Myklebust +Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + include/linux/sunrpc/rpc_pipe_fs.h | 5 +++++ + net/sunrpc/auth_gss/auth_gss.c | 19 +++++++++++++++++-- + 2 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h +index cd188a527d16..3b35b6f6533a 100644 +--- a/include/linux/sunrpc/rpc_pipe_fs.h ++++ b/include/linux/sunrpc/rpc_pipe_fs.h +@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); + extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); + ++/* returns true if the msg is in-flight, i.e., already eaten by the peer */ ++static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { ++ return (msg->copied != 0 && list_empty(&msg->list)); ++} ++ + struct rpc_clnt; + extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); + extern int rpc_remove_client_dir(struct rpc_clnt *); +diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c +index 7bb247c51e2f..2d7b1e03110a 100644 +--- a/net/sunrpc/auth_gss/auth_gss.c ++++ b/net/sunrpc/auth_gss/auth_gss.c +@@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth + list_for_each_entry(pos, &pipe->in_downcall, list) { + if (!uid_eq(pos->uid, uid)) + continue; +- if (auth && pos->auth->service != auth->service) ++ if (pos->auth->service != auth->service) + continue; + refcount_inc(&pos->count); + return pos; +@@ -686,6 +686,21 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) + return err; + } + ++static struct gss_upcall_msg * ++gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) ++{ ++ struct gss_upcall_msg *pos; ++ list_for_each_entry(pos, &pipe->in_downcall, list) { ++ if (!uid_eq(pos->uid, uid)) ++ continue; ++ if (!rpc_msg_is_inflight(&pos->msg)) ++ continue; ++ refcount_inc(&pos->count); ++ return pos; ++ } ++ return NULL; ++} ++ + #define MSG_BUF_MAXSIZE 1024 + + static ssize_t +@@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) + err = -ENOENT; + /* Find a matching upcall */ + spin_lock(&pipe->lock); +- gss_msg = __gss_find_upcall(pipe, uid, NULL); ++ gss_msg = gss_find_downcall(pipe, uid); + if (gss_msg == NULL) { + spin_unlock(&pipe->lock); + goto err_put_ctx; +-- +2.35.1 + diff --git a/queue-6.1/tcp-add-time_wait-sockets-in-bhash2.patch b/queue-6.1/tcp-add-time_wait-sockets-in-bhash2.patch new file mode 100644 index 00000000000..858419c04e1 --- /dev/null +++ b/queue-6.1/tcp-add-time_wait-sockets-in-bhash2.patch @@ -0,0 +1,262 @@ +From ed2489dd50ee9dbe98e8d5a64e80692fcee4c4de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Dec 2022 22:27:52 +0900 +Subject: tcp: Add TIME_WAIT sockets in bhash2. + +From: Kuniyuki Iwashima + +[ Upstream commit 936a192f974018b4f6040f6f77b1cc1e75bd8666 ] + +Jiri Slaby reported regression of bind() with a simple repro. [0] + +The repro creates a TIME_WAIT socket and tries to bind() a new socket +with the same local address and port. Before commit 28044fc1d495 ("net: +Add a bhash2 table hashed by port and address"), the bind() failed with +-EADDRINUSE, but now it succeeds. + +The cited commit should have put TIME_WAIT sockets into bhash2; otherwise, +inet_bhash2_conflict() misses TIME_WAIT sockets when validating bind() +requests if the address is not a wildcard one. + +The straight option is to move sk_bind2_node from struct sock to struct +sock_common to add twsk to bhash2 as implemented as RFC. [1] However, the +binary layout change in the struct sock could affect performances moving +hot fields on different cachelines. + +To avoid that, we add another TIME_WAIT list in inet_bind2_bucket and check +it while validating bind(). + +[0]: https://lore.kernel.org/netdev/6b971a4e-c7d8-411e-1f92-fda29b5b2fb9@kernel.org/ +[1]: https://lore.kernel.org/netdev/20221221151258.25748-2-kuniyu@amazon.com/ + +Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") +Reported-by: Jiri Slaby +Suggested-by: Paolo Abeni +Signed-off-by: Kuniyuki Iwashima +Acked-by: Joanne Koong +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/inet_hashtables.h | 4 ++++ + include/net/inet_timewait_sock.h | 5 +++++ + net/ipv4/inet_connection_sock.c | 26 ++++++++++++++++++++++---- + net/ipv4/inet_hashtables.c | 8 +++++--- + net/ipv4/inet_timewait_sock.c | 31 +++++++++++++++++++++++++++++-- + 5 files changed, 65 insertions(+), 9 deletions(-) + +diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h +index 69174093078f..99bd823e97f6 100644 +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -108,6 +108,10 @@ struct inet_bind2_bucket { + struct hlist_node node; + /* List of sockets hashed to this bucket */ + struct hlist_head owners; ++ /* bhash has twsk in owners, but bhash2 has twsk in ++ * deathrow not to add a member in struct sock_common. ++ */ ++ struct hlist_head deathrow; + }; + + static inline struct net *ib_net(const struct inet_bind_bucket *ib) +diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h +index 5b47545f22d3..4a8e578405cb 100644 +--- a/include/net/inet_timewait_sock.h ++++ b/include/net/inet_timewait_sock.h +@@ -73,9 +73,14 @@ struct inet_timewait_sock { + u32 tw_priority; + struct timer_list tw_timer; + struct inet_bind_bucket *tw_tb; ++ struct inet_bind2_bucket *tw_tb2; ++ struct hlist_node tw_bind2_node; + }; + #define tw_tclass tw_tos + ++#define twsk_for_each_bound_bhash2(__tw, list) \ ++ hlist_for_each_entry(__tw, list, tw_bind2_node) ++ + static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) + { + return (struct inet_timewait_sock *)sk; +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 4a34bc7cb15e..0465ada82799 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, + return false; + } + ++static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, ++ kuid_t sk_uid, bool relax, ++ bool reuseport_cb_ok, bool reuseport_ok) ++{ ++ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) ++ return false; ++ ++ return inet_bind_conflict(sk, sk2, sk_uid, relax, ++ reuseport_cb_ok, reuseport_ok); ++} ++ + static bool inet_bhash2_conflict(const struct sock *sk, + const struct inet_bind2_bucket *tb2, + kuid_t sk_uid, + bool relax, bool reuseport_cb_ok, + bool reuseport_ok) + { ++ struct inet_timewait_sock *tw2; + struct sock *sk2; + + sk_for_each_bound_bhash2(sk2, &tb2->owners) { +- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) +- continue; ++ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, ++ reuseport_cb_ok, reuseport_ok)) ++ return true; ++ } + +- if (inet_bind_conflict(sk, sk2, sk_uid, relax, +- reuseport_cb_ok, reuseport_ok)) ++ twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { ++ sk2 = (struct sock *)tw2; ++ ++ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, ++ reuseport_cb_ok, reuseport_ok)) + return true; + } ++ + return false; + } + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 3cec471a2cd2..67f5e5440802 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, + #endif + tb->rcv_saddr = sk->sk_rcv_saddr; + INIT_HLIST_HEAD(&tb->owners); ++ INIT_HLIST_HEAD(&tb->deathrow); + hlist_add_head(&tb->node, &head->chain); + } + +@@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, + /* Caller must hold hashbucket lock for this tb with local BH disabled */ + void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) + { +- if (hlist_empty(&tb->owners)) { ++ if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { + __hlist_del(&tb->node); + kmem_cache_free(cachep, tb); + } +@@ -1103,15 +1104,16 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, tb2, port); + +- spin_unlock(&head2->lock); +- + if (sk_unhashed(sk)) { + inet_sk(sk)->inet_sport = htons(port); + inet_ehash_nolisten(sk, (struct sock *)tw, NULL); + } + if (tw) + inet_twsk_bind_unhash(tw, hinfo); ++ ++ spin_unlock(&head2->lock); + spin_unlock(&head->lock); ++ + if (tw) + inet_twsk_deschedule_put(tw); + local_bh_enable(); +diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c +index 66fc940f9521..1d77d992e6e7 100644 +--- a/net/ipv4/inet_timewait_sock.c ++++ b/net/ipv4/inet_timewait_sock.c +@@ -29,6 +29,7 @@ + void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) + { ++ struct inet_bind2_bucket *tb2 = tw->tw_tb2; + struct inet_bind_bucket *tb = tw->tw_tb; + + if (!tb) +@@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); ++ ++ __hlist_del(&tw->tw_bind2_node); ++ tw->tw_tb2 = NULL; ++ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); ++ + __sock_put((struct sock *)tw); + } + +@@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) + { + struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); +- struct inet_bind_hashbucket *bhead; ++ struct inet_bind_hashbucket *bhead, *bhead2; + + spin_lock(lock); + sk_nulls_del_node_init_rcu((struct sock *)tw); +@@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) + /* Disassociate with bind bucket. */ + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; ++ bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw, ++ twsk_net(tw), tw->tw_num); + + spin_lock(&bhead->lock); ++ spin_lock(&bhead2->lock); + inet_twsk_bind_unhash(tw, hashinfo); ++ spin_unlock(&bhead2->lock); + spin_unlock(&bhead->lock); + + refcount_dec(&tw->tw_dr->tw_refcount); +@@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + hlist_add_head(&tw->tw_bind_node, list); + } + ++static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, ++ struct hlist_head *list) ++{ ++ hlist_add_head(&tw->tw_bind2_node, list); ++} ++ + /* + * Enter the time wait state. This is called with locally disabled BH. + * Essentially we whip up a timewait bucket, copy the relevant info into it +@@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); +- struct inet_bind_hashbucket *bhead; ++ struct inet_bind_hashbucket *bhead, *bhead2; ++ + /* Step 1: Put TW into bind hash. Original socket stays there too. + Note, that any socket with inet->num != 0 MUST be bound in + binding cache, even if it is closed. + */ + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, + hashinfo->bhash_size)]; ++ bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); ++ + spin_lock(&bhead->lock); ++ spin_lock(&bhead2->lock); ++ + tw->tw_tb = icsk->icsk_bind_hash; + WARN_ON(!icsk->icsk_bind_hash); + inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); ++ ++ tw->tw_tb2 = icsk->icsk_bind2_hash; ++ WARN_ON(!icsk->icsk_bind2_hash); ++ inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow); ++ ++ spin_unlock(&bhead2->lock); + spin_unlock(&bhead->lock); + + spin_lock(lock); +-- +2.35.1 + diff --git a/queue-6.1/ublk-honor-io_uring_f_nonblock-for-handling-control-.patch b/queue-6.1/ublk-honor-io_uring_f_nonblock-for-handling-control-.patch new file mode 100644 index 00000000000..06686a6abad --- /dev/null +++ b/queue-6.1/ublk-honor-io_uring_f_nonblock-for-handling-control-.patch @@ -0,0 +1,39 @@ +From 7792d8a5f96f7277285ffe44e94a95e1ac4e933f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Jan 2023 21:32:35 +0800 +Subject: ublk: honor IO_URING_F_NONBLOCK for handling control command + +From: Ming Lei + +[ Upstream commit fa8e442e832a3647cdd90f3e606c473a51bc1b26 ] + +Most of control command handlers may sleep, so return -EAGAIN in case +of IO_URING_F_NONBLOCK to defer the handling into io wq context. + +Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") +Reported-by: Jens Axboe +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20230104133235.836536-1-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/ublk_drv.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c +index e9de9d846b73..17b677b5d3b2 100644 +--- a/drivers/block/ublk_drv.c ++++ b/drivers/block/ublk_drv.c +@@ -1992,6 +1992,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + int ret = -EINVAL; + ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ + ublk_ctrl_cmd_dump(cmd); + + if (!(issue_flags & IO_URING_F_SQE128)) +-- +2.35.1 + diff --git a/queue-6.1/udf-fix-extension-of-the-last-extent-in-the-file.patch b/queue-6.1/udf-fix-extension-of-the-last-extent-in-the-file.patch new file mode 100644 index 00000000000..67b2eb77629 --- /dev/null +++ b/queue-6.1/udf-fix-extension-of-the-last-extent-in-the-file.patch @@ -0,0 +1,37 @@ +From 0a34407a4363ac1ba758597a6b5b6c1c3fc459ee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Dec 2022 17:45:51 +0100 +Subject: udf: Fix extension of the last extent in the file + +From: Jan Kara + +[ Upstream commit 83c7423d1eb6806d13c521d1002cc1a012111719 ] + +When extending the last extent in the file within the last block, we +wrongly computed the length of the last extent. This is mostly a +cosmetical problem since the extent does not contain any data and the +length will be fixed up by following operations but still. + +Fixes: 1f3868f06855 ("udf: Fix extending file within last block") +Signed-off-by: Jan Kara +Signed-off-by: Sasha Levin +--- + fs/udf/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/udf/inode.c b/fs/udf/inode.c +index f713d108f21d..e92a16435a29 100644 +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -600,7 +600,7 @@ static void udf_do_extend_final_block(struct inode *inode, + */ + if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) + return; +- added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen; ++ added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + last_ext->extLength += added_bytes; + UDF_I(inode)->i_lenExtents += added_bytes; + +-- +2.35.1 + diff --git a/queue-6.1/usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch b/queue-6.1/usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch new file mode 100644 index 00000000000..a9b63866209 --- /dev/null +++ b/queue-6.1/usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch @@ -0,0 +1,42 @@ +From 4046aa4fd3a73810da200fce0422c769b108940d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 13:17:46 +0100 +Subject: usb: dwc3: xilinx: include linux/gpio/consumer.h + +From: Arnd Bergmann + +[ Upstream commit e498a04443240c15c3c857165f7b652b87f4fd96 ] + +The newly added gpio consumer calls cause a build failure in configurations +that fail to include the right header implicitly: + +drivers/usb/dwc3/dwc3-xilinx.c: In function 'dwc3_xlnx_init_zynqmp': +drivers/usb/dwc3/dwc3-xilinx.c:207:22: error: implicit declaration of function 'devm_gpiod_get_optional'; did you mean 'devm_clk_get_optional'? [-Werror=implicit-function-declaration] + 207 | reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); + | ^~~~~~~~~~~~~~~~~~~~~~~ + | devm_clk_get_optional + +Fixes: ca05b38252d7 ("usb: dwc3: xilinx: Add gpio-reset support") +Signed-off-by: Arnd Bergmann +Link: https://lore.kernel.org/r/20230103121755.956027-1-arnd@kernel.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/dwc3/dwc3-xilinx.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c +index 8607d4c23283..0745e9f11b2e 100644 +--- a/drivers/usb/dwc3/dwc3-xilinx.c ++++ b/drivers/usb/dwc3/dwc3-xilinx.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +-- +2.35.1 + diff --git a/queue-6.1/usb-rndis_host-secure-rndis_query-check-against-int-.patch b/queue-6.1/usb-rndis_host-secure-rndis_query-check-against-int-.patch new file mode 100644 index 00000000000..19aa0b6d56e --- /dev/null +++ b/queue-6.1/usb-rndis_host-secure-rndis_query-check-against-int-.patch @@ -0,0 +1,43 @@ +From 1d980d064d0d1e38ba6d5eb733c0a51af23b550f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 10:17:09 +0100 +Subject: usb: rndis_host: Secure rndis_query check against int overflow + +From: Szymon Heidrich + +[ Upstream commit c7dd13805f8b8fc1ce3b6d40f6aff47e66b72ad2 ] + +Variables off and len typed as uint32 in rndis_query function +are controlled by incoming RNDIS response message thus their +value may be manipulated. Setting off to a unexpectetly large +value will cause the sum with len and 8 to overflow and pass +the implemented validation step. Consequently the response +pointer will be referring to a location past the expected +buffer boundaries allowing information leakage e.g. via +RNDIS_OID_802_3_PERMANENT_ADDRESS OID. + +Fixes: ddda08624013 ("USB: rndis_host, various cleanups") +Signed-off-by: Szymon Heidrich +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/usb/rndis_host.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c +index f79333fe1783..7b3739b29c8f 100644 +--- a/drivers/net/usb/rndis_host.c ++++ b/drivers/net/usb/rndis_host.c +@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf, + + off = le32_to_cpu(u.get_c->offset); + len = le32_to_cpu(u.get_c->len); +- if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE)) ++ if (unlikely((off > CONTROL_BUFFER_SIZE - 8) || ++ (len > CONTROL_BUFFER_SIZE - 8 - off))) + goto response_error; + + if (*reply_len != -1 && len != *reply_len) +-- +2.35.1 + diff --git a/queue-6.1/vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch b/queue-6.1/vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch new file mode 100644 index 00000000000..cd80e6f864d --- /dev/null +++ b/queue-6.1/vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch @@ -0,0 +1,49 @@ +From 9d93552c0fb14607bb20e0cfe5759e33efdf66a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Nov 2022 15:17:52 +0200 +Subject: vdpa/mlx5: Fix rule forwarding VLAN to TIR + +From: Eli Cohen + +[ Upstream commit a6ce72c0fb6041f9871f880b2d02b294f7f49cb4 ] + +Set the VLAN id to the header values field instead of overwriting the +headers criteria field. + +Before this fix, VLAN filtering would not really work and tagged packets +would be forwarded unfiltered to the TIR. + +Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support") +Acked-by: Jason Wang +Signed-off-by: Eli Cohen +Message-Id: <20221114131759.57883-2-elic@nvidia.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c +index 90913365def4..3fb06dcee943 100644 +--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c ++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c +@@ -1468,11 +1468,13 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); + eth_broadcast_addr(dmac_c); + ether_addr_copy(dmac_v, mac); +- MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); ++ if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { ++ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); ++ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); ++ } + if (tagged) { + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); +- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); +- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); + } + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; +-- +2.35.1 + diff --git a/queue-6.1/vdpa-mlx5-fix-wrong-mac-address-deletion.patch b/queue-6.1/vdpa-mlx5-fix-wrong-mac-address-deletion.patch new file mode 100644 index 00000000000..33fd46a2db4 --- /dev/null +++ b/queue-6.1/vdpa-mlx5-fix-wrong-mac-address-deletion.patch @@ -0,0 +1,38 @@ +From e8a3cd0f3ef1e114c7307a07a22bd768c03f2995 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Nov 2022 15:17:54 +0200 +Subject: vdpa/mlx5: Fix wrong mac address deletion + +From: Eli Cohen + +[ Upstream commit 1ab53760d322c82fb4cb5e81b5817065801e3ec4 ] + +Delete the old MAC from the table and not the new one which is not there +yet. + +Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support") +Acked-by: Jason Wang +Signed-off-by: Eli Cohen +Message-Id: <20221114131759.57883-4-elic@nvidia.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c +index 3fb06dcee943..444d6572b2d0 100644 +--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c ++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c +@@ -1686,7 +1686,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) + + /* Need recreate the flow table entry, so that the packet could forward back + */ +- mac_vlan_del(ndev, ndev->config.mac, 0, false); ++ mac_vlan_del(ndev, mac_back, 0, false); + + if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { + mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); +-- +2.35.1 + diff --git a/queue-6.1/vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch b/queue-6.1/vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch new file mode 100644 index 00000000000..21d2bded3f2 --- /dev/null +++ b/queue-6.1/vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch @@ -0,0 +1,58 @@ +From 20d944a5b618498a7d02160f638fb83fafc989fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Dec 2022 20:08:13 +0800 +Subject: vdpa/vp_vdpa: fix kfree a wrong pointer in vp_vdpa_remove + +From: Rong Wang + +[ Upstream commit ed843d6ed7310a27cf7c8ee0a82a482eed0cb4a6 ] + +In vp_vdpa_remove(), the code kfree(&vp_vdpa_mgtdev->mgtdev.id_table) uses +a reference of pointer as the argument of kfree, which is the wrong pointer +and then may hit crash like this: + +Unable to handle kernel paging request at virtual address 00ffff003363e30c +Internal error: Oops: 96000004 [#1] SMP +Call trace: + rb_next+0x20/0x5c + ext4_readdir+0x494/0x5c4 [ext4] + iterate_dir+0x168/0x1b4 + __se_sys_getdents64+0x68/0x170 + __arm64_sys_getdents64+0x24/0x30 + el0_svc_common.constprop.0+0x7c/0x1bc + do_el0_svc+0x2c/0x94 + el0_svc+0x20/0x30 + el0_sync_handler+0xb0/0xb4 + el0_sync+0x160/0x180 +Code: 54000220 f9400441 b4000161 aa0103e0 (f9400821) +SMP: stopping secondary CPUs +Starting crashdump kernel... + +Fixes: ffbda8e9df10 ("vdpa/vp_vdpa : add vdpa tool support in vp_vdpa") +Signed-off-by: Rong Wang +Signed-off-by: Nanyong Sun +Message-Id: <20221207120813.2837529-1-sunnanyong@huawei.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Cindy Lu +Acked-by: Jason Wang +Signed-off-by: Sasha Levin +--- + drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c +index d448db0c4de3..8fe267ca3e76 100644 +--- a/drivers/vdpa/virtio_pci/vp_vdpa.c ++++ b/drivers/vdpa/virtio_pci/vp_vdpa.c +@@ -647,7 +647,7 @@ static void vp_vdpa_remove(struct pci_dev *pdev) + mdev = vp_vdpa_mgtdev->mdev; + vp_modern_remove(mdev); + vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev); +- kfree(&vp_vdpa_mgtdev->mgtdev.id_table); ++ kfree(vp_vdpa_mgtdev->mgtdev.id_table); + kfree(mdev); + kfree(vp_vdpa_mgtdev); + } +-- +2.35.1 + diff --git a/queue-6.1/vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch b/queue-6.1/vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch new file mode 100644 index 00000000000..3974197ca96 --- /dev/null +++ b/queue-6.1/vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch @@ -0,0 +1,103 @@ +From 62e2b172deba739f1b337554d04398d235e76ba2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Nov 2022 16:23:48 +0800 +Subject: vdpa_sim: fix possible memory leak in vdpasim_net_init() and + vdpasim_blk_init() + +From: ruanjinjie + +[ Upstream commit aeca7ff254843d49a8739f07f7dab1341450111d ] + +Inject fault while probing module, if device_register() fails in +vdpasim_net_init() or vdpasim_blk_init(), but the refcount of kobject is +not decreased to 0, the name allocated in dev_set_name() is leaked. +Fix this by calling put_device(), so that name can be freed in +callback function kobject_cleanup(). + +(vdpa_sim_net) +unreferenced object 0xffff88807eebc370 (size 16): + comm "modprobe", pid 3848, jiffies 4362982860 (age 18.153s) + hex dump (first 16 bytes): + 76 64 70 61 73 69 6d 5f 6e 65 74 00 6b 6b 6b a5 vdpasim_net.kkk. + backtrace: + [] __kmalloc_node_track_caller+0x4e/0x150 + [] kstrdup+0x33/0x60 + [] kobject_set_name_vargs+0x41/0x110 + [] dev_set_name+0xab/0xe0 + [] device_add+0xe3/0x1a80 + [] 0xffffffffa0270013 + [] do_one_initcall+0x87/0x2e0 + [] do_init_module+0x1ab/0x640 + [] load_module+0x5d00/0x77f0 + [] __do_sys_finit_module+0x110/0x1b0 + [] do_syscall_64+0x35/0x80 + [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +(vdpa_sim_blk) +unreferenced object 0xffff8881070c1250 (size 16): + comm "modprobe", pid 6844, jiffies 4364069319 (age 17.572s) + hex dump (first 16 bytes): + 76 64 70 61 73 69 6d 5f 62 6c 6b 00 6b 6b 6b a5 vdpasim_blk.kkk. + backtrace: + [] __kmalloc_node_track_caller+0x4e/0x150 + [] kstrdup+0x33/0x60 + [] kobject_set_name_vargs+0x41/0x110 + [] dev_set_name+0xab/0xe0 + [] device_add+0xe3/0x1a80 + [] 0xffffffffa0220013 + [] do_one_initcall+0x87/0x2e0 + [] do_init_module+0x1ab/0x640 + [] load_module+0x5d00/0x77f0 + [] __do_sys_finit_module+0x110/0x1b0 + [] do_syscall_64+0x35/0x80 + [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +Fixes: 899c4d187f6a ("vdpa_sim_blk: add support for vdpa management tool") +Fixes: a3c06ae158dd ("vdpa_sim_net: Add support for user supported devices") + +Signed-off-by: ruanjinjie +Reviewed-by: Stefano Garzarella +Message-Id: <20221110082348.4105476-1-ruanjinjie@huawei.com> +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: Sasha Levin +--- + drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 4 +++- + drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 4 +++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +index c6db1a1baf76..f745926237a8 100644 +--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c ++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +@@ -427,8 +427,10 @@ static int __init vdpasim_blk_init(void) + int ret; + + ret = device_register(&vdpasim_blk_mgmtdev); +- if (ret) ++ if (ret) { ++ put_device(&vdpasim_blk_mgmtdev); + return ret; ++ } + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) +diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +index c3cb225ea469..11f5a121df24 100644 +--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c ++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +@@ -305,8 +305,10 @@ static int __init vdpasim_net_init(void) + int ret; + + ret = device_register(&vdpasim_net_mgmtdev); +- if (ret) ++ if (ret) { ++ put_device(&vdpasim_net_mgmtdev); + return ret; ++ } + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) +-- +2.35.1 + diff --git a/queue-6.1/vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch b/queue-6.1/vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch new file mode 100644 index 00000000000..2d53894a327 --- /dev/null +++ b/queue-6.1/vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch @@ -0,0 +1,52 @@ +From 7a1397c45e36ca747dccf8015925a81aebeea4f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Nov 2022 15:13:35 +0100 +Subject: vdpa_sim: fix vringh initialization in vdpasim_queue_ready() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Stefano Garzarella + +[ Upstream commit 794ec498c9fa79e6bfd71b931410d5897a9c00d4 ] + +When we initialize vringh, we should pass the features and the +number of elements in the virtqueue negotiated with the driver, +otherwise operations with vringh may fail. + +This was discovered in a case where the driver sets a number of +elements in the virtqueue different from the value returned by +.get_vq_num_max(). + +In vdpasim_vq_reset() is safe to initialize the vringh with +default values, since the virtqueue will not be used until +vdpasim_queue_ready() is called again. + +Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator") +Signed-off-by: Stefano Garzarella +Message-Id: <20221110141335.62171-1-sgarzare@redhat.com> +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +Acked-by: Eugenio Pérez +Signed-off-by: Sasha Levin +--- + drivers/vdpa/vdpa_sim/vdpa_sim.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c +index b071f0d842fb..b20689f8fe89 100644 +--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c ++++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c +@@ -67,8 +67,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) + { + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + +- vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, +- VDPASIM_QUEUE_MAX, false, ++ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, + (struct vring_desc *)(uintptr_t)vq->desc_addr, + (struct vring_avail *) + (uintptr_t)vq->driver_addr, +-- +2.35.1 + diff --git a/queue-6.1/vdpasim-fix-memory-leak-when-freeing-iotlbs.patch b/queue-6.1/vdpasim-fix-memory-leak-when-freeing-iotlbs.patch new file mode 100644 index 00000000000..1ad83242ff1 --- /dev/null +++ b/queue-6.1/vdpasim-fix-memory-leak-when-freeing-iotlbs.patch @@ -0,0 +1,43 @@ +From 97a39e06491b1fb52999c16c07f313e85343502b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Dec 2022 17:07:17 +0800 +Subject: vdpasim: fix memory leak when freeing IOTLBs + +From: Jason Wang + +[ Upstream commit 0b7a04a30eef20e6b24926a45c0ce7906ae85bd6 ] + +After commit bda324fd037a ("vdpasim: control virtqueue support"), +vdpasim->iommu became an array of IOTLB, so we should clean the +mappings of each free one by one instead of just deleting the ranges +in the first IOTLB which may leak maps. + +Fixes: bda324fd037a ("vdpasim: control virtqueue support") +Cc: Gautam Dawar +Signed-off-by: Jason Wang +Message-Id: <20221213090717.61529-1-jasowang@redhat.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Gautam Dawar +Signed-off-by: Sasha Levin +--- + drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c +index b20689f8fe89..cb88891b44a8 100644 +--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c ++++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c +@@ -689,7 +689,9 @@ static void vdpasim_free(struct vdpa_device *vdpa) + } + + kvfree(vdpasim->buffer); +- vhost_iotlb_free(vdpasim->iommu); ++ for (i = 0; i < vdpasim->dev_attr.nas; i++) ++ vhost_iotlb_reset(&vdpasim->iommu[i]); ++ kfree(vdpasim->iommu); + kfree(vdpasim->vqs); + kfree(vdpasim->config); + } +-- +2.35.1 + diff --git a/queue-6.1/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch b/queue-6.1/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch new file mode 100644 index 00000000000..f5148a5d94e --- /dev/null +++ b/queue-6.1/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch @@ -0,0 +1,88 @@ +From a4864ebdfdc09c7d6d44cc994270d79f85aa470e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 12:59:03 -0600 +Subject: veth: Fix race with AF_XDP exposing old or uninitialized descriptors + +From: Shawn Bohrer + +[ Upstream commit fa349e396e4886d742fd6501c599ec627ef1353b ] + +When AF_XDP is used on on a veth interface the RX ring is updated in two +steps. veth_xdp_rcv() removes packet descriptors from the FILL ring +fills them and places them in the RX ring updating the cached_prod +pointer. Later xdp_do_flush() syncs the RX ring prod pointer with the +cached_prod pointer allowing user-space to see the recently filled in +descriptors. The rings are intended to be SPSC, however the existing +order in veth_poll allows the xdp_do_flush() to run concurrently with +another CPU creating a race condition that allows user-space to see old +or uninitialized descriptors in the RX ring. This bug has been observed +in production systems. + +To summarize, we are expecting this ordering: + +CPU 0 __xsk_rcv_zc() +CPU 0 __xsk_map_flush() +CPU 2 __xsk_rcv_zc() +CPU 2 __xsk_map_flush() + +But we are seeing this order: + +CPU 0 __xsk_rcv_zc() +CPU 2 __xsk_rcv_zc() +CPU 0 __xsk_map_flush() +CPU 2 __xsk_map_flush() + +This occurs because we rely on NAPI to ensure that only one napi_poll +handler is running at a time for the given veth receive queue. +napi_schedule_prep() will prevent multiple instances from getting +scheduled. However calling napi_complete_done() signals that this +napi_poll is complete and allows subsequent calls to +napi_schedule_prep() and __napi_schedule() to succeed in scheduling a +concurrent napi_poll before the xdp_do_flush() has been called. For the +veth driver a concurrent call to napi_schedule_prep() and +__napi_schedule() can occur on a different CPU because the veth xmit +path can additionally schedule a napi_poll creating the race. + +The fix as suggested by Magnus Karlsson, is to simply move the +xdp_do_flush() call before napi_complete_done(). This syncs the +producer ring pointers before another instance of napi_poll can be +scheduled on another CPU. It will also slightly improve performance by +moving the flush closer to when the descriptors were placed in the +RX ring. + +Fixes: d1396004dd86 ("veth: Add XDP TX and REDIRECT") +Suggested-by: Magnus Karlsson +Signed-off-by: Shawn Bohrer +Link: https://lore.kernel.org/r/20221220185903.1105011-1-sbohrer@cloudflare.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/veth.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 09682ea3354e..bd385ccd0d18 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -974,6 +974,9 @@ static int veth_poll(struct napi_struct *napi, int budget) + xdp_set_return_frame_no_direct(); + done = veth_xdp_rcv(rq, budget, &bq, &stats); + ++ if (stats.xdp_redirect > 0) ++ xdp_do_flush(); ++ + if (done < budget && napi_complete_done(napi, done)) { + /* Write rx_notify_masked before reading ptr_ring */ + smp_store_mb(rq->rx_notify_masked, false); +@@ -987,8 +990,6 @@ static int veth_poll(struct napi_struct *napi, int budget) + + if (stats.xdp_tx > 0) + veth_xdp_flush(rq, &bq); +- if (stats.xdp_redirect > 0) +- xdp_do_flush(); + xdp_clear_return_frame_no_direct(); + + return done; +-- +2.35.1 + diff --git a/queue-6.1/vhost-fix-range-used-in-translate_desc.patch b/queue-6.1/vhost-fix-range-used-in-translate_desc.patch new file mode 100644 index 00000000000..3ee3999892f --- /dev/null +++ b/queue-6.1/vhost-fix-range-used-in-translate_desc.patch @@ -0,0 +1,55 @@ +From f64e78e0f1066c54afe9d2793b8720ee9ffbdf46 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 11:25:03 +0100 +Subject: vhost: fix range used in translate_desc() + +From: Stefano Garzarella + +[ Upstream commit 98047313cdb46828093894d0ac8b1183b8b317f9 ] + +vhost_iotlb_itree_first() requires `start` and `last` parameters +to search for a mapping that overlaps the range. + +In translate_desc() we cyclically call vhost_iotlb_itree_first(), +incrementing `addr` by the amount already translated, so rightly +we move the `start` parameter passed to vhost_iotlb_itree_first(), +but we should hold the `last` parameter constant. + +Let's fix it by saving the `last` parameter value before incrementing +`addr` in the loop. + +Fixes: a9709d6874d5 ("vhost: convert pre sorted vhost memory array to interval tree") +Acked-by: Jason Wang +Signed-off-by: Stefano Garzarella +Message-Id: <20221109102503.18816-3-sgarzare@redhat.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vhost/vhost.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c +index 40097826cff0..3c2359570df9 100644 +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -2053,7 +2053,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + struct vhost_dev *dev = vq->dev; + struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; + struct iovec *_iov; +- u64 s = 0; ++ u64 s = 0, last = addr + len - 1; + int ret = 0; + + while ((u64)len > s) { +@@ -2063,7 +2063,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + break; + } + +- map = vhost_iotlb_itree_first(umem, addr, addr + len - 1); ++ map = vhost_iotlb_itree_first(umem, addr, last); + if (map == NULL || map->start > addr) { + if (umem != dev->iotlb) { + ret = -EFAULT; +-- +2.35.1 + diff --git a/queue-6.1/vhost-vdpa-fix-an-iotlb-memory-leak.patch b/queue-6.1/vhost-vdpa-fix-an-iotlb-memory-leak.patch new file mode 100644 index 00000000000..bc88492a99d --- /dev/null +++ b/queue-6.1/vhost-vdpa-fix-an-iotlb-memory-leak.patch @@ -0,0 +1,102 @@ +From 6db7d7a9c1377e3b9d2cdf358d139717f9d744cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 16:42:13 +0100 +Subject: vhost-vdpa: fix an iotlb memory leak + +From: Stefano Garzarella + +[ Upstream commit c070c1912a83432530cbb4271d5b9b11fa36b67a ] + +Before commit 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") +we called vhost_vdpa_iotlb_unmap(v, iotlb, 0ULL, 0ULL - 1) during +release to free all the resources allocated when processing user IOTLB +messages through vhost_vdpa_process_iotlb_update(). +That commit changed the handling of IOTLB a bit, and we accidentally +removed some code called during the release. + +We partially fixed this with commit 037d4305569a ("vhost-vdpa: call +vhost_vdpa_cleanup during the release") but a potential memory leak is +still there as showed by kmemleak if the application does not send +VHOST_IOTLB_INVALIDATE or crashes: + + unreferenced object 0xffff888007fbaa30 (size 16): + comm "blkio-bench", pid 914, jiffies 4294993521 (age 885.500s) + hex dump (first 16 bytes): + 40 73 41 07 80 88 ff ff 00 00 00 00 00 00 00 00 @sA............. + backtrace: + [<0000000087736d2a>] kmem_cache_alloc_trace+0x142/0x1c0 + [<0000000060740f50>] vhost_vdpa_process_iotlb_msg+0x68c/0x901 [vhost_vdpa] + [<0000000083e8e205>] vhost_chr_write_iter+0xc0/0x4a0 [vhost] + [<000000008f2f414a>] vhost_vdpa_chr_write_iter+0x18/0x20 [vhost_vdpa] + [<00000000de1cd4a0>] vfs_write+0x216/0x4b0 + [<00000000a2850200>] ksys_write+0x71/0xf0 + [<00000000de8e720b>] __x64_sys_write+0x19/0x20 + [<0000000018b12cbb>] do_syscall_64+0x3f/0x90 + [<00000000986ec465>] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Let's fix this calling vhost_vdpa_iotlb_unmap() on the whole range in +vhost_vdpa_remove_as(). We move that call before vhost_dev_cleanup() +since we need a valid v->vdev.mm in vhost_vdpa_pa_unmap(). +vhost_iotlb_reset() call can be removed, since vhost_vdpa_iotlb_unmap() +on the whole range removes all the entries. + +The kmemleak log reported was observed with a vDPA device that has `use_va` +set to true (e.g. VDUSE). This patch has been tested with both types of +devices. + +Fixes: 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release") +Fixes: 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") +Signed-off-by: Stefano Garzarella +Message-Id: <20221109154213.146789-1-sgarzare@redhat.com> +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: Sasha Levin +--- + drivers/vhost/vdpa.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c +index 166044642fd5..b08e07fc7d1f 100644 +--- a/drivers/vhost/vdpa.c ++++ b/drivers/vhost/vdpa.c +@@ -65,6 +65,10 @@ static DEFINE_IDA(vhost_vdpa_ida); + + static dev_t vhost_vdpa_major; + ++static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, ++ struct vhost_iotlb *iotlb, ++ u64 start, u64 last); ++ + static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) + { + struct vhost_vdpa_as *as = container_of(iotlb, struct +@@ -135,7 +139,7 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) + return -EINVAL; + + hlist_del(&as->hash_link); +- vhost_iotlb_reset(&as->iotlb); ++ vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1); + kfree(as); + + return 0; +@@ -1162,14 +1166,14 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v) + struct vhost_vdpa_as *as; + u32 asid; + +- vhost_dev_cleanup(&v->vdev); +- kfree(v->vdev.vqs); +- + for (asid = 0; asid < v->vdpa->nas; asid++) { + as = asid_to_as(v, asid); + if (as) + vhost_vdpa_remove_as(v, asid); + } ++ ++ vhost_dev_cleanup(&v->vdev); ++ kfree(v->vdev.vqs); + } + + static int vhost_vdpa_open(struct inode *inode, struct file *filep) +-- +2.35.1 + diff --git a/queue-6.1/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch b/queue-6.1/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch new file mode 100644 index 00000000000..c8bb90d6d33 --- /dev/null +++ b/queue-6.1/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch @@ -0,0 +1,64 @@ +From ed3259c7df91d9d048e8b40611605e2813465ff7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Nov 2022 10:17:05 +0000 +Subject: vhost/vsock: Fix error handling in vhost_vsock_init() + +From: Yuan Can + +[ Upstream commit 7a4efe182ca61fb3e5307e69b261c57cbf434cd4 ] + +A problem about modprobe vhost_vsock failed is triggered with the +following log given: + +modprobe: ERROR: could not insert 'vhost_vsock': Device or resource busy + +The reason is that vhost_vsock_init() returns misc_register() directly +without checking its return value, if misc_register() failed, it returns +without calling vsock_core_unregister() on vhost_transport, resulting the +vhost_vsock can never be installed later. +A simple call graph is shown as below: + + vhost_vsock_init() + vsock_core_register() # register vhost_transport + misc_register() + device_create_with_groups() + device_create_groups_vargs() + dev = kzalloc(...) # OOM happened + # return without unregister vhost_transport + +Fix by calling vsock_core_unregister() when misc_register() returns error. + +Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko") +Signed-off-by: Yuan Can +Message-Id: <20221108101705.45981-1-yuancan@huawei.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Signed-off-by: Sasha Levin +--- + drivers/vhost/vsock.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c +index 5703775af129..10a7d23731fe 100644 +--- a/drivers/vhost/vsock.c ++++ b/drivers/vhost/vsock.c +@@ -959,7 +959,14 @@ static int __init vhost_vsock_init(void) + VSOCK_TRANSPORT_F_H2G); + if (ret < 0) + return ret; +- return misc_register(&vhost_vsock_misc); ++ ++ ret = misc_register(&vhost_vsock_misc); ++ if (ret) { ++ vsock_core_unregister(&vhost_transport.transport); ++ return ret; ++ } ++ ++ return 0; + }; + + static void __exit vhost_vsock_exit(void) +-- +2.35.1 + diff --git a/queue-6.1/virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch b/queue-6.1/virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch new file mode 100644 index 00000000000..321428f5aa8 --- /dev/null +++ b/queue-6.1/virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch @@ -0,0 +1,43 @@ +From 42b70154eb29cbe760f1895742c7cff189d7e11c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Nov 2022 11:07:40 +0000 +Subject: virtio-crypto: fix memory leak in + virtio_crypto_alg_skcipher_close_session() + +From: Wei Yongjun + +[ Upstream commit b1d65f717cd6305a396a8738e022c6f7c65cfbe8 ] + +'vc_ctrl_req' is alloced in virtio_crypto_alg_skcipher_close_session(), +and should be freed in the invalid ctrl_status->status error handling +case. Otherwise there is a memory leak. + +Fixes: 0756ad15b1fe ("virtio-crypto: use private buffer for control request") +Signed-off-by: Wei Yongjun +Message-Id: <20221114110740.537276-1-weiyongjun@huaweicloud.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Gonglei +Acked-by: zhenwei pi +Acked-by: Jason Wang +Signed-off-by: Sasha Levin +--- + drivers/crypto/virtio/virtio_crypto_skcipher_algs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c +index e553ccadbcbc..e5876286828b 100644 +--- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c ++++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c +@@ -239,7 +239,8 @@ static int virtio_crypto_alg_skcipher_close_session( + pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", + ctrl_status->status, destroy_session->session_id); + +- return -EINVAL; ++ err = -EINVAL; ++ goto out; + } + + err = 0; +-- +2.35.1 + diff --git a/queue-6.1/vmxnet3-correctly-report-csum_level-for-encapsulated.patch b/queue-6.1/vmxnet3-correctly-report-csum_level-for-encapsulated.patch new file mode 100644 index 00000000000..1b89facdb67 --- /dev/null +++ b/queue-6.1/vmxnet3-correctly-report-csum_level-for-encapsulated.patch @@ -0,0 +1,55 @@ +From d6f57bc638238ad86bb9396f7b08a82382db8888 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 12:25:55 -0800 +Subject: vmxnet3: correctly report csum_level for encapsulated packet + +From: Ronak Doshi + +[ Upstream commit 3d8f2c4269d08f8793e946279dbdf5e972cc4911 ] + +Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload +support") added support for encapsulation offload. However, the +pathc did not report correctly the csum_level for encapsulated packet. + +This patch fixes this issue by reporting correct csum level for the +encapsulated packet. + +Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") +Signed-off-by: Ronak Doshi +Acked-by: Peng Li +Link: https://lore.kernel.org/r/20221220202556.24421-1-doshir@vmware.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c +index 6f1e560fb15c..56267c327f0b 100644 +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -1288,6 +1288,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, + (le32_to_cpu(gdesc->dword[3]) & + VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { + skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if ((le32_to_cpu(gdesc->dword[0]) & ++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { ++ skb->csum_level = 1; ++ } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); +@@ -1297,6 +1301,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, + } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & + (1 << VMXNET3_RCD_TUC_SHIFT))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if ((le32_to_cpu(gdesc->dword[0]) & ++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { ++ skb->csum_level = 1; ++ } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); +-- +2.35.1 + diff --git a/queue-6.1/vringh-fix-range-used-in-iotlb_translate.patch b/queue-6.1/vringh-fix-range-used-in-iotlb_translate.patch new file mode 100644 index 00000000000..6a67f410119 --- /dev/null +++ b/queue-6.1/vringh-fix-range-used-in-iotlb_translate.patch @@ -0,0 +1,56 @@ +From 4b0f4d642ca28fa39796c2bf56d49bd6d9f551f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 11:25:02 +0100 +Subject: vringh: fix range used in iotlb_translate() + +From: Stefano Garzarella + +[ Upstream commit f85efa9b0f5381874f727bd98f56787840313f0b ] + +vhost_iotlb_itree_first() requires `start` and `last` parameters +to search for a mapping that overlaps the range. + +In iotlb_translate() we cyclically call vhost_iotlb_itree_first(), +incrementing `addr` by the amount already translated, so rightly +we move the `start` parameter passed to vhost_iotlb_itree_first(), +but we should hold the `last` parameter constant. + +Let's fix it by saving the `last` parameter value before incrementing +`addr` in the loop. + +Fixes: 9ad9c49cfe97 ("vringh: IOTLB support") +Acked-by: Jason Wang +Signed-off-by: Stefano Garzarella +Message-Id: <20221109102503.18816-2-sgarzare@redhat.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vhost/vringh.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c +index 11f59dd06a74..828c29306565 100644 +--- a/drivers/vhost/vringh.c ++++ b/drivers/vhost/vringh.c +@@ -1102,7 +1102,7 @@ static int iotlb_translate(const struct vringh *vrh, + struct vhost_iotlb_map *map; + struct vhost_iotlb *iotlb = vrh->iotlb; + int ret = 0; +- u64 s = 0; ++ u64 s = 0, last = addr + len - 1; + + spin_lock(vrh->iotlb_lock); + +@@ -1114,8 +1114,7 @@ static int iotlb_translate(const struct vringh *vrh, + break; + } + +- map = vhost_iotlb_itree_first(iotlb, addr, +- addr + len - 1); ++ map = vhost_iotlb_itree_first(iotlb, addr, last); + if (!map || map->start > addr) { + ret = -EINVAL; + break; +-- +2.35.1 + diff --git a/queue-6.1/vxlan-fix-memory-leaks-in-error-path.patch b/queue-6.1/vxlan-fix-memory-leaks-in-error-path.patch new file mode 100644 index 00000000000..0d61052a5c1 --- /dev/null +++ b/queue-6.1/vxlan-fix-memory-leaks-in-error-path.patch @@ -0,0 +1,115 @@ +From e4c7d0f13f84c32c85a7262aedcdf38b73e28c98 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Jan 2023 08:55:56 +0200 +Subject: vxlan: Fix memory leaks in error path + +From: Ido Schimmel + +[ Upstream commit 06bf62944144a92d83dd14fd1378d2a288259561 ] + +The memory allocated by vxlan_vnigroup_init() is not freed in the error +path, leading to memory leaks [1]. Fix by calling +vxlan_vnigroup_uninit() in the error path. + +The leaks can be reproduced by annotating gro_cells_init() with +ALLOW_ERROR_INJECTION() and then running: + + # echo "100" > /sys/kernel/debug/fail_function/probability + # echo "1" > /sys/kernel/debug/fail_function/times + # echo "gro_cells_init" > /sys/kernel/debug/fail_function/inject + # printf %#x -12 > /sys/kernel/debug/fail_function/gro_cells_init/retval + # ip link add name vxlan0 type vxlan dstport 4789 external vnifilter + RTNETLINK answers: Cannot allocate memory + +[1] +unreferenced object 0xffff88810db84a00 (size 512): + comm "ip", pid 330, jiffies 4295010045 (age 66.016s) + hex dump (first 32 bytes): + f8 d5 76 0e 81 88 ff ff 01 00 00 00 00 00 00 02 ..v............. + 03 00 04 00 48 00 00 00 00 00 00 01 04 00 01 00 ....H........... + backtrace: + [] kmalloc_trace+0x2a/0x60 + [] vxlan_vnigroup_init+0x4c/0x160 + [] vxlan_init+0x1ae/0x280 + [] register_netdevice+0x57a/0x16d0 + [] __vxlan_dev_create+0x7c7/0xa50 + [] vxlan_newlink+0xd6/0x130 + [] __rtnl_newlink+0x112b/0x18a0 + [] rtnl_newlink+0x6c/0xa0 + [] rtnetlink_rcv_msg+0x43f/0xd40 + [] netlink_rcv_skb+0x170/0x440 + [] netlink_unicast+0x53f/0x810 + [] netlink_sendmsg+0x958/0xe70 + [] ____sys_sendmsg+0x78f/0xa90 + [] ___sys_sendmsg+0x13a/0x1e0 + [] __sys_sendmsg+0x11c/0x1f0 + [] do_syscall_64+0x38/0x80 +unreferenced object 0xffff88810e76d5f8 (size 192): + comm "ip", pid 330, jiffies 4295010045 (age 66.016s) + hex dump (first 32 bytes): + 04 00 00 00 00 00 00 00 db e1 4f e7 00 00 00 00 ..........O..... + 08 d6 76 0e 81 88 ff ff 08 d6 76 0e 81 88 ff ff ..v.......v..... + backtrace: + [] __kmalloc_node+0x4e/0x90 + [] kvmalloc_node+0xa6/0x1f0 + [] bucket_table_alloc.isra.0+0x83/0x460 + [] rhashtable_init+0x43b/0x7c0 + [] vxlan_vnigroup_init+0x6c/0x160 + [] vxlan_init+0x1ae/0x280 + [] register_netdevice+0x57a/0x16d0 + [] __vxlan_dev_create+0x7c7/0xa50 + [] vxlan_newlink+0xd6/0x130 + [] __rtnl_newlink+0x112b/0x18a0 + [] rtnl_newlink+0x6c/0xa0 + [] rtnetlink_rcv_msg+0x43f/0xd40 + [] netlink_rcv_skb+0x170/0x440 + [] netlink_unicast+0x53f/0x810 + [] netlink_sendmsg+0x958/0xe70 + [] ____sys_sendmsg+0x78f/0xa90 + +Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") +Signed-off-by: Ido Schimmel +Reviewed-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/vxlan/vxlan_core.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c +index 6ab669dcd1c6..d4be39b19a6b 100644 +--- a/drivers/net/vxlan/vxlan_core.c ++++ b/drivers/net/vxlan/vxlan_core.c +@@ -2917,16 +2917,23 @@ static int vxlan_init(struct net_device *dev) + vxlan_vnigroup_init(vxlan); + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); +- if (!dev->tstats) +- return -ENOMEM; ++ if (!dev->tstats) { ++ err = -ENOMEM; ++ goto err_vnigroup_uninit; ++ } + + err = gro_cells_init(&vxlan->gro_cells, dev); +- if (err) { +- free_percpu(dev->tstats); +- return err; +- } ++ if (err) ++ goto err_free_percpu; + + return 0; ++ ++err_free_percpu: ++ free_percpu(dev->tstats); ++err_vnigroup_uninit: ++ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) ++ vxlan_vnigroup_uninit(vxlan); ++ return err; + } + + static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) +-- +2.35.1 + diff --git a/queue-6.1/wifi-ath9k-use-proper-statements-in-conditionals.patch b/queue-6.1/wifi-ath9k-use-proper-statements-in-conditionals.patch new file mode 100644 index 00000000000..53d0ea17373 --- /dev/null +++ b/queue-6.1/wifi-ath9k-use-proper-statements-in-conditionals.patch @@ -0,0 +1,68 @@ +From e59e36223cbc4db7c0d610eb8b6fae798081682a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Dec 2022 17:55:42 +0100 +Subject: wifi: ath9k: use proper statements in conditionals +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Arnd Bergmann + +[ Upstream commit b7dc753fe33a707379e2254317794a4dad6c0fe2 ] + +A previous cleanup patch accidentally broke some conditional +expressions by replacing the safe "do {} while (0)" constructs +with empty macros. gcc points this out when extra warnings +are enabled: + +drivers/net/wireless/ath/ath9k/hif_usb.c: In function 'ath9k_skb_queue_complete': +drivers/net/wireless/ath/ath9k/hif_usb.c:251:57: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] + 251 | TX_STAT_INC(hif_dev, skb_failed); + +Make both sets of macros proper expressions again. + +Fixes: d7fc76039b74 ("ath9k: htc: clean up statistics macros") +Signed-off-by: Arnd Bergmann +Acked-by: Toke Høiland-Jørgensen +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20221215165553.1950307-1-arnd@kernel.org +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/ath/ath9k/htc.h | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h +index 30f0765fb9fd..237f4ec2cffd 100644 +--- a/drivers/net/wireless/ath/ath9k/htc.h ++++ b/drivers/net/wireless/ath/ath9k/htc.h +@@ -327,9 +327,9 @@ static inline struct ath9k_htc_tx_ctl *HTC_SKB_CB(struct sk_buff *skb) + } + + #ifdef CONFIG_ATH9K_HTC_DEBUGFS +-#define __STAT_SAFE(hif_dev, expr) ((hif_dev)->htc_handle->drv_priv ? (expr) : 0) +-#define CAB_STAT_INC(priv) ((priv)->debug.tx_stats.cab_queued++) +-#define TX_QSTAT_INC(priv, q) ((priv)->debug.tx_stats.queue_stats[q]++) ++#define __STAT_SAFE(hif_dev, expr) do { ((hif_dev)->htc_handle->drv_priv ? (expr) : 0); } while (0) ++#define CAB_STAT_INC(priv) do { ((priv)->debug.tx_stats.cab_queued++); } while (0) ++#define TX_QSTAT_INC(priv, q) do { ((priv)->debug.tx_stats.queue_stats[q]++); } while (0) + + #define TX_STAT_INC(hif_dev, c) \ + __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.tx_stats.c++) +@@ -378,10 +378,10 @@ void ath9k_htc_get_et_stats(struct ieee80211_hw *hw, + struct ethtool_stats *stats, u64 *data); + #else + +-#define TX_STAT_INC(hif_dev, c) +-#define TX_STAT_ADD(hif_dev, c, a) +-#define RX_STAT_INC(hif_dev, c) +-#define RX_STAT_ADD(hif_dev, c, a) ++#define TX_STAT_INC(hif_dev, c) do { } while (0) ++#define TX_STAT_ADD(hif_dev, c, a) do { } while (0) ++#define RX_STAT_INC(hif_dev, c) do { } while (0) ++#define RX_STAT_ADD(hif_dev, c, a) do { } while (0) + + #define CAB_STAT_INC(priv) + #define TX_QSTAT_INC(priv, c) +-- +2.35.1 +