]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.0
authorSasha Levin <sashal@kernel.org>
Tue, 10 Jan 2023 01:55:35 +0000 (20:55 -0500)
committerSasha Levin <sashal@kernel.org>
Tue, 10 Jan 2023 01:55:35 +0000 (20:55 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
116 files changed:
queue-6.0/9p-client-fix-data-race-on-req-status.patch [new file with mode: 0644]
queue-6.0/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch [new file with mode: 0644]
queue-6.0/asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch [new file with mode: 0644]
queue-6.0/asoc-sof-mediatek-initialize-panic_info-to-zero.patch [new file with mode: 0644]
queue-6.0/asoc-sof-revert-core-unregister-clients-and-machine-.patch [new file with mode: 0644]
queue-6.0/bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch [new file with mode: 0644]
queue-6.0/bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch [new file with mode: 0644]
queue-6.0/bnxt_en-fix-xdp-rx-path.patch [new file with mode: 0644]
queue-6.0/bnxt_en-simplify-bnxt_xdp_buff_init.patch [new file with mode: 0644]
queue-6.0/bonding-fix-lockdep-splat-in-bond_miimon_commit.patch [new file with mode: 0644]
queue-6.0/bpf-pull-before-calling-skb_postpull_rcsum.patch [new file with mode: 0644]
queue-6.0/btrfs-check-superblock-to-ensure-the-fs-was-not-modi.patch [new file with mode: 0644]
queue-6.0/btrfs-don-t-save-block-group-root-into-super-block.patch [new file with mode: 0644]
queue-6.0/btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch [new file with mode: 0644]
queue-6.0/btrfs-fix-compat_ro-checks-against-remount.patch [new file with mode: 0644]
queue-6.0/btrfs-relax-block-group-tree-feature-dependency-chec.patch [new file with mode: 0644]
queue-6.0/btrfs-separate-block_group_tree-compat-ro-flag-from-.patch [new file with mode: 0644]
queue-6.0/caif-fix-memory-leak-in-cfctrl_linkup_request.patch [new file with mode: 0644]
queue-6.0/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch [new file with mode: 0644]
queue-6.0/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch [new file with mode: 0644]
queue-6.0/drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch [new file with mode: 0644]
queue-6.0/drm-amdkfd-fix-double-release-compute-pasid.patch [new file with mode: 0644]
queue-6.0/drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch [new file with mode: 0644]
queue-6.0/drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch [new file with mode: 0644]
queue-6.0/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch [new file with mode: 0644]
queue-6.0/drm-imx-ipuv3-plane-fix-overlay-plane-width.patch [new file with mode: 0644]
queue-6.0/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch [new file with mode: 0644]
queue-6.0/drm-panfrost-fix-gem-handle-creation-ref-counting.patch [new file with mode: 0644]
queue-6.0/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch [new file with mode: 0644]
queue-6.0/ext4-goto-right-label-failed_mount3a.patch [new file with mode: 0644]
queue-6.0/filelock-new-helper-vfs_inode_has_locks.patch [new file with mode: 0644]
queue-6.0/fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch [new file with mode: 0644]
queue-6.0/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch [new file with mode: 0644]
queue-6.0/hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch [new file with mode: 0644]
queue-6.0/ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch [new file with mode: 0644]
queue-6.0/io_uring-check-for-valid-register-opcode-earlier.patch [new file with mode: 0644]
queue-6.0/mptcp-fix-lockdep-false-positive.patch [new file with mode: 0644]
queue-6.0/net-amd-xgbe-add-missed-tasklet_kill.patch [new file with mode: 0644]
queue-6.0/net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch [new file with mode: 0644]
queue-6.0/net-ena-account-for-the-number-of-processed-bytes-in.patch [new file with mode: 0644]
queue-6.0/net-ena-don-t-register-memory-info-on-xdp-exchange.patch [new file with mode: 0644]
queue-6.0/net-ena-fix-rx_copybreak-value-update.patch [new file with mode: 0644]
queue-6.0/net-ena-fix-toeplitz-initial-hash-value.patch [new file with mode: 0644]
queue-6.0/net-ena-set-default-value-for-rx-interrupt-moderatio.patch [new file with mode: 0644]
queue-6.0/net-ena-update-numa-tph-hint-register-upon-numa-node.patch [new file with mode: 0644]
queue-6.0/net-ena-use-bitmask-to-indicate-packet-redirection.patch [new file with mode: 0644]
queue-6.0/net-hns3-add-interrupts-re-initialization-while-doin.patch [new file with mode: 0644]
queue-6.0/net-hns3-fix-miss-l3e-checking-for-rx-packet.patch [new file with mode: 0644]
queue-6.0/net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch [new file with mode: 0644]
queue-6.0/net-hns3-refactor-function-hclge_mbx_handler.patch [new file with mode: 0644]
queue-6.0/net-hns3-refine-the-handling-for-vf-heartbeat.patch [new file with mode: 0644]
queue-6.0/net-lan966x-fix-configuration-of-the-pcs.patch [new file with mode: 0644]
queue-6.0/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch [new file with mode: 0644]
queue-6.0/net-mlx5-avoid-recovery-in-probe-flows.patch [new file with mode: 0644]
queue-6.0/net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch [new file with mode: 0644]
queue-6.0/net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch [new file with mode: 0644]
queue-6.0/net-mlx5-fix-roce-setting-at-hca-level.patch [new file with mode: 0644]
queue-6.0/net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch [new file with mode: 0644]
queue-6.0/net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch [new file with mode: 0644]
queue-6.0/net-mlx5e-ct-fix-ct-debugfs-folder-name.patch [new file with mode: 0644]
queue-6.0/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch [new file with mode: 0644]
queue-6.0/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch [new file with mode: 0644]
queue-6.0/net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch [new file with mode: 0644]
queue-6.0/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch [new file with mode: 0644]
queue-6.0/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch [new file with mode: 0644]
queue-6.0/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch [new file with mode: 0644]
queue-6.0/net-sched-fix-memory-leak-in-tcindex_set_parms.patch [new file with mode: 0644]
queue-6.0/net-sparx5-fix-reading-of-the-mac-address.patch [new file with mode: 0644]
queue-6.0/net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch [new file with mode: 0644]
queue-6.0/net-vrf-determine-the-dst-using-the-original-ifindex.patch [new file with mode: 0644]
queue-6.0/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch [new file with mode: 0644]
queue-6.0/netfilter-ipset-rework-long-task-execution-when-addi.patch [new file with mode: 0644]
queue-6.0/netfilter-nf_tables-add-function-to-create-set-state.patch [new file with mode: 0644]
queue-6.0/netfilter-nf_tables-consolidate-set-description.patch [new file with mode: 0644]
queue-6.0/netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch [new file with mode: 0644]
queue-6.0/netfilter-nf_tables-perform-type-checking-for-existi.patch [new file with mode: 0644]
queue-6.0/nfc-fix-potential-resource-leaks.patch [new file with mode: 0644]
queue-6.0/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch [new file with mode: 0644]
queue-6.0/nvme-also-return-i-o-command-effects-from-nvme_comma.patch [new file with mode: 0644]
queue-6.0/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch [new file with mode: 0644]
queue-6.0/nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch [new file with mode: 0644]
queue-6.0/octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch [new file with mode: 0644]
queue-6.0/perf-lock-contention-fix-core-dump-related-to-not-fi.patch [new file with mode: 0644]
queue-6.0/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch [new file with mode: 0644]
queue-6.0/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch [new file with mode: 0644]
queue-6.0/perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch [new file with mode: 0644]
queue-6.0/perf-stat-fix-handling-of-unsupported-cgroup-events-.patch [new file with mode: 0644]
queue-6.0/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch [new file with mode: 0644]
queue-6.0/phy-qcom-qmp-combo-fix-broken-power-on.patch [new file with mode: 0644]
queue-6.0/qed-allow-sleep-in-qed_mcp_trace_dump.patch [new file with mode: 0644]
queue-6.0/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch [new file with mode: 0644]
queue-6.0/rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch [new file with mode: 0644]
queue-6.0/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch [new file with mode: 0644]
queue-6.0/selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch [new file with mode: 0644]
queue-6.0/selftests-net-return-non-zero-for-failures-reported-.patch [new file with mode: 0644]
queue-6.0/series
queue-6.0/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch [new file with mode: 0644]
queue-6.0/ublk-honor-io_uring_f_nonblock-for-handling-control-.patch [new file with mode: 0644]
queue-6.0/udf-fix-extension-of-the-last-extent-in-the-file.patch [new file with mode: 0644]
queue-6.0/usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch [new file with mode: 0644]
queue-6.0/usb-rndis_host-secure-rndis_query-check-against-int-.patch [new file with mode: 0644]
queue-6.0/vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch [new file with mode: 0644]
queue-6.0/vdpa-mlx5-fix-wrong-mac-address-deletion.patch [new file with mode: 0644]
queue-6.0/vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch [new file with mode: 0644]
queue-6.0/vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch [new file with mode: 0644]
queue-6.0/vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch [new file with mode: 0644]
queue-6.0/vdpasim-fix-memory-leak-when-freeing-iotlbs.patch [new file with mode: 0644]
queue-6.0/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch [new file with mode: 0644]
queue-6.0/vhost-fix-range-used-in-translate_desc.patch [new file with mode: 0644]
queue-6.0/vhost-vdpa-fix-an-iotlb-memory-leak.patch [new file with mode: 0644]
queue-6.0/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch [new file with mode: 0644]
queue-6.0/virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch [new file with mode: 0644]
queue-6.0/vmxnet3-correctly-report-csum_level-for-encapsulated.patch [new file with mode: 0644]
queue-6.0/vringh-fix-range-used-in-iotlb_translate.patch [new file with mode: 0644]
queue-6.0/vxlan-fix-memory-leaks-in-error-path.patch [new file with mode: 0644]
queue-6.0/wifi-ath9k-use-proper-statements-in-conditionals.patch [new file with mode: 0644]

diff --git a/queue-6.0/9p-client-fix-data-race-on-req-status.patch b/queue-6.0/9p-client-fix-data-race-on-req-status.patch
new file mode 100644 (file)
index 0000000..eafc2a5
--- /dev/null
@@ -0,0 +1,248 @@
+From bb23d52a4c5af1ba276270d1a9bb62e19d505ce4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Dec 2022 21:39:01 +0900
+Subject: 9p/client: fix data race on req->status
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+[ Upstream commit 1a4f69ef15ec29b213e2b086b2502644e8ef76ee ]
+
+KCSAN reported a race between writing req->status in p9_client_cb and
+accessing it in p9_client_rpc's wait_event.
+
+Accesses to req itself is protected by the data barrier (writing req
+fields, write barrier, writing status // reading status, read barrier,
+reading other req fields), but status accesses themselves apparently
+also must be annotated properly with WRITE_ONCE/READ_ONCE when we
+access it without locks.
+
+Follows:
+ - error paths writing status in various threads all can notify
+p9_client_rpc, so these all also need WRITE_ONCE
+ - there's a similar read loop in trans_virtio for zc case that also
+needs READ_ONCE
+ - other reads in trans_fd should be protected by the trans_fd lock and
+lists state machine, as corresponding writers all are within trans_fd
+and should be under the same lock. If KCSAN complains on them we likely
+will have something else to fix as well, so it's better to leave them
+unmarked and look again if required.
+
+Link: https://lkml.kernel.org/r/20221205124756.426350-1-asmadeus@codewreck.org
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Suggested-by: Marco Elver <elver@google.com>
+Acked-by: Marco Elver <elver@google.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/9p/client.c       | 15 ++++++++-------
+ net/9p/trans_fd.c     | 12 ++++++------
+ net/9p/trans_rdma.c   |  4 ++--
+ net/9p/trans_virtio.c |  9 +++++----
+ net/9p/trans_xen.c    |  4 ++--
+ 5 files changed, 23 insertions(+), 21 deletions(-)
+
+diff --git a/net/9p/client.c b/net/9p/client.c
+index 8464d95805d0..1f50dce8765d 100644
+--- a/net/9p/client.c
++++ b/net/9p/client.c
+@@ -425,7 +425,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
+        * the status change is visible to another thread
+        */
+       smp_wmb();
+-      req->status = status;
++      WRITE_ONCE(req->status, status);
+       wake_up(&req->wq);
+       p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+@@ -587,7 +587,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
+       /* if we haven't received a response for oldreq,
+        * remove it from the list
+        */
+-      if (oldreq->status == REQ_STATUS_SENT) {
++      if (READ_ONCE(oldreq->status) == REQ_STATUS_SENT) {
+               if (c->trans_mod->cancelled)
+                       c->trans_mod->cancelled(c, oldreq);
+       }
+@@ -672,7 +672,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+       }
+ again:
+       /* Wait for the response */
+-      err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
++      err = wait_event_killable(req->wq,
++                                READ_ONCE(req->status) >= REQ_STATUS_RCVD);
+       /* Make sure our req is coherent with regard to updates in other
+        * threads - echoes to wmb() in the callback
+@@ -686,7 +687,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+               goto again;
+       }
+-      if (req->status == REQ_STATUS_ERROR) {
++      if (READ_ONCE(req->status) == REQ_STATUS_ERROR) {
+               p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+               err = req->t_err;
+       }
+@@ -699,7 +700,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+                       p9_client_flush(c, req);
+               /* if we received the response anyway, don't signal error */
+-              if (req->status == REQ_STATUS_RCVD)
++              if (READ_ONCE(req->status) == REQ_STATUS_RCVD)
+                       err = 0;
+       }
+ recalc_sigpending:
+@@ -768,7 +769,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
+               if (err != -ERESTARTSYS)
+                       goto recalc_sigpending;
+       }
+-      if (req->status == REQ_STATUS_ERROR) {
++      if (READ_ONCE(req->status) == REQ_STATUS_ERROR) {
+               p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+               err = req->t_err;
+       }
+@@ -781,7 +782,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
+                       p9_client_flush(c, req);
+               /* if we received the response anyway, don't signal error */
+-              if (req->status == REQ_STATUS_RCVD)
++              if (READ_ONCE(req->status) == REQ_STATUS_RCVD)
+                       err = 0;
+       }
+ recalc_sigpending:
+diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
+index 080b5de3e1ed..a2eb1363d293 100644
+--- a/net/9p/trans_fd.c
++++ b/net/9p/trans_fd.c
+@@ -202,11 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
+       list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+               list_move(&req->req_list, &cancel_list);
+-              req->status = REQ_STATUS_ERROR;
++              WRITE_ONCE(req->status, REQ_STATUS_ERROR);
+       }
+       list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
+               list_move(&req->req_list, &cancel_list);
+-              req->status = REQ_STATUS_ERROR;
++              WRITE_ONCE(req->status, REQ_STATUS_ERROR);
+       }
+       spin_unlock(&m->req_lock);
+@@ -467,7 +467,7 @@ static void p9_write_work(struct work_struct *work)
+               req = list_entry(m->unsent_req_list.next, struct p9_req_t,
+                              req_list);
+-              req->status = REQ_STATUS_SENT;
++              WRITE_ONCE(req->status, REQ_STATUS_SENT);
+               p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
+               list_move_tail(&req->req_list, &m->req_list);
+@@ -676,7 +676,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
+               return m->err;
+       spin_lock(&m->req_lock);
+-      req->status = REQ_STATUS_UNSENT;
++      WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
+       list_add_tail(&req->req_list, &m->unsent_req_list);
+       spin_unlock(&m->req_lock);
+@@ -703,7 +703,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
+       if (req->status == REQ_STATUS_UNSENT) {
+               list_del(&req->req_list);
+-              req->status = REQ_STATUS_FLSHD;
++              WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
+               p9_req_put(client, req);
+               ret = 0;
+       }
+@@ -732,7 +732,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
+        * remove it from the list.
+        */
+       list_del(&req->req_list);
+-      req->status = REQ_STATUS_FLSHD;
++      WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
+       spin_unlock(&m->req_lock);
+       p9_req_put(client, req);
+diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
+index d817d3745238..d8b0a6f3b15e 100644
+--- a/net/9p/trans_rdma.c
++++ b/net/9p/trans_rdma.c
+@@ -507,7 +507,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
+        * because doing if after could erase the REQ_STATUS_RCVD
+        * status in case of a very fast reply.
+        */
+-      req->status = REQ_STATUS_SENT;
++      WRITE_ONCE(req->status, REQ_STATUS_SENT);
+       err = ib_post_send(rdma->qp, &wr, NULL);
+       if (err)
+               goto send_error;
+@@ -517,7 +517,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
+  /* Handle errors that happened during or while preparing the send: */
+  send_error:
+-      req->status = REQ_STATUS_ERROR;
++      WRITE_ONCE(req->status, REQ_STATUS_ERROR);
+       kfree(c);
+       p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
+diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
+index b84d35cf6899..947c038a0470 100644
+--- a/net/9p/trans_virtio.c
++++ b/net/9p/trans_virtio.c
+@@ -263,7 +263,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
+       p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
+-      req->status = REQ_STATUS_SENT;
++      WRITE_ONCE(req->status, REQ_STATUS_SENT);
+ req_retry:
+       spin_lock_irqsave(&chan->lock, flags);
+@@ -469,7 +469,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
+                       inlen = n;
+               }
+       }
+-      req->status = REQ_STATUS_SENT;
++      WRITE_ONCE(req->status, REQ_STATUS_SENT);
+ req_retry_pinned:
+       spin_lock_irqsave(&chan->lock, flags);
+@@ -532,9 +532,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
+       spin_unlock_irqrestore(&chan->lock, flags);
+       kicked = 1;
+       p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
+-      err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
++      err = wait_event_killable(req->wq,
++                                READ_ONCE(req->status) >= REQ_STATUS_RCVD);
+       // RERROR needs reply (== error string) in static data
+-      if (req->status == REQ_STATUS_RCVD &&
++      if (READ_ONCE(req->status) == REQ_STATUS_RCVD &&
+           unlikely(req->rc.sdata[4] == P9_RERROR))
+               handle_rerror(req, in_hdr_len, offs, in_pages);
+diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
+index 0f862d5a5960..a103aed85465 100644
+--- a/net/9p/trans_xen.c
++++ b/net/9p/trans_xen.c
+@@ -157,7 +157,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
+                             &masked_prod, masked_cons,
+                             XEN_9PFS_RING_SIZE(ring));
+-      p9_req->status = REQ_STATUS_SENT;
++      WRITE_ONCE(p9_req->status, REQ_STATUS_SENT);
+       virt_wmb();                     /* write ring before updating pointer */
+       prod += size;
+       ring->intf->out_prod = prod;
+@@ -212,7 +212,7 @@ static void p9_xen_response(struct work_struct *work)
+                       dev_warn(&priv->dev->dev,
+                                "requested packet size too big: %d for tag %d with capacity %zd\n",
+                                h.size, h.tag, req->rc.capacity);
+-                      req->status = REQ_STATUS_ERROR;
++                      WRITE_ONCE(req->status, REQ_STATUS_ERROR);
+                       goto recv_error;
+               }
+-- 
+2.35.1
+
diff --git a/queue-6.0/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch b/queue-6.0/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch
new file mode 100644 (file)
index 0000000..8e71a1f
--- /dev/null
@@ -0,0 +1,59 @@
+From 5be43f36b9b6b753e92e86658a0db259756b4857 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 13:32:46 +0100
+Subject: ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071
+ tablet
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit a1dec9d70b6ad97087b60b81d2492134a84208c6 ]
+
+The Advantech MICA-071 tablet deviates from the defaults for
+a non CR Bay Trail based tablet in several ways:
+
+1. It uses an analog MIC on IN3 rather then using DMIC1
+2. It only has 1 speaker
+3. It needs the OVCD current threshold to be set to 1500uA instead of
+   the default 2000uA to reliable differentiate between headphones vs
+   headsets
+
+Add a quirk with these settings for this tablet.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+Link: https://lore.kernel.org/r/20221213123246.11226-1-hdegoede@redhat.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/intel/boards/bytcr_rt5640.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
+index fb9d9e271845..ddd2625bed90 100644
+--- a/sound/soc/intel/boards/bytcr_rt5640.c
++++ b/sound/soc/intel/boards/bytcr_rt5640.c
+@@ -570,6 +570,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
+                                       BYT_RT5640_SSP0_AIF1 |
+                                       BYT_RT5640_MCLK_EN),
+       },
++      {
++              /* Advantech MICA-071 */
++              .matches = {
++                      DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"),
++                      DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"),
++              },
++              /* OVCD Th = 1500uA to reliable detect head-phones vs -set */
++              .driver_data = (void *)(BYT_RT5640_IN3_MAP |
++                                      BYT_RT5640_JD_SRC_JD2_IN4N |
++                                      BYT_RT5640_OVCD_TH_1500UA |
++                                      BYT_RT5640_OVCD_SF_0P75 |
++                                      BYT_RT5640_MONO_SPEAKER |
++                                      BYT_RT5640_DIFF_MIC |
++                                      BYT_RT5640_MCLK_EN),
++      },
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"),
+-- 
+2.35.1
+
diff --git a/queue-6.0/asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch b/queue-6.0/asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch
new file mode 100644 (file)
index 0000000..34bd7a1
--- /dev/null
@@ -0,0 +1,147 @@
+From f5a4199644e1d92d67c0a66433760bb5c28daf9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Dec 2022 13:45:28 +0200
+Subject: ASoC: SOF: Intel: pci-tgl: unblock S5 entry if DMA stop has failed"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kai Vehmanen <kai.vehmanen@linux.intel.com>
+
+[ Upstream commit 2aa2a5ead0ee0a358bf80a2984a641d1bf2adc2a ]
+
+If system shutdown has not been completed cleanly, it is possible the
+DMA stream shutdown has not been done, or was not clean.
+
+If this is the case, Intel TGL/ADL HDA platforms may fail to shutdown
+cleanly due to pending HDA DMA transactions. To avoid this, detect this
+scenario in the shutdown callback, and perform an additional controller
+reset. This has been tested to unblock S5 entry if this condition is
+hit.
+
+Co-developed-by: Archana Patni <archana.patni@intel.com>
+Signed-off-by: Archana Patni <archana.patni@intel.com>
+Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
+Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
+Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
+Link: https://lore.kernel.org/r/20221209114529.3909192-2-kai.vehmanen@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sof/intel/hda-dsp.c | 72 +++++++++++++++++++++++++++++++++++
+ sound/soc/sof/intel/hda.h     |  1 +
+ sound/soc/sof/intel/tgl.c     |  2 +-
+ 3 files changed, 74 insertions(+), 1 deletion(-)
+
+diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
+index eddfd77ad90f..0ab111814f1c 100644
+--- a/sound/soc/sof/intel/hda-dsp.c
++++ b/sound/soc/sof/intel/hda-dsp.c
+@@ -901,6 +901,78 @@ int hda_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state)
+       return snd_sof_dsp_set_power_state(sdev, &target_dsp_state);
+ }
++static unsigned int hda_dsp_check_for_dma_streams(struct snd_sof_dev *sdev)
++{
++      struct hdac_bus *bus = sof_to_bus(sdev);
++      struct hdac_stream *s;
++      unsigned int active_streams = 0;
++      int sd_offset;
++      u32 val;
++
++      list_for_each_entry(s, &bus->stream_list, list) {
++              sd_offset = SOF_STREAM_SD_OFFSET(s);
++              val = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR,
++                                     sd_offset);
++              if (val & SOF_HDA_SD_CTL_DMA_START)
++                      active_streams |= BIT(s->index);
++      }
++
++      return active_streams;
++}
++
++static int hda_dsp_s5_quirk(struct snd_sof_dev *sdev)
++{
++      int ret;
++
++      /*
++       * Do not assume a certain timing between the prior
++       * suspend flow, and running of this quirk function.
++       * This is needed if the controller was just put
++       * to reset before calling this function.
++       */
++      usleep_range(500, 1000);
++
++      /*
++       * Take controller out of reset to flush DMA
++       * transactions.
++       */
++      ret = hda_dsp_ctrl_link_reset(sdev, false);
++      if (ret < 0)
++              return ret;
++
++      usleep_range(500, 1000);
++
++      /* Restore state for shutdown, back to reset */
++      ret = hda_dsp_ctrl_link_reset(sdev, true);
++      if (ret < 0)
++              return ret;
++
++      return ret;
++}
++
++int hda_dsp_shutdown_dma_flush(struct snd_sof_dev *sdev)
++{
++      unsigned int active_streams;
++      int ret, ret2;
++
++      /* check if DMA cleanup has been successful */
++      active_streams = hda_dsp_check_for_dma_streams(sdev);
++
++      sdev->system_suspend_target = SOF_SUSPEND_S3;
++      ret = snd_sof_suspend(sdev->dev);
++
++      if (active_streams) {
++              dev_warn(sdev->dev,
++                       "There were active DSP streams (%#x) at shutdown, trying to recover\n",
++                       active_streams);
++              ret2 = hda_dsp_s5_quirk(sdev);
++              if (ret2 < 0)
++                      dev_err(sdev->dev, "shutdown recovery failed (%d)\n", ret2);
++      }
++
++      return ret;
++}
++
+ int hda_dsp_shutdown(struct snd_sof_dev *sdev)
+ {
+       sdev->system_suspend_target = SOF_SUSPEND_S3;
+diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
+index 5ef3e8775e36..554891e78cca 100644
+--- a/sound/soc/sof/intel/hda.h
++++ b/sound/soc/sof/intel/hda.h
+@@ -578,6 +578,7 @@ int hda_dsp_resume(struct snd_sof_dev *sdev);
+ int hda_dsp_runtime_suspend(struct snd_sof_dev *sdev);
+ int hda_dsp_runtime_resume(struct snd_sof_dev *sdev);
+ int hda_dsp_runtime_idle(struct snd_sof_dev *sdev);
++int hda_dsp_shutdown_dma_flush(struct snd_sof_dev *sdev);
+ int hda_dsp_shutdown(struct snd_sof_dev *sdev);
+ int hda_dsp_set_hw_params_upon_resume(struct snd_sof_dev *sdev);
+ void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags);
+diff --git a/sound/soc/sof/intel/tgl.c b/sound/soc/sof/intel/tgl.c
+index 6dfb4786c782..0173e5b255da 100644
+--- a/sound/soc/sof/intel/tgl.c
++++ b/sound/soc/sof/intel/tgl.c
+@@ -60,7 +60,7 @@ int sof_tgl_ops_init(struct snd_sof_dev *sdev)
+       memcpy(&sof_tgl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops));
+       /* probe/remove/shutdown */
+-      sof_tgl_ops.shutdown    = hda_dsp_shutdown;
++      sof_tgl_ops.shutdown    = hda_dsp_shutdown_dma_flush;
+       if (sdev->pdata->ipc_type == SOF_IPC) {
+               /* doorbell */
+-- 
+2.35.1
+
diff --git a/queue-6.0/asoc-sof-mediatek-initialize-panic_info-to-zero.patch b/queue-6.0/asoc-sof-mediatek-initialize-panic_info-to-zero.patch
new file mode 100644 (file)
index 0000000..77142e2
--- /dev/null
@@ -0,0 +1,39 @@
+From facbbe742ceacfa420528500ac226f2146149c09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 19:56:17 +0800
+Subject: ASoC: SOF: mediatek: initialize panic_info to zero
+
+From: YC Hung <yc.hung@mediatek.com>
+
+[ Upstream commit 7bd220f2ba9014b78f0304178103393554b8c4fe ]
+
+Coverity spotted that panic_info is not initialized to zero in
+mtk_adsp_dump. Using uninitialized value panic_info.linenum when
+calling snd_sof_get_status. Fix this coverity by initializing
+panic_info struct as zero.
+
+Signed-off-by: YC Hung <yc.hung@mediatek.com>
+Reviewed-by: Curtis Malainey <cujomalainey@chromium.org>
+Link: https://lore.kernel.org/r/20221213115617.25086-1-yc.hung@mediatek.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sof/mediatek/mtk-adsp-common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/sof/mediatek/mtk-adsp-common.c b/sound/soc/sof/mediatek/mtk-adsp-common.c
+index 1e0769c668a7..de8dbe27cd0d 100644
+--- a/sound/soc/sof/mediatek/mtk-adsp-common.c
++++ b/sound/soc/sof/mediatek/mtk-adsp-common.c
+@@ -60,7 +60,7 @@ void mtk_adsp_dump(struct snd_sof_dev *sdev, u32 flags)
+ {
+       char *level = (flags & SOF_DBG_DUMP_OPTIONAL) ? KERN_DEBUG : KERN_ERR;
+       struct sof_ipc_dsp_oops_xtensa xoops;
+-      struct sof_ipc_panic_info panic_info;
++      struct sof_ipc_panic_info panic_info = {};
+       u32 stack[MTK_ADSP_STACK_DUMP_SIZE];
+       u32 status;
+-- 
+2.35.1
+
diff --git a/queue-6.0/asoc-sof-revert-core-unregister-clients-and-machine-.patch b/queue-6.0/asoc-sof-revert-core-unregister-clients-and-machine-.patch
new file mode 100644 (file)
index 0000000..bfafd98
--- /dev/null
@@ -0,0 +1,71 @@
+From 19b14ae6a32944715bdd322f7be715526d0f626c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Dec 2022 13:45:29 +0200
+Subject: ASoC: SOF: Revert: "core: unregister clients and machine drivers in
+ .shutdown"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kai Vehmanen <kai.vehmanen@linux.intel.com>
+
+[ Upstream commit 44fda61d2bcfb74a942df93959e083a4e8eff75f ]
+
+The unregister machine drivers call is not safe to do when
+kexec is used. Kexec-lite gets blocked with following backtrace:
+
+[   84.943749] Freezing user space processes ... (elapsed 0.111 seconds) done.
+[  246.784446] INFO: task kexec-lite:5123 blocked for more than 122 seconds.
+[  246.819035] Call Trace:
+[  246.821782]  <TASK>
+[  246.824186]  __schedule+0x5f9/0x1263
+[  246.828231]  schedule+0x87/0xc5
+[  246.831779]  snd_card_disconnect_sync+0xb5/0x127
+...
+[  246.889249]  snd_sof_device_shutdown+0xb4/0x150
+[  246.899317]  pci_device_shutdown+0x37/0x61
+[  246.903990]  device_shutdown+0x14c/0x1d6
+[  246.908391]  kernel_kexec+0x45/0xb9
+
+This reverts commit 83bfc7e793b555291785136c3ae86abcdc046887.
+
+Reported-by: Ricardo Ribalda <ribalda@chromium.org>
+Cc: Ricardo Ribalda <ribalda@chromium.org>
+Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
+Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
+Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
+Link: https://lore.kernel.org/r/20221209114529.3909192-3-kai.vehmanen@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sof/core.c | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
+index c99b5e6c026c..694a2d94a222 100644
+--- a/sound/soc/sof/core.c
++++ b/sound/soc/sof/core.c
+@@ -472,19 +472,10 @@ EXPORT_SYMBOL(snd_sof_device_remove);
+ int snd_sof_device_shutdown(struct device *dev)
+ {
+       struct snd_sof_dev *sdev = dev_get_drvdata(dev);
+-      struct snd_sof_pdata *pdata = sdev->pdata;
+       if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE))
+               cancel_work_sync(&sdev->probe_work);
+-      /*
+-       * make sure clients and machine driver(s) are unregistered to force
+-       * all userspace devices to be closed prior to the DSP shutdown sequence
+-       */
+-      sof_unregister_clients(sdev);
+-
+-      snd_sof_machine_unregister(sdev, pdata);
+-
+       if (sdev->fw_state == SOF_FW_BOOT_COMPLETE)
+               return snd_sof_shutdown(sdev);
+-- 
+2.35.1
+
diff --git a/queue-6.0/bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch b/queue-6.0/bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch
new file mode 100644 (file)
index 0000000..760a149
--- /dev/null
@@ -0,0 +1,129 @@
+From 54876c1d065e21264c5a9c03311e3c95a3d12268 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Dec 2022 22:19:39 -0500
+Subject: bnxt_en: Fix first buffer size calculations for XDP multi-buffer
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 1abeacc1979fa4a756695f5030791d8f0fa934b9 ]
+
+The size of the first buffer is always page size, and the useable
+space is the page size minus the offset and the skb_shared_info size.
+Make sure SKB and XDP buf sizes match so that the skb_shared_info
+is at the same offset seen from the SKB and XDP_BUF.
+
+build_skb() should be passed PAGE_SIZE.  xdp_init_buff() should
+be passed PAGE_SIZE as well.  xdp_get_shared_info_from_buff() will
+automatically deduct the skb_shared_info size if the XDP buffer
+has frags.  There is no need to keep bp->xdp_has_frags.
+
+Change BNXT_PAGE_MODE_BUF_SIZE to BNXT_MAX_PAGE_MODE_MTU_SBUF
+since this constant is really the MTU with ethernet header size
+subtracted.
+
+Also fix the BNXT_MAX_PAGE_MODE_MTU macro with proper parentheses.
+
+Fixes: 32861236190b ("bnxt: change receive ring space parameters")
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  9 +++++----
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h     | 15 +++++++++++----
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c |  7 +------
+ 3 files changed, 17 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 31c427d53b90..be82464e1a77 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -988,8 +988,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
+       dma_addr -= bp->rx_dma_offset;
+       dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
+                            DMA_ATTR_WEAK_ORDERING);
+-      skb = build_skb(page_address(page), BNXT_PAGE_MODE_BUF_SIZE +
+-                                          bp->rx_dma_offset);
++      skb = build_skb(page_address(page), PAGE_SIZE);
+       if (!skb) {
+               __free_page(page);
+               return NULL;
+@@ -3966,8 +3965,10 @@ void bnxt_set_ring_params(struct bnxt *bp)
+               bp->rx_agg_ring_mask = (bp->rx_agg_nr_pages * RX_DESC_CNT) - 1;
+               if (BNXT_RX_PAGE_MODE(bp)) {
+-                      rx_space = BNXT_PAGE_MODE_BUF_SIZE;
+-                      rx_size = BNXT_MAX_PAGE_MODE_MTU;
++                      rx_space = PAGE_SIZE;
++                      rx_size = PAGE_SIZE -
++                                ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) -
++                                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+               } else {
+                       rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN);
+                       rx_space = rx_size + NET_SKB_PAD +
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+index d5fa43cfe524..02741d499bf4 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -591,12 +591,20 @@ struct nqe_cn {
+ #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT)
+ #define BNXT_MAX_MTU          9500
+-#define BNXT_PAGE_MODE_BUF_SIZE \
++
++/* First RX buffer page in XDP multi-buf mode
++ *
++ * +-------------------------------------------------------------------------+
++ * | XDP_PACKET_HEADROOM | bp->rx_buf_use_size              | skb_shared_info|
++ * | (bp->rx_dma_offset) |                                  |                |
++ * +-------------------------------------------------------------------------+
++ */
++#define BNXT_MAX_PAGE_MODE_MTU_SBUF \
+       ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN -       \
+        XDP_PACKET_HEADROOM)
+ #define BNXT_MAX_PAGE_MODE_MTU        \
+-      BNXT_PAGE_MODE_BUF_SIZE - \
+-      SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))
++      (BNXT_MAX_PAGE_MODE_MTU_SBUF - \
++       SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)))
+ #define BNXT_MIN_PKT_SIZE     52
+@@ -2131,7 +2139,6 @@ struct bnxt {
+ #define BNXT_DUMP_CRASH               1
+       struct bpf_prog         *xdp_prog;
+-      u8                      xdp_has_frags;
+       struct bnxt_ptp_cfg     *ptp_cfg;
+       u8                      ptp_all_rx_tstamp;
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index 2ceeaa818c1c..36d5202c0aee 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -193,9 +193,6 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+       mapping = rx_buf->mapping - bp->rx_dma_offset;
+       dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir);
+-      if (bp->xdp_has_frags)
+-              buflen = BNXT_PAGE_MODE_BUF_SIZE + offset;
+-
+       xdp_init_buff(xdp, buflen, &rxr->xdp_rxq);
+       xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false);
+ }
+@@ -404,10 +401,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
+               netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
+               return -EOPNOTSUPP;
+       }
+-      if (prog) {
++      if (prog)
+               tx_xdp = bp->rx_nr_rings;
+-              bp->xdp_has_frags = prog->aux->xdp_has_frags;
+-      }
+       tc = netdev_get_num_tc(dev);
+       if (!tc)
+-- 
+2.35.1
+
diff --git a/queue-6.0/bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch b/queue-6.0/bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch
new file mode 100644 (file)
index 0000000..cf3b059
--- /dev/null
@@ -0,0 +1,55 @@
+From 5ad3883e38f38cac0cf30b67042682d559aab5c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Dec 2022 22:19:40 -0500
+Subject: bnxt_en: Fix HDS and jumbo thresholds for RX packets
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit a056ebcc30e2f78451d66f615d2f6bdada3e6438 ]
+
+The recent XDP multi-buffer feature has introduced regressions in the
+setting of HDS and jumbo thresholds.  HDS was accidentally disabled in
+the nornmal mode without XDP.  This patch restores jumbo HDS placement
+when not in XDP mode.  In XDP multi-buffer mode, HDS should be disabled
+and the jumbo threshold should be set to the usable page size in the
+first page buffer.
+
+Fixes: 32861236190b ("bnxt: change receive ring space parameters")
+Reviewed-by: Mohammad Shuab Siddique <mohammad-shuab.siddique@broadcom.com>
+Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index be82464e1a77..1b38295254e2 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -5371,15 +5371,16 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id)
+       req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT);
+       req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID);
+-      if (BNXT_RX_PAGE_MODE(bp) && !BNXT_RX_JUMBO_MODE(bp)) {
++      if (BNXT_RX_PAGE_MODE(bp)) {
++              req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size);
++      } else {
+               req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
+                                         VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
+               req->enables |=
+                       cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID);
++              req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
++              req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
+       }
+-      /* thresholds not implemented in firmware yet */
+-      req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
+-      req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
+       req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+       return hwrm_req_send(bp, req);
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/bnxt_en-fix-xdp-rx-path.patch b/queue-6.0/bnxt_en-fix-xdp-rx-path.patch
new file mode 100644 (file)
index 0000000..ff7da4b
--- /dev/null
@@ -0,0 +1,84 @@
+From 41e6fb1cb7b99f51b29ca0bb929e351d3fd031f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Dec 2022 22:19:38 -0500
+Subject: bnxt_en: Fix XDP RX path
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 9b3e607871ea5ee90f10f5be3965fc07f2aa3ef7 ]
+
+The XDP program can change the starting address of the RX data buffer and
+this information needs to be passed back from bnxt_rx_xdp() to
+bnxt_rx_pkt() for the XDP_PASS case so that the SKB can point correctly
+to the modified buffer address.  Add back the data_ptr parameter to
+bnxt_rx_xdp() to make this work.
+
+Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff")
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 2 +-
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 +++++--
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 4 ++--
+ 3 files changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 2c83bc890839..31c427d53b90 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1937,7 +1937,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+       }
+       if (xdp_active) {
+-              if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &len, event)) {
++              if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &data_ptr, &len, event)) {
+                       rc = 1;
+                       goto next_rx;
+               }
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index 1847f191577d..2ceeaa818c1c 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -222,7 +222,8 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
+  * false   - packet should be passed to the stack.
+  */
+ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
+-               struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event)
++               struct xdp_buff xdp, struct page *page, u8 **data_ptr,
++               unsigned int *len, u8 *event)
+ {
+       struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog);
+       struct bnxt_tx_ring_info *txr;
+@@ -255,8 +256,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
+               *event &= ~BNXT_RX_EVENT;
+       *len = xdp.data_end - xdp.data;
+-      if (orig_data != xdp.data)
++      if (orig_data != xdp.data) {
+               offset = xdp.data - xdp.data_hard_start;
++              *data_ptr = xdp.data_hard_start + offset;
++      }
+       switch (act) {
+       case XDP_PASS:
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+index 2bbdb8e7c506..ea430d6961df 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+@@ -18,8 +18,8 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+                                  struct xdp_buff *xdp);
+ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
+ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
+-               struct xdp_buff xdp, struct page *page, unsigned int *len,
+-               u8 *event);
++               struct xdp_buff xdp, struct page *page, u8 **data_ptr,
++               unsigned int *len, u8 *event);
+ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp);
+ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+                 struct xdp_frame **frames, u32 flags);
+-- 
+2.35.1
+
diff --git a/queue-6.0/bnxt_en-simplify-bnxt_xdp_buff_init.patch b/queue-6.0/bnxt_en-simplify-bnxt_xdp_buff_init.patch
new file mode 100644 (file)
index 0000000..900f36c
--- /dev/null
@@ -0,0 +1,83 @@
+From b3f8eadd40597b04eaed2665df6c3acf42d88e7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Dec 2022 22:19:37 -0500
+Subject: bnxt_en: Simplify bnxt_xdp_buff_init()
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit bbfc17e50ba2ed18dfef46b1c433d50a58566bf1 ]
+
+bnxt_xdp_buff_init() does not modify the data_ptr or the len parameters,
+so no need to pass in the addresses of these parameters.
+
+Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff")
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 2 +-
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 6 +++---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 2 +-
+ 3 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 57cabe20aa12..2c83bc890839 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1922,7 +1922,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+       dma_addr = rx_buf->mapping;
+       if (bnxt_xdp_attached(bp, rxr)) {
+-              bnxt_xdp_buff_init(bp, rxr, cons, &data_ptr, &len, &xdp);
++              bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp);
+               if (agg_bufs) {
+                       u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp,
+                                                            cp_cons, agg_bufs,
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index c3065ec0a479..1847f191577d 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -177,7 +177,7 @@ bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
+ }
+ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+-                      u16 cons, u8 **data_ptr, unsigned int *len,
++                      u16 cons, u8 *data_ptr, unsigned int len,
+                       struct xdp_buff *xdp)
+ {
+       struct bnxt_sw_rx_bd *rx_buf;
+@@ -191,13 +191,13 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+       offset = bp->rx_offset;
+       mapping = rx_buf->mapping - bp->rx_dma_offset;
+-      dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
++      dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir);
+       if (bp->xdp_has_frags)
+               buflen = BNXT_PAGE_MODE_BUF_SIZE + offset;
+       xdp_init_buff(xdp, buflen, &rxr->xdp_rxq);
+-      xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false);
++      xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false);
+ }
+ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+index 505911ae095d..2bbdb8e7c506 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+@@ -27,7 +27,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+ bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr);
+ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+-                      u16 cons, u8 **data_ptr, unsigned int *len,
++                      u16 cons, u8 *data_ptr, unsigned int len,
+                       struct xdp_buff *xdp);
+ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
+                             struct xdp_buff *xdp);
+-- 
+2.35.1
+
diff --git a/queue-6.0/bonding-fix-lockdep-splat-in-bond_miimon_commit.patch b/queue-6.0/bonding-fix-lockdep-splat-in-bond_miimon_commit.patch
new file mode 100644 (file)
index 0000000..a7f8b2b
--- /dev/null
@@ -0,0 +1,62 @@
+From eca1f7f4b831191b4a0667b62b064d045e27f01f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 13:08:31 +0000
+Subject: bonding: fix lockdep splat in bond_miimon_commit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 42c7ded0eeacd2ba5db599205c71c279dc715de7 ]
+
+bond_miimon_commit() is run while RTNL is held, not RCU.
+
+WARNING: suspicious RCU usage
+6.1.0-syzkaller-09671-g89529367293c #0 Not tainted
+-----------------------------
+drivers/net/bonding/bond_main.c:2704 suspicious rcu_dereference_check() usage!
+
+Fixes: e95cc44763a4 ("bonding: do failover when high prio link up")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Hangbin Liu <liuhangbin@gmail.com>
+Cc: Jay Vosburgh <j.vosburgh@gmail.com>
+Cc: Veaceslav Falico <vfalico@gmail.com>
+Cc: Andy Gospodarek <andy@greyhouse.net>
+Link: https://lore.kernel.org/r/20221220130831.1480888-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_main.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index 771f2a533d3f..7807113e0910 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -2653,10 +2653,12 @@ static void bond_miimon_link_change(struct bonding *bond,
+ static void bond_miimon_commit(struct bonding *bond)
+ {
+-      struct slave *slave, *primary;
++      struct slave *slave, *primary, *active;
+       bool do_failover = false;
+       struct list_head *iter;
++      ASSERT_RTNL();
++
+       bond_for_each_slave(bond, slave, iter) {
+               switch (slave->link_new_state) {
+               case BOND_LINK_NOCHANGE:
+@@ -2699,8 +2701,8 @@ static void bond_miimon_commit(struct bonding *bond)
+                       bond_miimon_link_change(bond, slave, BOND_LINK_UP);
+-                      if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary ||
+-                          slave->prio > rcu_dereference(bond->curr_active_slave)->prio)
++                      active = rtnl_dereference(bond->curr_active_slave);
++                      if (!active || slave == primary || slave->prio > active->prio)
+                               do_failover = true;
+                       continue;
+-- 
+2.35.1
+
diff --git a/queue-6.0/bpf-pull-before-calling-skb_postpull_rcsum.patch b/queue-6.0/bpf-pull-before-calling-skb_postpull_rcsum.patch
new file mode 100644 (file)
index 0000000..6418390
--- /dev/null
@@ -0,0 +1,61 @@
+From f4c86114662e175805b1209a46a36e738faf3b62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 16:47:00 -0800
+Subject: bpf: pull before calling skb_postpull_rcsum()
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 54c3f1a81421f85e60ae2eaae7be3727a09916ee ]
+
+Anand hit a BUG() when pulling off headers on egress to a SW tunnel.
+We get to skb_checksum_help() with an invalid checksum offset
+(commit d7ea0d9df2a6 ("net: remove two BUG() from skb_checksum_help()")
+converted those BUGs to WARN_ONs()).
+He points out oddness in how skb_postpull_rcsum() gets used.
+Indeed looks like we should pull before "postpull", otherwise
+the CHECKSUM_PARTIAL fixup from skb_postpull_rcsum() will not
+be able to do its job:
+
+       if (skb->ip_summed == CHECKSUM_PARTIAL &&
+           skb_checksum_start_offset(skb) < 0)
+               skb->ip_summed = CHECKSUM_NONE;
+
+Reported-by: Anand Parthasarathy <anpartha@meta.com>
+Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/r/20221220004701.402165-1-kuba@kernel.org
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 3aae1885b970..50d685be517d 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3182,15 +3182,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+ {
++      void *old_data;
++
+       /* skb_ensure_writable() is not needed here, as we're
+        * already working on an uncloned skb.
+        */
+       if (unlikely(!pskb_may_pull(skb, off + len)))
+               return -ENOMEM;
+-      skb_postpull_rcsum(skb, skb->data + off, len);
+-      memmove(skb->data + len, skb->data, off);
++      old_data = skb->data;
+       __skb_pull(skb, len);
++      skb_postpull_rcsum(skb, old_data + off, len);
++      memmove(skb->data, old_data, off);
+       return 0;
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/btrfs-check-superblock-to-ensure-the-fs-was-not-modi.patch b/queue-6.0/btrfs-check-superblock-to-ensure-the-fs-was-not-modi.patch
new file mode 100644 (file)
index 0000000..7baffb0
--- /dev/null
@@ -0,0 +1,255 @@
+From d6dbbc11f0460370dacbeee0b474cf120447eca5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Aug 2022 20:16:22 +0800
+Subject: btrfs: check superblock to ensure the fs was not modified at thaw
+ time
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit a05d3c9153145283ce9c58a1d7a9056fbb85f6a1 ]
+
+[BACKGROUND]
+There is an incident report that, one user hibernated the system, with
+one btrfs on removable device still mounted.
+
+Then by some incident, the btrfs got mounted and modified by another
+system/OS, then back to the hibernated system.
+
+After resuming from the hibernation, new write happened into the victim btrfs.
+
+Now the fs is completely broken, since the underlying btrfs is no longer
+the same one before the hibernation, and the user lost their data due to
+various transid mismatch.
+
+[REPRODUCER]
+We can emulate the situation using the following small script:
+
+  truncate -s 1G $dev
+  mkfs.btrfs -f $dev
+  mount $dev $mnt
+  fsstress -w -d $mnt -n 500
+  sync
+  xfs_freeze -f $mnt
+  cp $dev $dev.backup
+
+  # There is no way to mount the same cloned fs on the same system,
+  # as the conflicting fsid will be rejected by btrfs.
+  # Thus here we have to wipe the fs using a different btrfs.
+  mkfs.btrfs -f $dev.backup
+
+  dd if=$dev.backup of=$dev bs=1M
+  xfs_freeze -u $mnt
+  fsstress -w -d $mnt -n 20
+  umount $mnt
+  btrfs check $dev
+
+The final fsck will fail due to some tree blocks has incorrect fsid.
+
+This is enough to emulate the problem hit by the unfortunate user.
+
+[ENHANCEMENT]
+Although such case should not be that common, it can still happen from
+time to time.
+
+From the view of btrfs, we can detect any unexpected super block change,
+and if there is any unexpected change, we just mark the fs read-only,
+and thaw the fs.
+
+By this we can limit the damage to minimal, and I hope no one would lose
+their data by this anymore.
+
+Suggested-by: Goffredo Baroncelli <kreijack@libero.it>
+Link: https://lore.kernel.org/linux-btrfs/83bf3b4b-7f4c-387a-b286-9251e3991e34@bluemole.com/
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 2ba48b20049b ("btrfs: fix compat_ro checks against remount")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 25 ++++++++++++++-----
+ fs/btrfs/disk-io.h |  4 +++-
+ fs/btrfs/super.c   | 60 ++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/volumes.c |  2 +-
+ 4 files changed, 83 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index aa4bc213d301..c72074a234d2 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2602,8 +2602,8 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
+  *            1, 2    2nd and 3rd backup copy
+  *           -1       skip bytenr check
+  */
+-static int validate_super(struct btrfs_fs_info *fs_info,
+-                          struct btrfs_super_block *sb, int mirror_num)
++int btrfs_validate_super(struct btrfs_fs_info *fs_info,
++                       struct btrfs_super_block *sb, int mirror_num)
+ {
+       u64 nodesize = btrfs_super_nodesize(sb);
+       u64 sectorsize = btrfs_super_sectorsize(sb);
+@@ -2787,7 +2787,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
+  */
+ static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
+ {
+-      return validate_super(fs_info, fs_info->super_copy, 0);
++      return btrfs_validate_super(fs_info, fs_info->super_copy, 0);
+ }
+ /*
+@@ -2801,7 +2801,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
+ {
+       int ret;
+-      ret = validate_super(fs_info, sb, -1);
++      ret = btrfs_validate_super(fs_info, sb, -1);
+       if (ret < 0)
+               goto out;
+       if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
+@@ -3835,7 +3835,7 @@ static void btrfs_end_super_write(struct bio *bio)
+ }
+ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+-                                                 int copy_num)
++                                                 int copy_num, bool drop_cache)
+ {
+       struct btrfs_super_block *super;
+       struct page *page;
+@@ -3853,6 +3853,19 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+       if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
+               return ERR_PTR(-EINVAL);
++      if (drop_cache) {
++              /* This should only be called with the primary sb. */
++              ASSERT(copy_num == 0);
++
++              /*
++               * Drop the page of the primary superblock, so later read will
++               * always read from the device.
++               */
++              invalidate_inode_pages2_range(mapping,
++                              bytenr >> PAGE_SHIFT,
++                              (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
++      }
++
+       page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
+       if (IS_ERR(page))
+               return ERR_CAST(page);
+@@ -3884,7 +3897,7 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
+        * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+        */
+       for (i = 0; i < 1; i++) {
+-              super = btrfs_read_dev_one_super(bdev, i);
++              super = btrfs_read_dev_one_super(bdev, i, false);
+               if (IS_ERR(super))
+                       continue;
+diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
+index 47ad8e0a2d33..aef981de672c 100644
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -46,10 +46,12 @@ int __cold open_ctree(struct super_block *sb,
+              struct btrfs_fs_devices *fs_devices,
+              char *options);
+ void __cold close_ctree(struct btrfs_fs_info *fs_info);
++int btrfs_validate_super(struct btrfs_fs_info *fs_info,
++                       struct btrfs_super_block *sb, int mirror_num);
+ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
+ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
+ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+-                                                 int copy_num);
++                                                 int copy_num, bool drop_cache);
+ int btrfs_commit_super(struct btrfs_fs_info *fs_info);
+ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+                                       struct btrfs_key *key);
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index ad3ce9700eaf..079855e9c881 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2562,11 +2562,71 @@ static int btrfs_freeze(struct super_block *sb)
+       return btrfs_commit_transaction(trans);
+ }
++static int check_dev_super(struct btrfs_device *dev)
++{
++      struct btrfs_fs_info *fs_info = dev->fs_info;
++      struct btrfs_super_block *sb;
++      int ret = 0;
++
++      /* This should be called with fs still frozen. */
++      ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags));
++
++      /* Missing dev, no need to check. */
++      if (!dev->bdev)
++              return 0;
++
++      /* Only need to check the primary super block. */
++      sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
++      if (IS_ERR(sb))
++              return PTR_ERR(sb);
++
++      /* Btrfs_validate_super() includes fsid check against super->fsid. */
++      ret = btrfs_validate_super(fs_info, sb, 0);
++      if (ret < 0)
++              goto out;
++
++      if (btrfs_super_generation(sb) != fs_info->last_trans_committed) {
++              btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
++                      btrfs_super_generation(sb),
++                      fs_info->last_trans_committed);
++              ret = -EUCLEAN;
++              goto out;
++      }
++out:
++      btrfs_release_disk_super(sb);
++      return ret;
++}
++
+ static int btrfs_unfreeze(struct super_block *sb)
+ {
+       struct btrfs_fs_info *fs_info = btrfs_sb(sb);
++      struct btrfs_device *device;
++      int ret = 0;
++      /*
++       * Make sure the fs is not changed by accident (like hibernation then
++       * modified by other OS).
++       * If we found anything wrong, we mark the fs error immediately.
++       *
++       * And since the fs is frozen, no one can modify the fs yet, thus
++       * we don't need to hold device_list_mutex.
++       */
++      list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
++              ret = check_dev_super(device);
++              if (ret < 0) {
++                      btrfs_handle_fs_error(fs_info, ret,
++                              "super block on devid %llu got modified unexpectedly",
++                              device->devid);
++                      break;
++              }
++      }
+       clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
++
++      /*
++       * We still return 0, to allow VFS layer to unfreeze the fs even the
++       * above checks failed. Since the fs is either fine or read-only, we're
++       * safe to continue, without causing further damage.
++       */
+       return 0;
+ }
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 6f006430115a..55c8bf9a5c89 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -2029,7 +2029,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+               struct page *page;
+               int ret;
+-              disk_super = btrfs_read_dev_one_super(bdev, copy_num);
++              disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
+               if (IS_ERR(disk_super))
+                       continue;
+-- 
+2.35.1
+
diff --git a/queue-6.0/btrfs-don-t-save-block-group-root-into-super-block.patch b/queue-6.0/btrfs-don-t-save-block-group-root-into-super-block.patch
new file mode 100644 (file)
index 0000000..e4733f8
--- /dev/null
@@ -0,0 +1,213 @@
+From b95b967c6f31455e9d9ae3d3d45f09c6ff99b353 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Aug 2022 13:02:17 +0800
+Subject: btrfs: don't save block group root into super block
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 14033b08a02916e85ffc5397e4ac15337359f3ae ]
+
+The extent tree v2 needs a new root for storing all block group items,
+the whole feature hasn't been finished yet so we can afford to do some
+changes.
+
+My initial proposal years ago just added a new tree rootid, and load it
+from tree root, just like what we did for quota/free space tree/uuid/extent
+roots.
+
+But the extent tree v2 patches introduced a completely new way to store
+block group tree root into super block which is arguably wasteful.
+
+Currently there are only 3 trees stored in super blocks, and they all
+have their valid reasons:
+
+- Chunk root
+  Needed for bootstrap.
+
+- Tree root
+  Really the entry point for all trees.
+
+- Log root
+  This is special as log root has to be updated out of existing
+  transaction mechanism.
+
+There is not even any reason to put block group root into super blocks,
+the block group tree is updated at the same time as the old extent tree,
+no need for extra bootstrap/out-of-transaction update.
+
+So just move block group root from super block into tree root.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 2ba48b20049b ("btrfs: fix compat_ro checks against remount")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-rsv.c   |  1 +
+ fs/btrfs/ctree.h       | 27 ++-------------------------
+ fs/btrfs/disk-io.c     | 40 ++++++++++++++++++++--------------------
+ fs/btrfs/transaction.c |  8 --------
+ 4 files changed, 23 insertions(+), 53 deletions(-)
+
+diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
+index 06be0644dd37..6ce704d3bdd2 100644
+--- a/fs/btrfs/block-rsv.c
++++ b/fs/btrfs/block-rsv.c
+@@ -424,6 +424,7 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
+       case BTRFS_CSUM_TREE_OBJECTID:
+       case BTRFS_EXTENT_TREE_OBJECTID:
+       case BTRFS_FREE_SPACE_TREE_OBJECTID:
++      case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
+               root->block_rsv = &fs_info->delayed_refs_rsv;
+               break;
+       case BTRFS_ROOT_TREE_OBJECTID:
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index bad06add93d7..6b1a8b295970 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -280,14 +280,9 @@ struct btrfs_super_block {
+       /* the UUID written into btree blocks */
+       u8 metadata_uuid[BTRFS_FSID_SIZE];
+-      /* Extent tree v2 */
+-      __le64 block_group_root;
+-      __le64 block_group_root_generation;
+-      u8 block_group_root_level;
+-
+       /* future expansion */
+-      u8 reserved8[7];
+-      __le64 reserved[25];
++      u8 reserved8[8];
++      __le64 reserved[27];
+       u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+       struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
+@@ -2391,17 +2386,6 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
+ BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
+                  num_devices, 64);
+-/*
+- * For extent tree v2 we overload the extent root with the block group root, as
+- * we will have multiple extent roots.
+- */
+-BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
+-                       extent_root, 64);
+-BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
+-                       extent_root_gen, 64);
+-BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
+-                       struct btrfs_root_backup, extent_root_level, 8);
+-
+ /* struct btrfs_balance_item */
+ BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
+@@ -2534,13 +2518,6 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+ BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
+ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+                        uuid_tree_generation, 64);
+-BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
+-                       block_group_root, 64);
+-BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
+-                       struct btrfs_super_block,
+-                       block_group_root_generation, 64);
+-BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
+-                       block_group_root_level, 8);
+ int btrfs_super_csum_size(const struct btrfs_super_block *s);
+ const char *btrfs_super_csum_name(u16 csum_type);
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index c72074a234d2..a4f78a347a1f 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1524,6 +1524,9 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
+       if (objectid == BTRFS_UUID_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->uuid_root) ?
+                       fs_info->uuid_root : ERR_PTR(-ENOENT);
++      if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID)
++              return btrfs_grab_root(fs_info->block_group_root) ?
++                      fs_info->block_group_root : ERR_PTR(-ENOENT);
+       if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
+               struct btrfs_root *root = btrfs_global_root(fs_info, &key);
+@@ -1980,14 +1983,7 @@ static void backup_super_roots(struct btrfs_fs_info *info)
+       btrfs_set_backup_chunk_root_level(root_backup,
+                              btrfs_header_level(info->chunk_root->node));
+-      if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
+-              btrfs_set_backup_block_group_root(root_backup,
+-                                      info->block_group_root->node->start);
+-              btrfs_set_backup_block_group_root_gen(root_backup,
+-                      btrfs_header_generation(info->block_group_root->node));
+-              btrfs_set_backup_block_group_root_level(root_backup,
+-                      btrfs_header_level(info->block_group_root->node));
+-      } else {
++      if (!btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
+               struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
+               struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
+@@ -2529,10 +2525,24 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
+       if (ret)
+               return ret;
+-      location.objectid = BTRFS_DEV_TREE_OBJECTID;
+       location.type = BTRFS_ROOT_ITEM_KEY;
+       location.offset = 0;
++      if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
++              location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID;
++              root = btrfs_read_tree_root(tree_root, &location);
++              if (IS_ERR(root)) {
++                      if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
++                              ret = PTR_ERR(root);
++                              goto out;
++                      }
++              } else {
++                      set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
++                      fs_info->block_group_root = root;
++              }
++      }
++
++      location.objectid = BTRFS_DEV_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root)) {
+               if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
+@@ -2862,17 +2872,7 @@ static int load_important_roots(struct btrfs_fs_info *fs_info)
+               btrfs_warn(fs_info, "couldn't read tree root");
+               return ret;
+       }
+-
+-      if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+-              return 0;
+-
+-      bytenr = btrfs_super_block_group_root(sb);
+-      gen = btrfs_super_block_group_root_generation(sb);
+-      level = btrfs_super_block_group_root_level(sb);
+-      ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
+-      if (ret)
+-              btrfs_warn(fs_info, "couldn't read block group root");
+-      return ret;
++      return 0;
+ }
+ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 0bec10740ad3..8fab3b274957 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1912,14 +1912,6 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
+               super->cache_generation = 0;
+       if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
+               super->uuid_tree_generation = root_item->generation;
+-
+-      if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+-              root_item = &fs_info->block_group_root->root_item;
+-
+-              super->block_group_root = root_item->bytenr;
+-              super->block_group_root_generation = root_item->generation;
+-              super->block_group_root_level = root_item->level;
+-      }
+ }
+ int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
+-- 
+2.35.1
+
diff --git a/queue-6.0/btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch b/queue-6.0/btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch
new file mode 100644 (file)
index 0000000..081c822
--- /dev/null
@@ -0,0 +1,45 @@
+From 7832fbe8bfeb93e59f733d34a8eb47add19040d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 8 Jan 2023 08:24:19 -0500
+Subject: btrfs: fix an error handling path in btrfs_defrag_leaves()
+
+[ Upstream commit db0a4a7b8e95f9312a59a67cbd5bc589f090e13d ]
+
+All error handling paths end to 'out', except this memory allocation
+failure.
+
+This is spurious. So branch to the error handling path also in this case.
+It will add a call to:
+
+       memset(&root->defrag_progress, 0,
+              sizeof(root->defrag_progress));
+
+Fixes: 6702ed490ca0 ("Btrfs: Add run time btree defrag, and an ioctl to force btree defrag")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-defrag.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
+index b6cf39f4e7e4..072ab9a1374b 100644
+--- a/fs/btrfs/tree-defrag.c
++++ b/fs/btrfs/tree-defrag.c
+@@ -31,8 +31,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+               goto out;
+       path = btrfs_alloc_path();
+-      if (!path)
+-              return -ENOMEM;
++      if (!path) {
++              ret = -ENOMEM;
++              goto out;
++      }
+       level = btrfs_header_level(root->node);
+-- 
+2.35.1
+
diff --git a/queue-6.0/btrfs-fix-compat_ro-checks-against-remount.patch b/queue-6.0/btrfs-fix-compat_ro-checks-against-remount.patch
new file mode 100644 (file)
index 0000000..47cd728
--- /dev/null
@@ -0,0 +1,154 @@
+From 2d3eb4ad5a3518fbc0ba08521ced91eb0528fe23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 07:59:17 +0800
+Subject: btrfs: fix compat_ro checks against remount
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 2ba48b20049b5a76f34a85f853c9496d1b10533a ]
+
+[BUG]
+Even with commit 81d5d61454c3 ("btrfs: enhance unsupported compat RO
+flags handling"), btrfs can still mount a fs with unsupported compat_ro
+flags read-only, then remount it RW:
+
+  # btrfs ins dump-super /dev/loop0 | grep compat_ro_flags -A 3
+  compat_ro_flags              0x403
+                       ( FREE_SPACE_TREE |
+                         FREE_SPACE_TREE_VALID |
+                         unknown flag: 0x400 )
+
+  # mount /dev/loop0 /mnt/btrfs
+  mount: /mnt/btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.
+         dmesg(1) may have more information after failed mount system call.
+  ^^^ RW mount failed as expected ^^^
+
+  # dmesg -t | tail -n5
+  loop0: detected capacity change from 0 to 1048576
+  BTRFS: device fsid cb5b82f5-0fdd-4d81-9b4b-78533c324afa devid 1 transid 7 /dev/loop0 scanned by mount (1146)
+  BTRFS info (device loop0): using crc32c (crc32c-intel) checksum algorithm
+  BTRFS info (device loop0): using free space tree
+  BTRFS error (device loop0): cannot mount read-write because of unknown compat_ro features (0x403)
+  BTRFS error (device loop0): open_ctree failed
+
+  # mount /dev/loop0 -o ro /mnt/btrfs
+  # mount -o remount,rw /mnt/btrfs
+  ^^^ RW remount succeeded unexpectedly ^^^
+
+[CAUSE]
+Currently we use btrfs_check_features() to check compat_ro flags against
+our current mount flags.
+
+That function get reused between open_ctree() and btrfs_remount().
+
+But for btrfs_remount(), the super block we passed in still has the old
+mount flags, thus btrfs_check_features() still believes we're mounting
+read-only.
+
+[FIX]
+Replace the existing @sb argument with @is_rw_mount.
+
+As originally we only use @sb to determine if the mount is RW.
+
+Now it's callers' responsibility to determine if the mount is RW, and
+since there are only two callers, the check is pretty simple:
+
+- caller in open_ctree()
+  Just pass !sb_rdonly().
+
+- caller in btrfs_remount()
+  Pass !(*flags & SB_RDONLY), as our check should be against the new
+  flags.
+
+Now we can correctly reject the RW remount:
+
+  # mount /dev/loop0 -o ro /mnt/btrfs
+  # mount -o remount,rw /mnt/btrfs
+  mount: /mnt/btrfs: mount point not mounted or bad option.
+         dmesg(1) may have more information after failed mount system call.
+  # dmesg -t | tail -n 1
+  BTRFS error (device loop0: state M): cannot mount read-write because of unknown compat_ro features (0x403)
+
+Reported-by: Chung-Chiang Cheng <shepjeng@gmail.com>
+Fixes: 81d5d61454c3 ("btrfs: enhance unsupported compat RO flags handling")
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 8 +++++---
+ fs/btrfs/disk-io.h | 2 +-
+ fs/btrfs/super.c   | 2 +-
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 7834dc77c935..d7a7f6288e70 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3286,6 +3286,8 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
+ /*
+  * Do various sanity and dependency checks of different features.
+  *
++ * @is_rw_mount:      If the mount is read-write.
++ *
+  * This is the place for less strict checks (like for subpage or artificial
+  * feature dependencies).
+  *
+@@ -3296,7 +3298,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
+  * (space cache related) can modify on-disk format like free space tree and
+  * screw up certain feature dependencies.
+  */
+-int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
++int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
+ {
+       struct btrfs_super_block *disk_super = fs_info->super_copy;
+       u64 incompat = btrfs_super_incompat_flags(disk_super);
+@@ -3335,7 +3337,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
+       if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
+               incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+-      if (compat_ro_unsupp && !sb_rdonly(sb)) {
++      if (compat_ro_unsupp && is_rw_mount) {
+               btrfs_err(fs_info,
+       "cannot mount read-write because of unknown compat_ro features (0x%llx)",
+                      compat_ro);
+@@ -3538,7 +3540,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+               goto fail_alloc;
+       }
+-      ret = btrfs_check_features(fs_info, sb);
++      ret = btrfs_check_features(fs_info, !sb_rdonly(sb));
+       if (ret < 0) {
+               err = ret;
+               goto fail_alloc;
+diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
+index c67c15d4d20b..6e49979024f0 100644
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -48,7 +48,7 @@ int __cold open_ctree(struct super_block *sb,
+ void __cold close_ctree(struct btrfs_fs_info *fs_info);
+ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
+                        struct btrfs_super_block *sb, int mirror_num);
+-int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb);
++int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount);
+ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
+ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
+ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index b6f3f24ac95d..64bda2f53311 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2012,7 +2012,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
+       if (ret)
+               goto restore;
+-      ret = btrfs_check_features(fs_info, sb);
++      ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY));
+       if (ret < 0)
+               goto restore;
+-- 
+2.35.1
+
diff --git a/queue-6.0/btrfs-relax-block-group-tree-feature-dependency-chec.patch b/queue-6.0/btrfs-relax-block-group-tree-feature-dependency-chec.patch
new file mode 100644 (file)
index 0000000..090bdba
--- /dev/null
@@ -0,0 +1,318 @@
+From 3870aa6cc6c5653f0b93f9daa007ba8e5303a2b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 13:44:37 +0800
+Subject: btrfs: relax block-group-tree feature dependency checks
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit d7f67ac9a928fa158a95573406eac0a887bbc28c ]
+
+[BUG]
+When one user did a wrong attempt to clear block group tree, which can
+not be done through mount option, by using "-o clear_cache,space_cache=v2",
+it will cause the following error on a fs with block-group-tree feature:
+
+  BTRFS info (device dm-1): force clearing of disk cache
+  BTRFS info (device dm-1): using free space tree
+  BTRFS info (device dm-1): clearing free space tree
+  BTRFS info (device dm-1): clearing compat-ro feature flag for FREE_SPACE_TREE (0x1)
+  BTRFS info (device dm-1): clearing compat-ro feature flag for FREE_SPACE_TREE_VALID (0x2)
+  BTRFS error (device dm-1): block-group-tree feature requires fres-space-tree and no-holes
+  BTRFS error (device dm-1): super block corruption detected before writing it to disk
+  BTRFS: error (device dm-1) in write_all_supers:4318: errno=-117 Filesystem corrupted (unexpected superblock corruption detected)
+  BTRFS warning (device dm-1: state E): Skipping commit of aborted transaction.
+
+[CAUSE]
+Although the dependency for block-group-tree feature is just an
+artificial one (to reduce test matrix), we put the dependency check into
+btrfs_validate_super().
+
+This is too strict, and during space cache clearing, we will have a
+window where free space tree is cleared, and we need to commit the super
+block.
+
+In that window, we had block group tree without v2 cache, and triggered
+the artificial dependency check.
+
+This is not necessary at all, especially for such a soft dependency.
+
+[FIX]
+Introduce a new helper, btrfs_check_features(), to do all the runtime
+limitation checks, including:
+
+- Unsupported incompat flags check
+
+- Unsupported compat RO flags check
+
+- Setting missing incompat flags
+
+- Artificial feature dependency checks
+  Currently only block group tree will rely on this.
+
+- Subpage runtime check for v1 cache
+
+With this helper, we can move quite some checks from
+open_ctree()/btrfs_remount() into it, and just call it after
+btrfs_parse_options().
+
+Now "-o clear_cache,space_cache=v2" will not trigger the above error
+anymore.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ edit messages ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 2ba48b20049b ("btrfs: fix compat_ro checks against remount")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 172 ++++++++++++++++++++++++++++-----------------
+ fs/btrfs/disk-io.h |   1 +
+ fs/btrfs/super.c   |  19 +----
+ 3 files changed, 113 insertions(+), 79 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 386039e13922..7834dc77c935 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3283,6 +3283,112 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
+       return ret;
+ }
++/*
++ * Do various sanity and dependency checks of different features.
++ *
++ * This is the place for less strict checks (like for subpage or artificial
++ * feature dependencies).
++ *
++ * For strict checks or possible corruption detection, see
++ * btrfs_validate_super().
++ *
++ * This should be called after btrfs_parse_options(), as some mount options
++ * (space cache related) can modify on-disk format like free space tree and
++ * screw up certain feature dependencies.
++ */
++int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
++{
++      struct btrfs_super_block *disk_super = fs_info->super_copy;
++      u64 incompat = btrfs_super_incompat_flags(disk_super);
++      const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super);
++      const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP);
++
++      if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
++              btrfs_err(fs_info,
++              "cannot mount because of unknown incompat features (0x%llx)",
++                  incompat);
++              return -EINVAL;
++      }
++
++      /* Runtime limitation for mixed block groups. */
++      if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
++          (fs_info->sectorsize != fs_info->nodesize)) {
++              btrfs_err(fs_info,
++"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
++                      fs_info->nodesize, fs_info->sectorsize);
++              return -EINVAL;
++      }
++
++      /* Mixed backref is an always-enabled feature. */
++      incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
++
++      /* Set compression related flags just in case. */
++      if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
++              incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
++      else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
++              incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
++
++      /*
++       * An ancient flag, which should really be marked deprecated.
++       * Such runtime limitation doesn't really need a incompat flag.
++       */
++      if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
++              incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
++
++      if (compat_ro_unsupp && !sb_rdonly(sb)) {
++              btrfs_err(fs_info,
++      "cannot mount read-write because of unknown compat_ro features (0x%llx)",
++                     compat_ro);
++              return -EINVAL;
++      }
++
++      /*
++       * We have unsupported RO compat features, although RO mounted, we
++       * should not cause any metadata writes, including log replay.
++       * Or we could screw up whatever the new feature requires.
++       */
++      if (compat_ro_unsupp && btrfs_super_log_root(disk_super) &&
++          !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
++              btrfs_err(fs_info,
++"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
++                        compat_ro);
++              return -EINVAL;
++      }
++
++      /*
++       * Artificial limitations for block group tree, to force
++       * block-group-tree to rely on no-holes and free-space-tree.
++       */
++      if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
++          (!btrfs_fs_incompat(fs_info, NO_HOLES) ||
++           !btrfs_test_opt(fs_info, FREE_SPACE_TREE))) {
++              btrfs_err(fs_info,
++"block-group-tree feature requires no-holes and free-space-tree features");
++              return -EINVAL;
++      }
++
++      /*
++       * Subpage runtime limitation on v1 cache.
++       *
++       * V1 space cache still has some hard codeed PAGE_SIZE usage, while
++       * we're already defaulting to v2 cache, no need to bother v1 as it's
++       * going to be deprecated anyway.
++       */
++      if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
++              btrfs_warn(fs_info,
++      "v1 space cache is not supported for page size %lu with sectorsize %u",
++                         PAGE_SIZE, fs_info->sectorsize);
++              return -EINVAL;
++      }
++
++      /* This can be called by remount, we need to protect the super block. */
++      spin_lock(&fs_info->super_lock);
++      btrfs_set_super_incompat_flags(disk_super, incompat);
++      spin_unlock(&fs_info->super_lock);
++
++      return 0;
++}
++
+ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
+                     char *options)
+ {
+@@ -3432,72 +3538,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+               goto fail_alloc;
+       }
+-      features = btrfs_super_incompat_flags(disk_super) &
+-              ~BTRFS_FEATURE_INCOMPAT_SUPP;
+-      if (features) {
+-              btrfs_err(fs_info,
+-                  "cannot mount because of unsupported optional features (0x%llx)",
+-                  features);
+-              err = -EINVAL;
+-              goto fail_alloc;
+-      }
+-
+-      features = btrfs_super_incompat_flags(disk_super);
+-      features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+-      if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
+-              features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+-      else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
+-              features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
+-
+-      /*
+-       * Flag our filesystem as having big metadata blocks if they are bigger
+-       * than the page size.
+-       */
+-      if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
+-              features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+-
+-      /*
+-       * mixed block groups end up with duplicate but slightly offset
+-       * extent buffers for the same range.  It leads to corruptions
+-       */
+-      if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
+-          (sectorsize != nodesize)) {
+-              btrfs_err(fs_info,
+-"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
+-                      nodesize, sectorsize);
+-              goto fail_alloc;
+-      }
+-
+-      /*
+-       * Needn't use the lock because there is no other task which will
+-       * update the flag.
+-       */
+-      btrfs_set_super_incompat_flags(disk_super, features);
+-
+-      features = btrfs_super_compat_ro_flags(disk_super) &
+-              ~BTRFS_FEATURE_COMPAT_RO_SUPP;
+-      if (!sb_rdonly(sb) && features) {
+-              btrfs_err(fs_info,
+-      "cannot mount read-write because of unsupported optional features (0x%llx)",
+-                     features);
+-              err = -EINVAL;
+-              goto fail_alloc;
+-      }
+-      /*
+-       * We have unsupported RO compat features, although RO mounted, we
+-       * should not cause any metadata write, including log replay.
+-       * Or we could screw up whatever the new feature requires.
+-       */
+-      if (unlikely(features && btrfs_super_log_root(disk_super) &&
+-                   !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+-              btrfs_err(fs_info,
+-"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+-                        features);
+-              err = -EINVAL;
++      ret = btrfs_check_features(fs_info, sb);
++      if (ret < 0) {
++              err = ret;
+               goto fail_alloc;
+       }
+-
+       if (sectorsize < PAGE_SIZE) {
+               struct btrfs_subpage_info *subpage_info;
+diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
+index 7e545ec09a10..c67c15d4d20b 100644
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -48,6 +48,7 @@ int __cold open_ctree(struct super_block *sb,
+ void __cold close_ctree(struct btrfs_fs_info *fs_info);
+ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
+                        struct btrfs_super_block *sb, int mirror_num);
++int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb);
+ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
+ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
+ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index 079855e9c881..b6f3f24ac95d 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2012,14 +2012,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
+       if (ret)
+               goto restore;
+-      /* V1 cache is not supported for subpage mount. */
+-      if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
+-              btrfs_warn(fs_info,
+-      "v1 space cache is not supported for page size %lu with sectorsize %u",
+-                         PAGE_SIZE, fs_info->sectorsize);
+-              ret = -EINVAL;
++      ret = btrfs_check_features(fs_info, sb);
++      if (ret < 0)
+               goto restore;
+-      }
++
+       btrfs_remount_begin(fs_info, old_opts, *flags);
+       btrfs_resize_thread_pool(fs_info,
+               fs_info->thread_pool_size, old_thread_pool_size);
+@@ -2115,15 +2111,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
+                       ret = -EINVAL;
+                       goto restore;
+               }
+-              if (btrfs_super_compat_ro_flags(fs_info->super_copy) &
+-                  ~BTRFS_FEATURE_COMPAT_RO_SUPP) {
+-                      btrfs_err(fs_info,
+-              "can not remount read-write due to unsupported optional flags 0x%llx",
+-                              btrfs_super_compat_ro_flags(fs_info->super_copy) &
+-                              ~BTRFS_FEATURE_COMPAT_RO_SUPP);
+-                      ret = -EINVAL;
+-                      goto restore;
+-              }
+               if (fs_info->fs_devices->rw_devices == 0) {
+                       ret = -EACCES;
+                       goto restore;
+-- 
+2.35.1
+
diff --git a/queue-6.0/btrfs-separate-block_group_tree-compat-ro-flag-from-.patch b/queue-6.0/btrfs-separate-block_group_tree-compat-ro-flag-from-.patch
new file mode 100644 (file)
index 0000000..8c63e8e
--- /dev/null
@@ -0,0 +1,167 @@
+From ca64c39f096f979da3d8569e766f9897b4703c22 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Aug 2022 13:02:18 +0800
+Subject: btrfs: separate BLOCK_GROUP_TREE compat RO flag from EXTENT_TREE_V2
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 1c56ab991903dce60e905a08f431c0e6f79b9b9e ]
+
+The problem of long mount time caused by block group item search is
+already known for some time, and the solution of block group tree has
+been proposed.
+
+There is really no need to bound this feature into extent tree v2, just
+introduce compat RO flag, BLOCK_GROUP_TREE, to correctly solve the
+problem.
+
+All the code handling block group root is already in the upstream
+kernel, thus this patch really only needs to introduce the new compat RO
+flag.
+
+This patch introduces one extra artificial limitation on block group
+tree feature, that free space cache v2 and no-holes feature must be
+enabled to use this new compat RO feature.
+
+This artificial requirement is mostly to reduce the test combinations,
+and can be a guideline for future features, to mostly rely on the latest
+default features.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 2ba48b20049b ("btrfs: fix compat_ro checks against remount")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h           |  3 ++-
+ fs/btrfs/disk-io.c         | 26 ++++++++++++++------------
+ fs/btrfs/disk-io.h         |  2 +-
+ fs/btrfs/sysfs.c           |  2 ++
+ include/uapi/linux/btrfs.h |  6 ++++++
+ 5 files changed, 25 insertions(+), 14 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 6b1a8b295970..8e77acdecd25 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -302,7 +302,8 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
+ #define BTRFS_FEATURE_COMPAT_RO_SUPP                  \
+       (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |      \
+        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
+-       BTRFS_FEATURE_COMPAT_RO_VERITY)
++       BTRFS_FEATURE_COMPAT_RO_VERITY |               \
++       BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE)
+ #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET      0ULL
+ #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR    0ULL
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index a4f78a347a1f..386039e13922 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1983,7 +1983,7 @@ static void backup_super_roots(struct btrfs_fs_info *info)
+       btrfs_set_backup_chunk_root_level(root_backup,
+                              btrfs_header_level(info->chunk_root->node));
+-      if (!btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
++      if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
+               struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
+               struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
+@@ -2528,7 +2528,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
+       location.type = BTRFS_ROOT_ITEM_KEY;
+       location.offset = 0;
+-      if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
++      if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
+               location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID;
+               root = btrfs_read_tree_root(tree_root, &location);
+               if (IS_ERR(root)) {
+@@ -2715,6 +2715,18 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
+               ret = -EINVAL;
+       }
++      /*
++       * Artificial requirement for block-group-tree to force newer features
++       * (free-space-tree, no-holes) so the test matrix is smaller.
++       */
++      if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
++          (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
++           !btrfs_fs_incompat(fs_info, NO_HOLES))) {
++              btrfs_err(fs_info,
++              "block-group-tree feature requires fres-space-tree and no-holes");
++              ret = -EINVAL;
++      }
++
+       if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
+                  BTRFS_FSID_SIZE) != 0) {
+               btrfs_err(fs_info,
+@@ -2884,16 +2896,6 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
+       int ret = 0;
+       int i;
+-      if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+-              struct btrfs_root *root;
+-
+-              root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
+-                                      GFP_KERNEL);
+-              if (!root)
+-                      return -ENOMEM;
+-              fs_info->block_group_root = root;
+-      }
+-
+       for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+               if (handle_error) {
+                       if (!IS_ERR(tree_root->node))
+diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
+index aef981de672c..7e545ec09a10 100644
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -105,7 +105,7 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
+ static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
+ {
+-      if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
++      if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
+               return fs_info->block_group_root;
+       return btrfs_extent_root(fs_info, 0);
+ }
+diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
+index 00b97e6eb507..897367ba68d2 100644
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -286,6 +286,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
+ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+ BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
+ BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
++BTRFS_FEAT_ATTR_COMPAT_RO(block_group_tree, BLOCK_GROUP_TREE);
+ BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
+ #ifdef CONFIG_BLK_DEV_ZONED
+ BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
+@@ -317,6 +318,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
+       BTRFS_FEAT_ATTR_PTR(metadata_uuid),
+       BTRFS_FEAT_ATTR_PTR(free_space_tree),
+       BTRFS_FEAT_ATTR_PTR(raid1c34),
++      BTRFS_FEAT_ATTR_PTR(block_group_tree),
+ #ifdef CONFIG_BLK_DEV_ZONED
+       BTRFS_FEAT_ATTR_PTR(zoned),
+ #endif
+diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
+index 7ada84e4a3ed..5655e89b962b 100644
+--- a/include/uapi/linux/btrfs.h
++++ b/include/uapi/linux/btrfs.h
+@@ -290,6 +290,12 @@ struct btrfs_ioctl_fs_info_args {
+ #define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
+ #define BTRFS_FEATURE_COMPAT_RO_VERITY                        (1ULL << 2)
++/*
++ * Put all block group items into a dedicated block group tree, greatly
++ * reducing mount time for large filesystem due to better locality.
++ */
++#define BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE      (1ULL << 3)
++
+ #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF  (1ULL << 0)
+ #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+ #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS   (1ULL << 2)
+-- 
+2.35.1
+
diff --git a/queue-6.0/caif-fix-memory-leak-in-cfctrl_linkup_request.patch b/queue-6.0/caif-fix-memory-leak-in-cfctrl_linkup_request.patch
new file mode 100644 (file)
index 0000000..bdc142c
--- /dev/null
@@ -0,0 +1,47 @@
+From d8900bdbe7ac62801d750f1489fb347585a286c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jan 2023 14:51:46 +0800
+Subject: caif: fix memory leak in cfctrl_linkup_request()
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit fe69230f05897b3de758427b574fc98025dfc907 ]
+
+When linktype is unknown or kzalloc failed in cfctrl_linkup_request(),
+pkt is not released. Add release process to error path.
+
+Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack")
+Fixes: 8d545c8f958f ("caif: Disconnect without waiting for response")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20230104065146.1153009-1-shaozhengchao@huawei.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/caif/cfctrl.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
+index 2809cbd6b7f7..d8cb4b2a076b 100644
+--- a/net/caif/cfctrl.c
++++ b/net/caif/cfctrl.c
+@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
+       default:
+               pr_warn("Request setup of bad link type = %d\n",
+                       param->linktype);
++              cfpkt_destroy(pkt);
+               return -EINVAL;
+       }
+       req = kzalloc(sizeof(*req), GFP_KERNEL);
+-      if (!req)
++      if (!req) {
++              cfpkt_destroy(pkt);
+               return -ENOMEM;
++      }
++
+       req->client_layer = user_layer;
+       req->cmd = CFCTRL_CMD_LINK_SETUP;
+       req->param = *param;
+-- 
+2.35.1
+
diff --git a/queue-6.0/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch b/queue-6.0/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch
new file mode 100644 (file)
index 0000000..a835c1c
--- /dev/null
@@ -0,0 +1,85 @@
+From dd9e657557b486b1f4b471d4f53a68fd0c0fcd4c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 10:43:21 +0800
+Subject: ceph: switch to vfs_inode_has_locks() to fix file lock bug
+
+From: Xiubo Li <xiubli@redhat.com>
+
+[ Upstream commit 461ab10ef7e6ea9b41a0571a7fc6a72af9549a3c ]
+
+For the POSIX locks they are using the same owner, which is the
+thread id. And multiple POSIX locks could be merged into single one,
+so when checking whether the 'file' has locks may fail.
+
+For a file where some openers use locking and others don't is a
+really odd usage pattern though. Locks are like stoplights -- they
+only work if everyone pays attention to them.
+
+Just switch ceph_get_caps() to check whether any locks are set on
+the inode. If there are POSIX/OFD/FLOCK locks on the file at the
+time, we should set CHECK_FILELOCK, regardless of what fd was used
+to set the lock.
+
+Fixes: ff5d913dfc71 ("ceph: return -EIO if read/write against filp that lost file locks")
+Signed-off-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/caps.c  | 2 +-
+ fs/ceph/locks.c | 4 ----
+ fs/ceph/super.h | 1 -
+ 3 files changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
+index 02b5c0ac5654..af8dbcf932ab 100644
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -2910,7 +2910,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
+       while (true) {
+               flags &= CEPH_FILE_MODE_MASK;
+-              if (atomic_read(&fi->num_locks))
++              if (vfs_inode_has_locks(inode))
+                       flags |= CHECK_FILELOCK;
+               _got = 0;
+               ret = try_get_cap_refs(inode, need, want, endoff,
+diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
+index 3e2843e86e27..b191426bf880 100644
+--- a/fs/ceph/locks.c
++++ b/fs/ceph/locks.c
+@@ -32,18 +32,14 @@ void __init ceph_flock_init(void)
+ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
+ {
+-      struct ceph_file_info *fi = dst->fl_file->private_data;
+       struct inode *inode = file_inode(dst->fl_file);
+       atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+-      atomic_inc(&fi->num_locks);
+ }
+ static void ceph_fl_release_lock(struct file_lock *fl)
+ {
+-      struct ceph_file_info *fi = fl->fl_file->private_data;
+       struct inode *inode = file_inode(fl->fl_file);
+       struct ceph_inode_info *ci = ceph_inode(inode);
+-      atomic_dec(&fi->num_locks);
+       if (atomic_dec_and_test(&ci->i_filelock_ref)) {
+               /* clear error when all locks are released */
+               spin_lock(&ci->i_ceph_lock);
+diff --git a/fs/ceph/super.h b/fs/ceph/super.h
+index 40630e6f691c..ae4126f63410 100644
+--- a/fs/ceph/super.h
++++ b/fs/ceph/super.h
+@@ -788,7 +788,6 @@ struct ceph_file_info {
+       struct list_head rw_contexts;
+       u32 filp_gen;
+-      atomic_t num_locks;
+ };
+ struct ceph_dir_file_info {
+-- 
+2.35.1
+
diff --git a/queue-6.0/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch b/queue-6.0/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch
new file mode 100644 (file)
index 0000000..afd8fcf
--- /dev/null
@@ -0,0 +1,39 @@
+From 1aaaaf31f205731611f4a8ef22e903d239fd52da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 12:53:35 +0300
+Subject: drivers/net/bonding/bond_3ad: return when there's no aggregator
+
+From: Daniil Tatianin <d-tatianin@yandex-team.ru>
+
+[ Upstream commit 9c807965483f42df1d053b7436eedd6cf28ece6f ]
+
+Otherwise we would dereference a NULL aggregator pointer when calling
+__set_agg_ports_ready on the line below.
+
+Found by Linux Verification Center (linuxtesting.org) with the SVACE
+static analysis tool.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_3ad.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index e58a1e0cadd2..9270977e6c7f 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -1540,6 +1540,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
+                       slave_err(bond->dev, port->slave->dev,
+                                 "Port %d did not find a suitable aggregator\n",
+                                 port->actor_port_number);
++                      return;
+               }
+       }
+       /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch b/queue-6.0/drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch
new file mode 100644 (file)
index 0000000..3d46280
--- /dev/null
@@ -0,0 +1,75 @@
+From a72ba4b570cb8f6709d6e41573e4b3abcff9ec00 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 10 Dec 2022 02:51:19 -0500
+Subject: drm/amdgpu: Fix size validation for non-exclusive domains (v4)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Luben Tuikov <luben.tuikov@amd.com>
+
+[ Upstream commit 7554886daa31eacc8e7fac9e15bbce67d10b8f1f ]
+
+Fix amdgpu_bo_validate_size() to check whether the TTM domain manager for the
+requested memory exists, else we get a kernel oops when dereferencing "man".
+
+v2: Make the patch standalone, i.e. not dependent on local patches.
+v3: Preserve old behaviour and just check that the manager pointer is not
+    NULL.
+v4: Complain if GTT domain requested and it is uninitialized--most likely a
+    bug.
+
+Cc: Alex Deucher <Alexander.Deucher@amd.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: AMD Graphics <amd-gfx@lists.freedesktop.org>
+Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index bfe0fc258fc1..60ab2d952d5c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -446,27 +446,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
+       /*
+        * If GTT is part of requested domains the check must succeed to
+-       * allow fall back to GTT
++       * allow fall back to GTT.
+        */
+       if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+               man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+-              if (size < man->size)
++              if (man && size < man->size)
+                       return true;
+-              else
+-                      goto fail;
+-      }
+-
+-      if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
++              else if (!man)
++                      WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
++              goto fail;
++      } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+               man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+-              if (size < man->size)
++              if (man && size < man->size)
+                       return true;
+-              else
+-                      goto fail;
++              goto fail;
+       }
+-
+       /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
+       return true;
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-amdkfd-fix-double-release-compute-pasid.patch b/queue-6.0/drm-amdkfd-fix-double-release-compute-pasid.patch
new file mode 100644 (file)
index 0000000..11e17dc
--- /dev/null
@@ -0,0 +1,181 @@
+From 57b6f027f238702f280bd087c4e86070c726204d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 00:50:03 -0500
+Subject: drm/amdkfd: Fix double release compute pasid
+
+From: Philip Yang <Philip.Yang@amd.com>
+
+[ Upstream commit 1a799c4c190ea9f0e81028e3eb3037ed0ab17ff5 ]
+
+If kfd_process_device_init_vm returns failure after vm is converted to
+compute vm and vm->pasid set to compute pasid, KFD will not take
+pdd->drm_file reference. As a result, drm close file handler maybe
+called to release the compute pasid before KFD process destroy worker to
+release the same pasid and set vm->pasid to zero, this generates below
+WARNING backtrace and NULL pointer access.
+
+Add helper amdgpu_amdkfd_gpuvm_set_vm_pasid and call it at the last step
+of kfd_process_device_init_vm, to ensure vm pasid is the original pasid
+if acquiring vm failed or is the compute pasid with pdd->drm_file
+reference taken to avoid double release same pasid.
+
+ amdgpu: Failed to create process VM object
+ ida_free called for id=32770 which is not allocated.
+ WARNING: CPU: 57 PID: 72542 at ../lib/idr.c:522 ida_free+0x96/0x140
+ RIP: 0010:ida_free+0x96/0x140
+ Call Trace:
+  amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu]
+  amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu]
+  drm_file_free.part.13+0x216/0x270 [drm]
+  drm_close_helper.isra.14+0x60/0x70 [drm]
+  drm_release+0x6e/0xf0 [drm]
+  __fput+0xcc/0x280
+  ____fput+0xe/0x20
+  task_work_run+0x96/0xc0
+  do_exit+0x3d0/0xc10
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ RIP: 0010:ida_free+0x76/0x140
+ Call Trace:
+  amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu]
+  amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu]
+  drm_file_free.part.13+0x216/0x270 [drm]
+  drm_close_helper.isra.14+0x60/0x70 [drm]
+  drm_release+0x6e/0xf0 [drm]
+  __fput+0xcc/0x280
+  ____fput+0xe/0x20
+  task_work_run+0x96/0xc0
+  do_exit+0x3d0/0xc10
+
+Signed-off-by: Philip Yang <Philip.Yang@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  4 +-
+ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 39 +++++++++++++------
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 12 ++++--
+ 3 files changed, 40 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+index 647220a8762d..30f145dc8724 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+@@ -265,8 +265,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
+       (&((struct amdgpu_fpriv *)                                      \
+               ((struct drm_file *)(drm_priv))->driver_priv)->vm)
++int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
++                                   struct file *filp, u32 pasid);
+ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+-                                      struct file *filp, u32 pasid,
++                                      struct file *filp,
+                                       void **process_info,
+                                       struct dma_fence **ef);
+ void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index 6659630303a3..ba5a09c2b3ce 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -1471,10 +1471,9 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
+       amdgpu_bo_unreserve(bo);
+ }
+-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+-                                         struct file *filp, u32 pasid,
+-                                         void **process_info,
+-                                         struct dma_fence **ef)
++int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
++                                   struct file *filp, u32 pasid)
++
+ {
+       struct amdgpu_fpriv *drv_priv;
+       struct amdgpu_vm *avm;
+@@ -1485,10 +1484,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+               return ret;
+       avm = &drv_priv->vm;
+-      /* Already a compute VM? */
+-      if (avm->process_info)
+-              return -EINVAL;
+-
+       /* Free the original amdgpu allocated pasid,
+        * will be replaced with kfd allocated pasid.
+        */
+@@ -1497,14 +1492,36 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+               amdgpu_vm_set_pasid(adev, avm, 0);
+       }
+-      /* Convert VM into a compute VM */
+-      ret = amdgpu_vm_make_compute(adev, avm);
++      ret = amdgpu_vm_set_pasid(adev, avm, pasid);
+       if (ret)
+               return ret;
+-      ret = amdgpu_vm_set_pasid(adev, avm, pasid);
++      return 0;
++}
++
++int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
++                                         struct file *filp,
++                                         void **process_info,
++                                         struct dma_fence **ef)
++{
++      struct amdgpu_fpriv *drv_priv;
++      struct amdgpu_vm *avm;
++      int ret;
++
++      ret = amdgpu_file_to_fpriv(filp, &drv_priv);
+       if (ret)
+               return ret;
++      avm = &drv_priv->vm;
++
++      /* Already a compute VM? */
++      if (avm->process_info)
++              return -EINVAL;
++
++      /* Convert VM into a compute VM */
++      ret = amdgpu_vm_make_compute(adev, avm);
++      if (ret)
++              return ret;
++
+       /* Initialize KFD part of the VM and process info */
+       ret = init_kfd_vm(avm, process_info, ef);
+       if (ret)
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 04678f9e214b..febf0e9f7af1 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -1581,9 +1581,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+       p = pdd->process;
+       dev = pdd->dev;
+-      ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
+-              dev->adev, drm_file, p->pasid,
+-              &p->kgd_process_info, &p->ef);
++      ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file,
++                                                   &p->kgd_process_info,
++                                                   &p->ef);
+       if (ret) {
+               pr_err("Failed to create process VM object\n");
+               return ret;
+@@ -1598,10 +1598,16 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+       if (ret)
+               goto err_init_cwsr;
++      ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid);
++      if (ret)
++              goto err_set_pasid;
++
+       pdd->drm_file = drm_file;
+       return 0;
++err_set_pasid:
++      kfd_process_device_destroy_cwsr_dgpu(pdd);
+ err_init_cwsr:
+       kfd_process_device_destroy_ib_mem(pdd);
+ err_reserve_ib_mem:
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch b/queue-6.0/drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch
new file mode 100644 (file)
index 0000000..6d1a837
--- /dev/null
@@ -0,0 +1,82 @@
+From e27985b6f4a62bb7e9670f29ad7451dc351d4a64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Dec 2022 10:15:17 -0500
+Subject: drm/amdkfd: Fix kfd_process_device_init_vm error handling
+
+From: Philip Yang <Philip.Yang@amd.com>
+
+[ Upstream commit 29d48b87db64b6697ddad007548e51d032081c59 ]
+
+Should only destroy the ib_mem and let process cleanup worker to free
+the outstanding BOs. Reset the pointer in pdd->qpd structure, to avoid
+NULL pointer access in process destroy worker.
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000010
+ Call Trace:
+  amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel+0x46/0xb0 [amdgpu]
+  kfd_process_device_destroy_cwsr_dgpu+0x40/0x70 [amdgpu]
+  kfd_process_destroy_pdds+0x71/0x190 [amdgpu]
+  kfd_process_wq_release+0x2a2/0x3b0 [amdgpu]
+  process_one_work+0x2a1/0x600
+  worker_thread+0x39/0x3d0
+
+Signed-off-by: Philip Yang <Philip.Yang@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 6c83a519b3a1..04678f9e214b 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -689,13 +689,13 @@ void kfd_process_destroy_wq(void)
+ }
+ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
+-                      struct kfd_process_device *pdd, void *kptr)
++                      struct kfd_process_device *pdd, void **kptr)
+ {
+       struct kfd_dev *dev = pdd->dev;
+-      if (kptr) {
++      if (kptr && *kptr) {
+               amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+-              kptr = NULL;
++              *kptr = NULL;
+       }
+       amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
+@@ -795,7 +795,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
+       if (!qpd->ib_kaddr || !qpd->ib_base)
+               return;
+-      kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
++      kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
+ }
+ struct kfd_process *kfd_create_process(struct file *filep)
+@@ -1277,7 +1277,7 @@ static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
+       if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
+               return;
+-      kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
++      kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
+ }
+ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
+@@ -1603,8 +1603,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+       return 0;
+ err_init_cwsr:
++      kfd_process_device_destroy_ib_mem(pdd);
+ err_reserve_ib_mem:
+-      kfd_process_device_free_bos(pdd);
+       pdd->drm_priv = NULL;
+       return ret;
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch b/queue-6.0/drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch
new file mode 100644 (file)
index 0000000..3a83224
--- /dev/null
@@ -0,0 +1,67 @@
+From b09de68e7ce03fe6f7cfadefbd2ace1d47475e6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 00:56:41 +0800
+Subject: drm/i915/gvt: fix double free bug in split_2MB_gtt_entry
+
+From: Zheng Wang <zyytlz.wz@163.com>
+
+[ Upstream commit 4a61648af68f5ba4884f0e3b494ee1cabc4b6620 ]
+
+If intel_gvt_dma_map_guest_page failed, it will call
+ppgtt_invalidate_spt, which will finally free the spt.
+But the caller function ppgtt_populate_spt_by_guest_entry
+does not notice that, it will free spt again in its error
+path.
+
+Fix this by canceling the mapping of DMA address and freeing sub_spt.
+Besides, leave the handle of spt destroy to caller function instead
+of callee function when error occurs.
+
+Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support")
+Signed-off-by: Zheng Wang <zyytlz.wz@163.com>
+Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/20221229165641.1192455-1-zyytlz.wz@163.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gvt/gtt.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
+index ce0eb03709c3..80c60754a5c1 100644
+--- a/drivers/gpu/drm/i915/gvt/gtt.c
++++ b/drivers/gpu/drm/i915/gvt/gtt.c
+@@ -1214,10 +1214,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
+       for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
+               ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
+                                                  PAGE_SIZE, &dma_addr);
+-              if (ret) {
+-                      ppgtt_invalidate_spt(spt);
+-                      return ret;
+-              }
++              if (ret)
++                      goto err;
+               sub_se.val64 = se->val64;
+               /* Copy the PAT field from PDE. */
+@@ -1236,6 +1234,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
+       ops->set_pfn(se, sub_spt->shadow_page.mfn);
+       ppgtt_set_shadow_entry(spt, se, index);
+       return 0;
++err:
++      /* Cancel the existing addess mappings of DMA addr. */
++      for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
++              gvt_vdbg_mm("invalidate 4K entry\n");
++              ppgtt_invalidate_pte(sub_spt, &sub_se);
++      }
++      /* Release the new allocated spt. */
++      trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
++              sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
++      ppgtt_free_spt(sub_spt);
++      return ret;
+ }
+ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch b/queue-6.0/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch
new file mode 100644 (file)
index 0000000..7523c6b
--- /dev/null
@@ -0,0 +1,36 @@
+From 09796492f401ef00f61f9b9165340abbb3256085 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Nov 2022 16:15:18 +0300
+Subject: drm/i915: unpin on error in intel_vgpu_shadow_mm_pin()
+
+From: Dan Carpenter <error27@gmail.com>
+
+[ Upstream commit 3792fc508c095abd84b10ceae12bd773e61fdc36 ]
+
+Call intel_vgpu_unpin_mm() on this error path.
+
+Fixes: 418741480809 ("drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/Y3OQ5tgZIVxyQ/WV@kili
+Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gvt/scheduler.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
+index d6fe94cd0fdb..8342d95f56cb 100644
+--- a/drivers/gpu/drm/i915/gvt/scheduler.c
++++ b/drivers/gpu/drm/i915/gvt/scheduler.c
+@@ -696,6 +696,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
+       if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
+           !workload->shadow_mm->ppgtt_mm.shadowed) {
++              intel_vgpu_unpin_mm(workload->shadow_mm);
+               gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
+               return -EINVAL;
+       }
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-imx-ipuv3-plane-fix-overlay-plane-width.patch b/queue-6.0/drm-imx-ipuv3-plane-fix-overlay-plane-width.patch
new file mode 100644 (file)
index 0000000..64da214
--- /dev/null
@@ -0,0 +1,82 @@
+From 46dc2da41e000aca5c60b399bf4de3559ae516d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Nov 2022 15:14:20 +0100
+Subject: drm/imx: ipuv3-plane: Fix overlay plane width
+
+From: Philipp Zabel <p.zabel@pengutronix.de>
+
+[ Upstream commit 92d43bd3bc9728c1fb114d7011d46f5ea9489e28 ]
+
+ipu_src_rect_width() was introduced to support odd screen resolutions
+such as 1366x768 by internally rounding up primary plane width to a
+multiple of 8 and compensating with reduced horizontal blanking.
+This also caused overlay plane width to be rounded up, which was not
+intended. Fix overlay plane width by limiting the rounding up to the
+primary plane.
+
+drm_rect_width(&new_state->src) >> 16 is the same value as
+drm_rect_width(dst) because there is no plane scaling support.
+
+Fixes: 94dfec48fca7 ("drm/imx: Add 8 pixel alignment fix")
+Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
+Link: https://lore.kernel.org/r/20221108141420.176696-1-p.zabel@pengutronix.de
+Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221108141420.176696-1-p.zabel@pengutronix.de
+Tested-by: Ian Ray <ian.ray@ge.com>
+(cherry picked from commit 4333472f8d7befe62359fecb1083cd57a6e07bfc)
+Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/imx/ipuv3-plane.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
+index ea5f594955df..4b05f310071c 100644
+--- a/drivers/gpu/drm/imx/ipuv3-plane.c
++++ b/drivers/gpu/drm/imx/ipuv3-plane.c
+@@ -615,6 +615,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+               break;
+       }
++      if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG)
++              width = ipu_src_rect_width(new_state);
++      else
++              width = drm_rect_width(&new_state->src) >> 16;
++
+       eba = drm_plane_state_to_eba(new_state, 0);
+       /*
+@@ -623,8 +628,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+        */
+       if (ipu_state->use_pre) {
+               axi_id = ipu_chan_assign_axi_id(ipu_plane->dma);
+-              ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id,
+-                                        ipu_src_rect_width(new_state),
++              ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width,
+                                         drm_rect_height(&new_state->src) >> 16,
+                                         fb->pitches[0], fb->format->format,
+                                         fb->modifier, &eba);
+@@ -679,9 +683,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+               break;
+       }
+-      ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8));
++      ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width);
+-      width = ipu_src_rect_width(new_state);
+       height = drm_rect_height(&new_state->src) >> 16;
+       info = drm_format_info(fb->format->format);
+       ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0],
+@@ -745,8 +748,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+               ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16);
+               ipu_cpmem_zero(ipu_plane->alpha_ch);
+-              ipu_cpmem_set_resolution(ipu_plane->alpha_ch,
+-                                       ipu_src_rect_width(new_state),
++              ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width,
+                                        drm_rect_height(&new_state->src) >> 16);
+               ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8);
+               ipu_cpmem_set_high_priority(ipu_plane->alpha_ch);
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch b/queue-6.0/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch
new file mode 100644 (file)
index 0000000..bd2394c
--- /dev/null
@@ -0,0 +1,56 @@
+From 656c1306deede00f5ba14b02fc567535c811e206 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 09:43:05 +0100
+Subject: drm/meson: Reduce the FIFO lines held when AFBC is not used
+
+From: Carlo Caione <ccaione@baylibre.com>
+
+[ Upstream commit 3b754ed6d1cd90017e66e5cc16f3923e4a952ffc ]
+
+Having a bigger number of FIFO lines held after vsync is only useful to
+SoCs using AFBC to give time to the AFBC decoder to be reset, configured
+and enabled again.
+
+For SoCs not using AFBC this, on the contrary, is causing on some
+displays issues and a few pixels vertical offset in the displayed image.
+
+Conditionally increase the number of lines held after vsync only for
+SoCs using AFBC, leaving the default value for all the others.
+
+Fixes: 24e0d4058eff ("drm/meson: hold 32 lines after vsync to give time for AFBC start")
+Signed-off-by: Carlo Caione <ccaione@baylibre.com>
+Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Acked-by: Neil Armstrong <neil.armstrong@linaro.org>
+[narmstrong: added fixes tag]
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221216-afbc_s905x-v1-0-033bebf780d9@baylibre.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/meson/meson_viu.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
+index d4b907889a21..cd399b0b7181 100644
+--- a/drivers/gpu/drm/meson/meson_viu.c
++++ b/drivers/gpu/drm/meson/meson_viu.c
+@@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv)
+       /* Initialize OSD1 fifo control register */
+       reg = VIU_OSD_DDR_PRIORITY_URGENT |
+-              VIU_OSD_HOLD_FIFO_LINES(31) |
+               VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */
+               VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */
+               VIU_OSD_FIFO_LIMITS(2);      /* fifo_lim: 2*16=32 */
+       if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+-              reg |= VIU_OSD_BURST_LENGTH_32;
++              reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31));
+       else
+-              reg |= VIU_OSD_BURST_LENGTH_64;
++              reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4));
+       writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT));
+       writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT));
+-- 
+2.35.1
+
diff --git a/queue-6.0/drm-panfrost-fix-gem-handle-creation-ref-counting.patch b/queue-6.0/drm-panfrost-fix-gem-handle-creation-ref-counting.patch
new file mode 100644 (file)
index 0000000..d1b6da5
--- /dev/null
@@ -0,0 +1,138 @@
+From 04a6f0c4e6c089353ecf580f0e40aa136d9d7dcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 14:01:30 +0000
+Subject: drm/panfrost: Fix GEM handle creation ref-counting
+
+From: Steven Price <steven.price@arm.com>
+
+[ Upstream commit 4217c6ac817451d5116687f3cc6286220dc43d49 ]
+
+panfrost_gem_create_with_handle() previously returned a BO but with the
+only reference being from the handle, which user space could in theory
+guess and release, causing a use-after-free. Additionally if the call to
+panfrost_gem_mapping_get() in panfrost_ioctl_create_bo() failed then
+a(nother) reference on the BO was dropped.
+
+The _create_with_handle() is a problematic pattern, so ditch it and
+instead create the handle in panfrost_ioctl_create_bo(). If the call to
+panfrost_gem_mapping_get() fails then this means that user space has
+indeed gone behind our back and freed the handle. In which case just
+return an error code.
+
+Reported-by: Rob Clark <robdclark@chromium.org>
+Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
+Signed-off-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Rob Clark <robdclark@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221219140130.410578-1-steven.price@arm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/panfrost/panfrost_drv.c | 27 ++++++++++++++++---------
+ drivers/gpu/drm/panfrost/panfrost_gem.c | 16 +--------------
+ drivers/gpu/drm/panfrost/panfrost_gem.h |  5 +----
+ 3 files changed, 20 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
+index 2fa5afe21288..919e6cc04982 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
+@@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+       struct panfrost_gem_object *bo;
+       struct drm_panfrost_create_bo *args = data;
+       struct panfrost_gem_mapping *mapping;
++      int ret;
+       if (!args->size || args->pad ||
+           (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
+@@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+           !(args->flags & PANFROST_BO_NOEXEC))
+               return -EINVAL;
+-      bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags,
+-                                           &args->handle);
++      bo = panfrost_gem_create(dev, args->size, args->flags);
+       if (IS_ERR(bo))
+               return PTR_ERR(bo);
++      ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
++      if (ret)
++              goto out;
++
+       mapping = panfrost_gem_mapping_get(bo, priv);
+-      if (!mapping) {
+-              drm_gem_object_put(&bo->base.base);
+-              return -EINVAL;
++      if (mapping) {
++              args->offset = mapping->mmnode.start << PAGE_SHIFT;
++              panfrost_gem_mapping_put(mapping);
++      } else {
++              /* This can only happen if the handle from
++               * drm_gem_handle_create() has already been guessed and freed
++               * by user space
++               */
++              ret = -EINVAL;
+       }
+-      args->offset = mapping->mmnode.start << PAGE_SHIFT;
+-      panfrost_gem_mapping_put(mapping);
+-
+-      return 0;
++out:
++      drm_gem_object_put(&bo->base.base);
++      return ret;
+ }
+ /**
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
+index 293e799e2fe8..3c812fbd126f 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
+@@ -235,12 +235,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
+ }
+ struct panfrost_gem_object *
+-panfrost_gem_create_with_handle(struct drm_file *file_priv,
+-                              struct drm_device *dev, size_t size,
+-                              u32 flags,
+-                              uint32_t *handle)
++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
+ {
+-      int ret;
+       struct drm_gem_shmem_object *shmem;
+       struct panfrost_gem_object *bo;
+@@ -256,16 +252,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
+       bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
+       bo->is_heap = !!(flags & PANFROST_BO_HEAP);
+-      /*
+-       * Allocate an id of idr table where the obj is registered
+-       * and handle has the id what user can see.
+-       */
+-      ret = drm_gem_handle_create(file_priv, &shmem->base, handle);
+-      /* drop reference from allocate - handle holds it now. */
+-      drm_gem_object_put(&shmem->base);
+-      if (ret)
+-              return ERR_PTR(ret);
+-
+       return bo;
+ }
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
+index 8088d5fd8480..ad2877eeeccd 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
+@@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev,
+                                  struct sg_table *sgt);
+ struct panfrost_gem_object *
+-panfrost_gem_create_with_handle(struct drm_file *file_priv,
+-                              struct drm_device *dev, size_t size,
+-                              u32 flags,
+-                              uint32_t *handle);
++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags);
+ int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv);
+ void panfrost_gem_close(struct drm_gem_object *obj,
+-- 
+2.35.1
+
diff --git a/queue-6.0/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch b/queue-6.0/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch
new file mode 100644 (file)
index 0000000..82b93f4
--- /dev/null
@@ -0,0 +1,55 @@
+From c93b92824096b71de4cc423d54c0c1855465b357 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 15:43:43 +0800
+Subject: ext4: correct inconsistent error msg in nojournal mode
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 89481b5fa8c0640e62ba84c6020cee895f7ac643 ]
+
+When we used the journal_async_commit mounting option in nojournal mode,
+the kernel told me that "can't mount with journal_checksum", was very
+confusing. I find that when we mount with journal_async_commit, both the
+JOURNAL_ASYNC_COMMIT and EXPLICIT_JOURNAL_CHECKSUM flags are set. However,
+in the error branch, CHECKSUM is checked before ASYNC_COMMIT. As a result,
+the above inconsistency occurs, and the ASYNC_COMMIT branch becomes dead
+code that cannot be executed. Therefore, we exchange the positions of the
+two judgments to make the error msg more accurate.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20221109074343.4184862-1-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index fedfe8cb78c6..ac083526c115 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5066,14 +5066,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+               goto failed_mount3a;
+       } else {
+               /* Nojournal mode, all journal mount options are illegal */
+-              if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
++              if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+-                               "journal_checksum, fs mounted w/o journal");
++                               "journal_async_commit, fs mounted w/o journal");
+                       goto failed_mount3a;
+               }
+-              if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
++
++              if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+-                               "journal_async_commit, fs mounted w/o journal");
++                               "journal_checksum, fs mounted w/o journal");
+                       goto failed_mount3a;
+               }
+               if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+-- 
+2.35.1
+
diff --git a/queue-6.0/ext4-goto-right-label-failed_mount3a.patch b/queue-6.0/ext4-goto-right-label-failed_mount3a.patch
new file mode 100644 (file)
index 0000000..c35d314
--- /dev/null
@@ -0,0 +1,69 @@
+From 34230f04dac9e68dce9c0666f37a84673fb23631 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Sep 2022 22:15:12 +0800
+Subject: ext4: goto right label 'failed_mount3a'
+
+From: Jason Yan <yanaijie@huawei.com>
+
+[ Upstream commit 43bd6f1b49b61f43de4d4e33661b8dbe8c911f14 ]
+
+Before these two branches neither loaded the journal nor created the
+xattr cache. So the right label to goto is 'failed_mount3a'. Although
+this did not cause any issues because the error handler validated if the
+pointer is null. However this still made me confused when reading
+the code. So it's still worth to modify to goto the right label.
+
+Signed-off-by: Jason Yan <yanaijie@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220916141527.1012715-2-yanaijie@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 89481b5fa8c0 ("ext4: correct inconsistent error msg in nojournal mode")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 2eae6e038f38..fedfe8cb78c6 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5063,30 +5063,30 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+                  ext4_has_feature_journal_needs_recovery(sb)) {
+               ext4_msg(sb, KERN_ERR, "required journal recovery "
+                      "suppressed and not mounted read-only");
+-              goto failed_mount_wq;
++              goto failed_mount3a;
+       } else {
+               /* Nojournal mode, all journal mount options are illegal */
+               if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "journal_checksum, fs mounted w/o journal");
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "journal_async_commit, fs mounted w/o journal");
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "commit=%lu, fs mounted w/o journal",
+                                sbi->s_commit_interval / HZ);
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               if (EXT4_MOUNT_DATA_FLAGS &
+                   (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "data=, fs mounted w/o journal");
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
+               clear_opt(sb, JOURNAL_CHECKSUM);
+-- 
+2.35.1
+
diff --git a/queue-6.0/filelock-new-helper-vfs_inode_has_locks.patch b/queue-6.0/filelock-new-helper-vfs_inode_has_locks.patch
new file mode 100644 (file)
index 0000000..9040838
--- /dev/null
@@ -0,0 +1,89 @@
+From f2a5ee02f4b65176dad075ad38a092be6de3eea0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 08:33:09 -0500
+Subject: filelock: new helper: vfs_inode_has_locks
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit ab1ddef98a715eddb65309ffa83267e4e84a571e ]
+
+Ceph has a need to know whether a particular inode has any locks set on
+it. It's currently tracking that by a num_locks field in its
+filp->private_data, but that's problematic as it tries to decrement this
+field when releasing locks and that can race with the file being torn
+down.
+
+Add a new vfs_inode_has_locks helper that just returns whether any locks
+are currently held on the inode.
+
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Stable-dep-of: 461ab10ef7e6 ("ceph: switch to vfs_inode_has_locks() to fix file lock bug")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/locks.c         | 23 +++++++++++++++++++++++
+ include/linux/fs.h |  6 ++++++
+ 2 files changed, 29 insertions(+)
+
+diff --git a/fs/locks.c b/fs/locks.c
+index 607f94a0e789..7dc129cc1a26 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -2669,6 +2669,29 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+ }
+ EXPORT_SYMBOL_GPL(vfs_cancel_lock);
++/**
++ * vfs_inode_has_locks - are any file locks held on @inode?
++ * @inode: inode to check for locks
++ *
++ * Return true if there are any FL_POSIX or FL_FLOCK locks currently
++ * set on @inode.
++ */
++bool vfs_inode_has_locks(struct inode *inode)
++{
++      struct file_lock_context *ctx;
++      bool ret;
++
++      ctx = smp_load_acquire(&inode->i_flctx);
++      if (!ctx)
++              return false;
++
++      spin_lock(&ctx->flc_lock);
++      ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
++      spin_unlock(&ctx->flc_lock);
++      return ret;
++}
++EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
++
+ #ifdef CONFIG_PROC_FS
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 8e79a761c56c..17a1a57adbe0 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1170,6 +1170,7 @@ extern int locks_delete_block(struct file_lock *);
+ extern int vfs_test_lock(struct file *, struct file_lock *);
+ extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
+ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
++bool vfs_inode_has_locks(struct inode *inode);
+ extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
+ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
+@@ -1284,6 +1285,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+       return 0;
+ }
++static inline bool vfs_inode_has_locks(struct inode *inode)
++{
++      return false;
++}
++
+ static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+ {
+       return -ENOLCK;
+-- 
+2.35.1
+
diff --git a/queue-6.0/fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch b/queue-6.0/fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch
new file mode 100644 (file)
index 0000000..9e13a6e
--- /dev/null
@@ -0,0 +1,51 @@
+From 528377551f17f7fdea681861daac24e0c32acbb9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 23:05:33 +0900
+Subject: fs/ntfs3: don't hold ni_lock when calling truncate_setsize()
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 0226635c304cfd5c9db9b78c259cb713819b057e ]
+
+syzbot is reporting hung task at do_user_addr_fault() [1], for there is
+a silent deadlock between PG_locked bit and ni_lock lock.
+
+Since filemap_update_page() calls filemap_read_folio() after calling
+folio_trylock() which will set PG_locked bit, ntfs_truncate() must not
+call truncate_setsize() which will wait for PG_locked bit to be cleared
+when holding ni_lock lock.
+
+Link: https://lore.kernel.org/all/00000000000060d41f05f139aa44@google.com/
+Link: https://syzkaller.appspot.com/bug?extid=bed15dbf10294aa4f2ae [1]
+Reported-by: syzbot <syzbot+bed15dbf10294aa4f2ae@syzkaller.appspotmail.com>
+Debugged-by: Linus Torvalds <torvalds@linux-foundation.org>
+Co-developed-by: Hillf Danton <hdanton@sina.com>
+Signed-off-by: Hillf Danton <hdanton@sina.com>
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ntfs3/file.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
+index 4f2ffc7ef296..f31c0389a2e7 100644
+--- a/fs/ntfs3/file.c
++++ b/fs/ntfs3/file.c
+@@ -486,10 +486,10 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
+       new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size));
+-      ni_lock(ni);
+-
+       truncate_setsize(inode, new_size);
++      ni_lock(ni);
++
+       down_write(&ni->file.run_lock);
+       err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
+                           &new_valid, ni->mi.sbi->options->prealloc, NULL);
+-- 
+2.35.1
+
diff --git a/queue-6.0/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch b/queue-6.0/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch
new file mode 100644 (file)
index 0000000..7f7a820
--- /dev/null
@@ -0,0 +1,36 @@
+From ddcea69d480c7ede55768d0252fce9ed1828c4ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 12:20:39 +0400
+Subject: gpio: sifive: Fix refcount leak in sifive_gpio_probe
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit 694175cd8a1643cde3acb45c9294bca44a8e08e9 ]
+
+of_irq_find_parent() returns a node pointer with refcount incremented,
+We should use of_node_put() on it when not needed anymore.
+Add missing of_node_put() to avoid refcount leak.
+
+Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-sifive.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
+index 238f3210970c..bc5660f61c57 100644
+--- a/drivers/gpio/gpio-sifive.c
++++ b/drivers/gpio/gpio-sifive.c
+@@ -215,6 +215,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
+               return -ENODEV;
+       }
+       parent = irq_find_host(irq_parent);
++      of_node_put(irq_parent);
+       if (!parent) {
+               dev_err(dev, "no IRQ parent domain\n");
+               return -ENODEV;
+-- 
+2.35.1
+
diff --git a/queue-6.0/hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch b/queue-6.0/hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch
new file mode 100644 (file)
index 0000000..a605fbc
--- /dev/null
@@ -0,0 +1,96 @@
+From 4db7d1ba53556537073e29d02ce2c5ba9e357c9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jan 2023 11:06:28 -0800
+Subject: hfs/hfsplus: avoid WARN_ON() for sanity check, use proper error
+ handling
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit cb7a95af78d29442b8294683eca4897544b8ef46 ]
+
+Commit 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check") fixed
+a build warning by turning a comment into a WARN_ON(), but it turns out
+that syzbot then complains because it can trigger said warning with a
+corrupted hfs image.
+
+The warning actually does warn about a bad situation, but we are much
+better off just handling it as the error it is.  So rather than warn
+about us doing bad things, stop doing the bad things and return -EIO.
+
+While at it, also fix a memory leak that was introduced by an earlier
+fix for a similar syzbot warning situation, and add a check for one case
+that historically wasn't handled at all (ie neither comment nor
+subsequent WARN_ON).
+
+Reported-by: syzbot+7bb7cd3595533513a9e7@syzkaller.appspotmail.com
+Fixes: 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check")
+Fixes: 8d824e69d9f3 ("hfs: fix OOB Read in __hfs_brec_find")
+Link: https://lore.kernel.org/lkml/000000000000dbce4e05f170f289@google.com/
+Tested-by: Michael Schmitz <schmitzmic@gmail.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Viacheslav Dubeyko <slava@dubeyko.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/hfs/inode.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
+index a0746be3c1de..80d17c520d0b 100644
+--- a/fs/hfs/inode.c
++++ b/fs/hfs/inode.c
+@@ -458,15 +458,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+               /* panic? */
+               return -EIO;
++      res = -EIO;
+       if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN)
+-              return -EIO;
++              goto out;
+       fd.search_key->cat = HFS_I(main_inode)->cat_key;
+       if (hfs_brec_find(&fd))
+-              /* panic? */
+               goto out;
+       if (S_ISDIR(main_inode->i_mode)) {
+-              WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir));
++              if (fd.entrylength < sizeof(struct hfs_cat_dir))
++                      goto out;
+               hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
+                          sizeof(struct hfs_cat_dir));
+               if (rec.type != HFS_CDR_DIR ||
+@@ -479,6 +480,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+               hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
+                           sizeof(struct hfs_cat_dir));
+       } else if (HFS_IS_RSRC(inode)) {
++              if (fd.entrylength < sizeof(struct hfs_cat_file))
++                      goto out;
+               hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
+                              sizeof(struct hfs_cat_file));
+               hfs_inode_write_fork(inode, rec.file.RExtRec,
+@@ -486,7 +489,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+               hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
+                               sizeof(struct hfs_cat_file));
+       } else {
+-              WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file));
++              if (fd.entrylength < sizeof(struct hfs_cat_file))
++                      goto out;
+               hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
+                          sizeof(struct hfs_cat_file));
+               if (rec.type != HFS_CDR_FIL ||
+@@ -503,9 +507,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+               hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
+                           sizeof(struct hfs_cat_file));
+       }
++      res = 0;
+ out:
+       hfs_find_exit(&fd);
+-      return 0;
++      return res;
+ }
+ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
+-- 
+2.35.1
+
diff --git a/queue-6.0/ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch b/queue-6.0/ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch
new file mode 100644 (file)
index 0000000..e5a3c00
--- /dev/null
@@ -0,0 +1,53 @@
+From be4e3dfd455effa33dd9329444aae8dea96685b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 09:54:48 -0800
+Subject: ice: xsk: do not use xdp_return_frame() on tx_buf->raw_buf
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 53fc61be273a1e76dd5e356f91805dce00ff2d2c ]
+
+Previously ice XDP xmit routine was changed in a way that it avoids
+xdp_buff->xdp_frame conversion as it is simply not needed for handling
+XDP_TX action and what is more it saves us CPU cycles. This routine is
+re-used on ZC driver to handle XDP_TX action.
+
+Although for XDP_TX on Rx ZC xdp_buff that comes from xsk_buff_pool is
+converted to xdp_frame, xdp_frame itself is not stored inside
+ice_tx_buf, we only store raw data pointer. Casting this pointer to
+xdp_frame and calling against it xdp_return_frame in
+ice_clean_xdp_tx_buf() results in undefined behavior.
+
+To fix this, simply call page_frag_free() on tx_buf->raw_buf.
+Later intention is to remove the buff->frame conversion in order to
+simplify the codebase and improve XDP_TX performance on ZC.
+
+Fixes: 126cdfe1007a ("ice: xsk: Improve AF_XDP ZC Tx and use batching API")
+Reported-and-tested-by: Robin Cowley <robin.cowley@thehutgroup.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Reviewed-by: Piotr Raczynski <piotr.raczynski@.intel.com>
+Link: https://lore.kernel.org/r/20221220175448.693999-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index 056c904b83cc..79fa65d1cf20 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -772,7 +772,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
+ static void
+ ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+ {
+-      xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
++      page_frag_free(tx_buf->raw_buf);
+       xdp_ring->xdp_tx_active--;
+       dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+                        dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+-- 
+2.35.1
+
diff --git a/queue-6.0/io_uring-check-for-valid-register-opcode-earlier.patch b/queue-6.0/io_uring-check-for-valid-register-opcode-earlier.patch
new file mode 100644 (file)
index 0000000..a52493c
--- /dev/null
@@ -0,0 +1,45 @@
+From 0d5a95d7778f37f11e8649ac82f541e838c40026 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 06:37:08 -0700
+Subject: io_uring: check for valid register opcode earlier
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 343190841a1f22b96996d9f8cfab902a4d1bfd0e ]
+
+We only check the register opcode value inside the restricted ring
+section, move it into the main io_uring_register() function instead
+and check it up front.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index b8a39be3bcb4..cfcf1d415521 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3725,8 +3725,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
+               return -EEXIST;
+       if (ctx->restricted) {
+-              if (opcode >= IORING_REGISTER_LAST)
+-                      return -EINVAL;
+               opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
+               if (!test_bit(opcode, ctx->restrictions.register_op))
+                       return -EACCES;
+@@ -3882,6 +3880,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
+       long ret = -EBADF;
+       struct fd f;
++      if (opcode >= IORING_REGISTER_LAST)
++              return -EINVAL;
++
+       f = fdget(fd);
+       if (!f.file)
+               return -EBADF;
+-- 
+2.35.1
+
diff --git a/queue-6.0/mptcp-fix-lockdep-false-positive.patch b/queue-6.0/mptcp-fix-lockdep-false-positive.patch
new file mode 100644 (file)
index 0000000..caaa001
--- /dev/null
@@ -0,0 +1,160 @@
+From aa0fddcc3f95856e7ac2e7ccb382a0d3bab37954 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 11:52:15 -0800
+Subject: mptcp: fix lockdep false positive
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit fec3adfd754ccc99a7230e8ab9f105b65fb07bcc ]
+
+MattB reported a lockdep splat in the mptcp listener code cleanup:
+
+ WARNING: possible circular locking dependency detected
+ packetdrill/14278 is trying to acquire lock:
+ ffff888017d868f0 ((work_completion)(&msk->work)){+.+.}-{0:0}, at: __flush_work (kernel/workqueue.c:3069)
+
+ but task is already holding lock:
+ ffff888017d84130 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close (net/mptcp/protocol.c:2973)
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+
+ -> #1 (sk_lock-AF_INET){+.+.}-{0:0}:
+        __lock_acquire (kernel/locking/lockdep.c:5055)
+        lock_acquire (kernel/locking/lockdep.c:466)
+        lock_sock_nested (net/core/sock.c:3463)
+        mptcp_worker (net/mptcp/protocol.c:2614)
+        process_one_work (kernel/workqueue.c:2294)
+        worker_thread (include/linux/list.h:292)
+        kthread (kernel/kthread.c:376)
+        ret_from_fork (arch/x86/entry/entry_64.S:312)
+
+ -> #0 ((work_completion)(&msk->work)){+.+.}-{0:0}:
+        check_prev_add (kernel/locking/lockdep.c:3098)
+        validate_chain (kernel/locking/lockdep.c:3217)
+        __lock_acquire (kernel/locking/lockdep.c:5055)
+        lock_acquire (kernel/locking/lockdep.c:466)
+        __flush_work (kernel/workqueue.c:3070)
+        __cancel_work_timer (kernel/workqueue.c:3160)
+        mptcp_cancel_work (net/mptcp/protocol.c:2758)
+        mptcp_subflow_queue_clean (net/mptcp/subflow.c:1817)
+        __mptcp_close_ssk (net/mptcp/protocol.c:2363)
+        mptcp_destroy_common (net/mptcp/protocol.c:3170)
+        mptcp_destroy (include/net/sock.h:1495)
+        __mptcp_destroy_sock (net/mptcp/protocol.c:2886)
+        __mptcp_close (net/mptcp/protocol.c:2959)
+        mptcp_close (net/mptcp/protocol.c:2974)
+        inet_release (net/ipv4/af_inet.c:432)
+        __sock_release (net/socket.c:651)
+        sock_close (net/socket.c:1367)
+        __fput (fs/file_table.c:320)
+        task_work_run (kernel/task_work.c:181 (discriminator 1))
+        exit_to_user_mode_prepare (include/linux/resume_user_mode.h:49)
+        syscall_exit_to_user_mode (kernel/entry/common.c:130)
+        do_syscall_64 (arch/x86/entry/common.c:87)
+        entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+ other info that might help us debug this:
+
+  Possible unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+   lock(sk_lock-AF_INET);
+                                lock((work_completion)(&msk->work));
+                                lock(sk_lock-AF_INET);
+   lock((work_completion)(&msk->work));
+
+  *** DEADLOCK ***
+
+The report is actually a false positive, since the only existing lock
+nesting is the msk socket lock acquired by the mptcp work.
+cancel_work_sync() is invoked without the relevant socket lock being
+held, but under a different (the msk listener) socket lock.
+
+We could silence the splat adding a per workqueue dynamic lockdep key,
+but that looks overkill. Instead just tell lockdep the msk socket lock
+is not held around cancel_work_sync().
+
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/322
+Fixes: 30e51b923e43 ("mptcp: fix unreleased socket in accept queue")
+Reported-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c |  2 +-
+ net/mptcp/protocol.h |  2 +-
+ net/mptcp/subflow.c  | 19 +++++++++++++++++--
+ 3 files changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 42d5e0a7952a..a2cc25cca33e 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2310,7 +2310,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+               /* otherwise tcp will dispose of the ssk and subflow ctx */
+               if (ssk->sk_state == TCP_LISTEN) {
+                       tcp_set_state(ssk, TCP_CLOSE);
+-                      mptcp_subflow_queue_clean(ssk);
++                      mptcp_subflow_queue_clean(sk, ssk);
+                       inet_csk_listen_stop(ssk);
+               }
+               __tcp_close(ssk, 0);
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index c1eaa1685592..df6937c8cf54 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -610,7 +610,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+                    struct mptcp_subflow_context *subflow);
+ void __mptcp_subflow_send_ack(struct sock *ssk);
+ void mptcp_subflow_reset(struct sock *ssk);
+-void mptcp_subflow_queue_clean(struct sock *ssk);
++void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
+ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
+ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+ bool __mptcp_close(struct sock *sk, long timeout);
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 613f515fedf0..9d3701fdb293 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1733,7 +1733,7 @@ static void subflow_state_change(struct sock *sk)
+       }
+ }
+-void mptcp_subflow_queue_clean(struct sock *listener_ssk)
++void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
+ {
+       struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+       struct mptcp_sock *msk, *next, *head = NULL;
+@@ -1782,8 +1782,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+               do_cancel_work = __mptcp_close(sk, 0);
+               release_sock(sk);
+-              if (do_cancel_work)
++              if (do_cancel_work) {
++                      /* lockdep will report a false positive ABBA deadlock
++                       * between cancel_work_sync and the listener socket.
++                       * The involved locks belong to different sockets WRT
++                       * the existing AB chain.
++                       * Using a per socket key is problematic as key
++                       * deregistration requires process context and must be
++                       * performed at socket disposal time, in atomic
++                       * context.
++                       * Just tell lockdep to consider the listener socket
++                       * released here.
++                       */
++                      mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
+                       mptcp_cancel_work(sk);
++                      mutex_acquire(&listener_sk->sk_lock.dep_map,
++                                    SINGLE_DEPTH_NESTING, 0, _RET_IP_);
++              }
+               sock_put(sk);
+       }
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-amd-xgbe-add-missed-tasklet_kill.patch b/queue-6.0/net-amd-xgbe-add-missed-tasklet_kill.patch
new file mode 100644 (file)
index 0000000..1e72633
--- /dev/null
@@ -0,0 +1,71 @@
+From cbe2daef582c7e9a51951b6806b4c2674e614d36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 16:14:47 +0800
+Subject: net: amd-xgbe: add missed tasklet_kill
+
+From: Jiguang Xiao <jiguang.xiao@windriver.com>
+
+[ Upstream commit d530ece70f16f912e1d1bfeea694246ab78b0a4b ]
+
+The driver does not call tasklet_kill in several places.
+Add the calls to fix it.
+
+Fixes: 85b85c853401 ("amd-xgbe: Re-issue interrupt if interrupt status not cleared")
+Signed-off-by: Jiguang Xiao <jiguang.xiao@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 3 +++
+ drivers/net/ethernet/amd/xgbe/xgbe-i2c.c  | 4 +++-
+ drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 +++-
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+index f342bb853189..2ee2cd4a1e35 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
+       devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
++      tasklet_kill(&pdata->tasklet_dev);
++      tasklet_kill(&pdata->tasklet_ecc);
++
+       if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+               devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+index 22d4fc547a0a..a9ccc4258ee5 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
+       xgbe_i2c_disable(pdata);
+       xgbe_i2c_clear_all_interrupts(pdata);
+-      if (pdata->dev_irq != pdata->i2c_irq)
++      if (pdata->dev_irq != pdata->i2c_irq) {
+               devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
++              tasklet_kill(&pdata->tasklet_i2c);
++      }
+ }
+ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+index 4e97b4869522..0c5c1b155683 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+@@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+       /* Disable auto-negotiation */
+       xgbe_an_disable_all(pdata);
+-      if (pdata->dev_irq != pdata->an_irq)
++      if (pdata->dev_irq != pdata->an_irq) {
+               devm_free_irq(pdata->dev, pdata->an_irq, pdata);
++              tasklet_kill(&pdata->tasklet_an);
++      }
+       pdata->phy_if.phy_impl.stop(pdata);
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch b/queue-6.0/net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch
new file mode 100644 (file)
index 0000000..45e8cdf
--- /dev/null
@@ -0,0 +1,55 @@
+From be89c9a760f66dda9731f7a76e63e58f8d9df5bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 22:34:05 +0800
+Subject: net: dsa: mv88e6xxx: depend on PTP conditionally
+
+From: Johnny S. Lee <foss@jsl.io>
+
+[ Upstream commit 30e725537546248bddc12eaac2fe0a258917f190 ]
+
+PTP hardware timestamping related objects are not linked when PTP
+support for MV88E6xxx (NET_DSA_MV88E6XXX_PTP) is disabled, therefore
+NET_DSA_MV88E6XXX should not depend on PTP_1588_CLOCK_OPTIONAL
+regardless of NET_DSA_MV88E6XXX_PTP.
+
+Instead, condition more strictly on how NET_DSA_MV88E6XXX_PTP's
+dependencies are met, making sure that it cannot be enabled when
+NET_DSA_MV88E6XXX=y and PTP_1588_CLOCK=m.
+
+In other words, this commit allows NET_DSA_MV88E6XXX to be built-in
+while PTP_1588_CLOCK is a module, as long as NET_DSA_MV88E6XXX_PTP is
+prevented from being enabled.
+
+Fixes: e5f31552674e ("ethernet: fix PTP_1588_CLOCK dependencies")
+Signed-off-by: Johnny S. Lee <foss@jsl.io>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/mv88e6xxx/Kconfig | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
+index 7a2445a34eb7..e3181d5471df 100644
+--- a/drivers/net/dsa/mv88e6xxx/Kconfig
++++ b/drivers/net/dsa/mv88e6xxx/Kconfig
+@@ -2,7 +2,6 @@
+ config NET_DSA_MV88E6XXX
+       tristate "Marvell 88E6xxx Ethernet switch fabric support"
+       depends on NET_DSA
+-      depends on PTP_1588_CLOCK_OPTIONAL
+       select IRQ_DOMAIN
+       select NET_DSA_TAG_EDSA
+       select NET_DSA_TAG_DSA
+@@ -13,7 +12,8 @@ config NET_DSA_MV88E6XXX
+ config NET_DSA_MV88E6XXX_PTP
+       bool "PTP support for Marvell 88E6xxx"
+       default n
+-      depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK
++      depends on (NET_DSA_MV88E6XXX = y && PTP_1588_CLOCK = y) || \
++                 (NET_DSA_MV88E6XXX = m && PTP_1588_CLOCK)
+       help
+         Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
+         chips that support it.
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ena-account-for-the-number-of-processed-bytes-in.patch b/queue-6.0/net-ena-account-for-the-number-of-processed-bytes-in.patch
new file mode 100644 (file)
index 0000000..6463ca1
--- /dev/null
@@ -0,0 +1,36 @@
+From f5428bafd5d23e6d518bdbc1da1e07cee5f71956 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 07:30:07 +0000
+Subject: net: ena: Account for the number of processed bytes in XDP
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit c7f5e34d906320fdc996afa616676161c029cc02 ]
+
+The size of packets that were forwarded or dropped by XDP wasn't added
+to the total processed bytes statistic.
+
+Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index f4ee8671b738..614f27f18164 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1719,6 +1719,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+                       }
+                       if (xdp_verdict != XDP_PASS) {
+                               xdp_flags |= xdp_verdict;
++                              total_len += ena_rx_ctx.ena_bufs[0].len;
+                               res_budget--;
+                               continue;
+                       }
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ena-don-t-register-memory-info-on-xdp-exchange.patch b/queue-6.0/net-ena-don-t-register-memory-info-on-xdp-exchange.patch
new file mode 100644 (file)
index 0000000..4d69603
--- /dev/null
@@ -0,0 +1,50 @@
+From 5aa2538adf2e4e02ed1b5a63e1bb3a9fcbda4f90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 07:30:06 +0000
+Subject: net: ena: Don't register memory info on XDP exchange
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 9c9e539956fa67efb8a65e32b72a853740b33445 ]
+
+Since the queues aren't destroyed when we only exchange XDP programs,
+there's no need to re-register them again.
+
+Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 41c821348476..f4ee8671b738 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -512,16 +512,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+                                                struct bpf_prog *prog,
+                                                int first, int count)
+ {
++      struct bpf_prog *old_bpf_prog;
+       struct ena_ring *rx_ring;
+       int i = 0;
+       for (i = first; i < count; i++) {
+               rx_ring = &adapter->rx_ring[i];
+-              xchg(&rx_ring->xdp_bpf_prog, prog);
+-              if (prog) {
++              old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
++
++              if (!old_bpf_prog && prog) {
+                       ena_xdp_register_rxq_info(rx_ring);
+                       rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
+-              } else {
++              } else if (old_bpf_prog && !prog) {
+                       ena_xdp_unregister_rxq_info(rx_ring);
+                       rx_ring->rx_headroom = NET_SKB_PAD;
+               }
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ena-fix-rx_copybreak-value-update.patch b/queue-6.0/net-ena-fix-rx_copybreak-value-update.patch
new file mode 100644 (file)
index 0000000..fba3230
--- /dev/null
@@ -0,0 +1,94 @@
+From 472f7286f498a987548ac570b82cdb89707ac4dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 07:30:09 +0000
+Subject: net: ena: Fix rx_copybreak value update
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit c7062aaee099f2f43d6f07a71744b44b94b94b34 ]
+
+Make the upper bound on rx_copybreak tighter, by
+making sure it is smaller than the minimum of mtu and
+ENA_PAGE_SIZE. With the current upper bound of mtu,
+rx_copybreak can be larger than a page. Such large
+rx_copybreak will not bring any performance benefit to
+the user and therefore makes no sense.
+
+In addition, the value update was only reflected in
+the adapter structure, but not applied for each ring,
+causing it to not take effect.
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Osama Abboud <osamaabb@amazon.com>
+Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_ethtool.c |  6 +-----
+ drivers/net/ethernet/amazon/ena/ena_netdev.c  | 18 ++++++++++++++++++
+ drivers/net/ethernet/amazon/ena/ena_netdev.h  |  2 ++
+ 3 files changed, 21 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+index 39242c5a1729..108506721bcf 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
++++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+@@ -887,11 +887,7 @@ static int ena_set_tunable(struct net_device *netdev,
+       switch (tuna->id) {
+       case ETHTOOL_RX_COPYBREAK:
+               len = *(u32 *)data;
+-              if (len > adapter->netdev->mtu) {
+-                      ret = -EINVAL;
+-                      break;
+-              }
+-              adapter->rx_copybreak = len;
++              ret = ena_set_rx_copybreak(adapter, len);
+               break;
+       default:
+               ret = -EINVAL;
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index a27a7963df76..083754e0bf23 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -2816,6 +2816,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
+       return dev_was_up ? ena_up(adapter) : 0;
+ }
++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
++{
++      struct ena_ring *rx_ring;
++      int i;
++
++      if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
++              return -EINVAL;
++
++      adapter->rx_copybreak = rx_copybreak;
++
++      for (i = 0; i < adapter->num_io_queues; i++) {
++              rx_ring = &adapter->rx_ring[i];
++              rx_ring->rx_copybreak = rx_copybreak;
++      }
++
++      return 0;
++}
++
+ int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
+ {
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+index 290ae9bf47ee..f9d862b630fa 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+@@ -392,6 +392,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
+ int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak);
++
+ int ena_get_sset_count(struct net_device *netdev, int sset);
+ static inline void ena_reset_device(struct ena_adapter *adapter,
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ena-fix-toeplitz-initial-hash-value.patch b/queue-6.0/net-ena-fix-toeplitz-initial-hash-value.patch
new file mode 100644 (file)
index 0000000..e582515
--- /dev/null
@@ -0,0 +1,72 @@
+From f9a7ae234ff08448072e4e1ce6858af3a7e98131 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 07:30:05 +0000
+Subject: net: ena: Fix toeplitz initial hash value
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 332b49ff637d6c1a75b971022a8b992cf3c57db1 ]
+
+On driver initialization, RSS hash initial value is set to zero,
+instead of the default value. This happens because we pass NULL as
+the RSS key parameter, which caused us to never initialize
+the RSS hash value.
+
+This patch fixes it by making sure the initial value is set, no matter
+what the value of the RSS key is.
+
+Fixes: 91a65b7d3ed8 ("net: ena: fix potential crash when rxfh key is NULL")
+Signed-off-by: Nati Koler <nkoler@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_com.c | 29 +++++++----------------
+ 1 file changed, 9 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
+index 8c8b4c88c7de..451c3a1b6255 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_com.c
++++ b/drivers/net/ethernet/amazon/ena/ena_com.c
+@@ -2400,29 +2400,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+               return -EOPNOTSUPP;
+       }
+-      switch (func) {
+-      case ENA_ADMIN_TOEPLITZ:
+-              if (key) {
+-                      if (key_len != sizeof(hash_key->key)) {
+-                              netdev_err(ena_dev->net_device,
+-                                         "key len (%u) doesn't equal the supported size (%zu)\n",
+-                                         key_len, sizeof(hash_key->key));
+-                              return -EINVAL;
+-                      }
+-                      memcpy(hash_key->key, key, key_len);
+-                      rss->hash_init_val = init_val;
+-                      hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
++      if ((func == ENA_ADMIN_TOEPLITZ) && key) {
++              if (key_len != sizeof(hash_key->key)) {
++                      netdev_err(ena_dev->net_device,
++                                 "key len (%u) doesn't equal the supported size (%zu)\n",
++                                 key_len, sizeof(hash_key->key));
++                      return -EINVAL;
+               }
+-              break;
+-      case ENA_ADMIN_CRC32:
+-              rss->hash_init_val = init_val;
+-              break;
+-      default:
+-              netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n",
+-                         func);
+-              return -EINVAL;
++              memcpy(hash_key->key, key, key_len);
++              hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
+       }
++      rss->hash_init_val = init_val;
+       old_func = rss->hash_func;
+       rss->hash_func = func;
+       rc = ena_com_set_hash_function(ena_dev);
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ena-set-default-value-for-rx-interrupt-moderatio.patch b/queue-6.0/net-ena-set-default-value-for-rx-interrupt-moderatio.patch
new file mode 100644 (file)
index 0000000..10d48a2
--- /dev/null
@@ -0,0 +1,42 @@
+From 7fcb138d1cb9a8c929cf23b5f4bee64e74c256b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 07:30:10 +0000
+Subject: net: ena: Set default value for RX interrupt moderation
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit e712f3e4920b3a1a5e6b536827d118e14862896c ]
+
+RX ring can be NULL in XDP use cases where only TX queues
+are configured. In this scenario, the RX interrupt moderation
+value sent to the device remains in its default value of 0.
+
+In this change, setting the default value of the RX interrupt
+moderation to be the same as of the TX.
+
+Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 083754e0bf23..b25a2fcff339 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1823,8 +1823,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
+ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+                                       struct ena_ring *rx_ring)
+ {
++      u32 rx_interval = tx_ring->smoothed_interval;
+       struct ena_eth_io_intr_reg intr_reg;
+-      u32 rx_interval = 0;
++
+       /* Rx ring can be NULL when for XDP tx queues which don't have an
+        * accompanying rx_ring pair.
+        */
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ena-update-numa-tph-hint-register-upon-numa-node.patch b/queue-6.0/net-ena-update-numa-tph-hint-register-upon-numa-node.patch
new file mode 100644 (file)
index 0000000..85507ac
--- /dev/null
@@ -0,0 +1,155 @@
+From a97d99eee90d94539c0710d8b4740df1985f5cf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 07:30:11 +0000
+Subject: net: ena: Update NUMA TPH hint register upon NUMA node update
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit a8ee104f986e720cea52133885cc822d459398c7 ]
+
+The device supports a PCIe optimization hint, which indicates on
+which NUMA the queue is currently processed. This hint is utilized
+by PCIe in order to reduce its access time by accessing the
+correct NUMA resources and maintaining cache coherence.
+
+The driver calls the register update for the hint (called TPH -
+TLP Processing Hint) during the NAPI loop.
+
+Though the update is expected upon a NUMA change (when a queue
+is moved from one NUMA to the other), the current logic performs
+a register update when the queue is moved to a different CPU,
+but the CPU is not necessarily in a different NUMA.
+
+The changes include:
+1. Performing the TPH update only when the queue has switched
+a NUMA node.
+2. Moving the TPH update call to be triggered only when NAPI was
+scheduled from interrupt context, as opposed to a busy-polling loop.
+This is due to the fact that during busy-polling, the frequency
+of CPU switches for a particular queue is significantly higher,
+thus, the likelihood to switch NUMA is much higher. Therefore,
+providing the frequent updates to the device upon a NUMA update
+are unlikely to be beneficial.
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 27 +++++++++++++-------
+ drivers/net/ethernet/amazon/ena/ena_netdev.h |  6 +++--
+ 2 files changed, 22 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index b25a2fcff339..413714f373ff 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -680,6 +680,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
+       ring->ena_dev = adapter->ena_dev;
+       ring->per_napi_packets = 0;
+       ring->cpu = 0;
++      ring->numa_node = 0;
+       ring->no_interrupt_event_cnt = 0;
+       u64_stats_init(&ring->syncp);
+ }
+@@ -783,6 +784,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
+       tx_ring->next_to_use = 0;
+       tx_ring->next_to_clean = 0;
+       tx_ring->cpu = ena_irq->cpu;
++      tx_ring->numa_node = node;
+       return 0;
+ err_push_buf_intermediate_buf:
+@@ -915,6 +917,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter,
+       rx_ring->next_to_clean = 0;
+       rx_ring->next_to_use = 0;
+       rx_ring->cpu = ena_irq->cpu;
++      rx_ring->numa_node = node;
+       return 0;
+ }
+@@ -1863,20 +1866,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+       if (likely(tx_ring->cpu == cpu))
+               goto out;
++      tx_ring->cpu = cpu;
++      if (rx_ring)
++              rx_ring->cpu = cpu;
++
+       numa_node = cpu_to_node(cpu);
++
++      if (likely(tx_ring->numa_node == numa_node))
++              goto out;
++
+       put_cpu();
+       if (numa_node != NUMA_NO_NODE) {
+               ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
+-              if (rx_ring)
++              tx_ring->numa_node = numa_node;
++              if (rx_ring) {
++                      rx_ring->numa_node = numa_node;
+                       ena_com_update_numa_node(rx_ring->ena_com_io_cq,
+                                                numa_node);
++              }
+       }
+-      tx_ring->cpu = cpu;
+-      if (rx_ring)
+-              rx_ring->cpu = cpu;
+-
+       return;
+ out:
+       put_cpu();
+@@ -1997,11 +2007,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
+                       if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+                               ena_adjust_adaptive_rx_intr_moderation(ena_napi);
++                      ena_update_ring_numa_node(tx_ring, rx_ring);
+                       ena_unmask_interrupt(tx_ring, rx_ring);
+               }
+-              ena_update_ring_numa_node(tx_ring, rx_ring);
+-
+               ret = rx_work_done;
+       } else {
+               ret = budget;
+@@ -2388,7 +2397,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
+       ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
+       ctx.msix_vector = msix_vector;
+       ctx.queue_size = tx_ring->ring_size;
+-      ctx.numa_node = cpu_to_node(tx_ring->cpu);
++      ctx.numa_node = tx_ring->numa_node;
+       rc = ena_com_create_io_queue(ena_dev, &ctx);
+       if (rc) {
+@@ -2456,7 +2465,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
+       ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+       ctx.msix_vector = msix_vector;
+       ctx.queue_size = rx_ring->ring_size;
+-      ctx.numa_node = cpu_to_node(rx_ring->cpu);
++      ctx.numa_node = rx_ring->numa_node;
+       rc = ena_com_create_io_queue(ena_dev, &ctx);
+       if (rc) {
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+index f9d862b630fa..2cb141079474 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+@@ -262,9 +262,11 @@ struct ena_ring {
+       bool disable_meta_caching;
+       u16 no_interrupt_event_cnt;
+-      /* cpu for TPH */
++      /* cpu and NUMA for TPH */
+       int cpu;
+-       /* number of tx/rx_buffer_info's entries */
++      int numa_node;
++
++      /* number of tx/rx_buffer_info's entries */
+       int ring_size;
+       enum ena_admin_placement_policy_type tx_mem_queue_type;
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ena-use-bitmask-to-indicate-packet-redirection.patch b/queue-6.0/net-ena-use-bitmask-to-indicate-packet-redirection.patch
new file mode 100644 (file)
index 0000000..12effad
--- /dev/null
@@ -0,0 +1,193 @@
+From dff90681352b402a59bc9e40aa6a71976262cee8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 07:30:08 +0000
+Subject: net: ena: Use bitmask to indicate packet redirection
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 59811faa2c54dbcf44d575b5a8f6e7077da88dc2 ]
+
+Redirecting packets with XDP Redirect is done in two phases:
+1. A packet is passed by the driver to the kernel using
+   xdp_do_redirect().
+2. After finishing polling for new packets the driver lets the kernel
+   know that it can now process the redirected packet using
+   xdp_do_flush_map().
+   The packets' redirection is handled in the napi context of the
+   queue that called xdp_do_redirect()
+
+To avoid calling xdp_do_flush_map() each time the driver first checks
+whether any packets were redirected, using
+       xdp_flags |= xdp_verdict;
+and
+       if (xdp_flags & XDP_REDIRECT)
+           xdp_do_flush_map()
+
+essentially treating XDP instructions as a bitmask, which isn't the case:
+    enum xdp_action {
+           XDP_ABORTED = 0,
+           XDP_DROP,
+           XDP_PASS,
+           XDP_TX,
+           XDP_REDIRECT,
+    };
+
+Given the current possible values of xdp_action, the current design
+doesn't have a bug (since XDP_REDIRECT = 100b), but it is still
+flawed.
+
+This patch makes the driver use a bitmask instead, to avoid future
+issues.
+
+Fixes: a318c70ad152 ("net: ena: introduce XDP redirect implementation")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 26 ++++++++++++--------
+ drivers/net/ethernet/amazon/ena/ena_netdev.h |  9 +++++++
+ 2 files changed, 25 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 614f27f18164..a27a7963df76 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -374,9 +374,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n,
+ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+ {
++      u32 verdict = ENA_XDP_PASS;
+       struct bpf_prog *xdp_prog;
+       struct ena_ring *xdp_ring;
+-      u32 verdict = XDP_PASS;
+       struct xdp_frame *xdpf;
+       u64 *xdp_stat;
+@@ -393,7 +393,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+               if (unlikely(!xdpf)) {
+                       trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+                       xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+-                      verdict = XDP_ABORTED;
++                      verdict = ENA_XDP_DROP;
+                       break;
+               }
+@@ -409,29 +409,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+               spin_unlock(&xdp_ring->xdp_tx_lock);
+               xdp_stat = &rx_ring->rx_stats.xdp_tx;
++              verdict = ENA_XDP_TX;
+               break;
+       case XDP_REDIRECT:
+               if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
+                       xdp_stat = &rx_ring->rx_stats.xdp_redirect;
++                      verdict = ENA_XDP_REDIRECT;
+                       break;
+               }
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+               xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+-              verdict = XDP_ABORTED;
++              verdict = ENA_XDP_DROP;
+               break;
+       case XDP_ABORTED:
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+               xdp_stat = &rx_ring->rx_stats.xdp_aborted;
++              verdict = ENA_XDP_DROP;
+               break;
+       case XDP_DROP:
+               xdp_stat = &rx_ring->rx_stats.xdp_drop;
++              verdict = ENA_XDP_DROP;
+               break;
+       case XDP_PASS:
+               xdp_stat = &rx_ring->rx_stats.xdp_pass;
++              verdict = ENA_XDP_PASS;
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
+               xdp_stat = &rx_ring->rx_stats.xdp_invalid;
++              verdict = ENA_XDP_DROP;
+       }
+       ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
+@@ -1621,12 +1627,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+        * we expect, then we simply drop it
+        */
+       if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
+-              return XDP_DROP;
++              return ENA_XDP_DROP;
+       ret = ena_xdp_execute(rx_ring, xdp);
+       /* The xdp program might expand the headers */
+-      if (ret == XDP_PASS) {
++      if (ret == ENA_XDP_PASS) {
+               rx_info->page_offset = xdp->data - xdp->data_hard_start;
+               rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
+       }
+@@ -1665,7 +1671,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+       xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
+       do {
+-              xdp_verdict = XDP_PASS;
++              xdp_verdict = ENA_XDP_PASS;
+               skb = NULL;
+               ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
+               ena_rx_ctx.max_bufs = rx_ring->sgl_size;
+@@ -1693,7 +1699,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+                       xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
+               /* allocate skb and fill it */
+-              if (xdp_verdict == XDP_PASS)
++              if (xdp_verdict == ENA_XDP_PASS)
+                       skb = ena_rx_skb(rx_ring,
+                                        rx_ring->ena_bufs,
+                                        ena_rx_ctx.descs,
+@@ -1711,13 +1717,13 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+                               /* Packets was passed for transmission, unmap it
+                                * from RX side.
+                                */
+-                              if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) {
++                              if (xdp_verdict & ENA_XDP_FORWARDED) {
+                                       ena_unmap_rx_buff(rx_ring,
+                                                         &rx_ring->rx_buffer_info[req_id]);
+                                       rx_ring->rx_buffer_info[req_id].page = NULL;
+                               }
+                       }
+-                      if (xdp_verdict != XDP_PASS) {
++                      if (xdp_verdict != ENA_XDP_PASS) {
+                               xdp_flags |= xdp_verdict;
+                               total_len += ena_rx_ctx.ena_bufs[0].len;
+                               res_budget--;
+@@ -1763,7 +1769,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+               ena_refill_rx_bufs(rx_ring, refill_required);
+       }
+-      if (xdp_flags & XDP_REDIRECT)
++      if (xdp_flags & ENA_XDP_REDIRECT)
+               xdp_do_flush_map();
+       return work_done;
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+index 1bdce99bf688..290ae9bf47ee 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+@@ -409,6 +409,15 @@ enum ena_xdp_errors_t {
+       ENA_XDP_NO_ENOUGH_QUEUES,
+ };
++enum ENA_XDP_ACTIONS {
++      ENA_XDP_PASS            = 0,
++      ENA_XDP_TX              = BIT(0),
++      ENA_XDP_REDIRECT        = BIT(1),
++      ENA_XDP_DROP            = BIT(2)
++};
++
++#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
++
+ static inline bool ena_xdp_present(struct ena_adapter *adapter)
+ {
+       return !!adapter->xdp_bpf_prog;
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-hns3-add-interrupts-re-initialization-while-doin.patch b/queue-6.0/net-hns3-add-interrupts-re-initialization-while-doin.patch
new file mode 100644 (file)
index 0000000..663152b
--- /dev/null
@@ -0,0 +1,43 @@
+From f2995246054cbc95170523f7991b12c70ada85c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:43:41 +0800
+Subject: net: hns3: add interrupts re-initialization while doing VF FLR
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit 09e6b30eeb254f1818a008cace3547159e908dfd ]
+
+Currently keep alive message between PF and VF may be lost and the VF is
+unalive in PF. So the VF will not do reset during PF FLR reset process.
+This would make the allocated interrupt resources of VF invalid and VF
+would't receive or respond to PF any more.
+
+So this patch adds VF interrupts re-initialization during VF FLR for VF
+recovery in above cases.
+
+Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+index 26f87330173e..c551508e6932 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -2767,7 +2767,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
+       struct pci_dev *pdev = hdev->pdev;
+       int ret = 0;
+-      if (hdev->reset_type == HNAE3_VF_FULL_RESET &&
++      if ((hdev->reset_type == HNAE3_VF_FULL_RESET ||
++           hdev->reset_type == HNAE3_FLR_RESET) &&
+           test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
+               hclgevf_misc_irq_uninit(hdev);
+               hclgevf_uninit_msi(hdev);
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-hns3-fix-miss-l3e-checking-for-rx-packet.patch b/queue-6.0/net-hns3-fix-miss-l3e-checking-for-rx-packet.patch
new file mode 100644 (file)
index 0000000..a10ef42
--- /dev/null
@@ -0,0 +1,69 @@
+From 5288c62c0e0d5b080b8a07550d59b9c4bee8e1f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:43:42 +0800
+Subject: net: hns3: fix miss L3E checking for rx packet
+
+From: Jian Shen <shenjian15@huawei.com>
+
+[ Upstream commit 7d89b53cea1a702f97117fb4361523519bb1e52c ]
+
+For device supports RXD advanced layout, the driver will
+return directly if the hardware finish the checksum
+calculate. It cause missing L3E checking for ip packets.
+Fixes it.
+
+Fixes: 1ddc028ac849 ("net: hns3: refactor out RX completion checksum")
+Signed-off-by: Jian Shen <shenjian15@huawei.com>
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+index 44d4265f109a..d5d7fae354e7 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -3813,18 +3813,16 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
+       return 0;
+ }
+-static bool hns3_checksum_complete(struct hns3_enet_ring *ring,
++static void hns3_checksum_complete(struct hns3_enet_ring *ring,
+                                  struct sk_buff *skb, u32 ptype, u16 csum)
+ {
+       if (ptype == HNS3_INVALID_PTYPE ||
+           hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE)
+-              return false;
++              return;
+       hns3_ring_stats_update(ring, csum_complete);
+       skb->ip_summed = CHECKSUM_COMPLETE;
+       skb->csum = csum_unfold((__force __sum16)csum);
+-
+-      return true;
+ }
+ static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info,
+@@ -3884,8 +3882,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
+               ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M,
+                                       HNS3_RXD_PTYPE_S);
+-      if (hns3_checksum_complete(ring, skb, ptype, csum))
+-              return;
++      hns3_checksum_complete(ring, skb, ptype, csum);
+       /* check if hardware has done checksum */
+       if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
+@@ -3894,6 +3891,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
+       if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) |
+                                BIT(HNS3_RXD_OL3E_B) |
+                                BIT(HNS3_RXD_OL4E_B)))) {
++              skb->ip_summed = CHECKSUM_NONE;
+               hns3_ring_stats_update(ring, l3l4_csum_err);
+               return;
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch b/queue-6.0/net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch
new file mode 100644 (file)
index 0000000..be9de7e
--- /dev/null
@@ -0,0 +1,134 @@
+From 382d1e8825fdc62e72ca8813d6135b3c13f5fb3d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:43:43 +0800
+Subject: net: hns3: fix VF promisc mode not update when mac table full
+
+From: Jian Shen <shenjian15@huawei.com>
+
+[ Upstream commit 8ee57c7b8406c7aa8ca31e014440c87c6383f429 ]
+
+Currently, it missed set HCLGE_VPORT_STATE_PROMISC_CHANGE
+flag for VF when vport->overflow_promisc_flags changed.
+So the VF won't check whether to update promisc mode in
+this case. So add it.
+
+Fixes: 1e6e76101fd9 ("net: hns3: configure promisc mode for VF asynchronously")
+Signed-off-by: Jian Shen <shenjian15@huawei.com>
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../hisilicon/hns3/hns3pf/hclge_main.c        | 75 +++++++++++--------
+ 1 file changed, 43 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index 7e8a60f2401c..2a1765eed4c8 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -12536,60 +12536,71 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable)
+       return ret;
+ }
+-static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
++static int hclge_sync_vport_promisc_mode(struct hclge_vport *vport)
+ {
+-      struct hclge_vport *vport = &hdev->vport[0];
+       struct hnae3_handle *handle = &vport->nic;
++      struct hclge_dev *hdev = vport->back;
++      bool uc_en = false;
++      bool mc_en = false;
+       u8 tmp_flags;
++      bool bc_en;
+       int ret;
+-      u16 i;
+       if (vport->last_promisc_flags != vport->overflow_promisc_flags) {
+               set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
+               vport->last_promisc_flags = vport->overflow_promisc_flags;
+       }
+-      if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) {
++      if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
++                              &vport->state))
++              return 0;
++
++      /* for PF */
++      if (!vport->vport_id) {
+               tmp_flags = handle->netdev_flags | vport->last_promisc_flags;
+               ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE,
+                                            tmp_flags & HNAE3_MPE);
+-              if (!ret) {
+-                      clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+-                                &vport->state);
++              if (!ret)
+                       set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
+                               &vport->state);
+-              }
++              else
++                      set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
++                              &vport->state);
++              return ret;
+       }
+-      for (i = 1; i < hdev->num_alloc_vport; i++) {
+-              bool uc_en = false;
+-              bool mc_en = false;
+-              bool bc_en;
++      /* for VF */
++      if (vport->vf_info.trusted) {
++              uc_en = vport->vf_info.request_uc_en > 0 ||
++                      vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE;
++              mc_en = vport->vf_info.request_mc_en > 0 ||
++                      vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE;
++      }
++      bc_en = vport->vf_info.request_bc_en > 0;
+-              vport = &hdev->vport[i];
++      ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en,
++                                       mc_en, bc_en);
++      if (ret) {
++              set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
++              return ret;
++      }
++      hclge_set_vport_vlan_fltr_change(vport);
+-              if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+-                                      &vport->state))
+-                      continue;
++      return 0;
++}
+-              if (vport->vf_info.trusted) {
+-                      uc_en = vport->vf_info.request_uc_en > 0 ||
+-                              vport->overflow_promisc_flags &
+-                              HNAE3_OVERFLOW_UPE;
+-                      mc_en = vport->vf_info.request_mc_en > 0 ||
+-                              vport->overflow_promisc_flags &
+-                              HNAE3_OVERFLOW_MPE;
+-              }
+-              bc_en = vport->vf_info.request_bc_en > 0;
++static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
++{
++      struct hclge_vport *vport;
++      int ret;
++      u16 i;
+-              ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en,
+-                                               mc_en, bc_en);
+-              if (ret) {
+-                      set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+-                              &vport->state);
++      for (i = 0; i < hdev->num_alloc_vport; i++) {
++              vport = &hdev->vport[i];
++
++              ret = hclge_sync_vport_promisc_mode(vport);
++              if (ret)
+                       return;
+-              }
+-              hclge_set_vport_vlan_fltr_change(vport);
+       }
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-hns3-refactor-function-hclge_mbx_handler.patch b/queue-6.0/net-hns3-refactor-function-hclge_mbx_handler.patch
new file mode 100644 (file)
index 0000000..74ae4f8
--- /dev/null
@@ -0,0 +1,496 @@
+From 2d403cf57944bd26da7a418591f46de691ac23af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Sep 2022 10:38:02 +0800
+Subject: net: hns3: refactor function hclge_mbx_handler()
+
+From: Hao Lan <lanhao@huawei.com>
+
+[ Upstream commit 09431ed8de874881e2d5d430042d718ae074d371 ]
+
+Currently, the function hclge_mbx_handler() has too many switch-case
+statements, it makes this function too long. To improve code readability,
+refactor this function and use lookup table instead.
+
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: fec7352117fa ("net: hns3: refine the handling for VF heartbeat")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/hisilicon/hns3/hclge_mbx.h   |  11 +
+ .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 415 ++++++++++++------
+ 2 files changed, 284 insertions(+), 142 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+index 7d4ae467f3ad..abcd7877f7d2 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+@@ -233,6 +233,17 @@ struct hclgevf_mbx_arq_ring {
+       __le16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
+ };
++struct hclge_dev;
++
++#define HCLGE_MBX_OPCODE_MAX 256
++struct hclge_mbx_ops_param {
++      struct hclge_vport *vport;
++      struct hclge_mbx_vf_to_pf_cmd *req;
++      struct hclge_respond_to_vf_msg *resp_msg;
++};
++
++typedef int (*hclge_mbx_ops_fn)(struct hclge_mbx_ops_param *param);
++
+ #define hclge_mbx_ring_ptr_move_crq(crq) \
+       (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
+ #define hclge_mbx_tail_ptr_move_arq(arq) \
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+index e1012f7f9b73..a7b06c63143c 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+@@ -779,17 +779,284 @@ static void hclge_handle_vf_tbl(struct hclge_vport *vport,
+       }
+ }
++static int
++hclge_mbx_map_ring_to_vector_handler(struct hclge_mbx_ops_param *param)
++{
++      return hclge_map_unmap_ring_to_vf_vector(param->vport, true,
++                                               param->req);
++}
++
++static int
++hclge_mbx_unmap_ring_to_vector_handler(struct hclge_mbx_ops_param *param)
++{
++      return hclge_map_unmap_ring_to_vf_vector(param->vport, false,
++                                               param->req);
++}
++
++static int
++hclge_mbx_get_ring_vector_map_handler(struct hclge_mbx_ops_param *param)
++{
++      int ret;
++
++      ret = hclge_get_vf_ring_vector_map(param->vport, param->req,
++                                         param->resp_msg);
++      if (ret)
++              dev_err(&param->vport->back->pdev->dev,
++                      "PF fail(%d) to get VF ring vector map\n",
++                      ret);
++      return ret;
++}
++
++static int hclge_mbx_set_promisc_mode_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_set_vf_promisc_mode(param->vport, param->req);
++      return 0;
++}
++
++static int hclge_mbx_set_unicast_handler(struct hclge_mbx_ops_param *param)
++{
++      int ret;
++
++      ret = hclge_set_vf_uc_mac_addr(param->vport, param->req);
++      if (ret)
++              dev_err(&param->vport->back->pdev->dev,
++                      "PF fail(%d) to set VF UC MAC Addr\n",
++                      ret);
++      return ret;
++}
++
++static int hclge_mbx_set_multicast_handler(struct hclge_mbx_ops_param *param)
++{
++      int ret;
++
++      ret = hclge_set_vf_mc_mac_addr(param->vport, param->req);
++      if (ret)
++              dev_err(&param->vport->back->pdev->dev,
++                      "PF fail(%d) to set VF MC MAC Addr\n",
++                      ret);
++      return ret;
++}
++
++static int hclge_mbx_set_vlan_handler(struct hclge_mbx_ops_param *param)
++{
++      int ret;
++
++      ret = hclge_set_vf_vlan_cfg(param->vport, param->req, param->resp_msg);
++      if (ret)
++              dev_err(&param->vport->back->pdev->dev,
++                      "PF failed(%d) to config VF's VLAN\n",
++                      ret);
++      return ret;
++}
++
++static int hclge_mbx_set_alive_handler(struct hclge_mbx_ops_param *param)
++{
++      int ret;
++
++      ret = hclge_set_vf_alive(param->vport, param->req);
++      if (ret)
++              dev_err(&param->vport->back->pdev->dev,
++                      "PF failed(%d) to set VF's ALIVE\n",
++                      ret);
++      return ret;
++}
++
++static int hclge_mbx_get_qinfo_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_get_vf_queue_info(param->vport, param->resp_msg);
++      return 0;
++}
++
++static int hclge_mbx_get_qdepth_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_get_vf_queue_depth(param->vport, param->resp_msg);
++      return 0;
++}
++
++static int hclge_mbx_get_basic_info_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_get_basic_info(param->vport, param->resp_msg);
++      return 0;
++}
++
++static int hclge_mbx_get_link_status_handler(struct hclge_mbx_ops_param *param)
++{
++      int ret;
++
++      ret = hclge_push_vf_link_status(param->vport);
++      if (ret)
++              dev_err(&param->vport->back->pdev->dev,
++                      "failed to inform link stat to VF, ret = %d\n",
++                      ret);
++      return ret;
++}
++
++static int hclge_mbx_queue_reset_handler(struct hclge_mbx_ops_param *param)
++{
++      return hclge_mbx_reset_vf_queue(param->vport, param->req,
++                                      param->resp_msg);
++}
++
++static int hclge_mbx_reset_handler(struct hclge_mbx_ops_param *param)
++{
++      return hclge_reset_vf(param->vport);
++}
++
++static int hclge_mbx_keep_alive_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_vf_keep_alive(param->vport);
++      return 0;
++}
++
++static int hclge_mbx_set_mtu_handler(struct hclge_mbx_ops_param *param)
++{
++      int ret;
++
++      ret = hclge_set_vf_mtu(param->vport, param->req);
++      if (ret)
++              dev_err(&param->vport->back->pdev->dev,
++                      "VF fail(%d) to set mtu\n", ret);
++      return ret;
++}
++
++static int hclge_mbx_get_qid_in_pf_handler(struct hclge_mbx_ops_param *param)
++{
++      return hclge_get_queue_id_in_pf(param->vport, param->req,
++                                      param->resp_msg);
++}
++
++static int hclge_mbx_get_rss_key_handler(struct hclge_mbx_ops_param *param)
++{
++      return hclge_get_rss_key(param->vport, param->req, param->resp_msg);
++}
++
++static int hclge_mbx_get_link_mode_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_get_link_mode(param->vport, param->req);
++      return 0;
++}
++
++static int
++hclge_mbx_get_vf_flr_status_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_rm_vport_all_mac_table(param->vport, false,
++                                   HCLGE_MAC_ADDR_UC);
++      hclge_rm_vport_all_mac_table(param->vport, false,
++                                   HCLGE_MAC_ADDR_MC);
++      hclge_rm_vport_all_vlan_table(param->vport, false);
++      return 0;
++}
++
++static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_rm_vport_all_mac_table(param->vport, true,
++                                   HCLGE_MAC_ADDR_UC);
++      hclge_rm_vport_all_mac_table(param->vport, true,
++                                   HCLGE_MAC_ADDR_MC);
++      hclge_rm_vport_all_vlan_table(param->vport, true);
++      return 0;
++}
++
++static int hclge_mbx_get_media_type_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_get_vf_media_type(param->vport, param->resp_msg);
++      return 0;
++}
++
++static int hclge_mbx_push_link_status_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_handle_link_change_event(param->vport->back, param->req);
++      return 0;
++}
++
++static int hclge_mbx_get_mac_addr_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_get_vf_mac_addr(param->vport, param->resp_msg);
++      return 0;
++}
++
++static int hclge_mbx_ncsi_error_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_handle_ncsi_error(param->vport->back);
++      return 0;
++}
++
++static int hclge_mbx_handle_vf_tbl_handler(struct hclge_mbx_ops_param *param)
++{
++      hclge_handle_vf_tbl(param->vport, param->req);
++      return 0;
++}
++
++static const hclge_mbx_ops_fn hclge_mbx_ops_list[HCLGE_MBX_OPCODE_MAX] = {
++      [HCLGE_MBX_RESET]   = hclge_mbx_reset_handler,
++      [HCLGE_MBX_SET_UNICAST] = hclge_mbx_set_unicast_handler,
++      [HCLGE_MBX_SET_MULTICAST] = hclge_mbx_set_multicast_handler,
++      [HCLGE_MBX_SET_VLAN] = hclge_mbx_set_vlan_handler,
++      [HCLGE_MBX_MAP_RING_TO_VECTOR] = hclge_mbx_map_ring_to_vector_handler,
++      [HCLGE_MBX_UNMAP_RING_TO_VECTOR] = hclge_mbx_unmap_ring_to_vector_handler,
++      [HCLGE_MBX_SET_PROMISC_MODE] = hclge_mbx_set_promisc_mode_handler,
++      [HCLGE_MBX_GET_QINFO] = hclge_mbx_get_qinfo_handler,
++      [HCLGE_MBX_GET_QDEPTH] = hclge_mbx_get_qdepth_handler,
++      [HCLGE_MBX_GET_BASIC_INFO] = hclge_mbx_get_basic_info_handler,
++      [HCLGE_MBX_GET_RSS_KEY] = hclge_mbx_get_rss_key_handler,
++      [HCLGE_MBX_GET_MAC_ADDR] = hclge_mbx_get_mac_addr_handler,
++      [HCLGE_MBX_GET_LINK_STATUS] = hclge_mbx_get_link_status_handler,
++      [HCLGE_MBX_QUEUE_RESET] = hclge_mbx_queue_reset_handler,
++      [HCLGE_MBX_KEEP_ALIVE] = hclge_mbx_keep_alive_handler,
++      [HCLGE_MBX_SET_ALIVE] = hclge_mbx_set_alive_handler,
++      [HCLGE_MBX_SET_MTU] = hclge_mbx_set_mtu_handler,
++      [HCLGE_MBX_GET_QID_IN_PF] = hclge_mbx_get_qid_in_pf_handler,
++      [HCLGE_MBX_GET_LINK_MODE] = hclge_mbx_get_link_mode_handler,
++      [HCLGE_MBX_GET_MEDIA_TYPE] = hclge_mbx_get_media_type_handler,
++      [HCLGE_MBX_VF_UNINIT] = hclge_mbx_vf_uninit_handler,
++      [HCLGE_MBX_HANDLE_VF_TBL] = hclge_mbx_handle_vf_tbl_handler,
++      [HCLGE_MBX_GET_RING_VECTOR_MAP] = hclge_mbx_get_ring_vector_map_handler,
++      [HCLGE_MBX_GET_VF_FLR_STATUS] = hclge_mbx_get_vf_flr_status_handler,
++      [HCLGE_MBX_PUSH_LINK_STATUS] = hclge_mbx_push_link_status_handler,
++      [HCLGE_MBX_NCSI_ERROR] = hclge_mbx_ncsi_error_handler,
++};
++
++static void hclge_mbx_request_handling(struct hclge_mbx_ops_param *param)
++{
++      hclge_mbx_ops_fn cmd_func = NULL;
++      struct hclge_dev *hdev;
++      int ret = 0;
++
++      hdev = param->vport->back;
++      cmd_func = hclge_mbx_ops_list[param->req->msg.code];
++      if (cmd_func)
++              ret = cmd_func(param);
++      else
++              dev_err(&hdev->pdev->dev,
++                      "un-supported mailbox message, code = %u\n",
++                      param->req->msg.code);
++
++      /* PF driver should not reply IMP */
++      if (hnae3_get_bit(param->req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B) &&
++          param->req->msg.code < HCLGE_MBX_GET_VF_FLR_STATUS) {
++              param->resp_msg->status = ret;
++              if (time_is_before_jiffies(hdev->last_mbx_scheduled +
++                                         HCLGE_MBX_SCHED_TIMEOUT))
++                      dev_warn(&hdev->pdev->dev,
++                               "resp vport%u mbx(%u,%u) late\n",
++                               param->req->mbx_src_vfid,
++                               param->req->msg.code,
++                               param->req->msg.subcode);
++
++              hclge_gen_resp_to_vf(param->vport, param->req, param->resp_msg);
++      }
++}
++
+ void hclge_mbx_handler(struct hclge_dev *hdev)
+ {
+       struct hclge_comm_cmq_ring *crq = &hdev->hw.hw.cmq.crq;
+       struct hclge_respond_to_vf_msg resp_msg;
+       struct hclge_mbx_vf_to_pf_cmd *req;
+-      struct hclge_vport *vport;
++      struct hclge_mbx_ops_param param;
+       struct hclge_desc *desc;
+-      bool is_del = false;
+       unsigned int flag;
+-      int ret = 0;
++      param.resp_msg = &resp_msg;
+       /* handle all the mailbox requests in the queue */
+       while (!hclge_cmd_crq_empty(&hdev->hw)) {
+               if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE,
+@@ -814,152 +1081,16 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
+                       continue;
+               }
+-              vport = &hdev->vport[req->mbx_src_vfid];
+-
+               trace_hclge_pf_mbx_get(hdev, req);
+               /* clear the resp_msg before processing every mailbox message */
+               memset(&resp_msg, 0, sizeof(resp_msg));
+-
+-              switch (req->msg.code) {
+-              case HCLGE_MBX_MAP_RING_TO_VECTOR:
+-                      ret = hclge_map_unmap_ring_to_vf_vector(vport, true,
+-                                                              req);
+-                      break;
+-              case HCLGE_MBX_UNMAP_RING_TO_VECTOR:
+-                      ret = hclge_map_unmap_ring_to_vf_vector(vport, false,
+-                                                              req);
+-                      break;
+-              case HCLGE_MBX_GET_RING_VECTOR_MAP:
+-                      ret = hclge_get_vf_ring_vector_map(vport, req,
+-                                                         &resp_msg);
+-                      if (ret)
+-                              dev_err(&hdev->pdev->dev,
+-                                      "PF fail(%d) to get VF ring vector map\n",
+-                                      ret);
+-                      break;
+-              case HCLGE_MBX_SET_PROMISC_MODE:
+-                      hclge_set_vf_promisc_mode(vport, req);
+-                      break;
+-              case HCLGE_MBX_SET_UNICAST:
+-                      ret = hclge_set_vf_uc_mac_addr(vport, req);
+-                      if (ret)
+-                              dev_err(&hdev->pdev->dev,
+-                                      "PF fail(%d) to set VF UC MAC Addr\n",
+-                                      ret);
+-                      break;
+-              case HCLGE_MBX_SET_MULTICAST:
+-                      ret = hclge_set_vf_mc_mac_addr(vport, req);
+-                      if (ret)
+-                              dev_err(&hdev->pdev->dev,
+-                                      "PF fail(%d) to set VF MC MAC Addr\n",
+-                                      ret);
+-                      break;
+-              case HCLGE_MBX_SET_VLAN:
+-                      ret = hclge_set_vf_vlan_cfg(vport, req, &resp_msg);
+-                      if (ret)
+-                              dev_err(&hdev->pdev->dev,
+-                                      "PF failed(%d) to config VF's VLAN\n",
+-                                      ret);
+-                      break;
+-              case HCLGE_MBX_SET_ALIVE:
+-                      ret = hclge_set_vf_alive(vport, req);
+-                      if (ret)
+-                              dev_err(&hdev->pdev->dev,
+-                                      "PF failed(%d) to set VF's ALIVE\n",
+-                                      ret);
+-                      break;
+-              case HCLGE_MBX_GET_QINFO:
+-                      hclge_get_vf_queue_info(vport, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_GET_QDEPTH:
+-                      hclge_get_vf_queue_depth(vport, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_GET_BASIC_INFO:
+-                      hclge_get_basic_info(vport, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_GET_LINK_STATUS:
+-                      ret = hclge_push_vf_link_status(vport);
+-                      if (ret)
+-                              dev_err(&hdev->pdev->dev,
+-                                      "failed to inform link stat to VF, ret = %d\n",
+-                                      ret);
+-                      break;
+-              case HCLGE_MBX_QUEUE_RESET:
+-                      ret = hclge_mbx_reset_vf_queue(vport, req, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_RESET:
+-                      ret = hclge_reset_vf(vport);
+-                      break;
+-              case HCLGE_MBX_KEEP_ALIVE:
+-                      hclge_vf_keep_alive(vport);
+-                      break;
+-              case HCLGE_MBX_SET_MTU:
+-                      ret = hclge_set_vf_mtu(vport, req);
+-                      if (ret)
+-                              dev_err(&hdev->pdev->dev,
+-                                      "VF fail(%d) to set mtu\n", ret);
+-                      break;
+-              case HCLGE_MBX_GET_QID_IN_PF:
+-                      ret = hclge_get_queue_id_in_pf(vport, req, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_GET_RSS_KEY:
+-                      ret = hclge_get_rss_key(vport, req, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_GET_LINK_MODE:
+-                      hclge_get_link_mode(vport, req);
+-                      break;
+-              case HCLGE_MBX_GET_VF_FLR_STATUS:
+-              case HCLGE_MBX_VF_UNINIT:
+-                      is_del = req->msg.code == HCLGE_MBX_VF_UNINIT;
+-                      hclge_rm_vport_all_mac_table(vport, is_del,
+-                                                   HCLGE_MAC_ADDR_UC);
+-                      hclge_rm_vport_all_mac_table(vport, is_del,
+-                                                   HCLGE_MAC_ADDR_MC);
+-                      hclge_rm_vport_all_vlan_table(vport, is_del);
+-                      break;
+-              case HCLGE_MBX_GET_MEDIA_TYPE:
+-                      hclge_get_vf_media_type(vport, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_PUSH_LINK_STATUS:
+-                      hclge_handle_link_change_event(hdev, req);
+-                      break;
+-              case HCLGE_MBX_GET_MAC_ADDR:
+-                      hclge_get_vf_mac_addr(vport, &resp_msg);
+-                      break;
+-              case HCLGE_MBX_NCSI_ERROR:
+-                      hclge_handle_ncsi_error(hdev);
+-                      break;
+-              case HCLGE_MBX_HANDLE_VF_TBL:
+-                      hclge_handle_vf_tbl(vport, req);
+-                      break;
+-              default:
+-                      dev_err(&hdev->pdev->dev,
+-                              "un-supported mailbox message, code = %u\n",
+-                              req->msg.code);
+-                      break;
+-              }
+-
+-              /* PF driver should not reply IMP */
+-              if (hnae3_get_bit(req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B) &&
+-                  req->msg.code < HCLGE_MBX_GET_VF_FLR_STATUS) {
+-                      resp_msg.status = ret;
+-                      if (time_is_before_jiffies(hdev->last_mbx_scheduled +
+-                                                 HCLGE_MBX_SCHED_TIMEOUT))
+-                              dev_warn(&hdev->pdev->dev,
+-                                       "resp vport%u mbx(%u,%u) late\n",
+-                                       req->mbx_src_vfid,
+-                                       req->msg.code,
+-                                       req->msg.subcode);
+-
+-                      hclge_gen_resp_to_vf(vport, req, &resp_msg);
+-              }
++              param.vport = &hdev->vport[req->mbx_src_vfid];
++              param.req = req;
++              hclge_mbx_request_handling(&param);
+               crq->desc[crq->next_to_use].flag = 0;
+               hclge_mbx_ring_ptr_move_crq(crq);
+-
+-              /* reinitialize ret after complete the mbx message processing */
+-              ret = 0;
+       }
+       /* Write back CMDQ_RQ header pointer, M7 need this pointer */
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-hns3-refine-the-handling-for-vf-heartbeat.patch b/queue-6.0/net-hns3-refine-the-handling-for-vf-heartbeat.patch
new file mode 100644 (file)
index 0000000..e2ad9f9
--- /dev/null
@@ -0,0 +1,311 @@
+From 5b10470fcedb53856ff29deec68a4660f8b1a720 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 14:27:49 +0800
+Subject: net: hns3: refine the handling for VF heartbeat
+
+From: Jian Shen <shenjian15@huawei.com>
+
+[ Upstream commit fec7352117fa301bfbc31bacc14bb9a579376b36 ]
+
+Currently, the PF check the VF alive by the KEEP_ALVE
+mailbox from VF. VF keep sending the mailbox per 2
+seconds. Once PF lost the mailbox for more than 8
+seconds, it will regards the VF is abnormal, and stop
+notifying the state change to VF, include link state,
+vf mac, reset, even though it receives the KEEP_ALIVE
+mailbox again. It's inreasonable.
+
+This patch fixes it. PF will record the state change which
+need to notify VF when lost the VF's KEEP_ALIVE mailbox.
+And notify VF when receive the mailbox again. Introduce a
+new flag HCLGE_VPORT_STATE_INITED, used to distinguish the
+case whether VF driver loaded or not. For VF will query
+these states when initializing, so it's unnecessary to
+notify it in this case.
+
+Fixes: aa5c4f175be6 ("net: hns3: add reset handling for VF when doing PF reset")
+Signed-off-by: Jian Shen <shenjian15@huawei.com>
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../hisilicon/hns3/hns3pf/hclge_main.c        | 57 +++++++++++----
+ .../hisilicon/hns3/hns3pf/hclge_main.h        |  7 ++
+ .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 71 ++++++++++++++++---
+ 3 files changed, 112 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index 2a1765eed4c8..d2dde3f1fb88 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -3713,9 +3713,17 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
+                       return ret;
+               }
+-              if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
++              if (!reset ||
++                  !test_bit(HCLGE_VPORT_STATE_INITED, &vport->state))
+                       continue;
++              if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) &&
++                  hdev->reset_type == HNAE3_FUNC_RESET) {
++                      set_bit(HCLGE_VPORT_NEED_NOTIFY_RESET,
++                              &vport->need_notify);
++                      continue;
++              }
++
+               /* Inform VF to process the reset.
+                * hclge_inform_reset_assert_to_vf may fail if VF
+                * driver is not loaded.
+@@ -4412,18 +4420,25 @@ static void hclge_reset_service_task(struct hclge_dev *hdev)
+ static void hclge_update_vport_alive(struct hclge_dev *hdev)
+ {
++#define HCLGE_ALIVE_SECONDS_NORMAL            8
++
++      unsigned long alive_time = HCLGE_ALIVE_SECONDS_NORMAL * HZ;
+       int i;
+       /* start from vport 1 for PF is always alive */
+       for (i = 1; i < hdev->num_alloc_vport; i++) {
+               struct hclge_vport *vport = &hdev->vport[i];
+-              if (time_after(jiffies, vport->last_active_jiffies + 8 * HZ))
++              if (!test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) ||
++                  !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
++                      continue;
++              if (time_after(jiffies, vport->last_active_jiffies +
++                             alive_time)) {
+                       clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+-
+-              /* If vf is not alive, set to default value */
+-              if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+-                      vport->mps = HCLGE_MAC_DEFAULT_FRAME;
++                      dev_warn(&hdev->pdev->dev,
++                               "VF %u heartbeat timeout\n",
++                               i - HCLGE_VF_VPORT_START_NUM);
++              }
+       }
+ }
+@@ -7853,9 +7868,11 @@ int hclge_vport_start(struct hclge_vport *vport)
+ {
+       struct hclge_dev *hdev = vport->back;
++      set_bit(HCLGE_VPORT_STATE_INITED, &vport->state);
+       set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+       set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
+       vport->last_active_jiffies = jiffies;
++      vport->need_notify = 0;
+       if (test_bit(vport->vport_id, hdev->vport_config_block)) {
+               if (vport->vport_id) {
+@@ -7873,7 +7890,9 @@ int hclge_vport_start(struct hclge_vport *vport)
+ void hclge_vport_stop(struct hclge_vport *vport)
+ {
++      clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state);
+       clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
++      vport->need_notify = 0;
+ }
+ static int hclge_client_start(struct hnae3_handle *handle)
+@@ -8997,7 +9016,8 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf,
+               return 0;
+       }
+-      dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n",
++      dev_info(&hdev->pdev->dev,
++               "MAC of VF %d has been set to %s, will be active after VF reset\n",
+                vf, format_mac_addr);
+       return 0;
+ }
+@@ -10254,12 +10274,16 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
+        * for DEVICE_VERSION_V3, vf doesn't need to know about the port based
+        * VLAN state.
+        */
+-      if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 &&
+-          test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+-              (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
+-                                                      vport->vport_id,
+-                                                      state, &vlan_info);
+-
++      if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) {
++              if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
++                      (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
++                                                              vport->vport_id,
++                                                              state,
++                                                              &vlan_info);
++              else
++                      set_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN,
++                              &vport->need_notify);
++      }
+       return 0;
+ }
+@@ -11723,7 +11747,7 @@ static void hclge_reset_vport_state(struct hclge_dev *hdev)
+       int i;
+       for (i = 0; i < hdev->num_alloc_vport; i++) {
+-              hclge_vport_stop(vport);
++              clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+               vport++;
+       }
+ }
+@@ -12737,6 +12761,11 @@ static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid)
+       struct hclge_vlan_info vlan_info;
+       int ret;
++      clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state);
++      clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
++      vport->need_notify = 0;
++      vport->mps = 0;
++
+       /* after disable sriov, clean VF rate configured by PF */
+       ret = hclge_tm_qs_shaper_cfg(vport, 0);
+       if (ret)
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+index 18caddd541f8..14473e29fe03 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+@@ -972,9 +972,15 @@ enum HCLGE_VPORT_STATE {
+       HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
+       HCLGE_VPORT_STATE_PROMISC_CHANGE,
+       HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
++      HCLGE_VPORT_STATE_INITED,
+       HCLGE_VPORT_STATE_MAX
+ };
++enum HCLGE_VPORT_NEED_NOTIFY {
++      HCLGE_VPORT_NEED_NOTIFY_RESET,
++      HCLGE_VPORT_NEED_NOTIFY_VF_VLAN,
++};
++
+ struct hclge_vlan_info {
+       u16 vlan_proto; /* so far support 802.1Q only */
+       u16 qos;
+@@ -1021,6 +1027,7 @@ struct hclge_vport {
+       struct hnae3_handle roce;
+       unsigned long state;
++      unsigned long need_notify;
+       unsigned long last_active_jiffies;
+       u32 mps; /* Max packet size */
+       struct hclge_vf_info vf_info;
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+index a7b06c63143c..04ff9bf12185 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+@@ -124,17 +124,26 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
+       return status;
+ }
++static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type)
++{
++      __le16 msg_data;
++      u8 dest_vfid;
++
++      dest_vfid = (u8)vport->vport_id;
++      msg_data = cpu_to_le16(reset_type);
++
++      /* send this requested info to VF */
++      return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data),
++                                HCLGE_MBX_ASSERTING_RESET, dest_vfid);
++}
++
+ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+ {
+       struct hclge_dev *hdev = vport->back;
+-      __le16 msg_data;
+       u16 reset_type;
+-      u8 dest_vfid;
+       BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX);
+-      dest_vfid = (u8)vport->vport_id;
+-
+       if (hdev->reset_type == HNAE3_FUNC_RESET)
+               reset_type = HNAE3_VF_PF_FUNC_RESET;
+       else if (hdev->reset_type == HNAE3_FLR_RESET)
+@@ -142,11 +151,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+       else
+               reset_type = HNAE3_VF_FUNC_RESET;
+-      msg_data = cpu_to_le16(reset_type);
+-
+-      /* send this requested info to VF */
+-      return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data),
+-                                HCLGE_MBX_ASSERTING_RESET, dest_vfid);
++      return hclge_inform_vf_reset(vport, reset_type);
+ }
+ static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
+@@ -652,9 +657,56 @@ static int hclge_reset_vf(struct hclge_vport *vport)
+       return hclge_func_reset_cmd(hdev, vport->vport_id);
+ }
++static void hclge_notify_vf_config(struct hclge_vport *vport)
++{
++      struct hclge_dev *hdev = vport->back;
++      struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
++      struct hclge_port_base_vlan_config *vlan_cfg;
++      int ret;
++
++      hclge_push_vf_link_status(vport);
++      if (test_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, &vport->need_notify)) {
++              ret = hclge_inform_vf_reset(vport, HNAE3_VF_PF_FUNC_RESET);
++              if (ret) {
++                      dev_err(&hdev->pdev->dev,
++                              "failed to inform VF %u reset!",
++                              vport->vport_id - HCLGE_VF_VPORT_START_NUM);
++                      return;
++              }
++              vport->need_notify = 0;
++              return;
++      }
++
++      if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 &&
++          test_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify)) {
++              vlan_cfg = &vport->port_base_vlan_cfg;
++              ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
++                                                      vport->vport_id,
++                                                      vlan_cfg->state,
++                                                      &vlan_cfg->vlan_info);
++              if (ret) {
++                      dev_err(&hdev->pdev->dev,
++                              "failed to inform VF %u port base vlan!",
++                              vport->vport_id - HCLGE_VF_VPORT_START_NUM);
++                      return;
++              }
++              clear_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify);
++      }
++}
++
+ static void hclge_vf_keep_alive(struct hclge_vport *vport)
+ {
++      struct hclge_dev *hdev = vport->back;
++
+       vport->last_active_jiffies = jiffies;
++
++      if (test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) &&
++          !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) {
++              set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
++              dev_info(&hdev->pdev->dev, "VF %u is alive!",
++                       vport->vport_id - HCLGE_VF_VPORT_START_NUM);
++              hclge_notify_vf_config(vport);
++      }
+ }
+ static int hclge_set_vf_mtu(struct hclge_vport *vport,
+@@ -954,6 +1006,7 @@ static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param)
+       hclge_rm_vport_all_mac_table(param->vport, true,
+                                    HCLGE_MAC_ADDR_MC);
+       hclge_rm_vport_all_vlan_table(param->vport, true);
++      param->vport->mps = 0;
+       return 0;
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-lan966x-fix-configuration-of-the-pcs.patch b/queue-6.0/net-lan966x-fix-configuration-of-the-pcs.patch
new file mode 100644 (file)
index 0000000..2eedd08
--- /dev/null
@@ -0,0 +1,49 @@
+From 947cf1fafe5e7f397d8544e2137610e6ae9833c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Dec 2022 10:33:15 +0100
+Subject: net: lan966x: Fix configuration of the PCS
+
+From: Horatiu Vultur <horatiu.vultur@microchip.com>
+
+[ Upstream commit d717f9474e3fb7e6bd3e43ca16e131f04320ed6f ]
+
+When the PCS was taken out of reset, we were changing by mistake also
+the speed to 100 Mbit. But in case the link was going down, the link
+up routine was setting correctly the link speed. If the link was not
+getting down then the speed was forced to run at 100 even if the
+speed was something else.
+On lan966x, to set the speed link to 1G or 2.5G a value of 1 needs to be
+written in DEV_CLOCK_CFG_LINK_SPEED. This is similar to the procedure in
+lan966x_port_init.
+
+The issue was reproduced using 1000base-x sfp module using the commands:
+ip link set dev eth2 up
+ip link addr add 10.97.10.2/24 dev eth2
+ethtool -s eth2 speed 1000 autoneg off
+
+Fixes: d28d6d2e37d1 ("net: lan966x: add port module support")
+Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
+Reviewed-by: Piotr Raczynski <piotr.raczynski@intel.com>
+Link: https://lore.kernel.org/r/20221221093315.939133-1-horatiu.vultur@microchip.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
+index f141644e4372..26c5cdf373c4 100644
+--- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
++++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
+@@ -369,7 +369,7 @@ int lan966x_port_pcs_set(struct lan966x_port *port,
+       }
+       /* Take PCS out of reset */
+-      lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(2) |
++      lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(LAN966X_SPEED_1000) |
+               DEV_CLOCK_CFG_PCS_RX_RST_SET(0) |
+               DEV_CLOCK_CFG_PCS_TX_RST_SET(0),
+               DEV_CLOCK_CFG_LINK_SPEED |
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch b/queue-6.0/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch
new file mode 100644 (file)
index 0000000..6a8d734
--- /dev/null
@@ -0,0 +1,39 @@
+From 07fa766665f5fc66df094c5b7c03efb1c4de10f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 12:51:52 +0200
+Subject: net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error
+ path
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 2a35b2c2e6a252eda2134aae6a756861d9299531 ]
+
+There are two cleanup calls missing in mlx5_init_once() error path.
+Add them making the error path flow to be the same as
+mlx5_cleanup_once().
+
+Fixes: 52ec462eca9b ("net/mlx5: Add reserved-gids support")
+Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section")
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index ac178796e484..dcc1e1b404e3 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1017,6 +1017,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
+ err_tables_cleanup:
+       mlx5_geneve_destroy(dev->geneve);
+       mlx5_vxlan_destroy(dev->vxlan);
++      mlx5_cleanup_clock(dev);
++      mlx5_cleanup_reserved_gids(dev);
+       mlx5_cq_debugfs_cleanup(dev);
+       mlx5_fw_reset_cleanup(dev);
+ err_events_cleanup:
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5-avoid-recovery-in-probe-flows.patch b/queue-6.0/net-mlx5-avoid-recovery-in-probe-flows.patch
new file mode 100644 (file)
index 0000000..41765a4
--- /dev/null
@@ -0,0 +1,49 @@
+From 39103fe19090f80461b0dfee0b2095335428cd29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 13:34:12 +0200
+Subject: net/mlx5: Avoid recovery in probe flows
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 9078e843efec530f279a155f262793c58b0746bd ]
+
+Currently, recovery is done without considering whether the device is
+still in probe flow.
+This may lead to recovery before device have finished probed
+successfully. e.g.: while mlx5_init_one() is running. Recovery flow is
+using functionality that is loaded only by mlx5_init_one(), and there
+is no point in running recovery without mlx5_init_one() finished
+successfully.
+
+Fix it by waiting for probe flow to finish and checking whether the
+device is probed before trying to perform recovery.
+
+Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+index 2cf2c9948446..0ed239eadf39 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -674,6 +674,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+       dev = container_of(priv, struct mlx5_core_dev, priv);
+       devlink = priv_to_devlink(dev);
++      mutex_lock(&dev->intf_state_mutex);
++      if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
++              mlx5_core_err(dev, "health works are not permitted at this stage\n");
++              return;
++      }
++      mutex_unlock(&dev->intf_state_mutex);
+       enter_error_state(dev, false);
+       if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+               devl_lock(devlink);
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch b/queue-6.0/net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch
new file mode 100644 (file)
index 0000000..4575a1f
--- /dev/null
@@ -0,0 +1,261 @@
+From 22a055be372f39e9659eb1b98a6ab78be6426f9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Dec 2022 10:42:15 +0200
+Subject: net/mlx5: E-Switch, properly handle ingress tagged packets on VST
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit 1f0ae22ab470946143485a02cc1cd7e05c0f9120 ]
+
+Fix SRIOV VST mode behavior to insert cvlan when a guest tag is already
+present in the frame. Previous VST mode behavior was to drop packets or
+override existing tag, depending on the device version.
+
+In this patch we fix this behavior by correctly building the HW steering
+rule with a push vlan action, or for older devices we ask the FW to stack
+the vlan when a vlan is already present.
+
+Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes")
+Fixes: dfcb1ed3c331 ("net/mlx5: E-Switch, Vport ingress/egress ACLs rules for VST mode")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/esw/acl/egress_lgcy.c  |  7 +++-
+ .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 33 ++++++++++++++++---
+ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 30 ++++++++++++-----
+ .../net/ethernet/mellanox/mlx5/core/eswitch.h |  6 ++++
+ include/linux/mlx5/device.h                   |  5 +++
+ include/linux/mlx5/mlx5_ifc.h                 |  3 +-
+ 6 files changed, 68 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+index 60a73990017c..6b4c9ffad95b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+@@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
+ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+                             struct mlx5_vport *vport)
+ {
++      bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+       struct mlx5_flow_destination drop_ctr_dst = {};
+       struct mlx5_flow_destination *dst = NULL;
+       struct mlx5_fc *drop_counter = NULL;
+@@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+        */
+       int table_size = 2;
+       int dest_num = 0;
++      int actions_flag;
+       int err = 0;
+       if (vport->egress.legacy.drop_counter) {
+@@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+                 vport->vport, vport->info.vlan, vport->info.qos);
+       /* Allowed vlan rule */
++      actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
++      if (vst_mode_steering)
++              actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+       err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
+-                                       MLX5_FLOW_CONTEXT_ACTION_ALLOW);
++                                       actions_flag);
+       if (err)
+               goto out;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+index b1a5199260f6..093ed86a0acd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+@@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
+ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+                              struct mlx5_vport *vport)
+ {
++      bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+       struct mlx5_flow_destination drop_ctr_dst = {};
+       struct mlx5_flow_destination *dst = NULL;
+       struct mlx5_flow_act flow_act = {};
+       struct mlx5_flow_spec *spec = NULL;
+       struct mlx5_fc *counter = NULL;
++      bool vst_check_cvlan = false;
++      bool vst_push_cvlan = false;
+       /* The ingress acl table contains 4 groups
+        * (2 active rules at the same time -
+        *      1 allow rule from one of the first 3 groups.
+@@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+               goto out;
+       }
+-      if (vport->info.vlan || vport->info.qos)
++      if ((vport->info.vlan || vport->info.qos)) {
++              if (vst_mode_steering)
++                      vst_push_cvlan = true;
++              else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always))
++                      vst_check_cvlan = true;
++      }
++
++      if (vst_check_cvlan || vport->info.spoofchk)
++              spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
++
++      /* Create ingress allow rule */
++      flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
++      if (vst_push_cvlan) {
++              flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
++              flow_act.vlan[0].prio = vport->info.qos;
++              flow_act.vlan[0].vid = vport->info.vlan;
++              flow_act.vlan[0].ethtype = ETH_P_8021Q;
++      }
++
++      if (vst_check_cvlan)
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+                                outer_headers.cvlan_tag);
+@@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+               ether_addr_copy(smac_v, vport->info.mac);
+       }
+-      /* Create ingress allow rule */
+-      spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+-      flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+       vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+                                                       &flow_act, NULL, 0);
+       if (IS_ERR(vport->ingress.allow_rule)) {
+@@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+               goto out;
+       }
++      if (!vst_check_cvlan && !vport->info.spoofchk)
++              goto out;
++
+       memset(&flow_act, 0, sizeof(flow_act));
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+       /* Attach drop flow counter */
+@@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+       return 0;
+ out:
+-      esw_acl_ingress_lgcy_cleanup(esw, vport);
++      if (err)
++              esw_acl_ingress_lgcy_cleanup(esw, vport);
+       kvfree(spec);
+       return err;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 59cffa49e4b5..940e893f3f09 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
+                        esw_vport_context.vport_cvlan_strip, 1);
+       if (set_flags & SET_VLAN_INSERT) {
+-              /* insert only if no vlan in packet */
+-              MLX5_SET(modify_esw_vport_context_in, in,
+-                       esw_vport_context.vport_cvlan_insert, 1);
+-
++              if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) {
++                      /* insert either if vlan exist in packet or not */
++                      MLX5_SET(modify_esw_vport_context_in, in,
++                               esw_vport_context.vport_cvlan_insert,
++                               MLX5_VPORT_CVLAN_INSERT_ALWAYS);
++              } else {
++                      /* insert only if no vlan in packet */
++                      MLX5_SET(modify_esw_vport_context_in, in,
++                               esw_vport_context.vport_cvlan_insert,
++                               MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN);
++              }
+               MLX5_SET(modify_esw_vport_context_in, in,
+                        esw_vport_context.cvlan_pcp, qos);
+               MLX5_SET(modify_esw_vport_context_in, in,
+@@ -774,6 +781,7 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
+ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+ {
++      bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+       u16 vport_num = vport->vport;
+       int flags;
+       int err;
+@@ -800,8 +808,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+       flags = (vport->info.vlan || vport->info.qos) ?
+               SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
+-      modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
+-                             vport->info.qos, flags);
++      if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering)
++              modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
++                                     vport->info.qos, flags);
+       return 0;
+ }
+@@ -1806,6 +1815,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+                                 u16 vport, u16 vlan, u8 qos, u8 set_flags)
+ {
+       struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
++      bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+       int err = 0;
+       if (IS_ERR(evport))
+@@ -1813,9 +1823,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+       if (vlan > 4095 || qos > 7)
+               return -EINVAL;
+-      err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+-      if (err)
+-              return err;
++      if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) {
++              err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
++              if (err)
++                      return err;
++      }
+       evport->info.vlan = vlan;
+       evport->info.qos = qos;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+index 5ceed4e6c658..03080e8161cb 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -513,6 +513,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+                                 u16 vport, u16 vlan, u8 qos, u8 set_flags);
++static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw)
++{
++      return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) &&
++              MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan));
++}
++
+ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
+                                                      u8 vlan_depth)
+ {
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index b5f58fd37a0f..4b00221f57e2 100644
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -1088,6 +1088,11 @@ enum {
+       MLX5_VPORT_ADMIN_STATE_AUTO  = 0x2,
+ };
++enum {
++      MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN  = 0x1,
++      MLX5_VPORT_CVLAN_INSERT_ALWAYS         = 0x3,
++};
++
+ enum {
+       MLX5_L3_PROT_TYPE_IPV4          = 0,
+       MLX5_L3_PROT_TYPE_IPV6          = 1,
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index 4acd5610e96b..e640c27b1b6d 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -877,7 +877,8 @@ struct mlx5_ifc_e_switch_cap_bits {
+       u8         vport_svlan_insert[0x1];
+       u8         vport_cvlan_insert_if_not_exist[0x1];
+       u8         vport_cvlan_insert_overwrite[0x1];
+-      u8         reserved_at_5[0x2];
++      u8         reserved_at_5[0x1];
++      u8         vport_cvlan_insert_always[0x1];
+       u8         esw_shared_ingress_acl[0x1];
+       u8         esw_uplink_ingress_acl[0x1];
+       u8         root_ft_on_other_esw[0x1];
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch b/queue-6.0/net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch
new file mode 100644 (file)
index 0000000..add08b4
--- /dev/null
@@ -0,0 +1,42 @@
+From 70959495c683e8f537459f652731063084ada9da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 18 Dec 2022 12:42:14 +0200
+Subject: net/mlx5: Fix io_eq_size and event_eq_size params validation
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 44aee8ea15ac205490a41b00cbafcccbf9f7f82b ]
+
+io_eq_size and event_eq_size params are of param type
+DEVLINK_PARAM_TYPE_U32. But, the validation callback is addressing them
+as DEVLINK_PARAM_TYPE_U16.
+
+This cause mismatch in validation in big-endian systems, in which
+values in range were rejected while 268500991 was accepted.
+Fix it by checking the U32 value in the validation callback.
+
+Fixes: 0844fa5f7b89 ("net/mlx5: Let user configure io_eq_size param")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+index 66c6a7017695..9e4e8d551884 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+@@ -563,7 +563,7 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
+                                         union devlink_param_value val,
+                                         struct netlink_ext_ack *extack)
+ {
+-      return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL;
++      return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL;
+ }
+ static const struct devlink_param mlx5_devlink_params[] = {
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5-fix-roce-setting-at-hca-level.patch b/queue-6.0/net-mlx5-fix-roce-setting-at-hca-level.patch
new file mode 100644 (file)
index 0000000..9614186
--- /dev/null
@@ -0,0 +1,55 @@
+From 7e15d14743c6ce850a75d1e6a3a21981159f857d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 14:42:59 +0200
+Subject: net/mlx5: Fix RoCE setting at HCA level
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit c4ad5f2bdad56265b23d3635494ecdb205431807 ]
+
+mlx5 PF can disable RoCE for its VFs and SFs. In such case RoCE is
+marked as unsupported on those VFs/SFs.
+The cited patch added an option for disable (and enable) RoCE at HCA
+level. However, that commit didn't check whether RoCE is supported on
+the HCA and enabled user to try and set RoCE to on.
+Fix it by checking whether the HCA supports RoCE.
+
+Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/main.c    | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+index 9e4e8d551884..97e9ec44a759 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+@@ -468,7 +468,7 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
+       bool new_state = val.vbool;
+       if (new_state && !MLX5_CAP_GEN(dev, roce) &&
+-          !MLX5_CAP_GEN(dev, roce_rw_supported)) {
++          !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) {
+               NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
+               return -EOPNOTSUPP;
+       }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index dcc1e1b404e3..0a2f23a7082a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -614,7 +614,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+               MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
+                        MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
+-      if (MLX5_CAP_GEN(dev, roce_rw_supported))
++      if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))
+               MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
+                        mlx5_is_roce_on(dev));
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch b/queue-6.0/net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch
new file mode 100644 (file)
index 0000000..14419b0
--- /dev/null
@@ -0,0 +1,66 @@
+From b6bf04f5f19e2b28026132587cf7f2bac627c352 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Dec 2022 14:28:34 +0200
+Subject: net/mlx5: Lag, fix failure to cancel delayed bond work
+
+From: Eli Cohen <elic@nvidia.com>
+
+[ Upstream commit 4d1c1379d71777ddeda3e54f8fc26e9ecbfd1009 ]
+
+Commit 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings")
+accidentally removed a call to cancel delayed bond work thus it may
+cause queued delay to expire and fall on an already destroyed work
+queue.
+
+Fix by restoring the call cancel_delayed_work_sync() before
+destroying the workqueue.
+
+This prevents call trace such as this:
+
+[  329.230417] BUG: kernel NULL pointer dereference, address: 0000000000000000
+ [  329.231444] #PF: supervisor write access in kernel mode
+ [  329.232233] #PF: error_code(0x0002) - not-present page
+ [  329.233007] PGD 0 P4D 0
+ [  329.233476] Oops: 0002 [#1] SMP
+ [  329.234012] CPU: 5 PID: 145 Comm: kworker/u20:4 Tainted: G OE      6.0.0-rc5_mlnx #1
+ [  329.235282] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ [  329.236868] Workqueue: mlx5_cmd_0000:08:00.1 cmd_work_handler [mlx5_core]
+ [  329.237886] RIP: 0010:_raw_spin_lock+0xc/0x20
+ [  329.238585] Code: f0 0f b1 17 75 02 f3 c3 89 c6 e9 6f 3c 5f ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 31 c0 ba 01 00 00 00 <f0> 0f b1 17 75 02 f3 c3 89 c6 e9 45 3c 5f ff 0f 1f 44 00 00 0f 1f
+ [  329.241156] RSP: 0018:ffffc900001b0e98 EFLAGS: 00010046
+ [  329.241940] RAX: 0000000000000000 RBX: ffffffff82374ae0 RCX: 0000000000000000
+ [  329.242954] RDX: 0000000000000001 RSI: 0000000000000014 RDI: 0000000000000000
+ [  329.243974] RBP: ffff888106ccf000 R08: ffff8881004000c8 R09: ffff888100400000
+ [  329.244990] R10: 0000000000000000 R11: ffffffff826669f8 R12: 0000000000002000
+ [  329.246009] R13: 0000000000000005 R14: ffff888100aa7ce0 R15: ffff88852ca80000
+ [  329.247030] FS:  0000000000000000(0000) GS:ffff88852ca80000(0000) knlGS:0000000000000000
+ [  329.248260] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [  329.249111] CR2: 0000000000000000 CR3: 000000016d675001 CR4: 0000000000770ee0
+ [  329.250133] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ [  329.251152] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ [  329.252176] PKRU: 55555554
+
+Fixes: 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings")
+Signed-off-by: Eli Cohen <elic@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+index bbe810f3b373..c142011d2097 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+@@ -201,6 +201,7 @@ static void mlx5_ldev_free(struct kref *ref)
+       if (ldev->nb.notifier_call)
+               unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+       mlx5_lag_mp_cleanup(ldev);
++      cancel_delayed_work_sync(&ldev->bond_work);
+       destroy_workqueue(ldev->wq);
+       mlx5_lag_mpesw_cleanup(ldev);
+       mutex_destroy(&ldev->lock);
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch b/queue-6.0/net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch
new file mode 100644 (file)
index 0000000..5a86d37
--- /dev/null
@@ -0,0 +1,56 @@
+From 8c36e13723778b26b9467deae758b1fe949ebb6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Dec 2022 09:22:50 +0800
+Subject: net/mlx5e: Always clear dest encap in neigh-update-del
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit 2951b2e142ecf6e0115df785ba91e91b6da74602 ]
+
+The cited commit introduced a bug for multiple encapsulations flow.
+If one dest encap becomes invalid, the flow is set slow path flag.
+But when other dests encap become invalid, they are not cleared due
+to slow path flag of the flow. When neigh-update-add is running, it
+will use invalid encap.
+
+Fix it by checking slow path flag after clearing dest encap.
+
+Fixes: 9a5f9cc794e1 ("net/mlx5e: Fix possible use-after-free deleting fdb rule")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c    | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+index ff73d25bc6eb..2aaf8ab857b8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -222,7 +222,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+       int err;
+       list_for_each_entry(flow, flow_list, tmp_list) {
+-              if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
++              if (!mlx5e_is_offloaded_flow(flow))
+                       continue;
+               attr = mlx5e_tc_get_encap_attr(flow);
+@@ -231,6 +231,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+               esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+               esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
++              /* Clear pkt_reformat before checking slow path flag. Because
++               * in next iteration, the same flow is already set slow path
++               * flag, but still need to clear the pkt_reformat.
++               */
++              if (flow_flag_test(flow, SLOW))
++                      continue;
++
+               /* update from encap rule to slow path rule */
+               spec = &flow->attr->parse_attr->spec;
+               rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5e-ct-fix-ct-debugfs-folder-name.patch b/queue-6.0/net-mlx5e-ct-fix-ct-debugfs-folder-name.patch
new file mode 100644 (file)
index 0000000..608292f
--- /dev/null
@@ -0,0 +1,46 @@
+From 455f7acc9e03a5db0e6872f881b77902da2dd8a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Nov 2022 13:54:29 +0800
+Subject: net/mlx5e: CT: Fix ct debugfs folder name
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit 849190e3e4ccf452fbe2240eace30a9ca83fb8d2 ]
+
+Need to use sprintf to build a string instead of sscanf. Otherwise
+dirname is null and both "ct_nic" and "ct_fdb" won't be created.
+But its redundant anyway as driver could be in switchdev mode but
+still add nic rules. So use "ct" as folder name.
+
+Fixes: 77422a8f6f61 ("net/mlx5e: CT: Add ct driver counters")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+index 864ce0c393e6..f01f7dfdbcf8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+@@ -2080,14 +2080,9 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
+ static void
+ mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+ {
+-      bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
+       struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+-      char dirname[16] = {};
+-      if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
+-              return;
+-
+-      ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
++      ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
+       debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+                               &ct_dbgfs->stats.offloaded);
+       debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch b/queue-6.0/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch
new file mode 100644 (file)
index 0000000..dc8ae73
--- /dev/null
@@ -0,0 +1,48 @@
+From bed12a13b389a4a9bfc3de36c168d1bbf96c793c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Dec 2022 16:02:57 +0200
+Subject: net/mlx5e: Fix hw mtu initializing at XDP SQ allocation
+
+From: Adham Faris <afaris@nvidia.com>
+
+[ Upstream commit 1e267ab88dc44c48f556218f7b7f14c76f7aa066 ]
+
+Current xdp xmit functions logic (mlx5e_xmit_xdp_frame_mpwqe or
+mlx5e_xmit_xdp_frame), validates xdp packet length by comparing it to
+hw mtu (configured at xdp sq allocation) before xmiting it. This check
+does not account for ethernet fcs length (calculated and filled by the
+nic). Hence, when we try sending packets with length > (hw-mtu -
+ethernet-fcs-size), the device port drops it and tx_errors_phy is
+incremented. Desired behavior is to catch these packets and drop them
+by the driver.
+
+Fix this behavior in XDP SQ allocation function (mlx5e_alloc_xdpsq) by
+subtracting ethernet FCS header size (4 Bytes) from current hw mtu
+value, since ethernet FCS is calculated and written to ethernet frames
+by the nic.
+
+Fixes: d8bec2b29a82 ("net/mlx5e: Support bpf_xdp_adjust_head()")
+Signed-off-by: Adham Faris <afaris@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 6cf6a81775a8..5c16efb8be81 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -1146,7 +1146,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
+       sq->channel   = c;
+       sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
+       sq->min_inline_mode = params->tx_min_inline_mode;
+-      sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
++      sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
+       sq->xsk_pool  = xsk_pool;
+       sq->stats = sq->xsk_pool ?
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch b/queue-6.0/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch
new file mode 100644 (file)
index 0000000..cbc2c33
--- /dev/null
@@ -0,0 +1,45 @@
+From f0fddb2d6b3144473a957519a58b5befcb47c676 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Nov 2022 15:24:21 +0200
+Subject: net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by
+ default
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit b12d581e83e3ae1080c32ab83f123005bd89a840 ]
+
+mlx5e_build_nic_params will turn CQE compression on if the hardware
+capability is enabled and the slow_pci_heuristic condition is detected.
+As IPoIB doesn't support CQE compression, make sure to disable the
+feature in the IPoIB profile init.
+
+Please note that the feature is not exposed to the user for IPoIB
+interfaces, so it can't be subsequently turned on.
+
+Fixes: b797a684b0dd ("net/mlx5e: Enable CQE compression when PCI is slower than link")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+index c02b7b08fb4c..2032d5c0ad86 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+@@ -70,6 +70,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
+       params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+       params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+       params->tunneled_offload_en = false;
++
++      /* CQE compression is not supported for IPoIB */
++      params->rx_cqe_compress_def = false;
++      MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+ }
+ /* Called directly after IPoIB netdevice was created to initialize SW structs */
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch b/queue-6.0/net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch
new file mode 100644 (file)
index 0000000..b4d13c1
--- /dev/null
@@ -0,0 +1,50 @@
+From b037cdd339795982d38b2a6e7b3d0fdb17a9c5c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Aug 2021 14:45:17 +0300
+Subject: net/mlx5e: Set geneve_tlv_option_0_exist when matching on geneve
+ option
+
+From: Maor Dickman <maord@nvidia.com>
+
+[ Upstream commit e54638a8380bd9c146a883035fffd0a821813682 ]
+
+The cited patch added support of matching on geneve option by setting
+geneve_tlv_option_0_data mask and key but didn't set geneve_tlv_option_0_exist
+bit which is required on some HWs when matching geneve_tlv_option_0_data parameter,
+this may cause in some cases for packets to wrongly match on rules with different
+geneve option.
+
+Example of such case is packet with geneve_tlv_object class=789 and data=456
+will wrongly match on rule with match geneve_tlv_object class=123 and data=456.
+
+Fix it by setting geneve_tlv_option_0_exist bit when supported by the HW when matching
+on geneve_tlv_option_0_data parameter.
+
+Fixes: 9272e3df3023 ("net/mlx5e: Geneve, Add support for encap/decap flows offload")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
+index f5b26f5a7de4..054d80c4e65c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
+@@ -273,6 +273,11 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
+                geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
+       MLX5_SET(fte_match_set_misc3, misc_3_c,
+                geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
++      if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
++                                     ft_field_support.geneve_tlv_option_0_exist)) {
++              MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist);
++              MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist);
++      }
+       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch b/queue-6.0/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch
new file mode 100644 (file)
index 0000000..5a22e1c
--- /dev/null
@@ -0,0 +1,35 @@
+From 2f75965da923178fbc94185d6d28421b28265742 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 10:29:25 +0400
+Subject: net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit d039535850ee47079d59527e96be18d8e0daa84b ]
+
+of_phy_find_device() return device node with refcount incremented.
+Call put_device() to relese it when not needed anymore.
+
+Fixes: ab4e6ee578e8 ("net: phy: xgmiitorgmii: Check phy_driver ready before accessing")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/xilinx_gmii2rgmii.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
+index 8dcb49ed1f3d..7fd9fe6a602b 100644
+--- a/drivers/net/phy/xilinx_gmii2rgmii.c
++++ b/drivers/net/phy/xilinx_gmii2rgmii.c
+@@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+       if (!priv->phy_dev->drv) {
+               dev_info(dev, "Attached phy not ready\n");
++              put_device(&priv->phy_dev->mdio.dev);
+               return -EPROBE_DEFER;
+       }
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch b/queue-6.0/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch
new file mode 100644 (file)
index 0000000..f6e20f2
--- /dev/null
@@ -0,0 +1,42 @@
+From 0bc9eaea6486a96c5e520e34fc57efcdb7d74aee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Jan 2023 16:57:43 -0500
+Subject: net: sched: atm: dont intepret cls results when asked to drop
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit a2965c7be0522eaa18808684b7b82b248515511b ]
+
+If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume
+res.class contains a valid pointer
+Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent")
+
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_atm.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
+index 816fd0d7ba38..28e1897e0da7 100644
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -397,10 +397,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                               result = tcf_classify(skb, NULL, fl, &res, true);
+                               if (result < 0)
+                                       continue;
++                              if (result == TC_ACT_SHOT)
++                                      goto done;
++
+                               flow = (struct atm_flow_data *)res.class;
+                               if (!flow)
+                                       flow = lookup_flow(sch, res.classid);
+-                              goto done;
++                              goto drop;
+                       }
+               }
+               flow = NULL;
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch b/queue-6.0/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch
new file mode 100644 (file)
index 0000000..63aff58
--- /dev/null
@@ -0,0 +1,147 @@
+From 117c25935425099229492fb88669ea68cd4a8254 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Jan 2023 16:57:44 -0500
+Subject: net: sched: cbq: dont intepret cls results when asked to drop
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 ]
+
+If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that
+res.class contains a valid pointer
+
+Sample splat reported by Kyle Zeng
+
+[    5.405624] 0: reclassify loop, rule prio 0, protocol 800
+[    5.406326] ==================================================================
+[    5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0
+[    5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299
+[    5.408731]
+[    5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15
+[    5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
+BIOS 1.15.0-1 04/01/2014
+[    5.410439] Call Trace:
+[    5.410764]  dump_stack+0x87/0xcd
+[    5.411153]  print_address_description+0x7a/0x6b0
+[    5.411687]  ? vprintk_func+0xb9/0xc0
+[    5.411905]  ? printk+0x76/0x96
+[    5.412110]  ? cbq_enqueue+0x54b/0xea0
+[    5.412323]  kasan_report+0x17d/0x220
+[    5.412591]  ? cbq_enqueue+0x54b/0xea0
+[    5.412803]  __asan_report_load1_noabort+0x10/0x20
+[    5.413119]  cbq_enqueue+0x54b/0xea0
+[    5.413400]  ? __kasan_check_write+0x10/0x20
+[    5.413679]  __dev_queue_xmit+0x9c0/0x1db0
+[    5.413922]  dev_queue_xmit+0xc/0x10
+[    5.414136]  ip_finish_output2+0x8bc/0xcd0
+[    5.414436]  __ip_finish_output+0x472/0x7a0
+[    5.414692]  ip_finish_output+0x5c/0x190
+[    5.414940]  ip_output+0x2d8/0x3c0
+[    5.415150]  ? ip_mc_finish_output+0x320/0x320
+[    5.415429]  __ip_queue_xmit+0x753/0x1760
+[    5.415664]  ip_queue_xmit+0x47/0x60
+[    5.415874]  __tcp_transmit_skb+0x1ef9/0x34c0
+[    5.416129]  tcp_connect+0x1f5e/0x4cb0
+[    5.416347]  tcp_v4_connect+0xc8d/0x18c0
+[    5.416577]  __inet_stream_connect+0x1ae/0xb40
+[    5.416836]  ? local_bh_enable+0x11/0x20
+[    5.417066]  ? lock_sock_nested+0x175/0x1d0
+[    5.417309]  inet_stream_connect+0x5d/0x90
+[    5.417548]  ? __inet_stream_connect+0xb40/0xb40
+[    5.417817]  __sys_connect+0x260/0x2b0
+[    5.418037]  __x64_sys_connect+0x76/0x80
+[    5.418267]  do_syscall_64+0x31/0x50
+[    5.418477]  entry_SYSCALL_64_after_hwframe+0x61/0xc6
+[    5.418770] RIP: 0033:0x473bb7
+[    5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00
+00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00
+00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34
+24 89
+[    5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX:
+000000000000002a
+[    5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7
+[    5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007
+[    5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004
+[    5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
+[    5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002
+[    5.422471]
+[    5.422562] Allocated by task 299:
+[    5.422782]  __kasan_kmalloc+0x12d/0x160
+[    5.423007]  kasan_kmalloc+0x5/0x10
+[    5.423208]  kmem_cache_alloc_trace+0x201/0x2e0
+[    5.423492]  tcf_proto_create+0x65/0x290
+[    5.423721]  tc_new_tfilter+0x137e/0x1830
+[    5.423957]  rtnetlink_rcv_msg+0x730/0x9f0
+[    5.424197]  netlink_rcv_skb+0x166/0x300
+[    5.424428]  rtnetlink_rcv+0x11/0x20
+[    5.424639]  netlink_unicast+0x673/0x860
+[    5.424870]  netlink_sendmsg+0x6af/0x9f0
+[    5.425100]  __sys_sendto+0x58d/0x5a0
+[    5.425315]  __x64_sys_sendto+0xda/0xf0
+[    5.425539]  do_syscall_64+0x31/0x50
+[    5.425764]  entry_SYSCALL_64_after_hwframe+0x61/0xc6
+[    5.426065]
+[    5.426157] The buggy address belongs to the object at ffff88800e312200
+[    5.426157]  which belongs to the cache kmalloc-128 of size 128
+[    5.426955] The buggy address is located 42 bytes to the right of
+[    5.426955]  128-byte region [ffff88800e312200, ffff88800e312280)
+[    5.427688] The buggy address belongs to the page:
+[    5.427992] page:000000009875fabc refcount:1 mapcount:0
+mapping:0000000000000000 index:0x0 pfn:0xe312
+[    5.428562] flags: 0x100000000000200(slab)
+[    5.428812] raw: 0100000000000200 dead000000000100 dead000000000122
+ffff888007843680
+[    5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff
+ffff88800e312401
+[    5.429875] page dumped because: kasan: bad access detected
+[    5.430214] page->mem_cgroup:ffff88800e312401
+[    5.430471]
+[    5.430564] Memory state around the buggy address:
+[    5.430846]  ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[    5.431267]  ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00
+00 00 00 fc
+[    5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[    5.432123]                                   ^
+[    5.432391]  ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00
+00 00 00 fc
+[    5.432810]  ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[    5.433229] ==================================================================
+[    5.433648] Disabling lock debugging due to kernel taint
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Kyle Zeng <zengyhkyle@gmail.com>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_cbq.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
+index ba99ce05cd52..5d9f4f259d69 100644
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -230,6 +230,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+               result = tcf_classify(skb, NULL, fl, &res, true);
+               if (!fl || result < 0)
+                       goto fallback;
++              if (result == TC_ACT_SHOT)
++                      return NULL;
+               cl = (void *)res.class;
+               if (!cl) {
+@@ -250,8 +252,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+               case TC_ACT_TRAP:
+                       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+                       fallthrough;
+-              case TC_ACT_SHOT:
+-                      return NULL;
+               case TC_ACT_RECLASSIFY:
+                       return cbq_reclassify(skb, cl);
+               }
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-sched-fix-memory-leak-in-tcindex_set_parms.patch b/queue-6.0/net-sched-fix-memory-leak-in-tcindex_set_parms.patch
new file mode 100644 (file)
index 0000000..a436113
--- /dev/null
@@ -0,0 +1,150 @@
+From 392f27700bd1e77975b60c3a6d2b1211a3f01383 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 11:51:19 +0800
+Subject: net: sched: fix memory leak in tcindex_set_parms
+
+From: Hawkins Jiawei <yin31149@gmail.com>
+
+[ Upstream commit 399ab7fe0fa0d846881685fd4e57e9a8ef7559f7 ]
+
+Syzkaller reports a memory leak as follows:
+====================================
+BUG: memory leak
+unreferenced object 0xffff88810c287f00 (size 256):
+  comm "syz-executor105", pid 3600, jiffies 4294943292 (age 12.990s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<ffffffff814cf9f0>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
+    [<ffffffff839c9e07>] kmalloc include/linux/slab.h:576 [inline]
+    [<ffffffff839c9e07>] kmalloc_array include/linux/slab.h:627 [inline]
+    [<ffffffff839c9e07>] kcalloc include/linux/slab.h:659 [inline]
+    [<ffffffff839c9e07>] tcf_exts_init include/net/pkt_cls.h:250 [inline]
+    [<ffffffff839c9e07>] tcindex_set_parms+0xa7/0xbe0 net/sched/cls_tcindex.c:342
+    [<ffffffff839caa1f>] tcindex_change+0xdf/0x120 net/sched/cls_tcindex.c:553
+    [<ffffffff8394db62>] tc_new_tfilter+0x4f2/0x1100 net/sched/cls_api.c:2147
+    [<ffffffff8389e91c>] rtnetlink_rcv_msg+0x4dc/0x5d0 net/core/rtnetlink.c:6082
+    [<ffffffff839eba67>] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2540
+    [<ffffffff839eab87>] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+    [<ffffffff839eab87>] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345
+    [<ffffffff839eb046>] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921
+    [<ffffffff8383e796>] sock_sendmsg_nosec net/socket.c:714 [inline]
+    [<ffffffff8383e796>] sock_sendmsg+0x56/0x80 net/socket.c:734
+    [<ffffffff8383eb08>] ____sys_sendmsg+0x178/0x410 net/socket.c:2482
+    [<ffffffff83843678>] ___sys_sendmsg+0xa8/0x110 net/socket.c:2536
+    [<ffffffff838439c5>] __sys_sendmmsg+0x105/0x330 net/socket.c:2622
+    [<ffffffff83843c14>] __do_sys_sendmmsg net/socket.c:2651 [inline]
+    [<ffffffff83843c14>] __se_sys_sendmmsg net/socket.c:2648 [inline]
+    [<ffffffff83843c14>] __x64_sys_sendmmsg+0x24/0x30 net/socket.c:2648
+    [<ffffffff84605fd5>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+    [<ffffffff84605fd5>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+    [<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
+====================================
+
+Kernel uses tcindex_change() to change an existing
+filter properties.
+
+Yet the problem is that, during the process of changing,
+if `old_r` is retrieved from `p->perfect`, then
+kernel uses tcindex_alloc_perfect_hash() to newly
+allocate filter results, uses tcindex_filter_result_init()
+to clear the old filter result, without destroying
+its tcf_exts structure, which triggers the above memory leak.
+
+To be more specific, there are only two source for the `old_r`,
+according to the tcindex_lookup(). `old_r` is retrieved from
+`p->perfect`, or `old_r` is retrieved from `p->h`.
+
+  * If `old_r` is retrieved from `p->perfect`, kernel uses
+tcindex_alloc_perfect_hash() to newly allocate the
+filter results. Then `r` is assigned with `cp->perfect + handle`,
+which is newly allocated. So condition `old_r && old_r != r` is
+true in this situation, and kernel uses tcindex_filter_result_init()
+to clear the old filter result, without destroying
+its tcf_exts structure
+
+  * If `old_r` is retrieved from `p->h`, then `p->perfect` is NULL
+according to the tcindex_lookup(). Considering that `cp->h`
+is directly copied from `p->h` and `p->perfect` is NULL,
+`r` is assigned with `tcindex_lookup(cp, handle)`, whose value
+should be the same as `old_r`, so condition `old_r && old_r != r`
+is false in this situation, kernel ignores using
+tcindex_filter_result_init() to clear the old filter result.
+
+So only when `old_r` is retrieved from `p->perfect` does kernel use
+tcindex_filter_result_init() to clear the old filter result, which
+triggers the above memory leak.
+
+Considering that there already exists a tc_filter_wq workqueue
+to destroy the old tcindex_data by tcindex_partial_destroy_work()
+at the end of tcindex_set_parms(), this patch solves
+this memory leak bug by removing this old filter result
+clearing part and delegating it to the tc_filter_wq workqueue.
+
+Note that this patch doesn't introduce any other issues. If
+`old_r` is retrieved from `p->perfect`, this patch just
+delegates old filter result clearing part to the
+tc_filter_wq workqueue; If `old_r` is retrieved from `p->h`,
+kernel doesn't reach the old filter result clearing part, so
+removing this part has no effect.
+
+[Thanks to the suggestion from Jakub Kicinski, Cong Wang, Paolo Abeni
+and Dmitry Vyukov]
+
+Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()")
+Link: https://lore.kernel.org/all/0000000000001de5c505ebc9ec59@google.com/
+Reported-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com
+Tested-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com
+Cc: Cong Wang <cong.wang@bytedance.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_tcindex.c | 12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
+index 742c7d49a958..8d1ef858db87 100644
+--- a/net/sched/cls_tcindex.c
++++ b/net/sched/cls_tcindex.c
+@@ -332,7 +332,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+                 struct tcindex_filter_result *r, struct nlattr **tb,
+                 struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
+ {
+-      struct tcindex_filter_result new_filter_result, *old_r = r;
++      struct tcindex_filter_result new_filter_result;
+       struct tcindex_data *cp = NULL, *oldp;
+       struct tcindex_filter *f = NULL; /* make gcc behave */
+       struct tcf_result cr = {};
+@@ -401,7 +401,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+       err = tcindex_filter_result_init(&new_filter_result, cp, net);
+       if (err < 0)
+               goto errout_alloc;
+-      if (old_r)
++      if (r)
+               cr = r->res;
+       err = -EBUSY;
+@@ -478,14 +478,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+               tcf_bind_filter(tp, &cr, base);
+       }
+-      if (old_r && old_r != r) {
+-              err = tcindex_filter_result_init(old_r, cp, net);
+-              if (err < 0) {
+-                      kfree(f);
+-                      goto errout_alloc;
+-              }
+-      }
+-
+       oldp = p;
+       r->res = cr;
+       tcf_exts_change(&r->exts, &e);
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-sparx5-fix-reading-of-the-mac-address.patch b/queue-6.0/net-sparx5-fix-reading-of-the-mac-address.patch
new file mode 100644 (file)
index 0000000..e8fd213
--- /dev/null
@@ -0,0 +1,40 @@
+From 9e02031a2820181c0b5334244b725d931f7558ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 13:12:15 +0100
+Subject: net: sparx5: Fix reading of the MAC address
+
+From: Horatiu Vultur <horatiu.vultur@microchip.com>
+
+[ Upstream commit 588ab2dc25f60efeb516b4abedb6c551949cc185 ]
+
+There is an issue with the checking of the return value of
+'of_get_mac_address', which returns 0 on success and negative value on
+failure. The driver interpretated the result the opposite way. Therefore
+if there was a MAC address defined in the DT, then the driver was
+generating a random MAC address otherwise it would use address 0.
+Fix this by checking correctly the return value of 'of_get_mac_address'
+
+Fixes: b74ef9f9cb91 ("net: sparx5: Do not use mac_addr uninitialized in mchp_sparx5_probe()")
+Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+index e58de119186a..a2d0631f7ac7 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+@@ -819,7 +819,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
+       if (err)
+               goto cleanup_config;
+-      if (!of_get_mac_address(np, sparx5->base_mac)) {
++      if (of_get_mac_address(np, sparx5->base_mac)) {
+               dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n");
+               eth_random_addr(sparx5->base_mac);
+               sparx5->base_mac[5] = 0;
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch b/queue-6.0/net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch
new file mode 100644 (file)
index 0000000..572abf1
--- /dev/null
@@ -0,0 +1,84 @@
+From f8fe8d7f6ff95585691e842effc095f5eb30bb65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 12:19:17 +0100
+Subject: net/ulp: prevent ULP without clone op from entering the LISTEN status
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 2c02d41d71f90a5168391b6a5f2954112ba2307c ]
+
+When an ULP-enabled socket enters the LISTEN status, the listener ULP data
+pointer is copied inside the child/accepted sockets by sk_clone_lock().
+
+The relevant ULP can take care of de-duplicating the context pointer via
+the clone() operation, but only MPTCP and SMC implement such op.
+
+Other ULPs may end-up with a double-free at socket disposal time.
+
+We can't simply clear the ULP data at clone time, as TLS replaces the
+socket ops with custom ones assuming a valid TLS ULP context is
+available.
+
+Instead completely prevent clone-less ULP sockets from entering the
+LISTEN status.
+
+Fixes: 734942cc4ea6 ("tcp: ULP infrastructure")
+Reported-by: slipper <slipper.alive@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://lore.kernel.org/r/4b80c3d1dbe3d0ab072f80450c202d9bc88b4b03.1672740602.git.pabeni@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/inet_connection_sock.c | 14 ++++++++++++++
+ net/ipv4/tcp_ulp.c              |  4 ++++
+ 2 files changed, 18 insertions(+)
+
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 971969cc7e17..fb01211a255b 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1037,12 +1037,26 @@ void inet_csk_prepare_forced_close(struct sock *sk)
+ }
+ EXPORT_SYMBOL(inet_csk_prepare_forced_close);
++static int inet_ulp_can_listen(const struct sock *sk)
++{
++      const struct inet_connection_sock *icsk = inet_csk(sk);
++
++      if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
++              return -EINVAL;
++
++      return 0;
++}
++
+ int inet_csk_listen_start(struct sock *sk)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_sock *inet = inet_sk(sk);
+       int err;
++      err = inet_ulp_can_listen(sk);
++      if (unlikely(err))
++              return err;
++
+       reqsk_queue_alloc(&icsk->icsk_accept_queue);
+       sk->sk_ack_backlog = 0;
+diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
+index 9ae50b1bd844..05b6077b9f2c 100644
+--- a/net/ipv4/tcp_ulp.c
++++ b/net/ipv4/tcp_ulp.c
+@@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
+       if (sk->sk_socket)
+               clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
++      err = -EINVAL;
++      if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN)
++              goto out_err;
++
+       err = ulp_ops->init(sk);
+       if (err)
+               goto out_err;
+-- 
+2.35.1
+
diff --git a/queue-6.0/net-vrf-determine-the-dst-using-the-original-ifindex.patch b/queue-6.0/net-vrf-determine-the-dst-using-the-original-ifindex.patch
new file mode 100644 (file)
index 0000000..ee7b4ca
--- /dev/null
@@ -0,0 +1,66 @@
+From af0fc9f37f7445b5ea529dcf59bed4b429975ece Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 18:18:25 +0100
+Subject: net: vrf: determine the dst using the original ifindex for multicast
+
+From: Antoine Tenart <atenart@kernel.org>
+
+[ Upstream commit f2575c8f404911da83f25b688e12afcf4273e640 ]
+
+Multicast packets received on an interface bound to a VRF are marked as
+belonging to the VRF and the skb device is updated to point to the VRF
+device itself. This was fine even when a route was associated to a
+device as when performing a fib table lookup 'oif' in fib6_table_lookup
+(coming from 'skb->dev->ifindex' in ip6_route_input) was set to 0 when
+FLOWI_FLAG_SKIP_NH_OIF was set.
+
+With commit 40867d74c374 ("net: Add l3mdev index to flow struct and
+avoid oif reset for port devices") this is not longer true and multicast
+traffic is not received on the original interface.
+
+Instead of adding back a similar check in fib6_table_lookup determine
+the dst using the original ifindex for multicast VRF traffic. To make
+things consistent across the function do the above for all strict
+packets, which was the logic before commit 6f12fa775530 ("vrf: mark skb
+for multicast or link-local as enslaved to VRF"). Note that reverting to
+this behavior should be fine as the change was about marking packets
+belonging to the VRF, not about their dst.
+
+Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Antoine Tenart <atenart@kernel.org>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20221220171825.1172237-1-atenart@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vrf.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index 5df7a0abc39d..f7f40e3fe9cc 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -1385,8 +1385,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+       /* loopback, multicast & non-ND link-local traffic; do not push through
+        * packet taps again. Reset pkt_type for upper layers to process skb.
+-       * For strict packets with a source LLA, determine the dst using the
+-       * original ifindex.
++       * For non-loopback strict packets, determine the dst using the original
++       * ifindex.
+        */
+       if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
+               skb->dev = vrf_dev;
+@@ -1395,7 +1395,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+               if (skb->pkt_type == PACKET_LOOPBACK)
+                       skb->pkt_type = PACKET_HOST;
+-              else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)
++              else
+                       vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
+               goto out;
+-- 
+2.35.1
+
diff --git a/queue-6.0/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch b/queue-6.0/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch
new file mode 100644 (file)
index 0000000..1127669
--- /dev/null
@@ -0,0 +1,109 @@
+From ce44ec2154de16e7c25c726033fc7fc72ddfe59f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 13:24:37 +0100
+Subject: netfilter: ipset: fix hash:net,port,net hang with /0 subnet
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+[ Upstream commit a31d47be64b9b74f8cfedffe03e0a8a1f9e51f23 ]
+
+The hash:net,port,net set type supports /0 subnets. However, the patch
+commit 5f7b51bf09baca8e titled "netfilter: ipset: Limit the maximal range
+of consecutive elements to add/delete" did not take into account it and
+resulted in an endless loop. The bug is actually older but the patch
+5f7b51bf09baca8e brings it out earlier.
+
+Handle /0 subnets properly in hash:net,port,net set types.
+
+Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete")
+Reported-by: Марк Коренберг <socketpair@gmail.com>
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/ipset/ip_set_hash_netportnet.c | 40 ++++++++++----------
+ 1 file changed, 21 insertions(+), 19 deletions(-)
+
+diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
+index 19bcdb3141f6..005a7ce87217 100644
+--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
+@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+       return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+ }
++static u32
++hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
++{
++      if (from == 0 && to == UINT_MAX) {
++              *cidr = 0;
++              return to;
++      }
++      return ip_set_range_to_cidr(from, to, cidr);
++}
++
+ static int
+ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_netportnet4 *h = set->data;
++      struct hash_netportnet4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netportnet4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+-      u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+-      u64 n = 0, m = 0;
++      u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+       bool with_ports = false;
+       int ret;
+@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+-              n++;
+-      } while (ipn++ < ip_to);
+-      ipn = ip2_from;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+-              m++;
+-      } while (ipn++ < ip2_to);
+-
+-      if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried) {
+               ip = ntohl(h->next.ip[0]);
+@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       do {
+               e.ip[0] = htonl(ip);
+-              ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
++              ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
+               for (; p <= port_to; p++) {
+                       e.port = htons(p);
+                       do {
++                              i++;
+                               e.ip[1] = htonl(ip2);
+-                              ip2 = ip_set_range_to_cidr(ip2, ip2_to,
+-                                                         &e.cidr[1]);
++                              if (i > IPSET_MAX_RANGE) {
++                                      hash_netportnet4_data_next(&h->next,
++                                                                 &e);
++                                      return -ERANGE;
++                              }
++                              ip2 = hash_netportnet4_range_to_cidr(ip2,
++                                                      ip2_to, &e.cidr[1]);
+                               ret = adtfn(set, &e, &ext, &ext, flags);
+                               if (ret && !ip_set_eexist(ret, flags))
+                                       return ret;
+-- 
+2.35.1
+
diff --git a/queue-6.0/netfilter-ipset-rework-long-task-execution-when-addi.patch b/queue-6.0/netfilter-ipset-rework-long-task-execution-when-addi.patch
new file mode 100644 (file)
index 0000000..91a17bc
--- /dev/null
@@ -0,0 +1,462 @@
+From fe0cba2b07f75321797fb09dc213e3b135c6d031 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 13:24:38 +0100
+Subject: netfilter: ipset: Rework long task execution when adding/deleting
+ entries
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+[ Upstream commit 5e29dc36bd5e2166b834ceb19990d9e68a734d7d ]
+
+When adding/deleting large number of elements in one step in ipset, it can
+take a reasonable amount of time and can result in soft lockup errors. The
+patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of
+consecutive elements to add/delete") tried to fix it by limiting the max
+elements to process at all. However it was not enough, it is still possible
+that we get hung tasks. Lowering the limit is not reasonable, so the
+approach in this patch is as follows: rely on the method used at resizing
+sets and save the state when we reach a smaller internal batch limit,
+unlock/lock and proceed from the saved state. Thus we can avoid long
+continuous tasks and at the same time removed the limit to add/delete large
+number of elements in one step.
+
+The nfnl mutex is held during the whole operation which prevents one to
+issue other ipset commands in parallel.
+
+Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete")
+Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netfilter/ipset/ip_set.h      |  2 +-
+ net/netfilter/ipset/ip_set_core.c           |  7 ++++---
+ net/netfilter/ipset/ip_set_hash_ip.c        | 14 ++++++-------
+ net/netfilter/ipset/ip_set_hash_ipmark.c    | 13 ++++++------
+ net/netfilter/ipset/ip_set_hash_ipport.c    | 13 ++++++------
+ net/netfilter/ipset/ip_set_hash_ipportip.c  | 13 ++++++------
+ net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 +++++++-----
+ net/netfilter/ipset/ip_set_hash_net.c       | 17 +++++++--------
+ net/netfilter/ipset/ip_set_hash_netiface.c  | 15 ++++++--------
+ net/netfilter/ipset/ip_set_hash_netnet.c    | 23 +++++++--------------
+ net/netfilter/ipset/ip_set_hash_netport.c   | 19 +++++++----------
+ 11 files changed, 68 insertions(+), 81 deletions(-)
+
+diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
+index ada1296c87d5..72f5ebc5c97a 100644
+--- a/include/linux/netfilter/ipset/ip_set.h
++++ b/include/linux/netfilter/ipset/ip_set.h
+@@ -197,7 +197,7 @@ struct ip_set_region {
+ };
+ /* Max range where every element is added/deleted in one step */
+-#define IPSET_MAX_RANGE               (1<<20)
++#define IPSET_MAX_RANGE               (1<<14)
+ /* The max revision number supported by any set type + 1 */
+ #define IPSET_REVISION_MAX    9
+diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
+index 6b31746f9be3..751ac89b07a5 100644
+--- a/net/netfilter/ipset/ip_set_core.c
++++ b/net/netfilter/ipset/ip_set_core.c
+@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+               ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
+               ip_set_unlock(set);
+               retried = true;
+-      } while (ret == -EAGAIN &&
+-               set->variant->resize &&
+-               (ret = set->variant->resize(set, retried)) == 0);
++      } while (ret == -ERANGE ||
++               (ret == -EAGAIN &&
++                set->variant->resize &&
++                (ret = set->variant->resize(set, retried)) == 0));
+       if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
+               return 0;
+diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
+index 75d556d71652..24adcdd7a0b1 100644
+--- a/net/netfilter/ipset/ip_set_hash_ip.c
++++ b/net/netfilter/ipset/ip_set_hash_ip.c
+@@ -98,11 +98,11 @@ static int
+ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+             enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ip4 *h = set->data;
++      struct hash_ip4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ip4_elem e = { 0 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, hosts;
++      u32 ip = 0, ip_to = 0, hosts, i = 0;
+       int ret = 0;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -147,14 +147,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+       hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+-      /* 64bit division is not allowed on 32bit */
+-      if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+-      for (; ip <= ip_to;) {
++      for (; ip <= ip_to; i++) {
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_ip4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+                       return ret;
+diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
+index 153de3457423..a22ec1a6f6ec 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
++++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
+@@ -97,11 +97,11 @@ static int
+ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+                 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipmark4 *h = set->data;
++      struct hash_ipmark4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipmark4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip, ip_to = 0;
++      u32 ip, ip_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+               ip_set_mask_from_to(ip, ip_to, cidr);
+       }
+-      if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+-      for (; ip <= ip_to; ip++) {
++      for (; ip <= ip_to; ip++, i++) {
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_ipmark4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
+index 7303138e46be..10481760a9b2 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipport.c
++++ b/net/netfilter/ipset/ip_set_hash_ipport.c
+@@ -105,11 +105,11 @@ static int
+ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+                 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipport4 *h = set->data;
++      struct hash_ipport4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipport4_elem e = { .ip = 0 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip, ip_to = 0, p = 0, port, port_to;
++      u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+       bool with_ports = false;
+       int ret;
+@@ -173,17 +173,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+                       swap(port, port_to);
+       }
+-      if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+       for (; ip <= ip_to; ip++) {
+               p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+                                                      : port;
+-              for (; p <= port_to; p++) {
++              for (; p <= port_to; p++, i++) {
+                       e.ip = htonl(ip);
+                       e.port = htons(p);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_ipport4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
+index 334fb1ad0e86..39a01934b153 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
++++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
+@@ -108,11 +108,11 @@ static int
+ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+                   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipportip4 *h = set->data;
++      struct hash_ipportip4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipportip4_elem e = { .ip = 0 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip, ip_to = 0, p = 0, port, port_to;
++      u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+       bool with_ports = false;
+       int ret;
+@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+                       swap(port, port_to);
+       }
+-      if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+       for (; ip <= ip_to; ip++) {
+               p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+                                                      : port;
+-              for (; p <= port_to; p++) {
++              for (; p <= port_to; p++, i++) {
+                       e.ip = htonl(ip);
+                       e.port = htons(p);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_ipportip4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+index 7df94f437f60..5c6de605a9fb 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+@@ -160,12 +160,12 @@ static int
+ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipportnet4 *h = set->data;
++      struct hash_ipportnet4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+-      u32 ip2_from = 0, ip2_to = 0, ip2;
++      u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+       bool with_ports = false;
+       u8 cidr;
+       int ret;
+@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                       swap(port, port_to);
+       }
+-      if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       ip2_to = ip2_from;
+       if (tb[IPSET_ATTR_IP2_TO]) {
+               ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+               for (; p <= port_to; p++) {
+                       e.port = htons(p);
+                       do {
++                              i++;
+                               e.ip2 = htonl(ip2);
+                               ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
+                               e.cidr = cidr - 1;
++                              if (i > IPSET_MAX_RANGE) {
++                                      hash_ipportnet4_data_next(&h->next,
++                                                                &e);
++                                      return -ERANGE;
++                              }
+                               ret = adtfn(set, &e, &ext, &ext, flags);
+                               if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
+index 1422739d9aa2..ce0a9ce5a91f 100644
+--- a/net/netfilter/ipset/ip_set_hash_net.c
++++ b/net/netfilter/ipset/ip_set_hash_net.c
+@@ -136,11 +136,11 @@ static int
+ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_net4 *h = set->data;
++      struct hash_net4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_net4_elem e = { .cidr = HOST_MASK };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, ipn, n = 0;
++      u32 ip = 0, ip_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+               if (ip + UINT_MAX == ip_to)
+                       return -IPSET_ERR_HASH_RANGE;
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+-              n++;
+-      } while (ipn++ < ip_to);
+-
+-      if (n > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried)
+               ip = ntohl(h->next.ip);
+       do {
++              i++;
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_net4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
+index 9810f5bf63f5..031073286236 100644
+--- a/net/netfilter/ipset/ip_set_hash_netiface.c
++++ b/net/netfilter/ipset/ip_set_hash_netiface.c
+@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, ipn, n = 0;
++      u32 ip = 0, ip_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip, ip_to, e.cidr);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+-              n++;
+-      } while (ipn++ < ip_to);
+-
+-      if (n > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried)
+               ip = ntohl(h->next.ip);
+       do {
++              i++;
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_netiface4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+               ret = adtfn(set, &e, &ext, &ext, flags);
+diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
+index 3d09eefe998a..c07b70bf32db 100644
+--- a/net/netfilter/ipset/ip_set_hash_netnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netnet.c
+@@ -163,13 +163,12 @@ static int
+ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_netnet4 *h = set->data;
++      struct hash_netnet4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netnet4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       u32 ip = 0, ip_to = 0;
+-      u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+-      u64 n = 0, m = 0;
++      u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -245,19 +244,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+-              n++;
+-      } while (ipn++ < ip_to);
+-      ipn = ip2_from;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+-              m++;
+-      } while (ipn++ < ip2_to);
+-
+-      if (n*m > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried) {
+               ip = ntohl(h->next.ip[0]);
+@@ -270,7 +256,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+               e.ip[0] = htonl(ip);
+               ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+               do {
++                      i++;
+                       e.ip[1] = htonl(ip2);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_netnet4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
+index 09cf72eb37f8..d1a0628df4ef 100644
+--- a/net/netfilter/ipset/ip_set_hash_netport.c
++++ b/net/netfilter/ipset/ip_set_hash_netport.c
+@@ -154,12 +154,11 @@ static int
+ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+                  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_netport4 *h = set->data;
++      struct hash_netport4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+-      u64 n = 0;
++      u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
+       bool with_ports = false;
+       u8 cidr;
+       int ret;
+@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+-              n++;
+-      } while (ipn++ < ip_to);
+-
+-      if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried) {
+               ip = ntohl(h->next.ip);
+@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+               e.ip = htonl(ip);
+               ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
+               e.cidr = cidr - 1;
+-              for (; p <= port_to; p++) {
++              for (; p <= port_to; p++, i++) {
+                       e.port = htons(p);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_netport4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+                               return ret;
+-- 
+2.35.1
+
diff --git a/queue-6.0/netfilter-nf_tables-add-function-to-create-set-state.patch b/queue-6.0/netfilter-nf_tables-add-function-to-create-set-state.patch
new file mode 100644 (file)
index 0000000..d6e43f0
--- /dev/null
@@ -0,0 +1,185 @@
+From 62593de6f5902b12265a02862804061d5e8366e3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 18:00:10 +0100
+Subject: netfilter: nf_tables: add function to create set stateful expressions
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit a8fe4154fa5a1bae590b243ed60f871e5a5e1378 ]
+
+Add a helper function to allocate and initialize the stateful expressions
+that are defined in a set.
+
+This patch allows to reuse this code from the set update path, to check
+that type of the update matches the existing set in the kernel.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: f6594c372afd ("netfilter: nf_tables: perform type checking for existing sets")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 106 ++++++++++++++++++++++------------
+ 1 file changed, 68 insertions(+), 38 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index eb6a2e09ec7b..1659b2575c05 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4340,6 +4340,59 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
+       return err;
+ }
++static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
++                            const struct nlattr * const *nla,
++                            struct nft_expr **exprs, int *num_exprs,
++                            u32 flags)
++{
++      struct nft_expr *expr;
++      int err, i;
++
++      if (nla[NFTA_SET_EXPR]) {
++              expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
++              if (IS_ERR(expr)) {
++                      err = PTR_ERR(expr);
++                      goto err_set_expr_alloc;
++              }
++              exprs[0] = expr;
++              (*num_exprs)++;
++      } else if (nla[NFTA_SET_EXPRESSIONS]) {
++              struct nlattr *tmp;
++              int left;
++
++              if (!(flags & NFT_SET_EXPR)) {
++                      err = -EINVAL;
++                      goto err_set_expr_alloc;
++              }
++              i = 0;
++              nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
++                      if (i == NFT_SET_EXPR_MAX) {
++                              err = -E2BIG;
++                              goto err_set_expr_alloc;
++                      }
++                      if (nla_type(tmp) != NFTA_LIST_ELEM) {
++                              err = -EINVAL;
++                              goto err_set_expr_alloc;
++                      }
++                      expr = nft_set_elem_expr_alloc(ctx, set, tmp);
++                      if (IS_ERR(expr)) {
++                              err = PTR_ERR(expr);
++                              goto err_set_expr_alloc;
++                      }
++                      exprs[i++] = expr;
++                      (*num_exprs)++;
++              }
++      }
++
++      return 0;
++
++err_set_expr_alloc:
++      for (i = 0; i < *num_exprs; i++)
++              nft_expr_destroy(ctx, exprs[i]);
++
++      return err;
++}
++
+ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+                           const struct nlattr * const nla[])
+ {
+@@ -4347,7 +4400,6 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       u8 genmask = nft_genmask_next(info->net);
+       u8 family = info->nfmsg->nfgen_family;
+       const struct nft_set_ops *ops;
+-      struct nft_expr *expr = NULL;
+       struct net *net = info->net;
+       struct nft_set_desc desc;
+       struct nft_table *table;
+@@ -4355,6 +4407,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       struct nft_set *set;
+       struct nft_ctx ctx;
+       size_t alloc_size;
++      int num_exprs = 0;
+       char *name;
+       int err, i;
+       u16 udlen;
+@@ -4481,6 +4534,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+                       return PTR_ERR(set);
+               }
+       } else {
++              struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
++
+               if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+                       return -EEXIST;
+@@ -4488,6 +4543,13 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+               if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+                       return -EOPNOTSUPP;
++              err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
++              if (err < 0)
++                      return err;
++
++              for (i = 0; i < num_exprs; i++)
++                      nft_expr_destroy(&ctx, exprs[i]);
++
+               return 0;
+       }
+@@ -4555,43 +4617,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       if (err < 0)
+               goto err_set_init;
+-      if (nla[NFTA_SET_EXPR]) {
+-              expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
+-              if (IS_ERR(expr)) {
+-                      err = PTR_ERR(expr);
+-                      goto err_set_expr_alloc;
+-              }
+-              set->exprs[0] = expr;
+-              set->num_exprs++;
+-      } else if (nla[NFTA_SET_EXPRESSIONS]) {
+-              struct nft_expr *expr;
+-              struct nlattr *tmp;
+-              int left;
+-
+-              if (!(flags & NFT_SET_EXPR)) {
+-                      err = -EINVAL;
+-                      goto err_set_expr_alloc;
+-              }
+-              i = 0;
+-              nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+-                      if (i == NFT_SET_EXPR_MAX) {
+-                              err = -E2BIG;
+-                              goto err_set_expr_alloc;
+-                      }
+-                      if (nla_type(tmp) != NFTA_LIST_ELEM) {
+-                              err = -EINVAL;
+-                              goto err_set_expr_alloc;
+-                      }
+-                      expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
+-                      if (IS_ERR(expr)) {
+-                              err = PTR_ERR(expr);
+-                              goto err_set_expr_alloc;
+-                      }
+-                      set->exprs[i++] = expr;
+-                      set->num_exprs++;
+-              }
+-      }
++      err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
++      if (err < 0)
++              goto err_set_destroy;
++      set->num_exprs = num_exprs;
+       set->handle = nf_tables_alloc_handle(table);
+       err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
+@@ -4605,7 +4635,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ err_set_expr_alloc:
+       for (i = 0; i < set->num_exprs; i++)
+               nft_expr_destroy(&ctx, set->exprs[i]);
+-
++err_set_destroy:
+       ops->destroy(set);
+ err_set_init:
+       kfree(set->name);
+-- 
+2.35.1
+
diff --git a/queue-6.0/netfilter-nf_tables-consolidate-set-description.patch b/queue-6.0/netfilter-nf_tables-consolidate-set-description.patch
new file mode 100644 (file)
index 0000000..ed1cb2a
--- /dev/null
@@ -0,0 +1,225 @@
+From cc4bf2caa0f1a2bb28ff0d7f29862b7b5e8f3dca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 20:07:52 +0100
+Subject: netfilter: nf_tables: consolidate set description
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit bed4a63ea4ae77cfe5aae004ef87379f0655260a ]
+
+Add the following fields to the set description:
+
+- key type
+- data type
+- object type
+- policy
+- gc_int: garbage collection interval)
+- timeout: element timeout
+
+This prepares for stricter set type checks on updates in a follow up
+patch.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: f6594c372afd ("netfilter: nf_tables: perform type checking for existing sets")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 12 +++++++
+ net/netfilter/nf_tables_api.c     | 58 +++++++++++++++----------------
+ 2 files changed, 40 insertions(+), 30 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index cdb7db9b0e25..ddcdde230747 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -311,17 +311,29 @@ struct nft_set_iter {
+ /**
+  *    struct nft_set_desc - description of set elements
+  *
++ *    @ktype: key type
+  *    @klen: key length
++ *    @dtype: data type
+  *    @dlen: data length
++ *    @objtype: object type
++ *    @flags: flags
+  *    @size: number of set elements
++ *    @policy: set policy
++ *    @gc_int: garbage collector interval
+  *    @field_len: length of each field in concatenation, bytes
+  *    @field_count: number of concatenated fields in element
+  *    @expr: set must support for expressions
+  */
+ struct nft_set_desc {
++      u32                     ktype;
+       unsigned int            klen;
++      u32                     dtype;
+       unsigned int            dlen;
++      u32                     objtype;
+       unsigned int            size;
++      u32                     policy;
++      u32                     gc_int;
++      u64                     timeout;
+       u8                      field_len[NFT_REG32_COUNT];
+       u8                      field_count;
+       bool                    expr;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 7977f0422ecf..eb6a2e09ec7b 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3732,8 +3732,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
+ static const struct nft_set_ops *
+ nft_select_set_ops(const struct nft_ctx *ctx,
+                  const struct nlattr * const nla[],
+-                 const struct nft_set_desc *desc,
+-                 enum nft_set_policies policy)
++                 const struct nft_set_desc *desc)
+ {
+       struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+       const struct nft_set_ops *ops, *bops;
+@@ -3762,7 +3761,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
+               if (!ops->estimate(desc, flags, &est))
+                       continue;
+-              switch (policy) {
++              switch (desc->policy) {
+               case NFT_SET_POL_PERFORMANCE:
+                       if (est.lookup < best.lookup)
+                               break;
+@@ -4344,7 +4343,6 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
+ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+                           const struct nlattr * const nla[])
+ {
+-      u32 ktype, dtype, flags, policy, gc_int, objtype;
+       struct netlink_ext_ack *extack = info->extack;
+       u8 genmask = nft_genmask_next(info->net);
+       u8 family = info->nfmsg->nfgen_family;
+@@ -4357,10 +4355,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       struct nft_set *set;
+       struct nft_ctx ctx;
+       size_t alloc_size;
+-      u64 timeout;
+       char *name;
+       int err, i;
+       u16 udlen;
++      u32 flags;
+       u64 size;
+       if (nla[NFTA_SET_TABLE] == NULL ||
+@@ -4371,10 +4369,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       memset(&desc, 0, sizeof(desc));
+-      ktype = NFT_DATA_VALUE;
++      desc.ktype = NFT_DATA_VALUE;
+       if (nla[NFTA_SET_KEY_TYPE] != NULL) {
+-              ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+-              if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
++              desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
++              if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+                       return -EINVAL;
+       }
+@@ -4399,17 +4397,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+                       return -EOPNOTSUPP;
+       }
+-      dtype = 0;
++      desc.dtype = 0;
+       if (nla[NFTA_SET_DATA_TYPE] != NULL) {
+               if (!(flags & NFT_SET_MAP))
+                       return -EINVAL;
+-              dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+-              if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+-                  dtype != NFT_DATA_VERDICT)
++              desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
++              if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
++                  desc.dtype != NFT_DATA_VERDICT)
+                       return -EINVAL;
+-              if (dtype != NFT_DATA_VERDICT) {
++              if (desc.dtype != NFT_DATA_VERDICT) {
+                       if (nla[NFTA_SET_DATA_LEN] == NULL)
+                               return -EINVAL;
+                       desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+@@ -4424,34 +4422,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+               if (!(flags & NFT_SET_OBJECT))
+                       return -EINVAL;
+-              objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+-              if (objtype == NFT_OBJECT_UNSPEC ||
+-                  objtype > NFT_OBJECT_MAX)
++              desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
++              if (desc.objtype == NFT_OBJECT_UNSPEC ||
++                  desc.objtype > NFT_OBJECT_MAX)
+                       return -EOPNOTSUPP;
+       } else if (flags & NFT_SET_OBJECT)
+               return -EINVAL;
+       else
+-              objtype = NFT_OBJECT_UNSPEC;
++              desc.objtype = NFT_OBJECT_UNSPEC;
+-      timeout = 0;
++      desc.timeout = 0;
+       if (nla[NFTA_SET_TIMEOUT] != NULL) {
+               if (!(flags & NFT_SET_TIMEOUT))
+                       return -EINVAL;
+-              err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
++              err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
+               if (err)
+                       return err;
+       }
+-      gc_int = 0;
++      desc.gc_int = 0;
+       if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+               if (!(flags & NFT_SET_TIMEOUT))
+                       return -EINVAL;
+-              gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
++              desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+       }
+-      policy = NFT_SET_POL_PERFORMANCE;
++      desc.policy = NFT_SET_POL_PERFORMANCE;
+       if (nla[NFTA_SET_POLICY] != NULL)
+-              policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
++              desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+       if (nla[NFTA_SET_DESC] != NULL) {
+               err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
+@@ -4496,7 +4494,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+               return -ENOENT;
+-      ops = nft_select_set_ops(&ctx, nla, &desc, policy);
++      ops = nft_select_set_ops(&ctx, nla, &desc);
+       if (IS_ERR(ops))
+               return PTR_ERR(ops);
+@@ -4536,18 +4534,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       set->table = table;
+       write_pnet(&set->net, net);
+       set->ops = ops;
+-      set->ktype = ktype;
++      set->ktype = desc.ktype;
+       set->klen = desc.klen;
+-      set->dtype = dtype;
+-      set->objtype = objtype;
++      set->dtype = desc.dtype;
++      set->objtype = desc.objtype;
+       set->dlen = desc.dlen;
+       set->flags = flags;
+       set->size = desc.size;
+-      set->policy = policy;
++      set->policy = desc.policy;
+       set->udlen = udlen;
+       set->udata = udata;
+-      set->timeout = timeout;
+-      set->gc_int = gc_int;
++      set->timeout = desc.timeout;
++      set->gc_int = desc.gc_int;
+       set->field_count = desc.field_count;
+       for (i = 0; i < desc.field_count; i++)
+-- 
+2.35.1
+
diff --git a/queue-6.0/netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch b/queue-6.0/netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch
new file mode 100644 (file)
index 0000000..26540da
--- /dev/null
@@ -0,0 +1,209 @@
+From f903b3ad4008aa50ce9c16bdcba3969692c82ce6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 20:10:12 +0100
+Subject: netfilter: nf_tables: honor set timeout and garbage collection
+ updates
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 123b99619cca94bdca0bf7bde9abe28f0a0dfe06 ]
+
+Set timeout and garbage collection interval updates are ignored on
+updates. Add transaction to update global set element timeout and
+garbage collection interval.
+
+Fixes: 96518518cc41 ("netfilter: add nftables")
+Suggested-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 13 ++++++-
+ net/netfilter/nf_tables_api.c     | 63 ++++++++++++++++++++++---------
+ 2 files changed, 57 insertions(+), 19 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index ddcdde230747..1daededfa75e 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -592,7 +592,9 @@ void *nft_set_catchall_gc(const struct nft_set *set);
+ static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
+ {
+-      return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
++      u32 gc_int = READ_ONCE(set->gc_int);
++
++      return gc_int ? msecs_to_jiffies(gc_int) : HZ;
+ }
+ /**
+@@ -1563,6 +1565,9 @@ struct nft_trans_rule {
+ struct nft_trans_set {
+       struct nft_set                  *set;
+       u32                             set_id;
++      u32                             gc_int;
++      u64                             timeout;
++      bool                            update;
+       bool                            bound;
+ };
+@@ -1572,6 +1577,12 @@ struct nft_trans_set {
+       (((struct nft_trans_set *)trans->data)->set_id)
+ #define nft_trans_set_bound(trans)    \
+       (((struct nft_trans_set *)trans->data)->bound)
++#define nft_trans_set_update(trans)   \
++      (((struct nft_trans_set *)trans->data)->update)
++#define nft_trans_set_timeout(trans)  \
++      (((struct nft_trans_set *)trans->data)->timeout)
++#define nft_trans_set_gc_int(trans)   \
++      (((struct nft_trans_set *)trans->data)->gc_int)
+ struct nft_trans_chain {
+       bool                            update;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 9fa155f2632c..e0c156bb0b17 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
+       return 0;
+ }
+-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+-                           struct nft_set *set)
++static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
++                             struct nft_set *set,
++                             const struct nft_set_desc *desc)
+ {
+       struct nft_trans *trans;
+@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+       if (trans == NULL)
+               return -ENOMEM;
+-      if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
++      if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
+               nft_trans_set_id(trans) =
+                       ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+               nft_activate_next(ctx->net, set);
+       }
+       nft_trans_set(trans) = set;
++      if (desc) {
++              nft_trans_set_update(trans) = true;
++              nft_trans_set_gc_int(trans) = desc->gc_int;
++              nft_trans_set_timeout(trans) = desc->timeout;
++      }
+       nft_trans_commit_list_add_tail(ctx->net, trans);
+       return 0;
+ }
++static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
++                           struct nft_set *set)
++{
++      return __nft_trans_set_add(ctx, msg_type, set, NULL);
++}
++
+ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+       int err;
+@@ -3996,8 +4008,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
+ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+                             const struct nft_set *set, u16 event, u16 flags)
+ {
+-      struct nlmsghdr *nlh;
++      u64 timeout = READ_ONCE(set->timeout);
++      u32 gc_int = READ_ONCE(set->gc_int);
+       u32 portid = ctx->portid;
++      struct nlmsghdr *nlh;
+       struct nlattr *nest;
+       u32 seq = ctx->seq;
+       int i;
+@@ -4033,13 +4047,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+           nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
+               goto nla_put_failure;
+-      if (set->timeout &&
++      if (timeout &&
+           nla_put_be64(skb, NFTA_SET_TIMEOUT,
+-                       nf_jiffies64_to_msecs(set->timeout),
++                       nf_jiffies64_to_msecs(timeout),
+                        NFTA_SET_PAD))
+               goto nla_put_failure;
+-      if (set->gc_int &&
+-          nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
++      if (gc_int &&
++          nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
+               goto nla_put_failure;
+       if (set->policy != NFT_SET_POL_PERFORMANCE) {
+@@ -4584,7 +4598,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+               for (i = 0; i < num_exprs; i++)
+                       nft_expr_destroy(&ctx, exprs[i]);
+-              return err;
++              if (err < 0)
++                      return err;
++
++              return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
+       }
+       if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+@@ -6022,7 +6039,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+                       return err;
+       } else if (set->flags & NFT_SET_TIMEOUT &&
+                  !(flags & NFT_SET_ELEM_INTERVAL_END)) {
+-              timeout = set->timeout;
++              timeout = READ_ONCE(set->timeout);
+       }
+       expiration = 0;
+@@ -6123,7 +6140,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+               if (err < 0)
+                       goto err_parse_key_end;
+-              if (timeout != set->timeout) {
++              if (timeout != READ_ONCE(set->timeout)) {
+                       err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+                       if (err < 0)
+                               goto err_parse_key_end;
+@@ -9039,14 +9056,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+                               nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+                       break;
+               case NFT_MSG_NEWSET:
+-                      nft_clear(net, nft_trans_set(trans));
+-                      /* This avoids hitting -EBUSY when deleting the table
+-                       * from the transaction.
+-                       */
+-                      if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+-                          !list_empty(&nft_trans_set(trans)->bindings))
+-                              trans->ctx.table->use--;
++                      if (nft_trans_set_update(trans)) {
++                              struct nft_set *set = nft_trans_set(trans);
++                              WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
++                              WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
++                      } else {
++                              nft_clear(net, nft_trans_set(trans));
++                              /* This avoids hitting -EBUSY when deleting the table
++                               * from the transaction.
++                               */
++                              if (nft_set_is_anonymous(nft_trans_set(trans)) &&
++                                  !list_empty(&nft_trans_set(trans)->bindings))
++                                      trans->ctx.table->use--;
++                      }
+                       nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+                                            NFT_MSG_NEWSET, GFP_KERNEL);
+                       nft_trans_destroy(trans);
+@@ -9268,6 +9291,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                       nft_trans_destroy(trans);
+                       break;
+               case NFT_MSG_NEWSET:
++                      if (nft_trans_set_update(trans)) {
++                              nft_trans_destroy(trans);
++                              break;
++                      }
+                       trans->ctx.table->use--;
+                       if (nft_trans_set_bound(trans)) {
+                               nft_trans_destroy(trans);
+-- 
+2.35.1
+
diff --git a/queue-6.0/netfilter-nf_tables-perform-type-checking-for-existi.patch b/queue-6.0/netfilter-nf_tables-perform-type-checking-for-existi.patch
new file mode 100644 (file)
index 0000000..bb3e6c7
--- /dev/null
@@ -0,0 +1,89 @@
+From 35a090d058455650bd7722f765bfb80ad2c3da2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 20:09:00 +0100
+Subject: netfilter: nf_tables: perform type checking for existing sets
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit f6594c372afd5cec8b1e9ee9ea8f8819d59c6fb1 ]
+
+If a ruleset declares a set name that matches an existing set in the
+kernel, then validate that this declaration really refers to the same
+set, otherwise bail out with EEXIST.
+
+Currently, the kernel reports success when adding a set that already
+exists in the kernel. This usually results in EINVAL errors at a later
+stage, when the user adds elements to the set, if the set declaration
+mismatches the existing set representation in the kernel.
+
+Add a new function to check that the set declaration really refers to
+the same existing set in the kernel.
+
+Fixes: 96518518cc41 ("netfilter: add nftables")
+Reported-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 36 ++++++++++++++++++++++++++++++++++-
+ 1 file changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 1659b2575c05..9fa155f2632c 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4393,6 +4393,34 @@ static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
+       return err;
+ }
++static bool nft_set_is_same(const struct nft_set *set,
++                          const struct nft_set_desc *desc,
++                          struct nft_expr *exprs[], u32 num_exprs, u32 flags)
++{
++      int i;
++
++      if (set->ktype != desc->ktype ||
++          set->dtype != desc->dtype ||
++          set->flags != flags ||
++          set->klen != desc->klen ||
++          set->dlen != desc->dlen ||
++          set->field_count != desc->field_count ||
++          set->num_exprs != num_exprs)
++              return false;
++
++      for (i = 0; i < desc->field_count; i++) {
++              if (set->field_len[i] != desc->field_len[i])
++                      return false;
++      }
++
++      for (i = 0; i < num_exprs; i++) {
++              if (set->exprs[i]->ops != exprs[i]->ops)
++                      return false;
++      }
++
++      return true;
++}
++
+ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+                           const struct nlattr * const nla[])
+ {
+@@ -4547,10 +4575,16 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+               if (err < 0)
+                       return err;
++              err = 0;
++              if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
++                      NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
++                      err = -EEXIST;
++              }
++
+               for (i = 0; i < num_exprs; i++)
+                       nft_expr_destroy(&ctx, exprs[i]);
+-              return 0;
++              return err;
+       }
+       if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+-- 
+2.35.1
+
diff --git a/queue-6.0/nfc-fix-potential-resource-leaks.patch b/queue-6.0/nfc-fix-potential-resource-leaks.patch
new file mode 100644 (file)
index 0000000..22009c4
--- /dev/null
@@ -0,0 +1,127 @@
+From df1f8ede419b074b464323d72f2682793dc0e418 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 11:37:18 +0400
+Subject: nfc: Fix potential resource leaks
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit df49908f3c52d211aea5e2a14a93bbe67a2cb3af ]
+
+nfc_get_device() take reference for the device, add missing
+nfc_put_device() to release it when not need anymore.
+Also fix the style warnning by use error EOPNOTSUPP instead of
+ENOTSUPP.
+
+Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation")
+Fixes: 29e76924cf08 ("nfc: netlink: Add capability to reply to vendor_cmd with data")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/nfc/netlink.c | 52 ++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 38 insertions(+), 14 deletions(-)
+
+diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
+index 7c62417ccfd7..32a08ae9ad11 100644
+--- a/net/nfc/netlink.c
++++ b/net/nfc/netlink.c
+@@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+       u32 dev_idx, se_idx;
+       u8 *apdu;
+       size_t apdu_len;
++      int rc;
+       if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+           !info->attrs[NFC_ATTR_SE_INDEX] ||
+@@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+       if (!dev)
+               return -ENODEV;
+-      if (!dev->ops || !dev->ops->se_io)
+-              return -ENOTSUPP;
++      if (!dev->ops || !dev->ops->se_io) {
++              rc = -EOPNOTSUPP;
++              goto put_dev;
++      }
+       apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
+-      if (apdu_len == 0)
+-              return -EINVAL;
++      if (apdu_len == 0) {
++              rc = -EINVAL;
++              goto put_dev;
++      }
+       apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
+-      if (!apdu)
+-              return -EINVAL;
++      if (!apdu) {
++              rc = -EINVAL;
++              goto put_dev;
++      }
+       ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
+-      if (!ctx)
+-              return -ENOMEM;
++      if (!ctx) {
++              rc = -ENOMEM;
++              goto put_dev;
++      }
+       ctx->dev_idx = dev_idx;
+       ctx->se_idx = se_idx;
+-      return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++      rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++
++put_dev:
++      nfc_put_device(dev);
++      return rc;
+ }
+ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+@@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+       subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
+       dev = nfc_get_device(dev_idx);
+-      if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
++      if (!dev)
+               return -ENODEV;
++      if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
++              err = -ENODEV;
++              goto put_dev;
++      }
++
+       if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
+               data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
+               data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
+-              if (data_len == 0)
+-                      return -EINVAL;
++              if (data_len == 0) {
++                      err = -EINVAL;
++                      goto put_dev;
++              }
+       } else {
+               data = NULL;
+               data_len = 0;
+@@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+               dev->cur_cmd_info = info;
+               err = cmd->doit(dev, data, data_len);
+               dev->cur_cmd_info = NULL;
+-              return err;
++              goto put_dev;
+       }
+-      return -EOPNOTSUPP;
++      err = -EOPNOTSUPP;
++
++put_dev:
++      nfc_put_device(dev);
++      return err;
+ }
+ /* message building helper */
+-- 
+2.35.1
+
diff --git a/queue-6.0/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch b/queue-6.0/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch
new file mode 100644 (file)
index 0000000..724b8b8
--- /dev/null
@@ -0,0 +1,42 @@
+From fd16975990e74be48027fada0fcd0ffc5e762cdb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 09:51:30 -0500
+Subject: nfsd: shut down the NFSv4 state objects before the filecache
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 789e1e10f214c00ca18fc6610824c5b9876ba5f2 ]
+
+Currently, we shut down the filecache before trying to clean up the
+stateids that depend on it. This leads to the kernel trying to free an
+nfsd_file twice, and a refcount overput on the nf_mark.
+
+Change the shutdown procedure to tear down all of the stateids prior
+to shutting down the filecache.
+
+Reported-and-tested-by: Wang Yugui <wangyugui@e16-tech.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Fixes: 5e113224c17e ("nfsd: nfsd_file cache entries should be per net namespace")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfssvc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 4bb5baa17040..011c556caa1e 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -447,8 +447,8 @@ static void nfsd_shutdown_net(struct net *net)
+ {
+       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+-      nfsd_file_cache_shutdown_net(net);
+       nfs4_state_shutdown_net(net);
++      nfsd_file_cache_shutdown_net(net);
+       if (nn->lockd_up) {
+               lockd_down(net);
+               nn->lockd_up = false;
+-- 
+2.35.1
+
diff --git a/queue-6.0/nvme-also-return-i-o-command-effects-from-nvme_comma.patch b/queue-6.0/nvme-also-return-i-o-command-effects-from-nvme_comma.patch
new file mode 100644 (file)
index 0000000..fb5dcbd
--- /dev/null
@@ -0,0 +1,81 @@
+From 991e47e6a6dbc999ed85f5815e933f0f3d20647c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Dec 2022 10:12:17 +0100
+Subject: nvme: also return I/O command effects from nvme_command_effects
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 831ed60c2aca2d7c517b2da22897a90224a97d27 ]
+
+To be able to use the Commands Supported and Effects Log for allowing
+unprivileged passtrough, it needs to be corretly reported for I/O
+commands as well.  Return the I/O command effects from
+nvme_command_effects, and also add a default list of effects for the
+NVM command set.  For other command sets, the Commands Supported and
+Effects log is required to be present already.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Keith Busch <kbusch@kernel.org>
+Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c | 32 ++++++++++++++++++++++++++------
+ 1 file changed, 26 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 3582a28a1dce..f06bae83e8ee 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -1069,6 +1069,18 @@ static u32 nvme_known_admin_effects(u8 opcode)
+       return 0;
+ }
++static u32 nvme_known_nvm_effects(u8 opcode)
++{
++      switch (opcode) {
++      case nvme_cmd_write:
++      case nvme_cmd_write_zeroes:
++      case nvme_cmd_write_uncor:
++               return NVME_CMD_EFFECTS_LBCC;
++      default:
++              return 0;
++      }
++}
++
+ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+ {
+       u32 effects = 0;
+@@ -1076,16 +1088,24 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+       if (ns) {
+               if (ns->head->effects)
+                       effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
++              if (ns->head->ids.csi == NVME_CAP_CSS_NVM)
++                      effects |= nvme_known_nvm_effects(opcode);
+               if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
+                       dev_warn_once(ctrl->device,
+-                              "IO command:%02x has unhandled effects:%08x\n",
++                              "IO command:%02x has unusual effects:%08x\n",
+                               opcode, effects);
+-              return 0;
+-      }
+-      if (ctrl->effects)
+-              effects = le32_to_cpu(ctrl->effects->acs[opcode]);
+-      effects |= nvme_known_admin_effects(opcode);
++              /*
++               * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues,
++               * which would deadlock when done on an I/O command.  Note that
++               * We already warn about an unusual effect above.
++               */
++              effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
++      } else {
++              if (ctrl->effects)
++                      effects = le32_to_cpu(ctrl->effects->acs[opcode]);
++              effects |= nvme_known_admin_effects(opcode);
++      }
+       return effects;
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch b/queue-6.0/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch
new file mode 100644 (file)
index 0000000..c5ff753
--- /dev/null
@@ -0,0 +1,81 @@
+From 0d48c961f76ed21432853c3000edb9e84facc61b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 09:57:21 +0800
+Subject: nvme: fix multipath crash caused by flush request when blktrace is
+ enabled
+
+From: Yanjun Zhang <zhangyanjun@cestc.cn>
+
+[ Upstream commit 3659fb5ac29a5e6102bebe494ac789fd47fb78f4 ]
+
+The flush request initialized by blk_kick_flush has NULL bio,
+and it may be dealt with nvme_end_req during io completion.
+When blktrace is enabled, nvme_trace_bio_complete with multipath
+activated trying to access NULL pointer bio from flush request
+results in the following crash:
+
+[ 2517.831677] BUG: kernel NULL pointer dereference, address: 000000000000001a
+[ 2517.835213] #PF: supervisor read access in kernel mode
+[ 2517.838724] #PF: error_code(0x0000) - not-present page
+[ 2517.842222] PGD 7b2d51067 P4D 0
+[ 2517.845684] Oops: 0000 [#1] SMP NOPTI
+[ 2517.849125] CPU: 2 PID: 732 Comm: kworker/2:1H Kdump: loaded Tainted: G S                5.15.67-0.cl9.x86_64 #1
+[ 2517.852723] Hardware name: XFUSION 2288H V6/BC13MBSBC, BIOS 1.13 07/27/2022
+[ 2517.856358] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp]
+[ 2517.859993] RIP: 0010:blk_add_trace_bio_complete+0x6/0x30
+[ 2517.863628] Code: 1f 44 00 00 48 8b 46 08 31 c9 ba 04 00 10 00 48 8b 80 50 03 00 00 48 8b 78 50 e9 e5 fe ff ff 0f 1f 44 00 00 41 54 49 89 f4 55 <0f> b6 7a 1a 48 89 d5 e8 3e 1c 2b 00 48 89 ee 4c 89 e7 5d 89 c1 ba
+[ 2517.871269] RSP: 0018:ff7f6a008d9dbcd0 EFLAGS: 00010286
+[ 2517.875081] RAX: ff3d5b4be00b1d50 RBX: 0000000002040002 RCX: ff3d5b0a270f2000
+[ 2517.878966] RDX: 0000000000000000 RSI: ff3d5b0b021fb9f8 RDI: 0000000000000000
+[ 2517.882849] RBP: ff3d5b0b96a6fa00 R08: 0000000000000001 R09: 0000000000000000
+[ 2517.886718] R10: 000000000000000c R11: 000000000000000c R12: ff3d5b0b021fb9f8
+[ 2517.890575] R13: 0000000002000000 R14: ff3d5b0b021fb1b0 R15: 0000000000000018
+[ 2517.894434] FS:  0000000000000000(0000) GS:ff3d5b42bfc80000(0000) knlGS:0000000000000000
+[ 2517.898299] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 2517.902157] CR2: 000000000000001a CR3: 00000004f023e005 CR4: 0000000000771ee0
+[ 2517.906053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 2517.909930] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 2517.913761] PKRU: 55555554
+[ 2517.917558] Call Trace:
+[ 2517.921294]  <TASK>
+[ 2517.924982]  nvme_complete_rq+0x1c3/0x1e0 [nvme_core]
+[ 2517.928715]  nvme_tcp_recv_pdu+0x4d7/0x540 [nvme_tcp]
+[ 2517.932442]  nvme_tcp_recv_skb+0x4f/0x240 [nvme_tcp]
+[ 2517.936137]  ? nvme_tcp_recv_pdu+0x540/0x540 [nvme_tcp]
+[ 2517.939830]  tcp_read_sock+0x9c/0x260
+[ 2517.943486]  nvme_tcp_try_recv+0x65/0xa0 [nvme_tcp]
+[ 2517.947173]  nvme_tcp_io_work+0x64/0x90 [nvme_tcp]
+[ 2517.950834]  process_one_work+0x1e8/0x390
+[ 2517.954473]  worker_thread+0x53/0x3c0
+[ 2517.958069]  ? process_one_work+0x390/0x390
+[ 2517.961655]  kthread+0x10c/0x130
+[ 2517.965211]  ? set_kthread_struct+0x40/0x40
+[ 2517.968760]  ret_from_fork+0x1f/0x30
+[ 2517.972285]  </TASK>
+
+To avoid this situation, add a NULL check for req->bio before
+calling trace_block_bio_complete.
+
+Signed-off-by: Yanjun Zhang <zhangyanjun@cestc.cn>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/nvme.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index 70555022cb44..35352206b5de 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -872,7 +872,7 @@ static inline void nvme_trace_bio_complete(struct request *req)
+ {
+       struct nvme_ns *ns = req->q->queuedata;
+-      if (req->cmd_flags & REQ_NVME_MPATH)
++      if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio)
+               trace_block_bio_complete(ns->head->disk->queue, req->bio);
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch b/queue-6.0/nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch
new file mode 100644 (file)
index 0000000..cfe1a30
--- /dev/null
@@ -0,0 +1,75 @@
+From af96175b3175890f6e507976dc448451c3fd2202 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Dec 2022 15:20:04 +0100
+Subject: nvmet: use NVME_CMD_EFFECTS_CSUPP instead of open coding it
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 61f37154c599cf9f2f84dcbd9be842f8645a7099 ]
+
+Use NVME_CMD_EFFECTS_CSUPP instead of open coding it and assign a
+single value to multiple array entries instead of repeated assignments.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Keith Busch <kbusch@kernel.org>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/admin-cmd.c | 35 ++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
+index fc8a957fad0a..4aaa27cc8d2b 100644
+--- a/drivers/nvme/target/admin-cmd.c
++++ b/drivers/nvme/target/admin-cmd.c
+@@ -164,26 +164,29 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
+ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
+ {
+-      log->acs[nvme_admin_get_log_page]       = cpu_to_le32(1 << 0);
+-      log->acs[nvme_admin_identify]           = cpu_to_le32(1 << 0);
+-      log->acs[nvme_admin_abort_cmd]          = cpu_to_le32(1 << 0);
+-      log->acs[nvme_admin_set_features]       = cpu_to_le32(1 << 0);
+-      log->acs[nvme_admin_get_features]       = cpu_to_le32(1 << 0);
+-      log->acs[nvme_admin_async_event]        = cpu_to_le32(1 << 0);
+-      log->acs[nvme_admin_keep_alive]         = cpu_to_le32(1 << 0);
+-
+-      log->iocs[nvme_cmd_read]                = cpu_to_le32(1 << 0);
+-      log->iocs[nvme_cmd_write]               = cpu_to_le32(1 << 0);
+-      log->iocs[nvme_cmd_flush]               = cpu_to_le32(1 << 0);
+-      log->iocs[nvme_cmd_dsm]                 = cpu_to_le32(1 << 0);
+-      log->iocs[nvme_cmd_write_zeroes]        = cpu_to_le32(1 << 0);
++      log->acs[nvme_admin_get_log_page] =
++      log->acs[nvme_admin_identify] =
++      log->acs[nvme_admin_abort_cmd] =
++      log->acs[nvme_admin_set_features] =
++      log->acs[nvme_admin_get_features] =
++      log->acs[nvme_admin_async_event] =
++      log->acs[nvme_admin_keep_alive] =
++              cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
++
++      log->iocs[nvme_cmd_read] =
++      log->iocs[nvme_cmd_write] =
++      log->iocs[nvme_cmd_flush] =
++      log->iocs[nvme_cmd_dsm] =
++      log->iocs[nvme_cmd_write_zeroes] =
++              cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+ }
+ static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log)
+ {
+-      log->iocs[nvme_cmd_zone_append]         = cpu_to_le32(1 << 0);
+-      log->iocs[nvme_cmd_zone_mgmt_send]      = cpu_to_le32(1 << 0);
+-      log->iocs[nvme_cmd_zone_mgmt_recv]      = cpu_to_le32(1 << 0);
++      log->iocs[nvme_cmd_zone_append] =
++      log->iocs[nvme_cmd_zone_mgmt_send] =
++      log->iocs[nvme_cmd_zone_mgmt_recv] =
++              cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+ }
+ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
+-- 
+2.35.1
+
diff --git a/queue-6.0/octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch b/queue-6.0/octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch
new file mode 100644 (file)
index 0000000..7aee0a7
--- /dev/null
@@ -0,0 +1,111 @@
+From 368de4a4fb151ce61956dffe1e676572319f1040 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 09:20:12 +0530
+Subject: octeontx2-pf: Fix lmtst ID used in aura free
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 4af1b64f80fbe1275fb02c5f1c0cef099a4a231f ]
+
+Current code uses per_cpu pointer to get the lmtst_id mapped to
+the core on which aura_free() is executed. Using per_cpu pointer
+without preemption disable causing mismatch between lmtst_id and
+core on which pointer gets freed. This patch fixes the issue by
+disabling preemption around aura_free.
+
+Fixes: ef6c8da71eaf ("octeontx2-pf: cn10K: Reserve LMTST lines per core")
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../marvell/octeontx2/nic/otx2_common.c       | 30 +++++++++++++------
+ 1 file changed, 21 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+index 9c2baa437c23..2926d754ade8 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+@@ -977,6 +977,7 @@ static void otx2_pool_refill_task(struct work_struct *work)
+       rbpool = cq->rbpool;
+       free_ptrs = cq->pool_ptrs;
++      get_cpu();
+       while (cq->pool_ptrs) {
+               if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) {
+                       /* Schedule a WQ if we fails to free atleast half of the
+@@ -996,6 +997,7 @@ static void otx2_pool_refill_task(struct work_struct *work)
+               pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
+               cq->pool_ptrs--;
+       }
++      put_cpu();
+       cq->refill_task_sched = false;
+ }
+@@ -1333,6 +1335,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+       if (err)
+               goto fail;
++      get_cpu();
+       /* Allocate pointers and free them to aura/pool */
+       for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) {
+               pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+@@ -1341,18 +1344,24 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+               sq = &qset->sq[qidx];
+               sq->sqb_count = 0;
+               sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL);
+-              if (!sq->sqb_ptrs)
+-                      return -ENOMEM;
++              if (!sq->sqb_ptrs) {
++                      err = -ENOMEM;
++                      goto err_mem;
++              }
+               for (ptr = 0; ptr < num_sqbs; ptr++) {
+-                      if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
+-                              return -ENOMEM;
++                      err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
++                      if (err)
++                              goto err_mem;
+                       pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
+                       sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
+               }
+       }
+-      return 0;
++err_mem:
++      put_cpu();
++      return err ? -ENOMEM : 0;
++
+ fail:
+       otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+       otx2_aura_pool_free(pfvf);
+@@ -1391,18 +1400,21 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
+       if (err)
+               goto fail;
++      get_cpu();
+       /* Allocate pointers and free them to aura/pool */
+       for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
+               pool = &pfvf->qset.pool[pool_id];
+               for (ptr = 0; ptr < num_ptrs; ptr++) {
+-                      if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
+-                              return -ENOMEM;
++                      err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
++                      if (err)
++                              goto err_mem;
+                       pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
+                                                  bufptr + OTX2_HEAD_ROOM);
+               }
+       }
+-
+-      return 0;
++err_mem:
++      put_cpu();
++      return err ? -ENOMEM : 0;
+ fail:
+       otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+       otx2_aura_pool_free(pfvf);
+-- 
+2.35.1
+
diff --git a/queue-6.0/perf-lock-contention-fix-core-dump-related-to-not-fi.patch b/queue-6.0/perf-lock-contention-fix-core-dump-related-to-not-fi.patch
new file mode 100644 (file)
index 0000000..e8a86db
--- /dev/null
@@ -0,0 +1,143 @@
+From 46664f9fbfce7d33045ab6998bb5b70e5231aad0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 11:26:27 +0100
+Subject: perf lock contention: Fix core dump related to not finding the
+ "__sched_text_end" symbol on s/390
+
+From: Thomas Richter <tmricht@linux.ibm.com>
+
+[ Upstream commit d8d85ce86dc82de4f88b821a78f533b9d5b22a45 ]
+
+The test case perf lock contention dumps core on s390. Run the following
+commands:
+
+  # ./perf lock record -- ./perf bench sched messaging
+  # Running 'sched/messaging' benchmark:
+  # 20 sender and receiver processes per group
+  # 10 groups == 400 processes run
+
+      Total time: 2.799 [sec]
+  [ perf record: Woken up 1 times to write data ]
+  [ perf record: Captured and wrote 0.073 MB perf.data (100 samples) ]
+  #
+  # ./perf lock contention
+  Segmentation fault (core dumped)
+  #
+
+The function call stack is lengthy, here are the top 5 functions:
+
+  # gdb ./perf core.24048
+  GNU gdb (GDB) Fedora Linux 12.1-6.fc37
+  Core was generated by `./perf lock contention'.
+  Program terminated with signal SIGSEGV, Segmentation fault.
+  #0  0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356
+         3356 machine->sched.text_end = kmap->unmap_ip(kmap, sym->start);
+
+ (gdb) where
+  #0  0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356
+  #1  0x000000000109f244 in callchain_id (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:957
+  #2  0x000000000109e094 in get_key_by_aggr_mode (key=0x3ffea4f7290, addr=27758136, evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:586
+  #3  0x000000000109f4d0 in report_lock_contention_begin_event (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1004
+  #4  0x00000000010a00ae in evsel__process_contention_begin (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1254
+  #5  0x00000000010a0e14 in process_sample_event (tool=0x3ffea4f8480, event=0x3ff85601ef8, sample=0x3ffea4f77d0, evsel=0x30313e0, machine=0x3029e28) at builtin-lock.c:1464
+  .....
+
+The issue is in function machine__is_lock_function() in file
+./util/machine.c lines 3355:
+
+   /* should not fail from here */
+   sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap);
+   machine->sched.text_end = kmap->unmap_ip(kmap, sym->start)
+
+On s390 the symbol __sched_text_end is *NOT* in the symbol list and the
+resulting pointer sym is set to NULL. The sym->start is then a NULL pointer
+access and generates the core dump.
+
+The reason why __sched_text_end is not in the symbol list on s390 is
+simple:
+
+When the symbol list is created at perf start up with function calls
+
+  dso__load
+  +--> dso__load_vmlinux_path
+       +--> dso__load_vmlinux
+            +--> dso__load_sym
+                +--> dso__load_sym_internal (reads kernel symbols)
+                +--> symbols__fixup_end
+                +--> symbols__fixup_duplicate
+
+The issue is in function symbols__fixup_duplicate(). It deletes all
+symbols with have the same address. On s390:
+
+  # nm -g  ~/linux/vmlinux| fgrep c68390
+  0000000000c68390 T __cpuidle_text_start
+  0000000000c68390 T __sched_text_end
+  #
+
+two symbols have identical addresses and __sched_text_end is considered
+duplicate (in ascending sort order) and removed from the symbol list.
+Therefore it is missing and an invalid pointer reference occurs.  The
+code checks for symbol __sched_text_start and when it exists assumes
+symbol __sched_text_end is also in the symbol table. However this is not
+the case on s390.
+
+Same situation exists for symbol __lock_text_start:
+
+0000000000c68770 T __cpuidle_text_end
+0000000000c68770 T __lock_text_start
+
+This symbol is also removed from the symbol table but used in function
+machine__is_lock_function().
+
+To fix this and keep duplicate symbols in the symbol table, set
+symbol_conf.allow_aliases to true. This prevents the removal of
+duplicate symbols in function symbols__fixup_duplicate().
+
+Output After:
+
+ # ./perf lock contention
+ contended total wait  max wait  avg wait    type   caller
+
+        48   124.39 ms 123.99 ms   2.59 ms rwsem:W unlink_anon_vmas+0x24a
+        47    83.68 ms  83.26 ms   1.78 ms rwsem:W free_pgtables+0x132
+         5    41.22 us  10.55 us   8.24 us rwsem:W free_pgtables+0x140
+         4    40.12 us  20.55 us  10.03 us rwsem:W copy_process+0x1ac8
+ #
+
+Fixes: 0d2997f750d1de39 ("perf lock: Look up callchain for the contended locks")
+Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
+Cc: Sven Schnelle <svens@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Link: https://lore.kernel.org/r/20221230102627.2410847-1-tmricht@linux.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-lock.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
+index ea40ae52cd2c..2bc9231d86b6 100644
+--- a/tools/perf/builtin-lock.c
++++ b/tools/perf/builtin-lock.c
+@@ -1539,6 +1539,7 @@ static int __cmd_report(bool display_info)
+       /* for lock function check */
+       symbol_conf.sort_by_name = true;
++      symbol_conf.allow_aliases = true;
+       symbol__init(&session->header.env);
+       if (!perf_session__has_traces(session, "lock record"))
+@@ -1613,6 +1614,7 @@ static int __cmd_contention(int argc, const char **argv)
+       /* for lock function check */
+       symbol_conf.sort_by_name = true;
++      symbol_conf.allow_aliases = true;
+       symbol__init(&session->header.env);
+       if (use_bpf) {
+-- 
+2.35.1
+
diff --git a/queue-6.0/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch b/queue-6.0/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch
new file mode 100644 (file)
index 0000000..466ed11
--- /dev/null
@@ -0,0 +1,92 @@
+From 6fd4a2e4f386607ceee3c3f2343c195eb76f3d70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 5 Nov 2022 12:01:14 +0900
+Subject: perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as
+ unsinged data
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit a9dfc46c67b52ad43b8e335e28f4cf8002c67793 ]
+
+DWARF version 5 standard Sec 2.14 says that
+
+  Any debugging information entry representing the declaration of an object,
+  module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and
+  DW_AT_decl_column attributes, each of whose value is an unsigned integer
+  constant.
+
+So it should be an unsigned integer data. Also, even though the standard
+doesn't clearly say the DW_AT_call_file is signed or unsigned, the
+elfutils (eu-readelf) interprets it as unsigned integer data and it is
+natural to handle it as unsigned integer data as same as DW_AT_decl_file.
+This changes the DW_AT_call_file as unsigned integer data too.
+
+Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/dwarf-aux.c | 21 ++++-----------------
+ 1 file changed, 4 insertions(+), 17 deletions(-)
+
+diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
+index a07efbadb775..623527edeac1 100644
+--- a/tools/perf/util/dwarf-aux.c
++++ b/tools/perf/util/dwarf-aux.c
+@@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+       return 0;
+ }
+-/* Get attribute and translate it as a sdata */
+-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+-                            Dwarf_Sword *result)
+-{
+-      Dwarf_Attribute attr;
+-
+-      if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+-          dwarf_formsdata(&attr, result) != 0)
+-              return -ENOENT;
+-
+-      return 0;
+-}
+-
+ /**
+  * die_is_signed_type - Check whether a type DIE is signed or not
+  * @tp_die: a DIE of a type
+@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+ /* Get the call file index number in CU DIE */
+ static int die_get_call_fileno(Dwarf_Die *in_die)
+ {
+-      Dwarf_Sword idx;
++      Dwarf_Word idx;
+-      if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
++      if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
+               return (int)idx;
+       else
+               return -ENOENT;
+@@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die)
+ /* Get the declared file index number in CU DIE */
+ static int die_get_decl_fileno(Dwarf_Die *pdie)
+ {
+-      Dwarf_Sword idx;
++      Dwarf_Word idx;
+-      if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
++      if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
+               return (int)idx;
+       else
+               return -ENOENT;
+-- 
+2.35.1
+
diff --git a/queue-6.0/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch b/queue-6.0/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch
new file mode 100644 (file)
index 0000000..2021d6f
--- /dev/null
@@ -0,0 +1,54 @@
+From 97da9a7d892e96283d451815a3f308cdd0a0b67b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Nov 2022 22:48:39 +0900
+Subject: perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit f828929ab7f0dc3353e4a617f94f297fa8f3dec3 ]
+
+Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute
+acccessor functions, so that it can find the specified attribute from
+abstact origin DIE etc.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: a9dfc46c67b5 ("perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/dwarf-aux.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
+index 609ca1671501..a07efbadb775 100644
+--- a/tools/perf/util/dwarf-aux.c
++++ b/tools/perf/util/dwarf-aux.c
+@@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ {
+       Dwarf_Attribute attr;
+-      if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
++      if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+           dwarf_formudata(&attr, result) != 0)
+               return -ENOENT;
+@@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+ {
+       Dwarf_Attribute attr;
+-      if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
++      if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+           dwarf_formsdata(&attr, result) != 0)
+               return -ENOENT;
+-- 
+2.35.1
+
diff --git a/queue-6.0/perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch b/queue-6.0/perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch
new file mode 100644 (file)
index 0000000..6091e66
--- /dev/null
@@ -0,0 +1,146 @@
+From 946f59c55fe27187d320b4940cda569fa4be5d22 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 22:44:02 -0800
+Subject: perf stat: Fix handling of --for-each-cgroup with --bpf-counters to
+ match non BPF mode
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+[ Upstream commit 54b353a20c7e8be98414754f5aff98c8a68fcc1f ]
+
+The --for-each-cgroup can have the same cgroup multiple times, but this
+confuses BPF counters (since they have the same cgroup id), making only
+the last cgroup events to be counted.
+
+Let's check the cgroup name before adding a new entry to the cgroups
+list.
+
+Before:
+
+  $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1
+
+   Performance counter stats for 'system wide':
+
+       <not counted> msec cpu-clock                        /
+       <not counted>      context-switches                 /
+       <not counted>      cpu-migrations                   /
+       <not counted>      page-faults                      /
+       <not counted>      cycles                           /
+       <not counted>      instructions                     /
+       <not counted>      branches                         /
+       <not counted>      branch-misses                    /
+            8,016.04 msec cpu-clock                        /                #    7.998 CPUs utilized
+               6,152      context-switches                 /                #  767.461 /sec
+                 250      cpu-migrations                   /                #   31.187 /sec
+                 442      page-faults                      /                #   55.139 /sec
+         613,111,487      cycles                           /                #    0.076 GHz
+         280,599,604      instructions                     /                #    0.46  insn per cycle
+          57,692,724      branches                         /                #    7.197 M/sec
+           3,385,168      branch-misses                    /                #    5.87% of all branches
+
+         1.002220125 seconds time elapsed
+
+After it becomes similar to the non-BPF mode:
+
+  $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/  sleep 1
+
+   Performance counter stats for 'system wide':
+
+            8,013.38 msec cpu-clock                        /                #    7.998 CPUs utilized
+               6,859      context-switches                 /                #  855.944 /sec
+                 334      cpu-migrations                   /                #   41.680 /sec
+                 345      page-faults                      /                #   43.053 /sec
+         782,326,119      cycles                           /                #    0.098 GHz
+         471,645,724      instructions                     /                #    0.60  insn per cycle
+          94,963,430      branches                         /                #   11.851 M/sec
+           3,685,511      branch-misses                    /                #    3.88% of all branches
+
+         1.001864539 seconds time elapsed
+
+Committer notes:
+
+As a reminder, to test with BPF counters one has to use BUILD_BPF_SKEL=1
+in the make command line and have clang/llvm installed when building
+perf, otherwise the --bpf-counters option will not be available:
+
+  # perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1
+  Error: unknown option `bpf-counters'
+
+   Usage: perf stat [<options>] [<command>]
+
+      -a, --all-cpus        system-wide collection from all CPUs
+  <SNIP>
+  #
+
+Fixes: bb1c15b60b981d10 ("perf stat: Support regex pattern in --for-each-cgroup")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: bpf@vger.kernel.org
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Song Liu <songliubraving@fb.com>
+Link: https://lore.kernel.org/r/20230104064402.1551516-5-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/cgroup.c | 23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
+index e99b41f9be45..cd978c240e0d 100644
+--- a/tools/perf/util/cgroup.c
++++ b/tools/perf/util/cgroup.c
+@@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus
+       return 0;
+ }
++static int check_and_add_cgroup_name(const char *fpath)
++{
++      struct cgroup_name *cn;
++
++      list_for_each_entry(cn, &cgroup_list, list) {
++              if (!strcmp(cn->name, fpath))
++                      return 0;
++      }
++
++      /* pretend if it's added by ftw() */
++      return add_cgroup_name(fpath, NULL, FTW_D, NULL);
++}
++
+ static void release_cgroup_list(void)
+ {
+       struct cgroup_name *cn;
+@@ -242,7 +255,7 @@ static int list_cgroups(const char *str)
+       struct cgroup_name *cn;
+       char *s;
+-      /* use given name as is - for testing purpose */
++      /* use given name as is when no regex is given */
+       for (;;) {
+               p = strchr(str, ',');
+               e = p ? p : eos;
+@@ -253,13 +266,13 @@ static int list_cgroups(const char *str)
+                       s = strndup(str, e - str);
+                       if (!s)
+                               return -1;
+-                      /* pretend if it's added by ftw() */
+-                      ret = add_cgroup_name(s, NULL, FTW_D, NULL);
++
++                      ret = check_and_add_cgroup_name(s);
+                       free(s);
+-                      if (ret)
++                      if (ret < 0)
+                               return -1;
+               } else {
+-                      if (add_cgroup_name("", NULL, FTW_D, NULL) < 0)
++                      if (check_and_add_cgroup_name("/") < 0)
+                               return -1;
+               }
+-- 
+2.35.1
+
diff --git a/queue-6.0/perf-stat-fix-handling-of-unsupported-cgroup-events-.patch b/queue-6.0/perf-stat-fix-handling-of-unsupported-cgroup-events-.patch
new file mode 100644 (file)
index 0000000..87da42b
--- /dev/null
@@ -0,0 +1,89 @@
+From 51dc0a18f4eaf1195df1c0a4fd0ce03250e84458 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 22:44:01 -0800
+Subject: perf stat: Fix handling of unsupported cgroup events when using BPF
+ counters
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+[ Upstream commit 2d656b0f81b22101db0447f890e39fdd736b745e ]
+
+When --for-each-cgroup option is used, it fails when any of events is
+not supported and exits immediately.  This is not how 'perf stat'
+handles unsupported events.
+
+Let's ignore the failure and proceed with others so that the output is
+similar to when BPF counters are not used:
+
+Before:
+
+  $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1
+  Failed to open first cgroup events
+  $
+
+After it shows output similat to when --bpf-counters isn't specified:
+
+  $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1
+
+   Performance counter stats for 'system wide':
+
+     <not supported>      L1-icache-loads                  system.slice
+          29,892,418      L1-dcache-loads                  system.slice
+     <not supported>      L1-icache-loads                  user.slice
+          52,497,220      L1-dcache-loads                  user.slice
+  $
+
+Fixes: 944138f048f7d759 ("perf stat: Enable BPF counter with --for-each-cgroup")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Song Liu <songliubraving@fb.com>
+Link: https://lore.kernel.org/r/20230104064402.1551516-4-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/bpf_counter_cgroup.c | 14 +++-----------
+ 1 file changed, 3 insertions(+), 11 deletions(-)
+
+diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
+index 3c2df7522f6f..1c82377ed78b 100644
+--- a/tools/perf/util/bpf_counter_cgroup.c
++++ b/tools/perf/util/bpf_counter_cgroup.c
+@@ -116,27 +116,19 @@ static int bperf_load_program(struct evlist *evlist)
+                       /* open single copy of the events w/o cgroup */
+                       err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1);
+-                      if (err) {
+-                              pr_err("Failed to open first cgroup events\n");
+-                              goto out;
+-                      }
++                      if (err == 0)
++                              evsel->supported = true;
+                       map_fd = bpf_map__fd(skel->maps.events);
+                       perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) {
+                               int fd = FD(evsel, j);
+                               __u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
+-                              err = bpf_map_update_elem(map_fd, &idx, &fd,
+-                                                        BPF_ANY);
+-                              if (err < 0) {
+-                                      pr_err("Failed to update perf_event fd\n");
+-                                      goto out;
+-                              }
++                              bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY);
+                       }
+                       evsel->cgrp = leader_cgrp;
+               }
+-              evsel->supported = true;
+               if (evsel->cgrp == cgrp)
+                       continue;
+-- 
+2.35.1
+
diff --git a/queue-6.0/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch b/queue-6.0/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch
new file mode 100644 (file)
index 0000000..cd8048c
--- /dev/null
@@ -0,0 +1,52 @@
+From a8ad7c83ecc8cd9027032dcd24b5b05c386676c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 13:09:00 +0400
+Subject: perf tools: Fix resources leak in perf_data__open_dir()
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit 0a6564ebd953c4590663c9a3c99a3ea9920ade6f ]
+
+In perf_data__open_dir(), opendir() opens the directory stream.  Add
+missing closedir() to release it after use.
+
+Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function")
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/data.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
+index a7f68c309545..fc16299c915f 100644
+--- a/tools/perf/util/data.c
++++ b/tools/perf/util/data.c
+@@ -132,6 +132,7 @@ int perf_data__open_dir(struct perf_data *data)
+               file->size = st.st_size;
+       }
++      closedir(dir);
+       if (!files)
+               return -EINVAL;
+@@ -140,6 +141,7 @@ int perf_data__open_dir(struct perf_data *data)
+       return 0;
+ out_err:
++      closedir(dir);
+       close_dir(files, nr);
+       return ret;
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/phy-qcom-qmp-combo-fix-broken-power-on.patch b/queue-6.0/phy-qcom-qmp-combo-fix-broken-power-on.patch
new file mode 100644 (file)
index 0000000..3953e7a
--- /dev/null
@@ -0,0 +1,96 @@
+From 6725c8b6687b6f84640ec5fa868cd48c94e7e564 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 09:13:44 +0100
+Subject: phy: qcom-qmp-combo: fix broken power on
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 7a7d86d14d073dfa3429c550667a8e78b99edbd4 ]
+
+The PHY is powered on during phy-init by setting the SW_PWRDN bit in the
+COM_POWER_DOWN_CTRL register and then setting the same bit in the in the
+PCS_POWER_DOWN_CONTROL register that belongs to the USB part of the
+PHY.
+
+Currently, whether power on succeeds depends on probe order and having
+the USB part of the PHY be initialised first. In case the DP part of the
+PHY is instead initialised first, the intended power on of the USB block
+results in a corrupted DP_PHY register (e.g. DP_PHY_AUX_CFG8).
+
+Add a pointer to the USB part of the PHY to the driver data and use that
+to power on the PHY also if the DP part of the PHY is initialised first.
+
+Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy")
+Cc: stable@vger.kernel.org     # 5.10
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/20221114081346.5116-5-johan+linaro@kernel.org
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+index 0feda8eb93b5..5e011520650d 100644
+--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
++++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+@@ -767,6 +767,7 @@ struct qcom_qmp {
+       struct regulator_bulk_data *vregs;
+       struct qmp_phy **phys;
++      struct qmp_phy *usb_phy;
+       struct mutex phy_mutex;
+       int init_count;
+@@ -1607,7 +1608,7 @@ static int qcom_qmp_phy_combo_com_init(struct qmp_phy *qphy)
+ {
+       struct qcom_qmp *qmp = qphy->qmp;
+       const struct qmp_phy_cfg *cfg = qphy->cfg;
+-      void __iomem *pcs = qphy->pcs;
++      struct qmp_phy *usb_phy = qmp->usb_phy;
+       void __iomem *dp_com = qmp->dp_com;
+       int ret;
+@@ -1663,13 +1664,13 @@ static int qcom_qmp_phy_combo_com_init(struct qmp_phy *qphy)
+               qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
+       }
+-      if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
+-              qphy_setbits(pcs,
+-                              cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+-                              cfg->pwrdn_ctrl);
++      if (usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
++              qphy_setbits(usb_phy->pcs,
++                              usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
++                              usb_phy->cfg->pwrdn_ctrl);
+       else
+-              qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
+-                              cfg->pwrdn_ctrl);
++              qphy_setbits(usb_phy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
++                              usb_phy->cfg->pwrdn_ctrl);
+       mutex_unlock(&qmp->phy_mutex);
+@@ -2576,6 +2577,8 @@ static int qcom_qmp_phy_combo_probe(struct platform_device *pdev)
+                               goto err_node_put;
+                       }
++                      qmp->usb_phy = qmp->phys[id];
++
+                       /*
+                        * Register the pipe clock provided by phy.
+                        * See function description to see details of this pipe clock.
+@@ -2591,6 +2594,9 @@ static int qcom_qmp_phy_combo_probe(struct platform_device *pdev)
+               id++;
+       }
++      if (!qmp->usb_phy)
++              return -EINVAL;
++
+       phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+       if (!IS_ERR(phy_provider))
+               dev_info(dev, "Registered Qcom-QMP phy\n");
+-- 
+2.35.1
+
diff --git a/queue-6.0/qed-allow-sleep-in-qed_mcp_trace_dump.patch b/queue-6.0/qed-allow-sleep-in-qed_mcp_trace_dump.patch
new file mode 100644 (file)
index 0000000..a2c514e
--- /dev/null
@@ -0,0 +1,168 @@
+From 860c536ed10ffe87130cc8ddd39f3a2979af87ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 16:30:21 -0700
+Subject: qed: allow sleep in qed_mcp_trace_dump()
+
+From: Caleb Sander <csander@purestorage.com>
+
+[ Upstream commit 5401c3e0992860b11fb4b25796e4c4f1921740df ]
+
+By default, qed_mcp_cmd_and_union() delays 10us at a time in a loop
+that can run 500K times, so calls to qed_mcp_nvm_rd_cmd()
+may block the current thread for over 5s.
+We observed thread scheduling delays over 700ms in production,
+with stacktraces pointing to this code as the culprit.
+
+qed_mcp_trace_dump() is called from ethtool, so sleeping is permitted.
+It already can sleep in qed_mcp_halt(), which calls qed_mcp_cmd().
+Add a "can sleep" parameter to qed_find_nvram_image() and
+qed_nvram_read() so they can sleep during qed_mcp_trace_dump().
+qed_mcp_trace_get_meta_info() and qed_mcp_trace_read_meta(),
+called only by qed_mcp_trace_dump(), allow these functions to sleep.
+I can't tell if the other caller (qed_grc_dump_mcp_hw_dump()) can sleep,
+so keep b_can_sleep set to false when it calls these functions.
+
+An example stacktrace from a custom warning we added to the kernel
+showing a thread that has not scheduled despite long needing resched:
+[ 2745.362925,17] ------------[ cut here ]------------
+[ 2745.362941,17] WARNING: CPU: 23 PID: 5640 at arch/x86/kernel/irq.c:233 do_IRQ+0x15e/0x1a0()
+[ 2745.362946,17] Thread not rescheduled for 744 ms after irq 99
+[ 2745.362956,17] Modules linked in: ...
+[ 2745.363339,17] CPU: 23 PID: 5640 Comm: lldpd Tainted: P           O    4.4.182+ #202104120910+6d1da174272d.61x
+[ 2745.363343,17] Hardware name: FOXCONN MercuryB/Quicksilver Controller, BIOS H11P1N09 07/08/2020
+[ 2745.363346,17]  0000000000000000 ffff885ec07c3ed8 ffffffff8131eb2f ffff885ec07c3f20
+[ 2745.363358,17]  ffffffff81d14f64 ffff885ec07c3f10 ffffffff81072ac2 ffff88be98ed0000
+[ 2745.363369,17]  0000000000000063 0000000000000174 0000000000000074 0000000000000000
+[ 2745.363379,17] Call Trace:
+[ 2745.363382,17]  <IRQ>  [<ffffffff8131eb2f>] dump_stack+0x8e/0xcf
+[ 2745.363393,17]  [<ffffffff81072ac2>] warn_slowpath_common+0x82/0xc0
+[ 2745.363398,17]  [<ffffffff81072b4c>] warn_slowpath_fmt+0x4c/0x50
+[ 2745.363404,17]  [<ffffffff810d5a8e>] ? rcu_irq_exit+0xae/0xc0
+[ 2745.363408,17]  [<ffffffff817c99fe>] do_IRQ+0x15e/0x1a0
+[ 2745.363413,17]  [<ffffffff817c7ac9>] common_interrupt+0x89/0x89
+[ 2745.363416,17]  <EOI>  [<ffffffff8132aa74>] ? delay_tsc+0x24/0x50
+[ 2745.363425,17]  [<ffffffff8132aa04>] __udelay+0x34/0x40
+[ 2745.363457,17]  [<ffffffffa04d45ff>] qed_mcp_cmd_and_union+0x36f/0x7d0 [qed]
+[ 2745.363473,17]  [<ffffffffa04d5ced>] qed_mcp_nvm_rd_cmd+0x4d/0x90 [qed]
+[ 2745.363490,17]  [<ffffffffa04e1dc7>] qed_mcp_trace_dump+0x4a7/0x630 [qed]
+[ 2745.363504,17]  [<ffffffffa04e2556>] ? qed_fw_asserts_dump+0x1d6/0x1f0 [qed]
+[ 2745.363520,17]  [<ffffffffa04e4ea7>] qed_dbg_mcp_trace_get_dump_buf_size+0x37/0x80 [qed]
+[ 2745.363536,17]  [<ffffffffa04ea881>] qed_dbg_feature_size+0x61/0xa0 [qed]
+[ 2745.363551,17]  [<ffffffffa04eb427>] qed_dbg_all_data_size+0x247/0x260 [qed]
+[ 2745.363560,17]  [<ffffffffa0482c10>] qede_get_regs_len+0x30/0x40 [qede]
+[ 2745.363566,17]  [<ffffffff816c9783>] ethtool_get_drvinfo+0xe3/0x190
+[ 2745.363570,17]  [<ffffffff816cc152>] dev_ethtool+0x1362/0x2140
+[ 2745.363575,17]  [<ffffffff8109bcc6>] ? finish_task_switch+0x76/0x260
+[ 2745.363580,17]  [<ffffffff817c2116>] ? __schedule+0x3c6/0x9d0
+[ 2745.363585,17]  [<ffffffff810dbd50>] ? hrtimer_start_range_ns+0x1d0/0x370
+[ 2745.363589,17]  [<ffffffff816c1e5b>] ? dev_get_by_name_rcu+0x6b/0x90
+[ 2745.363594,17]  [<ffffffff816de6a8>] dev_ioctl+0xe8/0x710
+[ 2745.363599,17]  [<ffffffff816a58a8>] sock_do_ioctl+0x48/0x60
+[ 2745.363603,17]  [<ffffffff816a5d87>] sock_ioctl+0x1c7/0x280
+[ 2745.363608,17]  [<ffffffff8111f393>] ? seccomp_phase1+0x83/0x220
+[ 2745.363612,17]  [<ffffffff811e3503>] do_vfs_ioctl+0x2b3/0x4e0
+[ 2745.363616,17]  [<ffffffff811e3771>] SyS_ioctl+0x41/0x70
+[ 2745.363619,17]  [<ffffffff817c6ffe>] entry_SYSCALL_64_fastpath+0x1e/0x79
+[ 2745.363622,17] ---[ end trace f6954aa440266421 ]---
+
+Fixes: c965db4446291 ("qed: Add support for debug data collection")
+Signed-off-by: Caleb Sander <csander@purestorage.com>
+Acked-by: Alok Prasad <palok@marvell.com>
+Link: https://lore.kernel.org/r/20230103233021.1457646-1-csander@purestorage.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_debug.c | 28 +++++++++++++++------
+ 1 file changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
+index 86ecb080b153..cdcead614e9f 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
+@@ -1832,7 +1832,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
+                                           struct qed_ptt *p_ptt,
+                                           u32 image_type,
+                                           u32 *nvram_offset_bytes,
+-                                          u32 *nvram_size_bytes)
++                                          u32 *nvram_size_bytes,
++                                          bool b_can_sleep)
+ {
+       u32 ret_mcp_resp, ret_mcp_param, ret_txn_size;
+       struct mcp_file_att file_att;
+@@ -1846,7 +1847,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
+                                       &ret_mcp_resp,
+                                       &ret_mcp_param,
+                                       &ret_txn_size,
+-                                      (u32 *)&file_att, false);
++                                      (u32 *)&file_att,
++                                      b_can_sleep);
+       /* Check response */
+       if (nvm_result || (ret_mcp_resp & FW_MSG_CODE_MASK) !=
+@@ -1873,7 +1875,9 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
+ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 nvram_offset_bytes,
+-                                    u32 nvram_size_bytes, u32 *ret_buf)
++                                    u32 nvram_size_bytes,
++                                    u32 *ret_buf,
++                                    bool b_can_sleep)
+ {
+       u32 ret_mcp_resp, ret_mcp_param, ret_read_size, bytes_to_copy;
+       s32 bytes_left = nvram_size_bytes;
+@@ -1899,7 +1903,7 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
+                                      &ret_mcp_resp,
+                                      &ret_mcp_param, &ret_read_size,
+                                      (u32 *)((u8 *)ret_buf + read_offset),
+-                                     false))
++                                     b_can_sleep))
+                       return DBG_STATUS_NVRAM_READ_FAILED;
+               /* Check response */
+@@ -3380,7 +3384,8 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn,
+                                     p_ptt,
+                                     NVM_TYPE_HW_DUMP_OUT,
+                                     &hw_dump_offset_bytes,
+-                                    &hw_dump_size_bytes);
++                                    &hw_dump_size_bytes,
++                                    false);
+       if (status != DBG_STATUS_OK)
+               return 0;
+@@ -3397,7 +3402,9 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn,
+               status = qed_nvram_read(p_hwfn,
+                                       p_ptt,
+                                       hw_dump_offset_bytes,
+-                                      hw_dump_size_bytes, dump_buf + offset);
++                                      hw_dump_size_bytes,
++                                      dump_buf + offset,
++                                      false);
+               if (status != DBG_STATUS_OK) {
+                       DP_NOTICE(p_hwfn,
+                                 "Failed to read MCP HW Dump image from NVRAM\n");
+@@ -4123,7 +4130,9 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
+       return qed_find_nvram_image(p_hwfn,
+                                   p_ptt,
+                                   nvram_image_type,
+-                                  trace_meta_offset, trace_meta_size);
++                                  trace_meta_offset,
++                                  trace_meta_size,
++                                  true);
+ }
+ /* Reads the MCP Trace meta data from NVRAM into the specified buffer */
+@@ -4139,7 +4148,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn,
+       /* Read meta data from NVRAM */
+       status = qed_nvram_read(p_hwfn,
+                               p_ptt,
+-                              nvram_offset_in_bytes, size_in_bytes, buf);
++                              nvram_offset_in_bytes,
++                              size_in_bytes,
++                              buf,
++                              true);
+       if (status != DBG_STATUS_OK)
+               return status;
+-- 
+2.35.1
+
diff --git a/queue-6.0/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch b/queue-6.0/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch
new file mode 100644 (file)
index 0000000..00ff8ea
--- /dev/null
@@ -0,0 +1,103 @@
+From 212f93eae86673033f9d3c0b810441860793a4f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:52:28 +0300
+Subject: qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure
+
+From: Daniil Tatianin <d-tatianin@yandex-team.ru>
+
+[ Upstream commit 13a7c8964afcd8ca43c0b6001ebb0127baa95362 ]
+
+adapter->dcb would get silently freed inside qlcnic_dcb_enable() in
+case qlcnic_dcb_attach() would return an error, which always happens
+under OOM conditions. This would lead to use-after-free because both
+of the existing callers invoke qlcnic_dcb_get_info() on the obtained
+pointer, which is potentially freed at that point.
+
+Propagate errors from qlcnic_dcb_enable(), and instead free the dcb
+pointer at callsite using qlcnic_dcb_free(). This also removes the now
+unused qlcnic_clear_dcb_ops() helper, which was a simple wrapper around
+kfree() also causing memory leaks for partially initialized dcb.
+
+Found by Linux Verification Center (linuxtesting.org) with the SVACE
+static analysis tool.
+
+Fixes: 3c44bba1d270 ("qlcnic: Disable DCB operations from SR-IOV VFs")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c |  8 +++++++-
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h       | 10 ++--------
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c      |  8 +++++++-
+ 3 files changed, 16 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+index dbb800769cb6..c95d56e56c59 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+@@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter)
+               goto disable_mbx_intr;
+       qlcnic_83xx_clear_function_resources(adapter);
+-      qlcnic_dcb_enable(adapter->dcb);
++
++      err = qlcnic_dcb_enable(adapter->dcb);
++      if (err) {
++              qlcnic_dcb_free(adapter->dcb);
++              goto disable_mbx_intr;
++      }
++
+       qlcnic_83xx_initialize_nic(adapter, 1);
+       qlcnic_dcb_get_info(adapter->dcb);
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+index 7519773eaca6..22afa2be85fd 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+@@ -41,11 +41,6 @@ struct qlcnic_dcb {
+       unsigned long                   state;
+ };
+-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb)
+-{
+-      kfree(dcb);
+-}
+-
+ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb)
+ {
+       if (dcb && dcb->ops->get_hw_capability)
+@@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb)
+               dcb->ops->init_dcbnl_ops(dcb);
+ }
+-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
++static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
+ {
+-      if (dcb && qlcnic_dcb_attach(dcb))
+-              qlcnic_clear_dcb_ops(dcb);
++      return dcb ? qlcnic_dcb_attach(dcb) : 0;
+ }
+ #endif
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+index 28476b982bab..44dac3c0908e 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -2599,7 +2599,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+                        "Device does not support MSI interrupts\n");
+       if (qlcnic_82xx_check(adapter)) {
+-              qlcnic_dcb_enable(adapter->dcb);
++              err = qlcnic_dcb_enable(adapter->dcb);
++              if (err) {
++                      qlcnic_dcb_free(adapter->dcb);
++                      dev_err(&pdev->dev, "Failed to enable DCB\n");
++                      goto err_out_free_hw;
++              }
++
+               qlcnic_dcb_get_info(adapter->dcb);
+               err = qlcnic_setup_intr(adapter);
+-- 
+2.35.1
+
diff --git a/queue-6.0/rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch b/queue-6.0/rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch
new file mode 100644 (file)
index 0000000..6b2714f
--- /dev/null
@@ -0,0 +1,112 @@
+From b2a35a405be949f4d19176367b1728ce07cee40d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 14:56:09 +0200
+Subject: RDMA/mlx5: Fix mlx5_ib_get_hw_stats when used for device
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 38b50aa44495d5eb4218f0b82fc2da76505cec53 ]
+
+Currently, when mlx5_ib_get_hw_stats() is used for device (port_num = 0),
+there is a special handling in order to use the correct counters, but,
+port_num is being passed down the stack without any change.  Also, some
+functions assume that port_num >=1. As a result, the following oops can
+occur.
+
+ BUG: unable to handle page fault for address: ffff89510294f1a8
+ #PF: supervisor write access in kernel mode
+ #PF: error_code(0x0002) - not-present page
+ PGD 0 P4D 0
+ Oops: 0002 [#1] SMP
+ CPU: 8 PID: 1382 Comm: devlink Tainted: G W          6.1.0-rc4_for_upstream_base_2022_11_10_16_12 #1
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ RIP: 0010:_raw_spin_lock+0xc/0x20
+ Call Trace:
+  <TASK>
+  mlx5_ib_get_native_port_mdev+0x73/0xe0 [mlx5_ib]
+  do_get_hw_stats.constprop.0+0x109/0x160 [mlx5_ib]
+  mlx5_ib_get_hw_stats+0xad/0x180 [mlx5_ib]
+  ib_setup_device_attrs+0xf0/0x290 [ib_core]
+  ib_register_device+0x3bb/0x510 [ib_core]
+  ? atomic_notifier_chain_register+0x67/0x80
+  __mlx5_ib_add+0x2b/0x80 [mlx5_ib]
+  mlx5r_probe+0xb8/0x150 [mlx5_ib]
+  ? auxiliary_match_id+0x6a/0x90
+  auxiliary_bus_probe+0x3c/0x70
+  ? driver_sysfs_add+0x6b/0x90
+  really_probe+0xcd/0x380
+  __driver_probe_device+0x80/0x170
+  driver_probe_device+0x1e/0x90
+  __device_attach_driver+0x7d/0x100
+  ? driver_allows_async_probing+0x60/0x60
+  ? driver_allows_async_probing+0x60/0x60
+  bus_for_each_drv+0x7b/0xc0
+  __device_attach+0xbc/0x200
+  bus_probe_device+0x87/0xa0
+  device_add+0x404/0x940
+  ? dev_set_name+0x53/0x70
+  __auxiliary_device_add+0x43/0x60
+  add_adev+0x99/0xe0 [mlx5_core]
+  mlx5_attach_device+0xc8/0x120 [mlx5_core]
+  mlx5_load_one_devl_locked+0xb2/0xe0 [mlx5_core]
+  devlink_reload+0x133/0x250
+  devlink_nl_cmd_reload+0x480/0x570
+  ? devlink_nl_pre_doit+0x44/0x2b0
+  genl_family_rcv_msg_doit.isra.0+0xc2/0x110
+  genl_rcv_msg+0x180/0x2b0
+  ? devlink_nl_cmd_region_read_dumpit+0x540/0x540
+  ? devlink_reload+0x250/0x250
+  ? devlink_put+0x50/0x50
+  ? genl_family_rcv_msg_doit.isra.0+0x110/0x110
+  netlink_rcv_skb+0x54/0x100
+  genl_rcv+0x24/0x40
+  netlink_unicast+0x1f6/0x2c0
+  netlink_sendmsg+0x237/0x490
+  sock_sendmsg+0x33/0x40
+  __sys_sendto+0x103/0x160
+  ? handle_mm_fault+0x10e/0x290
+  ? do_user_addr_fault+0x1c0/0x5f0
+  __x64_sys_sendto+0x25/0x30
+  do_syscall_64+0x3d/0x90
+  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Fix it by setting port_num to 1 in order to get device status and remove
+unused variable.
+
+Fixes: aac4492ef23a ("IB/mlx5: Update counter implementation for dual port RoCE")
+Link: https://lore.kernel.org/r/98b82994c3cd3fa593b8a75ed3f3901e208beb0f.1672231736.git.leonro@nvidia.com
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Patrisious Haddad <phaddad@nvidia.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/counters.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
+index 945758f39523..3e1272695d99 100644
+--- a/drivers/infiniband/hw/mlx5/counters.c
++++ b/drivers/infiniband/hw/mlx5/counters.c
+@@ -278,7 +278,6 @@ static int do_get_hw_stats(struct ib_device *ibdev,
+       const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
+       struct mlx5_core_dev *mdev;
+       int ret, num_counters;
+-      u32 mdev_port_num;
+       if (!stats)
+               return -EINVAL;
+@@ -299,8 +298,9 @@ static int do_get_hw_stats(struct ib_device *ibdev,
+       }
+       if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+-              mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
+-                                                  &mdev_port_num);
++              if (!port_num)
++                      port_num = 1;
++              mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
+               if (!mdev) {
+                       /* If port is not affiliated yet, its in down state
+                        * which doesn't have any counters yet, so it would be
+-- 
+2.35.1
+
diff --git a/queue-6.0/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch b/queue-6.0/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch
new file mode 100644 (file)
index 0000000..07bba53
--- /dev/null
@@ -0,0 +1,95 @@
+From a6e20aa2e700762b5729211f535a7b659e18f4d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 14:56:10 +0200
+Subject: RDMA/mlx5: Fix validation of max_rd_atomic caps for DC
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit 8de8482fe5732fbef4f5af82bc0c0362c804cd1f ]
+
+Currently, when modifying DC, we validate max_rd_atomic user attribute
+against the RC cap, validate against DC. RC and DC QP types have different
+device limitations.
+
+This can cause userspace created DC QPs to malfunction.
+
+Fixes: c32a4f296e1d ("IB/mlx5: Add support for DC Initiator QP")
+Link: https://lore.kernel.org/r/0c5aee72cea188c3bb770f4207cce7abc9b6fc74.1672231736.git.leonro@nvidia.com
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++----------
+ 1 file changed, 35 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index 40d9410ec303..cf953d23d18d 100644
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -4502,6 +4502,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
+       return false;
+ }
++static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr,
++                            int attr_mask, enum ib_qp_type qp_type)
++{
++      int log_max_ra_res;
++      int log_max_ra_req;
++
++      if (qp_type == MLX5_IB_QPT_DCI) {
++              log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_res_dc);
++              log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_req_dc);
++      } else {
++              log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_res_qp);
++              log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_req_qp);
++      }
++
++      if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
++          attr->max_rd_atomic > log_max_ra_res) {
++              mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
++                          attr->max_rd_atomic);
++              return false;
++      }
++
++      if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
++          attr->max_dest_rd_atomic > log_max_ra_req) {
++              mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
++                          attr->max_dest_rd_atomic);
++              return false;
++      }
++      return true;
++}
++
+ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+ {
+@@ -4589,21 +4623,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+               goto out;
+       }
+-      if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+-          attr->max_rd_atomic >
+-          (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
+-              mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+-                          attr->max_rd_atomic);
+-              goto out;
+-      }
+-
+-      if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+-          attr->max_dest_rd_atomic >
+-          (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
+-              mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+-                          attr->max_dest_rd_atomic);
++      if (!validate_rd_atomic(dev, attr, attr_mask, qp_type))
+               goto out;
+-      }
+       if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+               err = 0;
+-- 
+2.35.1
+
diff --git a/queue-6.0/selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch b/queue-6.0/selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch
new file mode 100644 (file)
index 0000000..371aa01
--- /dev/null
@@ -0,0 +1,62 @@
+From feeadbcb8b6a8180f2506aa1b963a6c9f7c38352 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 17:18:28 +0800
+Subject: selftests: net: fix cleanup_v6() for arp_ndisc_evict_nocarrier
+
+From: Po-Hsu Lin <po-hsu.lin@canonical.com>
+
+[ Upstream commit 9c4d7f45d60745a1cea0e841fa5e3444c398d2f1 ]
+
+The cleanup_v6() will cause the arp_ndisc_evict_nocarrier script exit
+with 255 (No such file or directory), even the tests are good:
+
+ # selftests: net: arp_ndisc_evict_nocarrier.sh
+ # run arp_evict_nocarrier=1 test
+ # RTNETLINK answers: File exists
+ # ok
+ # run arp_evict_nocarrier=0 test
+ # RTNETLINK answers: File exists
+ # ok
+ # run all.arp_evict_nocarrier=0 test
+ # RTNETLINK answers: File exists
+ # ok
+ # run ndisc_evict_nocarrier=1 test
+ # ok
+ # run ndisc_evict_nocarrier=0 test
+ # ok
+ # run all.ndisc_evict_nocarrier=0 test
+ # ok
+ not ok 1 selftests: net: arp_ndisc_evict_nocarrier.sh # exit=255
+
+This is because it's trying to modify the parameter for ipv4 instead.
+
+Also, tests for ipv6 (run_ndisc_evict_nocarrier_enabled() and
+run_ndisc_evict_nocarrier_disabled() are working on veth1, reflect
+this fact in cleanup_v6().
+
+Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier")
+Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+index b5af08af8559..b4ec1eeee6c9 100755
+--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
++++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+@@ -24,8 +24,8 @@ cleanup_v6()
+     ip netns del me
+     ip netns del peer
+-    sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+-    sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
++    sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1
++    sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+ }
+ create_ns()
+-- 
+2.35.1
+
diff --git a/queue-6.0/selftests-net-return-non-zero-for-failures-reported-.patch b/queue-6.0/selftests-net-return-non-zero-for-failures-reported-.patch
new file mode 100644 (file)
index 0000000..1b40f3b
--- /dev/null
@@ -0,0 +1,100 @@
+From 8c55180fdb60dd1406f80354e6145c9ab43657f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 17:18:29 +0800
+Subject: selftests: net: return non-zero for failures reported in
+ arp_ndisc_evict_nocarrier
+
+From: Po-Hsu Lin <po-hsu.lin@canonical.com>
+
+[ Upstream commit 1856628baa17032531916984808d1bdfd62700d4 ]
+
+Return non-zero return value if there is any failure reported in this
+script during the test. Otherwise it can only reflect the status of
+the last command.
+
+Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier")
+Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../selftests/net/arp_ndisc_evict_nocarrier.sh        | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+index b4ec1eeee6c9..4a110bb01e53 100755
+--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
++++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+@@ -18,6 +18,7 @@ readonly V4_ADDR1=10.0.10.2
+ readonly V6_ADDR0=2001:db8:91::1
+ readonly V6_ADDR1=2001:db8:91::2
+ nsid=100
++ret=0
+ cleanup_v6()
+ {
+@@ -61,7 +62,7 @@ setup_v6() {
+     if [ $? -ne 0 ]; then
+         cleanup_v6
+         echo "failed"
+-        exit
++        exit 1
+     fi
+     # Set veth2 down, which will put veth1 in NOCARRIER state
+@@ -88,7 +89,7 @@ setup_v4() {
+     if [ $? -ne 0 ]; then
+         cleanup_v4
+         echo "failed"
+-        exit
++        exit 1
+     fi
+     # Set veth1 down, which will put veth0 in NOCARRIER state
+@@ -115,6 +116,7 @@ run_arp_evict_nocarrier_enabled() {
+     if [ $? -eq 0 ];then
+         echo "failed"
++        ret=1
+     else
+         echo "ok"
+     fi
+@@ -134,6 +136,7 @@ run_arp_evict_nocarrier_disabled() {
+         echo "ok"
+     else
+         echo "failed"
++        ret=1
+     fi
+     cleanup_v4
+@@ -164,6 +167,7 @@ run_ndisc_evict_nocarrier_enabled() {
+     if [ $? -eq 0 ];then
+         echo "failed"
++        ret=1
+     else
+         echo "ok"
+     fi
+@@ -182,6 +186,7 @@ run_ndisc_evict_nocarrier_disabled() {
+         echo "ok"
+     else
+         echo "failed"
++        ret=1
+     fi
+     cleanup_v6
+@@ -198,6 +203,7 @@ run_ndisc_evict_nocarrier_disabled_all() {
+         echo "ok"
+     else
+         echo "failed"
++        ret=1
+     fi
+     cleanup_v6
+@@ -218,3 +224,4 @@ if [ "$(id -u)" -ne 0 ];then
+ fi
+ run_all_tests
++exit $ret
+-- 
+2.35.1
+
index c673aa408d8304e6c607bed8b9bb4f1d47c925e6..688495ae966071a8d0d87de388707e8ced335cdb 100644 (file)
@@ -3,3 +3,118 @@ btrfs-replace-strncpy-with-strscpy.patch
 cifs-fix-interface-count-calculation-during-refresh.patch
 cifs-refcount-only-the-selected-iface-during-interface-update.patch
 usb-dwc3-gadget-ignore-end-transfer-delay-on-teardown.patch
+perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch
+perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch
+phy-qcom-qmp-combo-fix-broken-power-on.patch
+ext4-goto-right-label-failed_mount3a.patch
+ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch
+sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch
+btrfs-fix-an-error-handling-path-in-btrfs_defrag_lea.patch
+wifi-ath9k-use-proper-statements-in-conditionals.patch
+bpf-pull-before-calling-skb_postpull_rcsum.patch
+drm-panfrost-fix-gem-handle-creation-ref-counting.patch
+netfilter-nf_tables-consolidate-set-description.patch
+netfilter-nf_tables-add-function-to-create-set-state.patch
+netfilter-nf_tables-perform-type-checking-for-existi.patch
+ice-xsk-do-not-use-xdp_return_frame-on-tx_buf-raw_bu.patch
+net-vrf-determine-the-dst-using-the-original-ifindex.patch
+vmxnet3-correctly-report-csum_level-for-encapsulated.patch
+mptcp-fix-lockdep-false-positive.patch
+netfilter-nf_tables-honor-set-timeout-and-garbage-co.patch
+bonding-fix-lockdep-splat-in-bond_miimon_commit.patch
+net-lan966x-fix-configuration-of-the-pcs.patch
+veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch
+nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch
+net-hns3-add-interrupts-re-initialization-while-doin.patch
+net-hns3-fix-miss-l3e-checking-for-rx-packet.patch
+net-hns3-fix-vf-promisc-mode-not-update-when-mac-tab.patch
+net-sched-fix-memory-leak-in-tcindex_set_parms.patch
+qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch
+net-dsa-mv88e6xxx-depend-on-ptp-conditionally.patch
+nfc-fix-potential-resource-leaks.patch
+bnxt_en-simplify-bnxt_xdp_buff_init.patch
+bnxt_en-fix-xdp-rx-path.patch
+bnxt_en-fix-first-buffer-size-calculations-for-xdp-m.patch
+bnxt_en-fix-hds-and-jumbo-thresholds-for-rx-packets.patch
+vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch
+vdpa-mlx5-fix-wrong-mac-address-deletion.patch
+vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch
+vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch
+vringh-fix-range-used-in-iotlb_translate.patch
+vhost-fix-range-used-in-translate_desc.patch
+vhost-vdpa-fix-an-iotlb-memory-leak.patch
+vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch
+virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch
+vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch
+vdpasim-fix-memory-leak-when-freeing-iotlbs.patch
+net-mlx5-e-switch-properly-handle-ingress-tagged-pac.patch
+net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch
+net-mlx5-fix-io_eq_size-and-event_eq_size-params-val.patch
+net-mlx5-avoid-recovery-in-probe-flows.patch
+net-mlx5-fix-roce-setting-at-hca-level.patch
+net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch
+net-mlx5e-ct-fix-ct-debugfs-folder-name.patch
+net-mlx5e-always-clear-dest-encap-in-neigh-update-de.patch
+net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch
+net-mlx5e-set-geneve_tlv_option_0_exist-when-matchin.patch
+net-mlx5-lag-fix-failure-to-cancel-delayed-bond-work.patch
+net-hns3-refactor-function-hclge_mbx_handler.patch
+net-hns3-refine-the-handling-for-vf-heartbeat.patch
+net-amd-xgbe-add-missed-tasklet_kill.patch
+net-ena-fix-toeplitz-initial-hash-value.patch
+net-ena-don-t-register-memory-info-on-xdp-exchange.patch
+net-ena-account-for-the-number-of-processed-bytes-in.patch
+net-ena-use-bitmask-to-indicate-packet-redirection.patch
+net-ena-fix-rx_copybreak-value-update.patch
+net-ena-set-default-value-for-rx-interrupt-moderatio.patch
+net-ena-update-numa-tph-hint-register-upon-numa-node.patch
+net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch
+rdma-mlx5-fix-mlx5_ib_get_hw_stats-when-used-for-dev.patch
+rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch
+selftests-net-fix-cleanup_v6-for-arp_ndisc_evict_noc.patch
+selftests-net-return-non-zero-for-failures-reported-.patch
+drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch
+filelock-new-helper-vfs_inode_has_locks.patch
+ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch
+gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch
+net-sched-atm-dont-intepret-cls-results-when-asked-t.patch
+net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch
+vxlan-fix-memory-leaks-in-error-path.patch
+net-sparx5-fix-reading-of-the-mac-address.patch
+netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch
+netfilter-ipset-rework-long-task-execution-when-addi.patch
+perf-tools-fix-resources-leak-in-perf_data__open_dir.patch
+drm-imx-ipuv3-plane-fix-overlay-plane-width.patch
+fs-ntfs3-don-t-hold-ni_lock-when-calling-truncate_se.patch
+drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch
+octeontx2-pf-fix-lmtst-id-used-in-aura-free.patch
+usb-rndis_host-secure-rndis_query-check-against-int-.patch
+perf-lock-contention-fix-core-dump-related-to-not-fi.patch
+perf-stat-fix-handling-of-unsupported-cgroup-events-.patch
+perf-stat-fix-handling-of-for-each-cgroup-with-bpf-c.patch
+drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch
+drm-i915-gvt-fix-double-free-bug-in-split_2mb_gtt_en.patch
+ublk-honor-io_uring_f_nonblock-for-handling-control-.patch
+qed-allow-sleep-in-qed_mcp_trace_dump.patch
+net-ulp-prevent-ulp-without-clone-op-from-entering-t.patch
+caif-fix-memory-leak-in-cfctrl_linkup_request.patch
+udf-fix-extension-of-the-last-extent-in-the-file.patch
+usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch
+hfs-hfsplus-avoid-warn_on-for-sanity-check-use-prope.patch
+asoc-sof-revert-core-unregister-clients-and-machine-.patch
+9p-client-fix-data-race-on-req-status.patch
+asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch
+asoc-sof-mediatek-initialize-panic_info-to-zero.patch
+drm-amdgpu-fix-size-validation-for-non-exclusive-dom.patch
+drm-amdkfd-fix-kfd_process_device_init_vm-error-hand.patch
+drm-amdkfd-fix-double-release-compute-pasid.patch
+nvme-fix-multipath-crash-caused-by-flush-request-whe.patch
+io_uring-check-for-valid-register-opcode-earlier.patch
+nvmet-use-nvme_cmd_effects_csupp-instead-of-open-cod.patch
+nvme-also-return-i-o-command-effects-from-nvme_comma.patch
+asoc-sof-intel-pci-tgl-unblock-s5-entry-if-dma-stop-.patch
+btrfs-check-superblock-to-ensure-the-fs-was-not-modi.patch
+btrfs-don-t-save-block-group-root-into-super-block.patch
+btrfs-separate-block_group_tree-compat-ro-flag-from-.patch
+btrfs-relax-block-group-tree-feature-dependency-chec.patch
+btrfs-fix-compat_ro-checks-against-remount.patch
diff --git a/queue-6.0/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch b/queue-6.0/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch
new file mode 100644 (file)
index 0000000..001fa8e
--- /dev/null
@@ -0,0 +1,133 @@
+From c0c06b39e216f7138b2cf5d5f217442e825c1c72 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 13:14:31 +0900
+Subject: SUNRPC: ensure the matching upcall is in-flight upon downcall
+
+From: minoura makoto <minoura@valinux.co.jp>
+
+[ Upstream commit b18cba09e374637a0a3759d856a6bca94c133952 ]
+
+Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid
+but different gss service") introduced `auth` argument to
+__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL
+since it (and auth->service) was not (yet) determined.
+
+When multiple upcalls with the same uid and different service are
+ongoing, it could happen that __gss_find_upcall(), which returns the
+first match found in the pipe->in_downcall list, could not find the
+correct gss_msg corresponding to the downcall we are looking for.
+Moreover, it might return a msg which is not sent to rpc.gssd yet.
+
+We could see mount.nfs process hung in D state with multiple mount.nfs
+are executed in parallel.  The call trace below is of CentOS 7.9
+kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/
+elrepo kernel-ml-6.0.7-1.el7.
+
+PID: 71258  TASK: ffff91ebd4be0000  CPU: 36  COMMAND: "mount.nfs"
+ #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f
+ #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9
+ #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss]
+ #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc
+[sunrpc]
+ #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss]
+ #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc]
+ #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc]
+ #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc]
+ #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc]
+ #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc]
+
+The scenario is like this. Let's say there are two upcalls for
+services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe.
+
+When rpc.gssd reads pipe to get the upcall msg corresponding to
+service B from pipe->pipe and then writes the response, in
+gss_pipe_downcall the msg corresponding to service A will be picked
+because only uid is used to find the msg and it is before the one for
+B in pipe->in_downcall.  And the process waiting for the msg
+corresponding to service A will be woken up.
+
+Actual scheduing of that process might be after rpc.gssd processes the
+next msg.  In rpc_pipe_generic_upcall it clears msg->errno (for A).
+The process is scheduled to see gss_msg->ctx == NULL and
+gss_msg->msg.errno == 0, therefore it cannot break the loop in
+gss_create_upcall and is never woken up after that.
+
+This patch adds a simple check to ensure that a msg which is not
+sent to rpc.gssd yet is not chosen as the matching upcall upon
+receiving a downcall.
+
+Signed-off-by: minoura makoto <minoura@valinux.co.jp>
+Signed-off-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
+Tested-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
+Cc: Trond Myklebust <trondmy@hammerspace.com>
+Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sunrpc/rpc_pipe_fs.h |  5 +++++
+ net/sunrpc/auth_gss/auth_gss.c     | 19 +++++++++++++++++--
+ 2 files changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
+index cd188a527d16..3b35b6f6533a 100644
+--- a/include/linux/sunrpc/rpc_pipe_fs.h
++++ b/include/linux/sunrpc/rpc_pipe_fs.h
+@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
+                                      char __user *, size_t);
+ extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
++/* returns true if the msg is in-flight, i.e., already eaten by the peer */
++static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
++      return (msg->copied != 0 && list_empty(&msg->list));
++}
++
+ struct rpc_clnt;
+ extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
+ extern int rpc_remove_client_dir(struct rpc_clnt *);
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index 7bb247c51e2f..2d7b1e03110a 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
+       list_for_each_entry(pos, &pipe->in_downcall, list) {
+               if (!uid_eq(pos->uid, uid))
+                       continue;
+-              if (auth && pos->auth->service != auth->service)
++              if (pos->auth->service != auth->service)
+                       continue;
+               refcount_inc(&pos->count);
+               return pos;
+@@ -686,6 +686,21 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+       return err;
+ }
++static struct gss_upcall_msg *
++gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
++{
++      struct gss_upcall_msg *pos;
++      list_for_each_entry(pos, &pipe->in_downcall, list) {
++              if (!uid_eq(pos->uid, uid))
++                      continue;
++              if (!rpc_msg_is_inflight(&pos->msg))
++                      continue;
++              refcount_inc(&pos->count);
++              return pos;
++      }
++      return NULL;
++}
++
+ #define MSG_BUF_MAXSIZE 1024
+ static ssize_t
+@@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+       err = -ENOENT;
+       /* Find a matching upcall */
+       spin_lock(&pipe->lock);
+-      gss_msg = __gss_find_upcall(pipe, uid, NULL);
++      gss_msg = gss_find_downcall(pipe, uid);
+       if (gss_msg == NULL) {
+               spin_unlock(&pipe->lock);
+               goto err_put_ctx;
+-- 
+2.35.1
+
diff --git a/queue-6.0/ublk-honor-io_uring_f_nonblock-for-handling-control-.patch b/queue-6.0/ublk-honor-io_uring_f_nonblock-for-handling-control-.patch
new file mode 100644 (file)
index 0000000..7367df1
--- /dev/null
@@ -0,0 +1,39 @@
+From 2b0f09ea84c6ea9f8e071508e8f9c4dc5a4d746b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jan 2023 21:32:35 +0800
+Subject: ublk: honor IO_URING_F_NONBLOCK for handling control command
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit fa8e442e832a3647cdd90f3e606c473a51bc1b26 ]
+
+Most of control command handlers may sleep, so return -EAGAIN in case
+of IO_URING_F_NONBLOCK to defer the handling into io wq context.
+
+Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver")
+Reported-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20230104133235.836536-1-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index 31a8715d3a4d..ebb5c846d826 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1718,6 +1718,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
+       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       int ret = -EINVAL;
++      if (issue_flags & IO_URING_F_NONBLOCK)
++              return -EAGAIN;
++
+       ublk_ctrl_cmd_dump(cmd);
+       if (!(issue_flags & IO_URING_F_SQE128))
+-- 
+2.35.1
+
diff --git a/queue-6.0/udf-fix-extension-of-the-last-extent-in-the-file.patch b/queue-6.0/udf-fix-extension-of-the-last-extent-in-the-file.patch
new file mode 100644 (file)
index 0000000..3e2a0e0
--- /dev/null
@@ -0,0 +1,37 @@
+From b907e918c515de745cc7e68f3db380f27e1fcf5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Dec 2022 17:45:51 +0100
+Subject: udf: Fix extension of the last extent in the file
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 83c7423d1eb6806d13c521d1002cc1a012111719 ]
+
+When extending the last extent in the file within the last block, we
+wrongly computed the length of the last extent. This is mostly a
+cosmetical problem since the extent does not contain any data and the
+length will be fixed up by following operations but still.
+
+Fixes: 1f3868f06855 ("udf: Fix extending file within last block")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index b9a83820e1ad..3c380140515d 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -600,7 +600,7 @@ static void udf_do_extend_final_block(struct inode *inode,
+        */
+       if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK))
+               return;
+-      added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen;
++      added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+       last_ext->extLength += added_bytes;
+       UDF_I(inode)->i_lenExtents += added_bytes;
+-- 
+2.35.1
+
diff --git a/queue-6.0/usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch b/queue-6.0/usb-dwc3-xilinx-include-linux-gpio-consumer.h.patch
new file mode 100644 (file)
index 0000000..69da905
--- /dev/null
@@ -0,0 +1,42 @@
+From 600570bd97c2f7856dcda4778fc50717d2ee42c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 13:17:46 +0100
+Subject: usb: dwc3: xilinx: include linux/gpio/consumer.h
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit e498a04443240c15c3c857165f7b652b87f4fd96 ]
+
+The newly added gpio consumer calls cause a build failure in configurations
+that fail to include the right header implicitly:
+
+drivers/usb/dwc3/dwc3-xilinx.c: In function 'dwc3_xlnx_init_zynqmp':
+drivers/usb/dwc3/dwc3-xilinx.c:207:22: error: implicit declaration of function 'devm_gpiod_get_optional'; did you mean 'devm_clk_get_optional'? [-Werror=implicit-function-declaration]
+  207 |         reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+      |                      ^~~~~~~~~~~~~~~~~~~~~~~
+      |                      devm_clk_get_optional
+
+Fixes: ca05b38252d7 ("usb: dwc3: xilinx: Add gpio-reset support")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Link: https://lore.kernel.org/r/20230103121755.956027-1-arnd@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-xilinx.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c
+index 67b237c7a76a..550ae64350df 100644
+--- a/drivers/usb/dwc3/dwc3-xilinx.c
++++ b/drivers/usb/dwc3/dwc3-xilinx.c
+@@ -13,6 +13,7 @@
+ #include <linux/of.h>
+ #include <linux/platform_device.h>
+ #include <linux/dma-mapping.h>
++#include <linux/gpio/consumer.h>
+ #include <linux/of_gpio.h>
+ #include <linux/of_platform.h>
+ #include <linux/pm_runtime.h>
+-- 
+2.35.1
+
diff --git a/queue-6.0/usb-rndis_host-secure-rndis_query-check-against-int-.patch b/queue-6.0/usb-rndis_host-secure-rndis_query-check-against-int-.patch
new file mode 100644 (file)
index 0000000..fa6662f
--- /dev/null
@@ -0,0 +1,43 @@
+From dc8824c6b657b8ca2e69bc9e3c733ab17637bc7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 10:17:09 +0100
+Subject: usb: rndis_host: Secure rndis_query check against int overflow
+
+From: Szymon Heidrich <szymon.heidrich@gmail.com>
+
+[ Upstream commit c7dd13805f8b8fc1ce3b6d40f6aff47e66b72ad2 ]
+
+Variables off and len typed as uint32 in rndis_query function
+are controlled by incoming RNDIS response message thus their
+value may be manipulated. Setting off to a unexpectetly large
+value will cause the sum with len and 8 to overflow and pass
+the implemented validation step. Consequently the response
+pointer will be referring to a location past the expected
+buffer boundaries allowing information leakage e.g. via
+RNDIS_OID_802_3_PERMANENT_ADDRESS OID.
+
+Fixes: ddda08624013 ("USB: rndis_host, various cleanups")
+Signed-off-by: Szymon Heidrich <szymon.heidrich@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/rndis_host.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
+index f79333fe1783..7b3739b29c8f 100644
+--- a/drivers/net/usb/rndis_host.c
++++ b/drivers/net/usb/rndis_host.c
+@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf,
+       off = le32_to_cpu(u.get_c->offset);
+       len = le32_to_cpu(u.get_c->len);
+-      if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE))
++      if (unlikely((off > CONTROL_BUFFER_SIZE - 8) ||
++                   (len > CONTROL_BUFFER_SIZE - 8 - off)))
+               goto response_error;
+       if (*reply_len != -1 && len != *reply_len)
+-- 
+2.35.1
+
diff --git a/queue-6.0/vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch b/queue-6.0/vdpa-mlx5-fix-rule-forwarding-vlan-to-tir.patch
new file mode 100644 (file)
index 0000000..7450f52
--- /dev/null
@@ -0,0 +1,49 @@
+From 6d1505a7c32eda663f9627b8d6798af391c980eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 15:17:52 +0200
+Subject: vdpa/mlx5: Fix rule forwarding VLAN to TIR
+
+From: Eli Cohen <elic@nvidia.com>
+
+[ Upstream commit a6ce72c0fb6041f9871f880b2d02b294f7f49cb4 ]
+
+Set the VLAN id to the header values field instead of overwriting the
+headers criteria field.
+
+Before this fix, VLAN filtering would not really work and tagged packets
+would be forwarded unfiltered to the TIR.
+
+Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support")
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Eli Cohen <elic@nvidia.com>
+Message-Id: <20221114131759.57883-2-elic@nvidia.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+index 90913365def4..3fb06dcee943 100644
+--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+@@ -1468,11 +1468,13 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
+       dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
+       eth_broadcast_addr(dmac_c);
+       ether_addr_copy(dmac_v, mac);
+-      MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
++      if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
++              MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
++              MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
++      }
+       if (tagged) {
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
+-              MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
+-              MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid);
++              MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
+       }
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+       dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+-- 
+2.35.1
+
diff --git a/queue-6.0/vdpa-mlx5-fix-wrong-mac-address-deletion.patch b/queue-6.0/vdpa-mlx5-fix-wrong-mac-address-deletion.patch
new file mode 100644 (file)
index 0000000..2148e9f
--- /dev/null
@@ -0,0 +1,38 @@
+From c49e6edd359ffdf60ea218687b57edd6e13df90c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 15:17:54 +0200
+Subject: vdpa/mlx5: Fix wrong mac address deletion
+
+From: Eli Cohen <elic@nvidia.com>
+
+[ Upstream commit 1ab53760d322c82fb4cb5e81b5817065801e3ec4 ]
+
+Delete the old MAC from the table and not the new one which is not there
+yet.
+
+Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support")
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Eli Cohen <elic@nvidia.com>
+Message-Id: <20221114131759.57883-4-elic@nvidia.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+index 3fb06dcee943..444d6572b2d0 100644
+--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+@@ -1686,7 +1686,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
+               /* Need recreate the flow table entry, so that the packet could forward back
+                */
+-              mac_vlan_del(ndev, ndev->config.mac, 0, false);
++              mac_vlan_del(ndev, mac_back, 0, false);
+               if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
+                       mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
+-- 
+2.35.1
+
diff --git a/queue-6.0/vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch b/queue-6.0/vdpa-vp_vdpa-fix-kfree-a-wrong-pointer-in-vp_vdpa_re.patch
new file mode 100644 (file)
index 0000000..57b246a
--- /dev/null
@@ -0,0 +1,58 @@
+From 2f35a045afdaf41571e6c1da0d7aef4e16a7e056 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Dec 2022 20:08:13 +0800
+Subject: vdpa/vp_vdpa: fix kfree a wrong pointer in vp_vdpa_remove
+
+From: Rong Wang <wangrong68@huawei.com>
+
+[ Upstream commit ed843d6ed7310a27cf7c8ee0a82a482eed0cb4a6 ]
+
+In vp_vdpa_remove(), the code kfree(&vp_vdpa_mgtdev->mgtdev.id_table) uses
+a reference of pointer as the argument of kfree, which is the wrong pointer
+and then may hit crash like this:
+
+Unable to handle kernel paging request at virtual address 00ffff003363e30c
+Internal error: Oops: 96000004 [#1] SMP
+Call trace:
+ rb_next+0x20/0x5c
+ ext4_readdir+0x494/0x5c4 [ext4]
+ iterate_dir+0x168/0x1b4
+ __se_sys_getdents64+0x68/0x170
+ __arm64_sys_getdents64+0x24/0x30
+ el0_svc_common.constprop.0+0x7c/0x1bc
+ do_el0_svc+0x2c/0x94
+ el0_svc+0x20/0x30
+ el0_sync_handler+0xb0/0xb4
+ el0_sync+0x160/0x180
+Code: 54000220 f9400441 b4000161 aa0103e0 (f9400821)
+SMP: stopping secondary CPUs
+Starting crashdump kernel...
+
+Fixes: ffbda8e9df10 ("vdpa/vp_vdpa : add vdpa tool support in vp_vdpa")
+Signed-off-by: Rong Wang <wangrong68@huawei.com>
+Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
+Message-Id: <20221207120813.2837529-1-sunnanyong@huawei.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Cindy Lu <lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
+index 04522077735b..f4e375b1d903 100644
+--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
++++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
+@@ -629,7 +629,7 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
+       mdev = vp_vdpa_mgtdev->mdev;
+       vp_modern_remove(mdev);
+       vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev);
+-      kfree(&vp_vdpa_mgtdev->mgtdev.id_table);
++      kfree(vp_vdpa_mgtdev->mgtdev.id_table);
+       kfree(mdev);
+       kfree(vp_vdpa_mgtdev);
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch b/queue-6.0/vdpa_sim-fix-possible-memory-leak-in-vdpasim_net_ini.patch
new file mode 100644 (file)
index 0000000..be8d39d
--- /dev/null
@@ -0,0 +1,103 @@
+From 04718cb154c12216999ea85aa29a73d4a9fddbfc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Nov 2022 16:23:48 +0800
+Subject: vdpa_sim: fix possible memory leak in vdpasim_net_init() and
+ vdpasim_blk_init()
+
+From: ruanjinjie <ruanjinjie@huawei.com>
+
+[ Upstream commit aeca7ff254843d49a8739f07f7dab1341450111d ]
+
+Inject fault while probing module, if device_register() fails in
+vdpasim_net_init() or vdpasim_blk_init(), but the refcount of kobject is
+not decreased to 0, the name allocated in dev_set_name() is leaked.
+Fix this by calling put_device(), so that name can be freed in
+callback function kobject_cleanup().
+
+(vdpa_sim_net)
+unreferenced object 0xffff88807eebc370 (size 16):
+  comm "modprobe", pid 3848, jiffies 4362982860 (age 18.153s)
+  hex dump (first 16 bytes):
+    76 64 70 61 73 69 6d 5f 6e 65 74 00 6b 6b 6b a5  vdpasim_net.kkk.
+  backtrace:
+    [<ffffffff8174f19e>] __kmalloc_node_track_caller+0x4e/0x150
+    [<ffffffff81731d53>] kstrdup+0x33/0x60
+    [<ffffffff83a5d421>] kobject_set_name_vargs+0x41/0x110
+    [<ffffffff82d87aab>] dev_set_name+0xab/0xe0
+    [<ffffffff82d91a23>] device_add+0xe3/0x1a80
+    [<ffffffffa0270013>] 0xffffffffa0270013
+    [<ffffffff81001c27>] do_one_initcall+0x87/0x2e0
+    [<ffffffff813739cb>] do_init_module+0x1ab/0x640
+    [<ffffffff81379d20>] load_module+0x5d00/0x77f0
+    [<ffffffff8137bc40>] __do_sys_finit_module+0x110/0x1b0
+    [<ffffffff83c4d505>] do_syscall_64+0x35/0x80
+    [<ffffffff83e0006a>] entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+(vdpa_sim_blk)
+unreferenced object 0xffff8881070c1250 (size 16):
+  comm "modprobe", pid 6844, jiffies 4364069319 (age 17.572s)
+  hex dump (first 16 bytes):
+    76 64 70 61 73 69 6d 5f 62 6c 6b 00 6b 6b 6b a5  vdpasim_blk.kkk.
+  backtrace:
+    [<ffffffff8174f19e>] __kmalloc_node_track_caller+0x4e/0x150
+    [<ffffffff81731d53>] kstrdup+0x33/0x60
+    [<ffffffff83a5d421>] kobject_set_name_vargs+0x41/0x110
+    [<ffffffff82d87aab>] dev_set_name+0xab/0xe0
+    [<ffffffff82d91a23>] device_add+0xe3/0x1a80
+    [<ffffffffa0220013>] 0xffffffffa0220013
+    [<ffffffff81001c27>] do_one_initcall+0x87/0x2e0
+    [<ffffffff813739cb>] do_init_module+0x1ab/0x640
+    [<ffffffff81379d20>] load_module+0x5d00/0x77f0
+    [<ffffffff8137bc40>] __do_sys_finit_module+0x110/0x1b0
+    [<ffffffff83c4d505>] do_syscall_64+0x35/0x80
+    [<ffffffff83e0006a>] entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Fixes: 899c4d187f6a ("vdpa_sim_blk: add support for vdpa management tool")
+Fixes: a3c06ae158dd ("vdpa_sim_net: Add support for user supported devices")
+
+Signed-off-by: ruanjinjie <ruanjinjie@huawei.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221110082348.4105476-1-ruanjinjie@huawei.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 4 +++-
+ drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 4 +++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+index c8bfea3b7db2..cc0534f8ae93 100644
+--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+@@ -427,8 +427,10 @@ static int __init vdpasim_blk_init(void)
+       int ret;
+       ret = device_register(&vdpasim_blk_mgmtdev);
+-      if (ret)
++      if (ret) {
++              put_device(&vdpasim_blk_mgmtdev);
+               return ret;
++      }
+       ret = vdpa_mgmtdev_register(&mgmt_dev);
+       if (ret)
+diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+index 886449e88502..c2e19dd06419 100644
+--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+@@ -304,8 +304,10 @@ static int __init vdpasim_net_init(void)
+       int ret;
+       ret = device_register(&vdpasim_net_mgmtdev);
+-      if (ret)
++      if (ret) {
++              put_device(&vdpasim_net_mgmtdev);
+               return ret;
++      }
+       ret = vdpa_mgmtdev_register(&mgmt_dev);
+       if (ret)
+-- 
+2.35.1
+
diff --git a/queue-6.0/vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch b/queue-6.0/vdpa_sim-fix-vringh-initialization-in-vdpasim_queue_.patch
new file mode 100644 (file)
index 0000000..1e97849
--- /dev/null
@@ -0,0 +1,52 @@
+From 6c45c7323466017958a9da4b541b87e1e2b51030 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Nov 2022 15:13:35 +0100
+Subject: vdpa_sim: fix vringh initialization in vdpasim_queue_ready()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 794ec498c9fa79e6bfd71b931410d5897a9c00d4 ]
+
+When we initialize vringh, we should pass the features and the
+number of elements in the virtqueue negotiated with the driver,
+otherwise operations with vringh may fail.
+
+This was discovered in a case where the driver sets a number of
+elements in the virtqueue different from the value returned by
+.get_vq_num_max().
+
+In vdpasim_vq_reset() is safe to initialize the vringh with
+default values, since the virtqueue will not be used until
+vdpasim_queue_ready() is called again.
+
+Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator")
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221110141335.62171-1-sgarzare@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Eugenio Pérez <eperezma@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/vdpa_sim/vdpa_sim.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
+index 225b7f5d8be3..1701e0623408 100644
+--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
++++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
+@@ -66,8 +66,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
+ {
+       struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+-      vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
+-                        VDPASIM_QUEUE_MAX, false,
++      vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
+                         (struct vring_desc *)(uintptr_t)vq->desc_addr,
+                         (struct vring_avail *)
+                         (uintptr_t)vq->driver_addr,
+-- 
+2.35.1
+
diff --git a/queue-6.0/vdpasim-fix-memory-leak-when-freeing-iotlbs.patch b/queue-6.0/vdpasim-fix-memory-leak-when-freeing-iotlbs.patch
new file mode 100644 (file)
index 0000000..ded0345
--- /dev/null
@@ -0,0 +1,43 @@
+From 401340f32955edd3700fb714169fc5d7f2d20ffd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 17:07:17 +0800
+Subject: vdpasim: fix memory leak when freeing IOTLBs
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 0b7a04a30eef20e6b24926a45c0ce7906ae85bd6 ]
+
+After commit bda324fd037a ("vdpasim: control virtqueue support"),
+vdpasim->iommu became an array of IOTLB, so we should clean the
+mappings of each free one by one instead of just deleting the ranges
+in the first IOTLB which may leak maps.
+
+Fixes: bda324fd037a ("vdpasim: control virtqueue support")
+Cc: Gautam Dawar <gautam.dawar@xilinx.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <20221213090717.61529-1-jasowang@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Gautam Dawar <gautam.dawar@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
+index 1701e0623408..6489f44bca1a 100644
+--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
++++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
+@@ -679,7 +679,9 @@ static void vdpasim_free(struct vdpa_device *vdpa)
+       }
+       kvfree(vdpasim->buffer);
+-      vhost_iotlb_free(vdpasim->iommu);
++      for (i = 0; i < vdpasim->dev_attr.nas; i++)
++              vhost_iotlb_reset(&vdpasim->iommu[i]);
++      kfree(vdpasim->iommu);
+       kfree(vdpasim->vqs);
+       kfree(vdpasim->config);
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch b/queue-6.0/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch
new file mode 100644 (file)
index 0000000..f91ac49
--- /dev/null
@@ -0,0 +1,88 @@
+From 83a4dff4d53b5e43bfbca5345ad9effa7fc4289d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 12:59:03 -0600
+Subject: veth: Fix race with AF_XDP exposing old or uninitialized descriptors
+
+From: Shawn Bohrer <sbohrer@cloudflare.com>
+
+[ Upstream commit fa349e396e4886d742fd6501c599ec627ef1353b ]
+
+When AF_XDP is used on on a veth interface the RX ring is updated in two
+steps.  veth_xdp_rcv() removes packet descriptors from the FILL ring
+fills them and places them in the RX ring updating the cached_prod
+pointer.  Later xdp_do_flush() syncs the RX ring prod pointer with the
+cached_prod pointer allowing user-space to see the recently filled in
+descriptors.  The rings are intended to be SPSC, however the existing
+order in veth_poll allows the xdp_do_flush() to run concurrently with
+another CPU creating a race condition that allows user-space to see old
+or uninitialized descriptors in the RX ring.  This bug has been observed
+in production systems.
+
+To summarize, we are expecting this ordering:
+
+CPU 0 __xsk_rcv_zc()
+CPU 0 __xsk_map_flush()
+CPU 2 __xsk_rcv_zc()
+CPU 2 __xsk_map_flush()
+
+But we are seeing this order:
+
+CPU 0 __xsk_rcv_zc()
+CPU 2 __xsk_rcv_zc()
+CPU 0 __xsk_map_flush()
+CPU 2 __xsk_map_flush()
+
+This occurs because we rely on NAPI to ensure that only one napi_poll
+handler is running at a time for the given veth receive queue.
+napi_schedule_prep() will prevent multiple instances from getting
+scheduled. However calling napi_complete_done() signals that this
+napi_poll is complete and allows subsequent calls to
+napi_schedule_prep() and __napi_schedule() to succeed in scheduling a
+concurrent napi_poll before the xdp_do_flush() has been called.  For the
+veth driver a concurrent call to napi_schedule_prep() and
+__napi_schedule() can occur on a different CPU because the veth xmit
+path can additionally schedule a napi_poll creating the race.
+
+The fix as suggested by Magnus Karlsson, is to simply move the
+xdp_do_flush() call before napi_complete_done().  This syncs the
+producer ring pointers before another instance of napi_poll can be
+scheduled on another CPU.  It will also slightly improve performance by
+moving the flush closer to when the descriptors were placed in the
+RX ring.
+
+Fixes: d1396004dd86 ("veth: Add XDP TX and REDIRECT")
+Suggested-by: Magnus Karlsson <magnus.karlsson@gmail.com>
+Signed-off-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Link: https://lore.kernel.org/r/20221220185903.1105011-1-sbohrer@cloudflare.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index 466da01ba2e3..909427d99a59 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -974,6 +974,9 @@ static int veth_poll(struct napi_struct *napi, int budget)
+       xdp_set_return_frame_no_direct();
+       done = veth_xdp_rcv(rq, budget, &bq, &stats);
++      if (stats.xdp_redirect > 0)
++              xdp_do_flush();
++
+       if (done < budget && napi_complete_done(napi, done)) {
+               /* Write rx_notify_masked before reading ptr_ring */
+               smp_store_mb(rq->rx_notify_masked, false);
+@@ -987,8 +990,6 @@ static int veth_poll(struct napi_struct *napi, int budget)
+       if (stats.xdp_tx > 0)
+               veth_xdp_flush(rq, &bq);
+-      if (stats.xdp_redirect > 0)
+-              xdp_do_flush();
+       xdp_clear_return_frame_no_direct();
+       return done;
+-- 
+2.35.1
+
diff --git a/queue-6.0/vhost-fix-range-used-in-translate_desc.patch b/queue-6.0/vhost-fix-range-used-in-translate_desc.patch
new file mode 100644 (file)
index 0000000..739c323
--- /dev/null
@@ -0,0 +1,55 @@
+From c83d3b8f045dfe3e96dc3f251b5f7a1a765d607c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 11:25:03 +0100
+Subject: vhost: fix range used in translate_desc()
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 98047313cdb46828093894d0ac8b1183b8b317f9 ]
+
+vhost_iotlb_itree_first() requires `start` and `last` parameters
+to search for a mapping that overlaps the range.
+
+In translate_desc() we cyclically call vhost_iotlb_itree_first(),
+incrementing `addr` by the amount already translated, so rightly
+we move the `start` parameter passed to vhost_iotlb_itree_first(),
+but we should hold the `last` parameter constant.
+
+Let's fix it by saving the `last` parameter value before incrementing
+`addr` in the loop.
+
+Fixes: a9709d6874d5 ("vhost: convert pre sorted vhost memory array to interval tree")
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221109102503.18816-3-sgarzare@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vhost.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index 40097826cff0..3c2359570df9 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -2053,7 +2053,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+       struct vhost_dev *dev = vq->dev;
+       struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
+       struct iovec *_iov;
+-      u64 s = 0;
++      u64 s = 0, last = addr + len - 1;
+       int ret = 0;
+       while ((u64)len > s) {
+@@ -2063,7 +2063,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+                       break;
+               }
+-              map = vhost_iotlb_itree_first(umem, addr, addr + len - 1);
++              map = vhost_iotlb_itree_first(umem, addr, last);
+               if (map == NULL || map->start > addr) {
+                       if (umem != dev->iotlb) {
+                               ret = -EFAULT;
+-- 
+2.35.1
+
diff --git a/queue-6.0/vhost-vdpa-fix-an-iotlb-memory-leak.patch b/queue-6.0/vhost-vdpa-fix-an-iotlb-memory-leak.patch
new file mode 100644 (file)
index 0000000..39172ed
--- /dev/null
@@ -0,0 +1,102 @@
+From d42d2a2dd6be1330a9565f33729f3578332b923c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 16:42:13 +0100
+Subject: vhost-vdpa: fix an iotlb memory leak
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit c070c1912a83432530cbb4271d5b9b11fa36b67a ]
+
+Before commit 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB")
+we called vhost_vdpa_iotlb_unmap(v, iotlb, 0ULL, 0ULL - 1) during
+release to free all the resources allocated when processing user IOTLB
+messages through vhost_vdpa_process_iotlb_update().
+That commit changed the handling of IOTLB a bit, and we accidentally
+removed some code called during the release.
+
+We partially fixed this with commit 037d4305569a ("vhost-vdpa: call
+vhost_vdpa_cleanup during the release") but a potential memory leak is
+still there as showed by kmemleak if the application does not send
+VHOST_IOTLB_INVALIDATE or crashes:
+
+  unreferenced object 0xffff888007fbaa30 (size 16):
+    comm "blkio-bench", pid 914, jiffies 4294993521 (age 885.500s)
+    hex dump (first 16 bytes):
+      40 73 41 07 80 88 ff ff 00 00 00 00 00 00 00 00  @sA.............
+    backtrace:
+      [<0000000087736d2a>] kmem_cache_alloc_trace+0x142/0x1c0
+      [<0000000060740f50>] vhost_vdpa_process_iotlb_msg+0x68c/0x901 [vhost_vdpa]
+      [<0000000083e8e205>] vhost_chr_write_iter+0xc0/0x4a0 [vhost]
+      [<000000008f2f414a>] vhost_vdpa_chr_write_iter+0x18/0x20 [vhost_vdpa]
+      [<00000000de1cd4a0>] vfs_write+0x216/0x4b0
+      [<00000000a2850200>] ksys_write+0x71/0xf0
+      [<00000000de8e720b>] __x64_sys_write+0x19/0x20
+      [<0000000018b12cbb>] do_syscall_64+0x3f/0x90
+      [<00000000986ec465>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Let's fix this calling vhost_vdpa_iotlb_unmap() on the whole range in
+vhost_vdpa_remove_as(). We move that call before vhost_dev_cleanup()
+since we need a valid v->vdev.mm in vhost_vdpa_pa_unmap().
+vhost_iotlb_reset() call can be removed, since vhost_vdpa_iotlb_unmap()
+on the whole range removes all the entries.
+
+The kmemleak log reported was observed with a vDPA device that has `use_va`
+set to true (e.g. VDUSE). This patch has been tested with both types of
+devices.
+
+Fixes: 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release")
+Fixes: 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB")
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221109154213.146789-1-sgarzare@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vdpa.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
+index 166044642fd5..b08e07fc7d1f 100644
+--- a/drivers/vhost/vdpa.c
++++ b/drivers/vhost/vdpa.c
+@@ -65,6 +65,10 @@ static DEFINE_IDA(vhost_vdpa_ida);
+ static dev_t vhost_vdpa_major;
++static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
++                                 struct vhost_iotlb *iotlb,
++                                 u64 start, u64 last);
++
+ static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
+ {
+       struct vhost_vdpa_as *as = container_of(iotlb, struct
+@@ -135,7 +139,7 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
+               return -EINVAL;
+       hlist_del(&as->hash_link);
+-      vhost_iotlb_reset(&as->iotlb);
++      vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1);
+       kfree(as);
+       return 0;
+@@ -1162,14 +1166,14 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
+       struct vhost_vdpa_as *as;
+       u32 asid;
+-      vhost_dev_cleanup(&v->vdev);
+-      kfree(v->vdev.vqs);
+-
+       for (asid = 0; asid < v->vdpa->nas; asid++) {
+               as = asid_to_as(v, asid);
+               if (as)
+                       vhost_vdpa_remove_as(v, asid);
+       }
++
++      vhost_dev_cleanup(&v->vdev);
++      kfree(v->vdev.vqs);
+ }
+ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
+-- 
+2.35.1
+
diff --git a/queue-6.0/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch b/queue-6.0/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch
new file mode 100644 (file)
index 0000000..f299fa3
--- /dev/null
@@ -0,0 +1,64 @@
+From 278cbdd65b74d771d365061c773642b75f35a620 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Nov 2022 10:17:05 +0000
+Subject: vhost/vsock: Fix error handling in vhost_vsock_init()
+
+From: Yuan Can <yuancan@huawei.com>
+
+[ Upstream commit 7a4efe182ca61fb3e5307e69b261c57cbf434cd4 ]
+
+A problem about modprobe vhost_vsock failed is triggered with the
+following log given:
+
+modprobe: ERROR: could not insert 'vhost_vsock': Device or resource busy
+
+The reason is that vhost_vsock_init() returns misc_register() directly
+without checking its return value, if misc_register() failed, it returns
+without calling vsock_core_unregister() on vhost_transport, resulting the
+vhost_vsock can never be installed later.
+A simple call graph is shown as below:
+
+ vhost_vsock_init()
+   vsock_core_register() # register vhost_transport
+   misc_register()
+     device_create_with_groups()
+       device_create_groups_vargs()
+         dev = kzalloc(...) # OOM happened
+   # return without unregister vhost_transport
+
+Fix by calling vsock_core_unregister() when misc_register() returns error.
+
+Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
+Signed-off-by: Yuan Can <yuancan@huawei.com>
+Message-Id: <20221108101705.45981-1-yuancan@huawei.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vsock.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
+index 5703775af129..10a7d23731fe 100644
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -959,7 +959,14 @@ static int __init vhost_vsock_init(void)
+                                 VSOCK_TRANSPORT_F_H2G);
+       if (ret < 0)
+               return ret;
+-      return misc_register(&vhost_vsock_misc);
++
++      ret = misc_register(&vhost_vsock_misc);
++      if (ret) {
++              vsock_core_unregister(&vhost_transport.transport);
++              return ret;
++      }
++
++      return 0;
+ };
+ static void __exit vhost_vsock_exit(void)
+-- 
+2.35.1
+
diff --git a/queue-6.0/virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch b/queue-6.0/virtio-crypto-fix-memory-leak-in-virtio_crypto_alg_s.patch
new file mode 100644 (file)
index 0000000..9a666ac
--- /dev/null
@@ -0,0 +1,43 @@
+From 682dcd35edd27e1c46cf837eb821fbff1b2bcb30 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 11:07:40 +0000
+Subject: virtio-crypto: fix memory leak in
+ virtio_crypto_alg_skcipher_close_session()
+
+From: Wei Yongjun <weiyongjun1@huawei.com>
+
+[ Upstream commit b1d65f717cd6305a396a8738e022c6f7c65cfbe8 ]
+
+'vc_ctrl_req' is alloced in virtio_crypto_alg_skcipher_close_session(),
+and should be freed in the invalid ctrl_status->status error handling
+case. Otherwise there is a memory leak.
+
+Fixes: 0756ad15b1fe ("virtio-crypto: use private buffer for control request")
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Message-Id: <20221114110740.537276-1-weiyongjun@huaweicloud.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Gonglei <arei.gonglei@huawei.com>
+Acked-by: zhenwei pi<pizhenwei@bytedance.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/crypto/virtio/virtio_crypto_skcipher_algs.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
+index e553ccadbcbc..e5876286828b 100644
+--- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
++++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
+@@ -239,7 +239,8 @@ static int virtio_crypto_alg_skcipher_close_session(
+               pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n",
+                       ctrl_status->status, destroy_session->session_id);
+-              return -EINVAL;
++              err = -EINVAL;
++              goto out;
+       }
+       err = 0;
+-- 
+2.35.1
+
diff --git a/queue-6.0/vmxnet3-correctly-report-csum_level-for-encapsulated.patch b/queue-6.0/vmxnet3-correctly-report-csum_level-for-encapsulated.patch
new file mode 100644 (file)
index 0000000..06b697d
--- /dev/null
@@ -0,0 +1,55 @@
+From 2540f020e9cb2f80d18494054ad395c98e4d2f2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 12:25:55 -0800
+Subject: vmxnet3: correctly report csum_level for encapsulated packet
+
+From: Ronak Doshi <doshir@vmware.com>
+
+[ Upstream commit 3d8f2c4269d08f8793e946279dbdf5e972cc4911 ]
+
+Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload
+support") added support for encapsulation offload. However, the
+pathc did not report correctly the csum_level for encapsulated packet.
+
+This patch fixes this issue by reporting correct csum level for the
+encapsulated packet.
+
+Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support")
+Signed-off-by: Ronak Doshi <doshir@vmware.com>
+Acked-by: Peng Li <lpeng@vmware.com>
+Link: https://lore.kernel.org/r/20221220202556.24421-1-doshir@vmware.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
+index c28c4a654615..c312d16f580c 100644
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -1288,6 +1288,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
+                   (le32_to_cpu(gdesc->dword[3]) &
+                    VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
++                      if ((le32_to_cpu(gdesc->dword[0]) &
++                                   (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
++                              skb->csum_level = 1;
++                      }
+                       WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+                                    !(le32_to_cpu(gdesc->dword[0]) &
+                                    (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+@@ -1297,6 +1301,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
+               } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
+                                            (1 << VMXNET3_RCD_TUC_SHIFT))) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
++                      if ((le32_to_cpu(gdesc->dword[0]) &
++                                   (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
++                              skb->csum_level = 1;
++                      }
+                       WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+                                    !(le32_to_cpu(gdesc->dword[0]) &
+                                    (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+-- 
+2.35.1
+
diff --git a/queue-6.0/vringh-fix-range-used-in-iotlb_translate.patch b/queue-6.0/vringh-fix-range-used-in-iotlb_translate.patch
new file mode 100644 (file)
index 0000000..84fed1b
--- /dev/null
@@ -0,0 +1,56 @@
+From e50b6231f570d76b41a0cbce76c870fc5000427a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 11:25:02 +0100
+Subject: vringh: fix range used in iotlb_translate()
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit f85efa9b0f5381874f727bd98f56787840313f0b ]
+
+vhost_iotlb_itree_first() requires `start` and `last` parameters
+to search for a mapping that overlaps the range.
+
+In iotlb_translate() we cyclically call vhost_iotlb_itree_first(),
+incrementing `addr` by the amount already translated, so rightly
+we move the `start` parameter passed to vhost_iotlb_itree_first(),
+but we should hold the `last` parameter constant.
+
+Let's fix it by saving the `last` parameter value before incrementing
+`addr` in the loop.
+
+Fixes: 9ad9c49cfe97 ("vringh: IOTLB support")
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221109102503.18816-2-sgarzare@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vringh.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
+index 11f59dd06a74..828c29306565 100644
+--- a/drivers/vhost/vringh.c
++++ b/drivers/vhost/vringh.c
+@@ -1102,7 +1102,7 @@ static int iotlb_translate(const struct vringh *vrh,
+       struct vhost_iotlb_map *map;
+       struct vhost_iotlb *iotlb = vrh->iotlb;
+       int ret = 0;
+-      u64 s = 0;
++      u64 s = 0, last = addr + len - 1;
+       spin_lock(vrh->iotlb_lock);
+@@ -1114,8 +1114,7 @@ static int iotlb_translate(const struct vringh *vrh,
+                       break;
+               }
+-              map = vhost_iotlb_itree_first(iotlb, addr,
+-                                            addr + len - 1);
++              map = vhost_iotlb_itree_first(iotlb, addr, last);
+               if (!map || map->start > addr) {
+                       ret = -EINVAL;
+                       break;
+-- 
+2.35.1
+
diff --git a/queue-6.0/vxlan-fix-memory-leaks-in-error-path.patch b/queue-6.0/vxlan-fix-memory-leaks-in-error-path.patch
new file mode 100644 (file)
index 0000000..35db058
--- /dev/null
@@ -0,0 +1,115 @@
+From c85fa7e059c9dac5017bb94d12d43ffa47c4af62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 08:55:56 +0200
+Subject: vxlan: Fix memory leaks in error path
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 06bf62944144a92d83dd14fd1378d2a288259561 ]
+
+The memory allocated by vxlan_vnigroup_init() is not freed in the error
+path, leading to memory leaks [1]. Fix by calling
+vxlan_vnigroup_uninit() in the error path.
+
+The leaks can be reproduced by annotating gro_cells_init() with
+ALLOW_ERROR_INJECTION() and then running:
+
+ # echo "100" > /sys/kernel/debug/fail_function/probability
+ # echo "1" > /sys/kernel/debug/fail_function/times
+ # echo "gro_cells_init" > /sys/kernel/debug/fail_function/inject
+ # printf %#x -12 > /sys/kernel/debug/fail_function/gro_cells_init/retval
+ # ip link add name vxlan0 type vxlan dstport 4789 external vnifilter
+ RTNETLINK answers: Cannot allocate memory
+
+[1]
+unreferenced object 0xffff88810db84a00 (size 512):
+  comm "ip", pid 330, jiffies 4295010045 (age 66.016s)
+  hex dump (first 32 bytes):
+    f8 d5 76 0e 81 88 ff ff 01 00 00 00 00 00 00 02  ..v.............
+    03 00 04 00 48 00 00 00 00 00 00 01 04 00 01 00  ....H...........
+  backtrace:
+    [<ffffffff81a3097a>] kmalloc_trace+0x2a/0x60
+    [<ffffffff82f049fc>] vxlan_vnigroup_init+0x4c/0x160
+    [<ffffffff82ecd69e>] vxlan_init+0x1ae/0x280
+    [<ffffffff836858ca>] register_netdevice+0x57a/0x16d0
+    [<ffffffff82ef67b7>] __vxlan_dev_create+0x7c7/0xa50
+    [<ffffffff82ef6ce6>] vxlan_newlink+0xd6/0x130
+    [<ffffffff836d02ab>] __rtnl_newlink+0x112b/0x18a0
+    [<ffffffff836d0a8c>] rtnl_newlink+0x6c/0xa0
+    [<ffffffff836c0ddf>] rtnetlink_rcv_msg+0x43f/0xd40
+    [<ffffffff83908ce0>] netlink_rcv_skb+0x170/0x440
+    [<ffffffff839066af>] netlink_unicast+0x53f/0x810
+    [<ffffffff839072d8>] netlink_sendmsg+0x958/0xe70
+    [<ffffffff835c319f>] ____sys_sendmsg+0x78f/0xa90
+    [<ffffffff835cd6da>] ___sys_sendmsg+0x13a/0x1e0
+    [<ffffffff835cd94c>] __sys_sendmsg+0x11c/0x1f0
+    [<ffffffff8424da78>] do_syscall_64+0x38/0x80
+unreferenced object 0xffff88810e76d5f8 (size 192):
+  comm "ip", pid 330, jiffies 4295010045 (age 66.016s)
+  hex dump (first 32 bytes):
+    04 00 00 00 00 00 00 00 db e1 4f e7 00 00 00 00  ..........O.....
+    08 d6 76 0e 81 88 ff ff 08 d6 76 0e 81 88 ff ff  ..v.......v.....
+  backtrace:
+    [<ffffffff81a3162e>] __kmalloc_node+0x4e/0x90
+    [<ffffffff81a0e166>] kvmalloc_node+0xa6/0x1f0
+    [<ffffffff8276e1a3>] bucket_table_alloc.isra.0+0x83/0x460
+    [<ffffffff8276f18b>] rhashtable_init+0x43b/0x7c0
+    [<ffffffff82f04a1c>] vxlan_vnigroup_init+0x6c/0x160
+    [<ffffffff82ecd69e>] vxlan_init+0x1ae/0x280
+    [<ffffffff836858ca>] register_netdevice+0x57a/0x16d0
+    [<ffffffff82ef67b7>] __vxlan_dev_create+0x7c7/0xa50
+    [<ffffffff82ef6ce6>] vxlan_newlink+0xd6/0x130
+    [<ffffffff836d02ab>] __rtnl_newlink+0x112b/0x18a0
+    [<ffffffff836d0a8c>] rtnl_newlink+0x6c/0xa0
+    [<ffffffff836c0ddf>] rtnetlink_rcv_msg+0x43f/0xd40
+    [<ffffffff83908ce0>] netlink_rcv_skb+0x170/0x440
+    [<ffffffff839066af>] netlink_unicast+0x53f/0x810
+    [<ffffffff839072d8>] netlink_sendmsg+0x958/0xe70
+    [<ffffffff835c319f>] ____sys_sendmsg+0x78f/0xa90
+
+Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vxlan/vxlan_core.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
+index c3285242f74f..a03752ef544f 100644
+--- a/drivers/net/vxlan/vxlan_core.c
++++ b/drivers/net/vxlan/vxlan_core.c
+@@ -2920,16 +2920,23 @@ static int vxlan_init(struct net_device *dev)
+               vxlan_vnigroup_init(vxlan);
+       dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+-      if (!dev->tstats)
+-              return -ENOMEM;
++      if (!dev->tstats) {
++              err = -ENOMEM;
++              goto err_vnigroup_uninit;
++      }
+       err = gro_cells_init(&vxlan->gro_cells, dev);
+-      if (err) {
+-              free_percpu(dev->tstats);
+-              return err;
+-      }
++      if (err)
++              goto err_free_percpu;
+       return 0;
++
++err_free_percpu:
++      free_percpu(dev->tstats);
++err_vnigroup_uninit:
++      if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
++              vxlan_vnigroup_uninit(vxlan);
++      return err;
+ }
+ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
+-- 
+2.35.1
+
diff --git a/queue-6.0/wifi-ath9k-use-proper-statements-in-conditionals.patch b/queue-6.0/wifi-ath9k-use-proper-statements-in-conditionals.patch
new file mode 100644 (file)
index 0000000..6e9c1e1
--- /dev/null
@@ -0,0 +1,68 @@
+From 45799974af90bd561636685e3cf517537e77686c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Dec 2022 17:55:42 +0100
+Subject: wifi: ath9k: use proper statements in conditionals
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit b7dc753fe33a707379e2254317794a4dad6c0fe2 ]
+
+A previous cleanup patch accidentally broke some conditional
+expressions by replacing the safe "do {} while (0)" constructs
+with empty macros. gcc points this out when extra warnings
+are enabled:
+
+drivers/net/wireless/ath/ath9k/hif_usb.c: In function 'ath9k_skb_queue_complete':
+drivers/net/wireless/ath/ath9k/hif_usb.c:251:57: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body]
+  251 |                         TX_STAT_INC(hif_dev, skb_failed);
+
+Make both sets of macros proper expressions again.
+
+Fixes: d7fc76039b74 ("ath9k: htc: clean up statistics macros")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
+Signed-off-by: Kalle Valo <kvalo@kernel.org>
+Link: https://lore.kernel.org/r/20221215165553.1950307-1-arnd@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc.h | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h
+index 30f0765fb9fd..237f4ec2cffd 100644
+--- a/drivers/net/wireless/ath/ath9k/htc.h
++++ b/drivers/net/wireless/ath/ath9k/htc.h
+@@ -327,9 +327,9 @@ static inline struct ath9k_htc_tx_ctl *HTC_SKB_CB(struct sk_buff *skb)
+ }
+ #ifdef CONFIG_ATH9K_HTC_DEBUGFS
+-#define __STAT_SAFE(hif_dev, expr)    ((hif_dev)->htc_handle->drv_priv ? (expr) : 0)
+-#define CAB_STAT_INC(priv)            ((priv)->debug.tx_stats.cab_queued++)
+-#define TX_QSTAT_INC(priv, q)         ((priv)->debug.tx_stats.queue_stats[q]++)
++#define __STAT_SAFE(hif_dev, expr)    do { ((hif_dev)->htc_handle->drv_priv ? (expr) : 0); } while (0)
++#define CAB_STAT_INC(priv)            do { ((priv)->debug.tx_stats.cab_queued++); } while (0)
++#define TX_QSTAT_INC(priv, q)         do { ((priv)->debug.tx_stats.queue_stats[q]++); } while (0)
+ #define TX_STAT_INC(hif_dev, c) \
+               __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.tx_stats.c++)
+@@ -378,10 +378,10 @@ void ath9k_htc_get_et_stats(struct ieee80211_hw *hw,
+                           struct ethtool_stats *stats, u64 *data);
+ #else
+-#define TX_STAT_INC(hif_dev, c)
+-#define TX_STAT_ADD(hif_dev, c, a)
+-#define RX_STAT_INC(hif_dev, c)
+-#define RX_STAT_ADD(hif_dev, c, a)
++#define TX_STAT_INC(hif_dev, c)               do { } while (0)
++#define TX_STAT_ADD(hif_dev, c, a)    do { } while (0)
++#define RX_STAT_INC(hif_dev, c)               do { } while (0)
++#define RX_STAT_ADD(hif_dev, c, a)    do { } while (0)
+ #define CAB_STAT_INC(priv)
+ #define TX_QSTAT_INC(priv, c)
+-- 
+2.35.1
+