From f478f0d42742e06ff40fa4a3f31a3d9296143115 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 16 Jul 2023 21:01:17 -0400 Subject: [PATCH] Fixes for 6.4 Signed-off-by: Sasha Levin --- ...ynamic-lock-class-for-blk_crypto_pro.patch | 110 +++ ...x-memory-leak-in-cpu_map_update_elem.patch | 138 ++++ ...tack-depth-check-for-async-callbacks.patch | 59 ++ ...w_hdmi-fix-connector-access-for-scdc.patch | 81 ++ ...sn65dsi86-fix-auxiliary-bus-lifetime.patch | 112 +++ ...x-documented-default-preferred_bpp-v.patch | 42 + ...reserve-dpll_hw_state-for-slave-crtc.patch | 41 + ...ix-one-wrong-caching-mode-enum-usage.patch | 45 ++ ...abort-loading-acr-if-no-firmware-was.patch | 38 + ...g-back-blit-subchannel-for-pre-nv50-.patch | 93 +++ .../drm-nouveau-disp-fix-hdmi-on-gt215.patch | 37 + .../drm-nouveau-disp-g94-enable-hdmi.patch | 36 + ...-add-connector_type-for-innolux_at04.patch | 39 + ...-add-powertip-ph800480t013-drm_displ.patch | 38 + ...nite-loop-in-z_erofs_do_read_page-wh.patch | 54 ++ ...ess-loops-in-z_erofs_pcluster_readmo.patch | 46 ++ ...unavailability-for-chunk-based-regul.patch | 42 + ...k-to-match-a-succeeded-ftrace_test_r.patch | 50 ++ ...default-duplex-configuration-to-full.patch | 43 + ...-avoid-struct-memcpy-overrun-warning.patch | 65 ++ ...-check-while-configuring-tx-rate-lim.patch | 86 ++ ...e-rate-limit-when-tcs-are-configured.patch | 123 +++ ...tr-deref-of-ip6_null_entry-rt6i_idev.patch | 145 ++++ ...n-for-qbv_config_change_errors-count.patch | 66 ++ ..._buff-wrapper-for-xdp_buff-in-driver.patch | 104 +++ ...igc-add-xdp-hints-kfuncs-for-rx-hash.patch | 145 ++++ ...e-taprio-offload-for-invalid-argumen.patch | 109 +++ ...erting-of-empty-frame-for-launchtime.patch | 128 +++ ...fix-launchtime-before-start-of-cycle.patch | 46 ++ ...x-hang-issue-when-qbv-gate-is-closed.patch | 316 ++++++++ ...dy-enabled-taprio-offload-for-baseti.patch | 62 ++ ...tart-time-programming-for-past-time-.patch | 109 +++ ...length-type-field-and-vlan-tag-in-qu.patch | 68 ++ ...-mode-in-pure-launchtime-cbs-offload.patch | 153 ++++ ...e-delay-during-tx-ring-configuration.patch | 46 ++ ...-qbv_enable-to-taprio_offload_enable.patch | 87 ++ ...n-supported-and-advertising-fields-o.patch | 39 + ...ove-warn_on-to-prevent-panic_on_warn.patch | 42 + ...x-a-potential-refcount-underflow-for.patch | 53 ++ ...-cleanup-logic-of-enable_trace_eprob.patch | 71 ++ ...ne-turning-irqs-off-to-avoid-soc-han.patch | 55 ++ ...ke-vsc9959_tas_guard_bands_update-vi.patch | 100 +++ ...net-dsa-qca8k-add-check-for-skb_copy.patch | 38 + ...unneeded-of_node_put-in-felix_parse_.patch | 38 + ...-the-size-of-tx-ring-and-update-tx_w.patch | 83 ++ ...cle-pages-for-transmitted-xdp-frames.patch | 297 +++++++ ...ast_bdp-from-fec_enet_txq_xmit_frame.patch | 64 ++ ...ec-remove-useless-fec_enet_reset_skb.patch | 66 ++ ...-query-hca_cap_2-only-when-supported.patch | 42 + ...ter-a-unique-thermal-zone-per-device.patch | 84 ++ ...for-not_ready-flag-state-after-locki.patch | 133 ++++ ...uble-free-in-mlx5e_destroy_flow_tabl.patch | 38 + ...mory-leak-in-mlx5e_fs_tt_redirect_an.patch | 51 ++ ...5e-fix-memory-leak-in-mlx5e_ptp_open.patch | 44 + ...-flush-and-close-release-flow-of-reg.patch | 52 ++ ...-page_pool-page-fragment-tracking-fo.patch | 124 +++ ...x5e-tc-ct-offload-ct-clear-only-once.patch | 92 +++ ...fix-oversize-frame-dropping-for-pree.patch | 133 ++++ ...-fix-txq_map-in-case-of-txq_number-1.patch | 48 ++ ...corruption-on-frag-list-segmentation.patch | 102 +++ ...-fix-improper-refcount-update-leads-.patch | 62 ++ ...-ensure-both-minimum-and-maximum-por.patch | 82 ++ ...sched-make-psched_mtu-rtnl-less-safe.patch | 49 ++ ...q-account-for-stab-overhead-in-qfq_e.patch | 96 +++ ...q-reintroduce-lmax-bound-check-for-m.patch | 52 ++ ...-replace-tc_taprio_qopt_offload-enab.patch | 390 +++++++++ ...t-txgbe-fix-eeprom-calculation-error.patch | 40 + ...initialized-data-in-nsim_dev_trap_fa.patch | 55 ++ ...ack-don-t-fold-port-numbers-into-add.patch | 94 +++ ..._tables-report-use-refcount-overflow.patch | 752 ++++++++++++++++++ ...r-handling-in-amd_ntb_pci_driver_ini.patch | 64 ++ ...rror-handling-in-idt_pci_driver_init.patch | 66 ++ ...ror-handling-in-intel_ntb_pci_driver.patch | 65 ++ ...-ntb_tool-add-check-for-devm_kcalloc.patch | 39 + ...t-fix-possible-memory-leak-while-dev.patch | 42 + ...e-nvme_id_ns_nvm_sts_mask-definition.patch | 36 + ...-direction-of-unmapping-integrity-da.patch | 40 + ...e-validation-of-ptp-pointer-before-i.patch | 110 +++ ...-promisc-enable-disable-through-mbox.patch | 118 +++ ...-add-additional-check-for-mcam-rules.patch | 71 ++ ...pcsr-and-oldmask-in-sigcontext-to-un.patch | 75 ++ ...-break-possible-infinite-loop-when-p.patch | 84 ++ ...ix-inconsistent-jit-image-generation.patch | 137 ++++ ...cv-mm-fix-truncation-warning-on-rv32.patch | 46 ++ ...unregister-clients-with-registered-d.patch | 74 ++ ...-simplify-add-remove-callback-handli.patch | 242 ++++++ ...king-for-forwarding-of-irqs-and-even.patch | 209 +++++ ...x-fix-error-code-in-qla2x00_start_sp.patch | 38 + ...iatek-add-dependency-for-reset_contr.patch | 55 ++ queue-6.4/series | 98 +++ ...et-the-number-of-areas-before-alloca.patch | 85 ++ ...he-number-of-areas-to-match-actual-m.patch | 114 +++ ...ng-call-into-udp_fail_queue_rcv_skb-.patch | 57 ++ queue-6.4/udp6-fix-udp6_ehashfn-typo.patch | 40 + ...uninitialized-warning-in-airo_get_ra.patch | 47 ++ ...-fix-error-code-in-rtw89_debug_priv_.patch | 51 ++ queue-6.4/x86-fineibt-poison-endbr-at-0.patch | 89 +++ ...rusted-arguments-in-xdp-hints-kfuncs.patch | 53 ++ ...ull-deref-when-a-bridge-of-pci-root-.patch | 90 +++ 99 files changed, 8898 insertions(+) create mode 100644 queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch create mode 100644 queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch create mode 100644 queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch create mode 100644 queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch create mode 100644 queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch create mode 100644 queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch create mode 100644 queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch create mode 100644 queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch create mode 100644 queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch create mode 100644 queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch create mode 100644 queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch create mode 100644 queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch create mode 100644 queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch create mode 100644 queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch create mode 100644 queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch create mode 100644 queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch create mode 100644 queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch create mode 100644 queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch create mode 100644 queue-6.4/gve-set-default-duplex-configuration-to-full.patch create mode 100644 queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch create mode 100644 queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch create mode 100644 queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch create mode 100644 queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch create mode 100644 queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch create mode 100644 queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch create mode 100644 queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch create mode 100644 queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch create mode 100644 queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch create mode 100644 queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch create mode 100644 queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch create mode 100644 queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch create mode 100644 queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch create mode 100644 queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch create mode 100644 queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch create mode 100644 queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch create mode 100644 queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch create mode 100644 queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch create mode 100644 queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch create mode 100644 queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch create mode 100644 queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch create mode 100644 queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch create mode 100644 queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch create mode 100644 queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch create mode 100644 queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch create mode 100644 queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch create mode 100644 queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch create mode 100644 queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch create mode 100644 queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch create mode 100644 queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch create mode 100644 queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch create mode 100644 queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch create mode 100644 queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch create mode 100644 queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch create mode 100644 queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch create mode 100644 queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch create mode 100644 queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch create mode 100644 queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch create mode 100644 queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch create mode 100644 queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch create mode 100644 queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch create mode 100644 queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch create mode 100644 queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch create mode 100644 queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch create mode 100644 queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch create mode 100644 queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch create mode 100644 queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch create mode 100644 queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch create mode 100644 queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch create mode 100644 queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch create mode 100644 queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch create mode 100644 queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch create mode 100644 queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch create mode 100644 queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch create mode 100644 queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch create mode 100644 queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch create mode 100644 queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch create mode 100644 queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch create mode 100644 queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch create mode 100644 queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch create mode 100644 queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch create mode 100644 queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch create mode 100644 queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch create mode 100644 queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch create mode 100644 queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch create mode 100644 queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch create mode 100644 queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch create mode 100644 queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch create mode 100644 queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch create mode 100644 queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch create mode 100644 queue-6.4/series create mode 100644 queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch create mode 100644 queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch create mode 100644 queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch create mode 100644 queue-6.4/udp6-fix-udp6_ehashfn-typo.patch create mode 100644 queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch create mode 100644 queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch create mode 100644 queue-6.4/x86-fineibt-poison-endbr-at-0.patch create mode 100644 queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch create mode 100644 queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch diff --git a/queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch b/queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch new file mode 100644 index 00000000000..aeea9893971 --- /dev/null +++ b/queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch @@ -0,0 +1,110 @@ +From 049354d92774b515eee4d0942a55012adfdfd0ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Jun 2023 23:11:39 -0700 +Subject: blk-crypto: use dynamic lock class for blk_crypto_profile::lock + +From: Eric Biggers + +[ Upstream commit 2fb48d88e77f29bf9d278f25bcfe82cf59a0e09b ] + +When a device-mapper device is passing through the inline encryption +support of an underlying device, calls to blk_crypto_evict_key() take +the blk_crypto_profile::lock of the device-mapper device, then take the +blk_crypto_profile::lock of the underlying device (nested). This isn't +a real deadlock, but it causes a lockdep report because there is only +one lock class for all instances of this lock. + +Lockdep subclasses don't really work here because the hierarchy of block +devices is dynamic and could have more than 2 levels. + +Instead, register a dynamic lock class for each blk_crypto_profile, and +associate that with the lock. + +This avoids false-positive lockdep reports like the following: + + ============================================ + WARNING: possible recursive locking detected + 6.4.0-rc5 #2 Not tainted + -------------------------------------------- + fscryptctl/1421 is trying to acquire lock: + ffffff80829ca418 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0x44/0x1c0 + + but task is already holding lock: + ffffff8086b68ca8 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0xc8/0x1c0 + + other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&profile->lock); + lock(&profile->lock); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + +Fixes: 1b2628397058 ("block: Keyslot Manager for Inline Encryption") +Reported-by: Bart Van Assche +Signed-off-by: Eric Biggers +Reviewed-by: Bart Van Assche +Link: https://lore.kernel.org/r/20230610061139.212085-1-ebiggers@kernel.org +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-crypto-profile.c | 12 ++++++++++-- + include/linux/blk-crypto-profile.h | 1 + + 2 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c +index 2a67d3fb63e5c..7fabc883e39f1 100644 +--- a/block/blk-crypto-profile.c ++++ b/block/blk-crypto-profile.c +@@ -79,7 +79,14 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile, + unsigned int slot_hashtable_size; + + memset(profile, 0, sizeof(*profile)); +- init_rwsem(&profile->lock); ++ ++ /* ++ * profile->lock of an underlying device can nest inside profile->lock ++ * of a device-mapper device, so use a dynamic lock class to avoid ++ * false-positive lockdep reports. ++ */ ++ lockdep_register_key(&profile->lockdep_key); ++ __init_rwsem(&profile->lock, "&profile->lock", &profile->lockdep_key); + + if (num_slots == 0) + return 0; +@@ -89,7 +96,7 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile, + profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]), + GFP_KERNEL); + if (!profile->slots) +- return -ENOMEM; ++ goto err_destroy; + + profile->num_slots = num_slots; + +@@ -435,6 +442,7 @@ void blk_crypto_profile_destroy(struct blk_crypto_profile *profile) + { + if (!profile) + return; ++ lockdep_unregister_key(&profile->lockdep_key); + kvfree(profile->slot_hashtable); + kvfree_sensitive(profile->slots, + sizeof(profile->slots[0]) * profile->num_slots); +diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h +index e6802b69cdd64..90ab33cb5d0ef 100644 +--- a/include/linux/blk-crypto-profile.h ++++ b/include/linux/blk-crypto-profile.h +@@ -111,6 +111,7 @@ struct blk_crypto_profile { + * keyslots while ensuring that they can't be changed concurrently. + */ + struct rw_semaphore lock; ++ struct lock_class_key lockdep_key; + + /* List of idle slots, with least recently used slot at front */ + wait_queue_head_t idle_slots_wait_queue; +-- +2.39.2 + diff --git a/queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch b/queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch new file mode 100644 index 00000000000..1458f7e9aea --- /dev/null +++ b/queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch @@ -0,0 +1,138 @@ +From a4e1b6b58194575ab350ac438efcc0521fe963dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 19:58:48 +0800 +Subject: bpf: cpumap: Fix memory leak in cpu_map_update_elem + +From: Pu Lehui + +[ Upstream commit 4369016497319a9635702da010d02af1ebb1849d ] + +Syzkaller reported a memory leak as follows: + +BUG: memory leak +unreferenced object 0xff110001198ef748 (size 192): + comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s) + hex dump (first 32 bytes): + 00 00 00 00 4a 19 00 00 80 ad e3 e4 fe ff c0 00 ....J........... + 00 b2 d3 0c 01 00 11 ff 28 f5 8e 19 01 00 11 ff ........(....... + backtrace: + [] __cpu_map_entry_alloc+0xf7/0xb00 + [] cpu_map_update_elem+0x2fe/0x3d0 + [] bpf_map_update_value.isra.0+0x2bd/0x520 + [] map_update_elem+0x4cb/0x720 + [] __se_sys_bpf+0x8c3/0xb90 + [] do_syscall_64+0x30/0x40 + [] entry_SYSCALL_64_after_hwframe+0x61/0xc6 + +BUG: memory leak +unreferenced object 0xff110001198ef528 (size 192): + comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] __cpu_map_entry_alloc+0x260/0xb00 + [] cpu_map_update_elem+0x2fe/0x3d0 + [] bpf_map_update_value.isra.0+0x2bd/0x520 + [] map_update_elem+0x4cb/0x720 + [] __se_sys_bpf+0x8c3/0xb90 + [] do_syscall_64+0x30/0x40 + [] entry_SYSCALL_64_after_hwframe+0x61/0xc6 + +BUG: memory leak +unreferenced object 0xff1100010fd93d68 (size 8): + comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s) + hex dump (first 8 bytes): + 00 00 00 00 00 00 00 00 ........ + backtrace: + [] kvmalloc_node+0x11e/0x170 + [] __cpu_map_entry_alloc+0x2f0/0xb00 + [] cpu_map_update_elem+0x2fe/0x3d0 + [] bpf_map_update_value.isra.0+0x2bd/0x520 + [] map_update_elem+0x4cb/0x720 + [] __se_sys_bpf+0x8c3/0xb90 + [] do_syscall_64+0x30/0x40 + [] entry_SYSCALL_64_after_hwframe+0x61/0xc6 + +In the cpu_map_update_elem flow, when kthread_stop is called before +calling the threadfn of rcpu->kthread, since the KTHREAD_SHOULD_STOP bit +of kthread has been set by kthread_stop, the threadfn of rcpu->kthread +will never be executed, and rcpu->refcnt will never be 0, which will +lead to the allocated rcpu, rcpu->queue and rcpu->queue->queue cannot be +released. + +Calling kthread_stop before executing kthread's threadfn will return +-EINTR. We can complete the release of memory resources in this state. + +Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP") +Signed-off-by: Pu Lehui +Acked-by: Jesper Dangaard Brouer +Acked-by: Hou Tao +Link: https://lore.kernel.org/r/20230711115848.2701559-1-pulehui@huaweicloud.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/cpumap.c | 40 ++++++++++++++++++++++++---------------- + 1 file changed, 24 insertions(+), 16 deletions(-) + +diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c +index 8ec18faa74ac3..3da63be602d1c 100644 +--- a/kernel/bpf/cpumap.c ++++ b/kernel/bpf/cpumap.c +@@ -126,22 +126,6 @@ static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) + atomic_inc(&rcpu->refcnt); + } + +-/* called from workqueue, to workaround syscall using preempt_disable */ +-static void cpu_map_kthread_stop(struct work_struct *work) +-{ +- struct bpf_cpu_map_entry *rcpu; +- +- rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); +- +- /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, +- * as it waits until all in-flight call_rcu() callbacks complete. +- */ +- rcu_barrier(); +- +- /* kthread_stop will wake_up_process and wait for it to complete */ +- kthread_stop(rcpu->kthread); +-} +- + static void __cpu_map_ring_cleanup(struct ptr_ring *ring) + { + /* The tear-down procedure should have made sure that queue is +@@ -169,6 +153,30 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) + } + } + ++/* called from workqueue, to workaround syscall using preempt_disable */ ++static void cpu_map_kthread_stop(struct work_struct *work) ++{ ++ struct bpf_cpu_map_entry *rcpu; ++ int err; ++ ++ rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); ++ ++ /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, ++ * as it waits until all in-flight call_rcu() callbacks complete. ++ */ ++ rcu_barrier(); ++ ++ /* kthread_stop will wake_up_process and wait for it to complete */ ++ err = kthread_stop(rcpu->kthread); ++ if (err) { ++ /* kthread_stop may be called before cpu_map_kthread_run ++ * is executed, so we need to release the memory related ++ * to rcpu. ++ */ ++ put_cpu_map_entry(rcpu); ++ } ++} ++ + static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, + struct list_head *listp, + struct xdp_cpumap_stats *stats) +-- +2.39.2 + diff --git a/queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch b/queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch new file mode 100644 index 00000000000..76fd0dd9603 --- /dev/null +++ b/queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch @@ -0,0 +1,59 @@ +From 13aba8cc9c46174b275c69e259ce56721c4fc8da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 20:17:29 +0530 +Subject: bpf: Fix max stack depth check for async callbacks + +From: Kumar Kartikeya Dwivedi + +[ Upstream commit 5415ccd50a8620c8cbaa32d6f18c946c453566f5 ] + +The check_max_stack_depth pass happens after the verifier's symbolic +execution, and attempts to walk the call graph of the BPF program, +ensuring that the stack usage stays within bounds for all possible call +chains. There are two cases to consider: bpf_pseudo_func and +bpf_pseudo_call. In the former case, the callback pointer is loaded into +a register, and is assumed that it is passed to some helper later which +calls it (however there is no way to be sure), but the check remains +conservative and accounts the stack usage anyway. For this particular +case, asynchronous callbacks are skipped as they execute asynchronously +when their corresponding event fires. + +The case of bpf_pseudo_call is simpler and we know that the call is +definitely made, hence the stack depth of the subprog is accounted for. + +However, the current check still skips an asynchronous callback even if +a bpf_pseudo_call was made for it. This is erroneous, as it will miss +accounting for the stack usage of the asynchronous callback, which can +be used to breach the maximum stack depth limit. + +Fix this by only skipping asynchronous callbacks when the instruction is +not a pseudo call to the subprog. + +Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.") +Signed-off-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/r/20230705144730.235802-2-memxor@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 30fabae47a07b..aac31e33323bb 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5450,8 +5450,9 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) + verbose(env, "verifier bug. subprog has tail_call and async cb\n"); + return -EFAULT; + } +- /* async callbacks don't increase bpf prog stack size */ +- continue; ++ /* async callbacks don't increase bpf prog stack size unless called directly */ ++ if (!bpf_pseudo_call(insn + i)) ++ continue; + } + i = next_insn; + +-- +2.39.2 + diff --git a/queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch b/queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch new file mode 100644 index 00000000000..9ab4155fc23 --- /dev/null +++ b/queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch @@ -0,0 +1,81 @@ +From 55ccff99ae176eb0c945916524cfd9719d1abd4c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Jun 2023 13:31:53 +0100 +Subject: drm: bridge: dw_hdmi: fix connector access for scdc +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Adrián Larumbe + +[ Upstream commit 98703e4e061fb8715c7613cd227e32cdfd136b23 ] + +Commit 5d844091f237 ("drm/scdc-helper: Pimp SCDC debugs") changed the scdc +interface to pick up an i2c adapter from a connector instead. However, in +the case of dw-hdmi, the wrong connector was being used to pass i2c adapter +information, since dw-hdmi's embedded connector structure is only populated +when the bridge attachment callback explicitly asks for it. + +drm-meson is handling connector creation, so this won't happen, leading to +a NULL pointer dereference. + +Fix it by having scdc functions access dw-hdmi's current connector pointer +instead, which is assigned during the bridge enablement stage. + +Fixes: 5d844091f237 ("drm/scdc-helper: Pimp SCDC debugs") +Signed-off-by: Adrián Larumbe +Reported-by: Lukas F. Hartmann +Acked-by: Neil Armstrong +[narmstrong: moved Fixes tag before first S-o-b and added Reported-by tag] +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20230601123153.196867-1-adrian.larumbe@collabora.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +index 603bb3c51027b..3b40e0fdca5cb 100644 +--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c ++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +@@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi, + /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */ + if (dw_hdmi_support_scdc(hdmi, display)) { + if (mtmdsclock > HDMI14_MAX_TMDSCLK) +- drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 1); ++ drm_scdc_set_high_tmds_clock_ratio(hdmi->curr_conn, 1); + else +- drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 0); ++ drm_scdc_set_high_tmds_clock_ratio(hdmi->curr_conn, 0); + } + } + EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio); +@@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, + min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION)); + + /* Enabled Scrambling in the Sink */ +- drm_scdc_set_scrambling(&hdmi->connector, 1); ++ drm_scdc_set_scrambling(hdmi->curr_conn, 1); + + /* + * To activate the scrambler feature, you must ensure +@@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, + hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL); + hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ, + HDMI_MC_SWRSTZ); +- drm_scdc_set_scrambling(&hdmi->connector, 0); ++ drm_scdc_set_scrambling(hdmi->curr_conn, 0); + } + } + +@@ -3553,6 +3553,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, + hdmi->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID + | DRM_BRIDGE_OP_HPD; + hdmi->bridge.interlace_allowed = true; ++ hdmi->bridge.ddc = hdmi->ddc; + #ifdef CONFIG_OF + hdmi->bridge.of_node = pdev->dev.of_node; + #endif +-- +2.39.2 + diff --git a/queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch b/queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch new file mode 100644 index 00000000000..0f6890212fa --- /dev/null +++ b/queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch @@ -0,0 +1,112 @@ +From 270b8bee9af7ad5cd07b9d7e5e88c3dfaa772065 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Jun 2023 06:58:13 -0700 +Subject: drm/bridge: ti-sn65dsi86: Fix auxiliary bus lifetime + +From: Douglas Anderson + +[ Upstream commit 7aa83fbd712a6f08ffa67890061f26d140c2a84f ] + +Memory for the "struct device" for any given device isn't supposed to +be released until the device's release() is called. This is important +because someone might be holding a kobject reference to the "struct +device" and might try to access one of its members even after any +other cleanup/uninitialization has happened. + +Code analysis of ti-sn65dsi86 shows that this isn't quite right. When +the code was written, it was believed that we could rely on the fact +that the child devices would all be freed before the parent devices +and thus we didn't need to worry about a release() function. While I +still believe that the parent's "struct device" is guaranteed to +outlive the child's "struct device" (because the child holds a kobject +reference to the parent), the parent's "devm" allocated memory is a +different story. That appears to be freed much earlier. + +Let's make this better for ti-sn65dsi86 by allocating each auxiliary +with kzalloc and then free that memory in the release(). + +Fixes: bf73537f411b ("drm/bridge: ti-sn65dsi86: Break GPIO and MIPI-to-eDP bridge into sub-drivers") +Suggested-by: Stephen Boyd +Reviewed-by: Stephen Boyd +Signed-off-by: Douglas Anderson +Link: https://patchwork.freedesktop.org/patch/msgid/20230613065812.v2.1.I24b838a5b4151fb32bccd6f36397998ea2df9fbb@changeid +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/bridge/ti-sn65dsi86.c | 35 +++++++++++++++++---------- + 1 file changed, 22 insertions(+), 13 deletions(-) + +diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c +index 4676cf2900dfd..3c8fd6ea6d6a4 100644 +--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c ++++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c +@@ -170,10 +170,10 @@ + * @pwm_refclk_freq: Cache for the reference clock input to the PWM. + */ + struct ti_sn65dsi86 { +- struct auxiliary_device bridge_aux; +- struct auxiliary_device gpio_aux; +- struct auxiliary_device aux_aux; +- struct auxiliary_device pwm_aux; ++ struct auxiliary_device *bridge_aux; ++ struct auxiliary_device *gpio_aux; ++ struct auxiliary_device *aux_aux; ++ struct auxiliary_device *pwm_aux; + + struct device *dev; + struct regmap *regmap; +@@ -468,27 +468,34 @@ static void ti_sn65dsi86_delete_aux(void *data) + auxiliary_device_delete(data); + } + +-/* +- * AUX bus docs say that a non-NULL release is mandatory, but it makes no +- * sense for the model used here where all of the aux devices are allocated +- * in the single shared structure. We'll use this noop as a workaround. +- */ +-static void ti_sn65dsi86_noop(struct device *dev) {} ++static void ti_sn65dsi86_aux_device_release(struct device *dev) ++{ ++ struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); ++ ++ kfree(aux); ++} + + static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, +- struct auxiliary_device *aux, ++ struct auxiliary_device **aux_out, + const char *name) + { + struct device *dev = pdata->dev; ++ struct auxiliary_device *aux; + int ret; + ++ aux = kzalloc(sizeof(*aux), GFP_KERNEL); ++ if (!aux) ++ return -ENOMEM; ++ + aux->name = name; + aux->dev.parent = dev; +- aux->dev.release = ti_sn65dsi86_noop; ++ aux->dev.release = ti_sn65dsi86_aux_device_release; + device_set_of_node_from_dev(&aux->dev, dev); + ret = auxiliary_device_init(aux); +- if (ret) ++ if (ret) { ++ kfree(aux); + return ret; ++ } + ret = devm_add_action_or_reset(dev, ti_sn65dsi86_uninit_aux, aux); + if (ret) + return ret; +@@ -497,6 +504,8 @@ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, + if (ret) + return ret; + ret = devm_add_action_or_reset(dev, ti_sn65dsi86_delete_aux, aux); ++ if (!ret) ++ *aux_out = aux; + + return ret; + } +-- +2.39.2 + diff --git a/queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch b/queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch new file mode 100644 index 00000000000..45dae98556f --- /dev/null +++ b/queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch @@ -0,0 +1,42 @@ +From 9874576143edd04da17ddc4c1e193e5a747512f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 17:30:31 +0200 +Subject: drm/fbdev-dma: Fix documented default preferred_bpp value + +From: Geert Uytterhoeven + +[ Upstream commit 15008052b34efaa86c1d56190ac73c4bf8c462f9 ] + +As of commit 6c80a93be62d398e ("drm/fb-helper: Initialize fb-helper's +preferred BPP in prepare function"), the preferred_bpp parameter of +drm_fb_helper_prepare() defaults to 32 instead of +drm_mode_config.preferred_depth. Hence this also applies to +drm_fbdev_dma_setup(), which just passes its own preferred_bpp +parameter. + +Fixes: b79fe9abd58bab73 ("drm/fbdev-dma: Implement fbdev emulation for GEM DMA helpers") +Signed-off-by: Geert Uytterhoeven +Reviewed-by: Thomas Zimmermann +Signed-off-by: Thomas Zimmermann +Link: https://patchwork.freedesktop.org/patch/msgid/91f093ffe436a9f94d58fb2bfbc1407f1ebe8bb0.1688656591.git.geert+renesas@glider.be +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/drm_fbdev_dma.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c +index 728deffcc0d92..e85cdf69cd6c4 100644 +--- a/drivers/gpu/drm/drm_fbdev_dma.c ++++ b/drivers/gpu/drm/drm_fbdev_dma.c +@@ -218,7 +218,7 @@ static const struct drm_client_funcs drm_fbdev_dma_client_funcs = { + * drm_fbdev_dma_setup() - Setup fbdev emulation for GEM DMA helpers + * @dev: DRM device + * @preferred_bpp: Preferred bits per pixel for the device. +- * @dev->mode_config.preferred_depth is used if this is zero. ++ * 32 is used if this is zero. + * + * This function sets up fbdev emulation for GEM DMA drivers that support + * dumb buffers with a virtual address and that can be mmap'ed. +-- +2.39.2 + diff --git a/queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch b/queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch new file mode 100644 index 00000000000..1f9856602e4 --- /dev/null +++ b/queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch @@ -0,0 +1,41 @@ +From c33a440dd21cd1cbd590b5f9cc48a1f556c59732 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 17:10:17 +0300 +Subject: drm/i915: Don't preserve dpll_hw_state for slave crtc in Bigjoiner +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Stanislav Lisovskiy + +[ Upstream commit 5c413188c68da0e4bffc93de1c80257e20741e69 ] + +If we are using Bigjoiner dpll_hw_state is supposed to be exactly +same as for master crtc, so no need to save it's state for slave crtc. + +Signed-off-by: Stanislav Lisovskiy +Fixes: 0ff0e219d9b8 ("drm/i915: Compute clocks earlier") +Reviewed-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20230628141017.18937-1-stanislav.lisovskiy@intel.com +(cherry picked from commit cbaf758809952c95ec00e796695049babb08bb60) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/display/intel_display.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c +index 7749f95d5d02a..a805b57f3d912 100644 +--- a/drivers/gpu/drm/i915/display/intel_display.c ++++ b/drivers/gpu/drm/i915/display/intel_display.c +@@ -4968,7 +4968,6 @@ copy_bigjoiner_crtc_state_modeset(struct intel_atomic_state *state, + saved_state->uapi = slave_crtc_state->uapi; + saved_state->scaler_state = slave_crtc_state->scaler_state; + saved_state->shared_dpll = slave_crtc_state->shared_dpll; +- saved_state->dpll_hw_state = slave_crtc_state->dpll_hw_state; + saved_state->crc_enabled = slave_crtc_state->crc_enabled; + + intel_crtc_free_hw_state(slave_crtc_state); +-- +2.39.2 + diff --git a/queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch b/queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch new file mode 100644 index 00000000000..1a23b03525c --- /dev/null +++ b/queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch @@ -0,0 +1,45 @@ +From 1b7eae674754c36f9db7a4d8ba556602591973ca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 13:55:03 +0100 +Subject: drm/i915: Fix one wrong caching mode enum usage + +From: Tvrtko Ursulin + +[ Upstream commit 113899c2669dff148b2a5bea4780123811aecc13 ] + +Commit a4d86249c773 ("drm/i915/gt: Provide a utility to create a scratch +buffer") mistakenly passed in uapi I915_CACHING_CACHED as argument to +i915_gem_object_set_cache_coherency(), which actually takes internal +enum i915_cache_level. + +No functional issue since the value matches I915_CACHE_LLC (1 == 1), which +is the intended caching mode, but lets clean it up nevertheless. + +Signed-off-by: Tvrtko Ursulin +Fixes: a4d86249c773 ("drm/i915/gt: Provide a utility to create a scratch buffer") +Cc: Daniele Ceraolo Spurio +Reviewed-by: Tejas Upadhyay +Link: https://patchwork.freedesktop.org/patch/msgid/20230707125503.3965817-1-tvrtko.ursulin@linux.intel.com +(cherry picked from commit 49c60b2f0867ac36fd54d513882a48431aeccae7) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c +index 4f436ba7a3c83..123b82f29a1bf 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gtt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gtt.c +@@ -625,7 +625,7 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) + if (IS_ERR(obj)) + return ERR_CAST(obj); + +- i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); ++ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { +-- +2.39.2 + diff --git a/queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch b/queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch new file mode 100644 index 00000000000..8bbcb55f035 --- /dev/null +++ b/queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch @@ -0,0 +1,38 @@ +From 68f7b5d7a40cfd63e381066ace1770379c8a7c51 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 May 2023 22:18:38 +0200 +Subject: drm/nouveau/acr: Abort loading ACR if no firmware was found + +From: Karol Herbst + +[ Upstream commit 938a06c8b7913455073506c33ae3bff029c3c4ef ] + +This fixes a NULL pointer access inside nvkm_acr_oneinit in case necessary +firmware files couldn't be loaded. + +Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/212 +Fixes: 4b569ded09fd ("drm/nouveau/acr/ga102: initial support") +Signed-off-by: Karol Herbst +Reviewed-by: Dave Airlie +Link: https://patchwork.freedesktop.org/patch/msgid/20230522201838.1496622-1-kherbst@redhat.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c +index 795f3a649b122..9b8ca4e898f90 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c +@@ -224,7 +224,7 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev) + u64 falcons; + int ret, i; + +- if (list_empty(&acr->hsfw)) { ++ if (list_empty(&acr->hsfw) || !acr->func || !acr->func->wpr_layout) { + nvkm_debug(subdev, "No HSFW(s)\n"); + nvkm_acr_cleanup(acr); + return 0; +-- +2.39.2 + diff --git a/queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch b/queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch new file mode 100644 index 00000000000..dbb48294d8d --- /dev/null +++ b/queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch @@ -0,0 +1,93 @@ +From 6baa2918216778a5c148247d43684e5a2f83e6d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 May 2023 11:10:52 +0200 +Subject: drm/nouveau: bring back blit subchannel for pre nv50 GPUs + +From: Karol Herbst + +[ Upstream commit 835a65f51790e1f72b1ab106ec89db9ac15b47d6 ] + +1ba6113a90a0 removed a lot of the kernel GPU channel, but method 0x128 +was important as otherwise the GPU spams us with `CACHE_ERROR` messages. + +We use the blit subchannel inside our vblank handling, so we should keep +at least this part. + +v2: Only do it for NV11+ GPUs + +Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/201 +Fixes: 4a16dd9d18a0 ("drm/nouveau/kms: switch to drm fbdev helpers") +Signed-off-by: Karol Herbst +Reviewed-by: Ben Skeggs +Link: https://patchwork.freedesktop.org/patch/msgid/20230526091052.2169044-1-kherbst@redhat.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/nouveau_chan.c | 1 + + drivers/gpu/drm/nouveau/nouveau_chan.h | 1 + + drivers/gpu/drm/nouveau/nouveau_drm.c | 20 +++++++++++++++++--- + 3 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c +index e648ecd0c1a03..3dfbc374478e6 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_chan.c ++++ b/drivers/gpu/drm/nouveau/nouveau_chan.c +@@ -90,6 +90,7 @@ nouveau_channel_del(struct nouveau_channel **pchan) + if (cli) + nouveau_svmm_part(chan->vmm->svmm, chan->inst); + ++ nvif_object_dtor(&chan->blit); + nvif_object_dtor(&chan->nvsw); + nvif_object_dtor(&chan->gart); + nvif_object_dtor(&chan->vram); +diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h +index e06a8ffed31a8..bad7466bd0d59 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_chan.h ++++ b/drivers/gpu/drm/nouveau/nouveau_chan.h +@@ -53,6 +53,7 @@ struct nouveau_channel { + u32 user_put; + + struct nvif_object user; ++ struct nvif_object blit; + + struct nvif_event kill; + atomic_t killed; +diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c +index 7aac9384600ed..40fb9a8349180 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_drm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_drm.c +@@ -375,15 +375,29 @@ nouveau_accel_gr_init(struct nouveau_drm *drm) + ret = nvif_object_ctor(&drm->channel->user, "drmNvsw", + NVDRM_NVSW, nouveau_abi16_swclass(drm), + NULL, 0, &drm->channel->nvsw); ++ ++ if (ret == 0 && device->info.chipset >= 0x11) { ++ ret = nvif_object_ctor(&drm->channel->user, "drmBlit", ++ 0x005f, 0x009f, ++ NULL, 0, &drm->channel->blit); ++ } ++ + if (ret == 0) { + struct nvif_push *push = drm->channel->chan.push; +- ret = PUSH_WAIT(push, 2); +- if (ret == 0) ++ ret = PUSH_WAIT(push, 8); ++ if (ret == 0) { ++ if (device->info.chipset >= 0x11) { ++ PUSH_NVSQ(push, NV05F, 0x0000, drm->channel->blit.handle); ++ PUSH_NVSQ(push, NV09F, 0x0120, 0, ++ 0x0124, 1, ++ 0x0128, 2); ++ } + PUSH_NVSQ(push, NV_SW, 0x0000, drm->channel->nvsw.handle); ++ } + } + + if (ret) { +- NV_ERROR(drm, "failed to allocate sw class, %d\n", ret); ++ NV_ERROR(drm, "failed to allocate sw or blit class, %d\n", ret); + nouveau_accel_gr_fini(drm); + return; + } +-- +2.39.2 + diff --git a/queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch b/queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch new file mode 100644 index 00000000000..5334d54403b --- /dev/null +++ b/queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch @@ -0,0 +1,37 @@ +From d92dc097c5c959d8a0645d0fdc7f502852665962 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 23:22:46 +0200 +Subject: drm/nouveau/disp: fix HDMI on gt215+ + +From: Karol Herbst + +[ Upstream commit d94303699921bda8141ad33554ae55b615ddd149 ] + +Cc: Ben Skeggs +Cc: Lyude Paul +Fixes: f530bc60a30b ("drm/nouveau/disp: move HDMI config into acquire + infoframe methods") +Signed-off-by: Karol Herbst +Reviewed-by: Ben Skeggs +Link: https://patchwork.freedesktop.org/patch/msgid/20230628212248.3798605-1-kherbst@redhat.com +Signed-off-by: Karol Herbst +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c +index a2c7c6f83dcdb..506ffbe7b8421 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c +@@ -125,7 +125,7 @@ gt215_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 siz + pack_hdmi_infoframe(&avi, data, size); + + nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000); +- if (size) ++ if (!size) + return; + + nvkm_wr32(device, 0x61c528 + soff, avi.header); +-- +2.39.2 + diff --git a/queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch b/queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch new file mode 100644 index 00000000000..b9fd97a181d --- /dev/null +++ b/queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch @@ -0,0 +1,36 @@ +From c4673406b30a28d2fa8ed21b37962828adf965e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Jun 2023 18:06:45 +0200 +Subject: drm/nouveau/disp/g94: enable HDMI + +From: Karol Herbst + +[ Upstream commit c177872cb056e0b499af4717d8d1977017fd53df ] + +Cc: Ben Skeggs +Cc: Lyude Paul +Fixes: f530bc60a30b ("drm/nouveau/disp: move HDMI config into acquire + infoframe methods") +Signed-off-by: Karol Herbst +Reviewed-by: Ben Skeggs +Link: https://patchwork.freedesktop.org/patch/msgid/20230630160645.3984596-1-kherbst@redhat.com +Signed-off-by: Karol Herbst +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c +index a4853c4e5ee3a..67ef889a0c5f4 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c +@@ -295,6 +295,7 @@ g94_sor = { + .clock = nv50_sor_clock, + .war_2 = g94_sor_war_2, + .war_3 = g94_sor_war_3, ++ .hdmi = &g84_sor_hdmi, + .dp = &g94_sor_dp, + }; + +-- +2.39.2 + diff --git a/queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch b/queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch new file mode 100644 index 00000000000..db25f0745c3 --- /dev/null +++ b/queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch @@ -0,0 +1,39 @@ +From 6ca5c4bfdbae1b72a46946af102d9677577fcd6d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 08:22:02 -0300 +Subject: drm/panel: simple: Add connector_type for innolux_at043tn24 + +From: Fabio Estevam + +[ Upstream commit 2c56a751845ddfd3078ebe79981aaaa182629163 ] + +The innolux at043tn24 display is a parallel LCD. Pass the 'connector_type' +information to avoid the following warning: + +panel-simple panel: Specify missing connector_type + +Signed-off-by: Fabio Estevam +Fixes: 41bcceb4de9c ("drm/panel: simple: Add support for Innolux AT043TN24") +Reviewed-by: Sam Ravnborg +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20230620112202.654981-1-festevam@gmail.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panel/panel-simple.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c +index d8efbcee9bc12..1927fef9aed67 100644 +--- a/drivers/gpu/drm/panel/panel-simple.c ++++ b/drivers/gpu/drm/panel/panel-simple.c +@@ -2117,6 +2117,7 @@ static const struct panel_desc innolux_at043tn24 = { + .height = 54, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, ++ .connector_type = DRM_MODE_CONNECTOR_DPI, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, + }; + +-- +2.39.2 + diff --git a/queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch b/queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch new file mode 100644 index 00000000000..99cfa6582f3 --- /dev/null +++ b/queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch @@ -0,0 +1,38 @@ +From e0bf3f1aa9d658c8219ebdd882c6aacb1bdf32ba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 22:16:02 +0200 +Subject: drm/panel: simple: Add Powertip PH800480T013 drm_display_mode flags + +From: Marek Vasut + +[ Upstream commit 1c519980aced3da1fae37c1339cf43b24eccdee7 ] + +Add missing drm_display_mode DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC +flags. Those are used by various bridges in the pipeline to correctly +configure its sync signals polarity. + +Fixes: d69de69f2be1 ("drm/panel: simple: Add Powertip PH800480T013 panel") +Signed-off-by: Marek Vasut +Reviewed-by: Sam Ravnborg +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20230615201602.565948-1-marex@denx.de +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panel/panel-simple.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c +index 1927fef9aed67..e02249b212c2a 100644 +--- a/drivers/gpu/drm/panel/panel-simple.c ++++ b/drivers/gpu/drm/panel/panel-simple.c +@@ -3110,6 +3110,7 @@ static const struct drm_display_mode powertip_ph800480t013_idf02_mode = { + .vsync_start = 480 + 49, + .vsync_end = 480 + 49 + 2, + .vtotal = 480 + 49 + 2 + 22, ++ .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, + }; + + static const struct panel_desc powertip_ph800480t013_idf02 = { +-- +2.39.2 + diff --git a/queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch b/queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch new file mode 100644 index 00000000000..c6cf620791c --- /dev/null +++ b/queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch @@ -0,0 +1,54 @@ +From 179c93cb4147df032e78c950833127e4fe6344e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 17:34:10 +0800 +Subject: erofs: avoid infinite loop in z_erofs_do_read_page() when reading + beyond EOF + +From: Chunhai Guo + +[ Upstream commit 8191213a5835b0317c5e4d0d337ae1ae00c75253 ] + +z_erofs_do_read_page() may loop infinitely due to the inappropriate +truncation in the below statement. Since the offset is 64 bits and min_t() +truncates the result to 32 bits. The solution is to replace unsigned int +with a 64-bit type, such as erofs_off_t. + cur = end - min_t(unsigned int, offset + end - map->m_la, end); + + - For example: + - offset = 0x400160000 + - end = 0x370 + - map->m_la = 0x160370 + - offset + end - map->m_la = 0x400000000 + - offset + end - map->m_la = 0x00000000 (truncated as unsigned int) + - Expected result: + - cur = 0 + - Actual result: + - cur = 0x370 + +Signed-off-by: Chunhai Guo +Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") +Reviewed-by: Gao Xiang +Reviewed-by: Chao Yu +Link: https://lore.kernel.org/r/20230710093410.44071-1-guochunhai@vivo.com +Signed-off-by: Gao Xiang +Signed-off-by: Sasha Levin +--- + fs/erofs/zdata.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c +index bedfff5d45faf..997ca4b32e87f 100644 +--- a/fs/erofs/zdata.c ++++ b/fs/erofs/zdata.c +@@ -990,7 +990,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, + */ + tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); + +- cur = end - min_t(unsigned int, offset + end - map->m_la, end); ++ cur = end - min_t(erofs_off_t, offset + end - map->m_la, end); + if (!(map->m_flags & EROFS_MAP_MAPPED)) { + zero_user_segment(page, cur, end); + goto next_part; +-- +2.39.2 + diff --git a/queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch b/queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch new file mode 100644 index 00000000000..0746f68d54e --- /dev/null +++ b/queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch @@ -0,0 +1,46 @@ +From 201f9e4e49bcb83a199bb05c153545ff504140bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 12:25:31 +0800 +Subject: erofs: avoid useless loops in z_erofs_pcluster_readmore() when + reading beyond EOF + +From: Chunhai Guo + +[ Upstream commit 936aa701d82d397c2d1afcd18ce2c739471d978d ] + +z_erofs_pcluster_readmore() may take a long time to loop when the page +offset is large enough, which is unnecessary should be prevented. + +For example, when the following case is encountered, it will loop 4691368 +times, taking about 27 seconds: + - offset = 19217289215 + - inode_size = 1442672 + +Signed-off-by: Chunhai Guo +Fixes: 386292919c25 ("erofs: introduce readmore decompression strategy") +Reviewed-by: Gao Xiang +Reviewed-by: Yue Hu +Reviewed-by: Chao Yu +Link: https://lore.kernel.org/r/20230710042531.28761-1-guochunhai@vivo.com +Signed-off-by: Gao Xiang +Signed-off-by: Sasha Levin +--- + fs/erofs/zdata.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c +index 502893e3da010..bedfff5d45faf 100644 +--- a/fs/erofs/zdata.c ++++ b/fs/erofs/zdata.c +@@ -1807,7 +1807,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, + } + + cur = map->m_la + map->m_llen - 1; +- while (cur >= end) { ++ while ((cur >= end) && (cur < i_size_read(inode))) { + pgoff_t index = cur >> PAGE_SHIFT; + struct page *page; + +-- +2.39.2 + diff --git a/queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch b/queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch new file mode 100644 index 00000000000..9ac828578ca --- /dev/null +++ b/queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch @@ -0,0 +1,42 @@ +From 625c8bd6b37ff491ca8ee641313b1693e0a5d5f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 14:21:30 +0800 +Subject: erofs: fix fsdax unavailability for chunk-based regular files + +From: Xin Yin + +[ Upstream commit 18bddc5b67038722cb88fcf51fbf41a0277092cb ] + +DAX can be used to share page cache between VMs, reducing guest memory +overhead. And chunk based data format is widely used for VM and +container image. So enable dax support for it, make erofs better used +for VM scenarios. + +Fixes: c5aa903a59db ("erofs: support reading chunk-based uncompressed files") +Signed-off-by: Xin Yin +Reviewed-by: Gao Xiang +Reviewed-by: Chao Yu +Link: https://lore.kernel.org/r/20230711062130.7860-1-yinxin.x@bytedance.com +Signed-off-by: Gao Xiang +Signed-off-by: Sasha Levin +--- + fs/erofs/inode.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c +index d70b12b81507f..e12592727a546 100644 +--- a/fs/erofs/inode.c ++++ b/fs/erofs/inode.c +@@ -183,7 +183,8 @@ static void *erofs_read_inode(struct erofs_buf *buf, + + inode->i_flags &= ~S_DAX; + if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && +- vi->datalayout == EROFS_INODE_FLAT_PLAIN) ++ (vi->datalayout == EROFS_INODE_FLAT_PLAIN || ++ vi->datalayout == EROFS_INODE_CHUNK_BASED)) + inode->i_flags |= S_DAX; + + if (!nblks) +-- +2.39.2 + diff --git a/queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch b/queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch new file mode 100644 index 00000000000..bda7ba8a935 --- /dev/null +++ b/queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch @@ -0,0 +1,50 @@ +From 9ea6b366f8d472e6814dbc9aad4e5bfb1d00ac39 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Jul 2023 17:23:36 +0800 +Subject: fprobe: add unlock to match a succeeded ftrace_test_recursion_trylock + +From: Ze Gao + +[ Upstream commit 5f0c584daf7464f04114c65dd07269ee2bfedc13 ] + +Unlock ftrace recursion lock when fprobe_kprobe_handler() is failed +because of some running kprobe. + +Link: https://lore.kernel.org/all/20230703092336.268371-1-zegao@tencent.com/ + +Fixes: 3cc4e2c5fbae ("fprobe: make fprobe_kprobe_handler recursion free") +Reported-by: Yafang +Closes: https://lore.kernel.org/linux-trace-kernel/CALOAHbC6UpfFOOibdDiC7xFc5YFUgZnk3MZ=3Ny6we=AcrNbew@mail.gmail.com/ +Signed-off-by: Ze Gao +Acked-by: Masami Hiramatsu (Google) +Acked-by: Yafang Shao +Reviewed-by: Steven Rostedt (Google) +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/fprobe.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c +index 18d36842faf57..93b3e361bb97a 100644 +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -102,12 +102,14 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip, + + if (unlikely(kprobe_running())) { + fp->nmissed++; +- return; ++ goto recursion_unlock; + } + + kprobe_busy_begin(); + __fprobe_handler(ip, parent_ip, ops, fregs); + kprobe_busy_end(); ++ ++recursion_unlock: + ftrace_test_recursion_unlock(bit); + } + +-- +2.39.2 + diff --git a/queue-6.4/gve-set-default-duplex-configuration-to-full.patch b/queue-6.4/gve-set-default-duplex-configuration-to-full.patch new file mode 100644 index 00000000000..3517d138933 --- /dev/null +++ b/queue-6.4/gve-set-default-duplex-configuration-to-full.patch @@ -0,0 +1,43 @@ +From 756748806f6ce7d1f3efe286826e4813f7725296 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 12:41:28 +0800 +Subject: gve: Set default duplex configuration to full + +From: Junfeng Guo + +[ Upstream commit 0503efeadbf6bb8bf24397613a73b67e665eac5f ] + +Current duplex mode was unset in the driver, resulting in the default +parameter being set to 0, which corresponds to half duplex. It might +mislead users to have incorrect expectation about the driver's +transmission capabilities. +Set the default duplex configuration to full, as the driver runs in +full duplex mode at this point. + +Fixes: 7e074d5a76ca ("gve: Enable Link Speed Reporting in the driver.") +Signed-off-by: Junfeng Guo +Reviewed-by: Leon Romanovsky +Message-ID: <20230706044128.2726747-1-junfeng.guo@intel.com> +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c +index cfd4b8d284d12..50162ec9424df 100644 +--- a/drivers/net/ethernet/google/gve/gve_ethtool.c ++++ b/drivers/net/ethernet/google/gve/gve_ethtool.c +@@ -590,6 +590,9 @@ static int gve_get_link_ksettings(struct net_device *netdev, + err = gve_adminq_report_link_speed(priv); + + cmd->base.speed = priv->link_speed; ++ ++ cmd->base.duplex = DUPLEX_FULL; ++ + return err; + } + +-- +2.39.2 + diff --git a/queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch b/queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch new file mode 100644 index 00000000000..48fb7d1f04f --- /dev/null +++ b/queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch @@ -0,0 +1,65 @@ +From b4bc4dd1ce538c2050276f4c029b3d91ea87604f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 16:02:24 +0200 +Subject: HID: hyperv: avoid struct memcpy overrun warning + +From: Arnd Bergmann + +[ Upstream commit 5f151364b1da6bd217632fd4ee8cc24eaf66a497 ] + +A previous patch addressed the fortified memcpy warning for most +builds, but I still see this one with gcc-9: + +In file included from include/linux/string.h:254, + from drivers/hid/hid-hyperv.c:8: +In function 'fortify_memcpy_chk', + inlined from 'mousevsc_on_receive' at drivers/hid/hid-hyperv.c:272:3: +include/linux/fortify-string.h:583:4: error: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning] + 583 | __write_overflow_field(p_size_field, size); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +My guess is that the WARN_ON() itself is what confuses gcc, so it no +longer sees that there is a correct range check. Rework the code in a +way that helps readability and avoids the warning. + +Fixes: 542f25a94471 ("HID: hyperv: Replace one-element array with flexible-array member") +Signed-off-by: Arnd Bergmann +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20230705140242.844167-1-arnd@kernel.org +Signed-off-by: Benjamin Tissoires +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-hyperv.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c +index 49d4a26895e76..f33485d83d24f 100644 +--- a/drivers/hid/hid-hyperv.c ++++ b/drivers/hid/hid-hyperv.c +@@ -258,19 +258,17 @@ static void mousevsc_on_receive(struct hv_device *device, + + switch (hid_msg_hdr->type) { + case SYNTH_HID_PROTOCOL_RESPONSE: ++ len = struct_size(pipe_msg, data, pipe_msg->size); ++ + /* + * While it will be impossible for us to protect against + * malicious/buggy hypervisor/host, add a check here to + * ensure we don't corrupt memory. + */ +- if (struct_size(pipe_msg, data, pipe_msg->size) +- > sizeof(struct mousevsc_prt_msg)) { +- WARN_ON(1); ++ if (WARN_ON(len > sizeof(struct mousevsc_prt_msg))) + break; +- } + +- memcpy(&input_dev->protocol_resp, pipe_msg, +- struct_size(pipe_msg, data, pipe_msg->size)); ++ memcpy(&input_dev->protocol_resp, pipe_msg, len); + complete(&input_dev->wait_event); + break; + +-- +2.39.2 + diff --git a/queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch b/queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch new file mode 100644 index 00000000000..5b57d1a65c1 --- /dev/null +++ b/queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch @@ -0,0 +1,86 @@ +From c426f5ca59f9bd7765ca00379db289f3b8213478 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Jun 2023 17:40:23 -0700 +Subject: ice: Fix max_rate check while configuring TX rate limits + +From: Sridhar Samudrala + +[ Upstream commit 5f16da6ee6ac32e6c8098bc4cfcc4f170694f9da ] + +Remove incorrect check in ice_validate_mqprio_opt() that limits +filter configuration when sum of max_rates of all TCs exceeds +the link speed. The max rate of each TC is unrelated to value +used by other TCs and is valid as long as it is less than link +speed. + +Fixes: fbc7b27af0f9 ("ice: enable ndo_setup_tc support for mqprio_qdisc") +Signed-off-by: Sridhar Samudrala +Signed-off-by: Sudheer Mogilappagari +Tested-by: Bharathi Sreenivas +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_main.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index fcc027c938fda..eef7c1224887a 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -8114,10 +8114,10 @@ static int + ice_validate_mqprio_qopt(struct ice_vsi *vsi, + struct tc_mqprio_qopt_offload *mqprio_qopt) + { +- u64 sum_max_rate = 0, sum_min_rate = 0; + int non_power_of_2_qcount = 0; + struct ice_pf *pf = vsi->back; + int max_rss_q_cnt = 0; ++ u64 sum_min_rate = 0; + struct device *dev; + int i, speed; + u8 num_tc; +@@ -8133,6 +8133,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, + dev = ice_pf_to_dev(pf); + vsi->ch_rss_size = 0; + num_tc = mqprio_qopt->qopt.num_tc; ++ speed = ice_get_link_speed_kbps(vsi); + + for (i = 0; num_tc; i++) { + int qcount = mqprio_qopt->qopt.count[i]; +@@ -8173,7 +8174,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, + */ + max_rate = mqprio_qopt->max_rate[i]; + max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); +- sum_max_rate += max_rate; + + /* min_rate is minimum guaranteed rate and it can't be zero */ + min_rate = mqprio_qopt->min_rate[i]; +@@ -8186,6 +8186,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, + return -EINVAL; + } + ++ if (max_rate && max_rate > speed) { ++ dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n", ++ i, max_rate, speed); ++ return -EINVAL; ++ } ++ + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); + if (rem) { + dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", +@@ -8223,12 +8229,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + +- speed = ice_get_link_speed_kbps(vsi); +- if (sum_max_rate && sum_max_rate > (u64)speed) { +- dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", +- sum_max_rate, speed); +- return -EINVAL; +- } + if (sum_min_rate && sum_min_rate > (u64)speed) { + dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", + sum_min_rate, speed); +-- +2.39.2 + diff --git a/queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch b/queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch new file mode 100644 index 00000000000..96d134658ee --- /dev/null +++ b/queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch @@ -0,0 +1,123 @@ +From 1f0c01f03b4c0b4ebc5b97bf3e0679fc1e9b7c3b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Jun 2023 17:40:24 -0700 +Subject: ice: Fix tx queue rate limit when TCs are configured + +From: Sridhar Samudrala + +[ Upstream commit 479cdfe388a04a16fdd127f3e9e9e019e45e5573 ] + +Configuring tx_maxrate via sysfs interface +/sys/class/net/eth0/queues/tx-1/tx_maxrate was not working when +TCs are configured because always main VSI was being used. Fix by +using correct VSI in ice_set_tx_maxrate when TCs are configured. + +Fixes: 1ddef455f4a8 ("ice: Add NDO callback to set the maximum per-queue bitrate") +Signed-off-by: Sridhar Samudrala +Signed-off-by: Sudheer Mogilappagari +Tested-by: Bharathi Sreenivas +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_main.c | 7 +++++++ + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 22 ++++++++++----------- + drivers/net/ethernet/intel/ice/ice_tc_lib.h | 1 + + 3 files changed, 19 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index eef7c1224887a..1277e0a044ee4 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -5969,6 +5969,13 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) + q_handle = vsi->tx_rings[queue_index]->q_handle; + tc = ice_dcb_get_tc(vsi, queue_index); + ++ vsi = ice_locate_vsi_using_queue(vsi, queue_index); ++ if (!vsi) { ++ netdev_err(netdev, "Invalid VSI for given queue %d\n", ++ queue_index); ++ return -EINVAL; ++ } ++ + /* Set BW back to default, when user set maxrate to 0 */ + if (!maxrate) + status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc, +diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +index d1a31f236d26a..8578dc1cb967d 100644 +--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c +@@ -735,17 +735,16 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) + /** + * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action) + * @vsi: Pointer to VSI +- * @tc_fltr: Pointer to tc_flower_filter ++ * @queue: Queue index + * +- * Locate the VSI using specified queue. When ADQ is not enabled, always +- * return input VSI, otherwise locate corresponding VSI based on per channel +- * offset and qcount ++ * Locate the VSI using specified "queue". When ADQ is not enabled, ++ * always return input VSI, otherwise locate corresponding ++ * VSI based on per channel "offset" and "qcount" + */ +-static struct ice_vsi * +-ice_locate_vsi_using_queue(struct ice_vsi *vsi, +- struct ice_tc_flower_fltr *tc_fltr) ++struct ice_vsi * ++ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue) + { +- int num_tc, tc, queue; ++ int num_tc, tc; + + /* if ADQ is not active, passed VSI is the candidate VSI */ + if (!ice_is_adq_active(vsi->back)) +@@ -755,7 +754,6 @@ ice_locate_vsi_using_queue(struct ice_vsi *vsi, + * upon queue number) + */ + num_tc = vsi->mqprio_qopt.qopt.num_tc; +- queue = tc_fltr->action.fwd.q.queue; + + for (tc = 0; tc < num_tc; tc++) { + int qcount = vsi->mqprio_qopt.qopt.count[tc]; +@@ -797,6 +795,7 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) + struct ice_pf *pf = vsi->back; + struct device *dev; + u32 tc_class; ++ int q; + + dev = ice_pf_to_dev(pf); + +@@ -825,7 +824,8 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) + /* Determine destination VSI even though the action is + * FWD_TO_QUEUE, because QUEUE is associated with VSI + */ +- dest_vsi = tc_fltr->dest_vsi; ++ q = tc_fltr->action.fwd.q.queue; ++ dest_vsi = ice_locate_vsi_using_queue(vsi, q); + break; + default: + dev_err(dev, +@@ -1702,7 +1702,7 @@ ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, + /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare + * ADQ switch filter + */ +- ch_vsi = ice_locate_vsi_using_queue(vsi, fltr); ++ ch_vsi = ice_locate_vsi_using_queue(vsi, fltr->action.fwd.q.queue); + if (!ch_vsi) + return -EINVAL; + fltr->dest_vsi = ch_vsi; +diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h +index 8d5e22ac7023c..189c73d885356 100644 +--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h ++++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h +@@ -203,6 +203,7 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) + return pf->num_dmac_chnl_fltrs; + } + ++struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue); + int + ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower); +-- +2.39.2 + diff --git a/queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch b/queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch new file mode 100644 index 00000000000..ca6c596b525 --- /dev/null +++ b/queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch @@ -0,0 +1,145 @@ +From a22ee92cac3b600e237a63c814daa71e2083eb91 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 18:43:27 -0700 +Subject: icmp6: Fix null-ptr-deref of ip6_null_entry->rt6i_idev in + icmp6_dev(). + +From: Kuniyuki Iwashima + +[ Upstream commit 2aaa8a15de73874847d62eb595c6683bface80fd ] + +With some IPv6 Ext Hdr (RPL, SRv6, etc.), we can send a packet that +has the link-local address as src and dst IP and will be forwarded to +an external IP in the IPv6 Ext Hdr. + +For example, the script below generates a packet whose src IP is the +link-local address and dst is updated to 11::. + + # for f in $(find /proc/sys/net/ -name *seg6_enabled*); do echo 1 > $f; done + # python3 + >>> from socket import * + >>> from scapy.all import * + >>> + >>> SRC_ADDR = DST_ADDR = "fe80::5054:ff:fe12:3456" + >>> + >>> pkt = IPv6(src=SRC_ADDR, dst=DST_ADDR) + >>> pkt /= IPv6ExtHdrSegmentRouting(type=4, addresses=["11::", "22::"], segleft=1) + >>> + >>> sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW) + >>> sk.sendto(bytes(pkt), (DST_ADDR, 0)) + +For such a packet, we call ip6_route_input() to look up a route for the +next destination in these three functions depending on the header type. + + * ipv6_rthdr_rcv() + * ipv6_rpl_srh_rcv() + * ipv6_srh_rcv() + +If no route is found, ip6_null_entry is set to skb, and the following +dst_input(skb) calls ip6_pkt_drop(). + +Finally, in icmp6_dev(), we dereference skb_rt6_info(skb)->rt6i_idev->dev +as the input device is the loopback interface. Then, we have to check if +skb_rt6_info(skb)->rt6i_idev is NULL or not to avoid NULL pointer deref +for ip6_null_entry. + +BUG: kernel NULL pointer dereference, address: 0000000000000000 + PF: supervisor read access in kernel mode + PF: error_code(0x0000) - not-present page +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP PTI +CPU: 0 PID: 157 Comm: python3 Not tainted 6.4.0-11996-gb121d614371c #35 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503) +Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01 +RSP: 0018:ffffc90000003c70 EFLAGS: 00000286 +RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0 +RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18 +RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001 +R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10 +R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0 +FS: 00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0 +PKRU: 55555554 +Call Trace: + + ip6_pkt_drop (net/ipv6/route.c:4513) + ipv6_rthdr_rcv (net/ipv6/exthdrs.c:640 net/ipv6/exthdrs.c:686) + ip6_protocol_deliver_rcu (net/ipv6/ip6_input.c:437 (discriminator 5)) + ip6_input_finish (./include/linux/rcupdate.h:781 net/ipv6/ip6_input.c:483) + __netif_receive_skb_one_core (net/core/dev.c:5455) + process_backlog (./include/linux/rcupdate.h:781 net/core/dev.c:5895) + __napi_poll (net/core/dev.c:6460) + net_rx_action (net/core/dev.c:6529 net/core/dev.c:6660) + __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) + do_softirq (kernel/softirq.c:454 kernel/softirq.c:441) + + + __local_bh_enable_ip (kernel/softirq.c:381) + __dev_queue_xmit (net/core/dev.c:4231) + ip6_finish_output2 (./include/net/neighbour.h:544 net/ipv6/ip6_output.c:135) + rawv6_sendmsg (./include/net/dst.h:458 ./include/linux/netfilter.h:303 net/ipv6/raw.c:656 net/ipv6/raw.c:914) + sock_sendmsg (net/socket.c:725 net/socket.c:748) + __sys_sendto (net/socket.c:2134) + __x64_sys_sendto (net/socket.c:2146 net/socket.c:2142 net/socket.c:2142) + do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) +RIP: 0033:0x7f9dc751baea +Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 +RSP: 002b:00007ffe98712c38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 00007ffe98712cf8 RCX: 00007f9dc751baea +RDX: 0000000000000060 RSI: 00007f9dc6460b90 RDI: 0000000000000003 +RBP: 00007f9dc56e8be0 R08: 00007ffe98712d70 R09: 000000000000001c +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007f9dc6af5d1b + +Modules linked in: +CR2: 0000000000000000 + ---[ end trace 0000000000000000 ]--- +RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503) +Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01 +RSP: 0018:ffffc90000003c70 EFLAGS: 00000286 +RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0 +RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18 +RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001 +R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10 +R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0 +FS: 00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0 +PKRU: 55555554 +Kernel panic - not syncing: Fatal exception in interrupt +Kernel Offset: disabled + +Fixes: 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address") +Reported-by: Wang Yufen +Closes: https://lore.kernel.org/netdev/c41403a9-c2f6-3b7e-0c96-e1901e605cd0@huawei.com/ +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: David Ahern +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/icmp.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c +index 9edf1f45b1ed6..65fa5014bc85e 100644 +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -424,7 +424,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb) + if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { + const struct rt6_info *rt6 = skb_rt6_info(skb); + +- if (rt6) ++ /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), ++ * and ip6_null_entry could be set to skb if no route is found. ++ */ ++ if (rt6 && rt6->rt6i_idev) + dev = rt6->rt6i_idev->dev; + } + +-- +2.39.2 + diff --git a/queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch b/queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch new file mode 100644 index 00000000000..958e18aa681 --- /dev/null +++ b/queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch @@ -0,0 +1,66 @@ +From b34edb600747de56af3d2abd046a7ba09ca8a345 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 15 May 2023 14:03:36 +0800 +Subject: igc: Add condition for qbv_config_change_errors counter + +From: Muhammad Husaini Zulkifli + +[ Upstream commit ed89b74d2dc920cb61d3094e0e97ec8775b13086 ] + +Add condition to increase the qbv counter during taprio qbv +configuration only. + +There might be a case when TC already been setup then user configure +the ETF/CBS qdisc and this counter will increase if no condition above. + +Fixes: ae4fe4698300 ("igc: Add qbv_config_change_errors counter") +Signed-off-by: Muhammad Husaini Zulkifli +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc.h | 1 + + drivers/net/ethernet/intel/igc/igc_main.c | 2 ++ + drivers/net/ethernet/intel/igc/igc_tsn.c | 1 + + 3 files changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h +index 9dc9b982a7ea6..9902f726f06a9 100644 +--- a/drivers/net/ethernet/intel/igc/igc.h ++++ b/drivers/net/ethernet/intel/igc/igc.h +@@ -184,6 +184,7 @@ struct igc_adapter { + u32 max_frame_size; + u32 min_frame_size; + ++ int tc_setup_type; + ktime_t base_time; + ktime_t cycle_time; + bool qbv_enable; +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 5f2e8bcd75973..a8815ccf7887d 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -6295,6 +6295,8 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, + { + struct igc_adapter *adapter = netdev_priv(dev); + ++ adapter->tc_setup_type = type; ++ + switch (type) { + case TC_QUERY_CAPS: + return igc_tc_query_caps(adapter, type_data); +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index 94a2b0dfb54d4..6b299b83e7ef2 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -249,6 +249,7 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) + * Gate Control List (GCL) is running. + */ + if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && ++ (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && + tsn_mode_reconfig) + adapter->qbv_config_change_errors++; + } else { +-- +2.39.2 + diff --git a/queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch b/queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch new file mode 100644 index 00000000000..639a98e562e --- /dev/null +++ b/queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch @@ -0,0 +1,104 @@ +From 8c665525c75977297efb957a7a7e51b89b5ca546 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Apr 2023 15:30:47 +0200 +Subject: igc: Add igc_xdp_buff wrapper for xdp_buff in driver + +From: Jesper Dangaard Brouer + +[ Upstream commit 73b7123de0cfa4f6609677e927ab02cb05b593c2 ] + +Driver specific metadata data for XDP-hints kfuncs are propagated via tail +extending the struct xdp_buff with a locally scoped driver struct. + +Zero-Copy AF_XDP/XSK does similar tricks via struct xdp_buff_xsk. This +xdp_buff_xsk struct contains a CB area (24 bytes) that can be used for +extending the locally scoped driver into. The XSK_CHECK_PRIV_TYPE define +catch size violations build time. + +The changes needed for AF_XDP zero-copy in igc_clean_rx_irq_zc() +is done in next patch, because the member rx_desc isn't available +at this point. + +Signed-off-by: Jesper Dangaard Brouer +Signed-off-by: Daniel Borkmann +Acked-by: Song Yoong Siang +Link: https://lore.kernel.org/bpf/168182464779.616355.3761989884165609387.stgit@firesoul +Stable-dep-of: 175c241288c0 ("igc: Fix TX Hang issue when QBV Gate is closed") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc.h | 5 +++++ + drivers/net/ethernet/intel/igc/igc_main.c | 16 +++++++++------- + 2 files changed, 14 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h +index 9902f726f06a9..3bb48840a249e 100644 +--- a/drivers/net/ethernet/intel/igc/igc.h ++++ b/drivers/net/ethernet/intel/igc/igc.h +@@ -502,6 +502,11 @@ struct igc_rx_buffer { + }; + }; + ++/* context wrapper around xdp_buff to provide access to descriptor metadata */ ++struct igc_xdp_buff { ++ struct xdp_buff xdp; ++}; ++ + struct igc_q_vector { + struct igc_adapter *adapter; /* backlink */ + void __iomem *itr_register; +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index b131c8f2b03df..c6169357f72fc 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -2246,6 +2246,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) + if (!count) + return ok; + ++ XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); ++ + desc = IGC_RX_DESC(ring, i); + bi = &ring->rx_buffer_info[i]; + i -= ring->count; +@@ -2530,8 +2532,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) + union igc_adv_rx_desc *rx_desc; + struct igc_rx_buffer *rx_buffer; + unsigned int size, truesize; ++ struct igc_xdp_buff ctx; + ktime_t timestamp = 0; +- struct xdp_buff xdp; + int pkt_offset = 0; + void *pktbuf; + +@@ -2565,13 +2567,13 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) + } + + if (!skb) { +- xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); +- xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), ++ xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); ++ xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), + igc_rx_offset(rx_ring) + pkt_offset, + size, true); +- xdp_buff_clear_frags_flag(&xdp); ++ xdp_buff_clear_frags_flag(&ctx.xdp); + +- skb = igc_xdp_run_prog(adapter, &xdp); ++ skb = igc_xdp_run_prog(adapter, &ctx.xdp); + } + + if (IS_ERR(skb)) { +@@ -2593,9 +2595,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) + } else if (skb) + igc_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) +- skb = igc_build_skb(rx_ring, rx_buffer, &xdp); ++ skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); + else +- skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, ++ skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp, + timestamp); + + /* exit if we failed to retrieve a buffer */ +-- +2.39.2 + diff --git a/queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch b/queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch new file mode 100644 index 00000000000..1f854268990 --- /dev/null +++ b/queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch @@ -0,0 +1,145 @@ +From 8f7e2ee37d15e7153091b9b0842b2e9cab18e3c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Apr 2023 15:30:52 +0200 +Subject: igc: Add XDP hints kfuncs for RX hash + +From: Jesper Dangaard Brouer + +[ Upstream commit 8416814fffa9cfa74c18da149f522dd9e1850987 ] + +This implements XDP hints kfunc for RX-hash (xmo_rx_hash). +The HW rss hash type is handled via mapping table. + +This igc driver (default config) does L3 hashing for UDP packets +(excludes UDP src/dest ports in hash calc). Meaning RSS hash type is +L3 based. Tested that the igc_rss_type_num for UDP is either +IGC_RSS_TYPE_HASH_IPV4 or IGC_RSS_TYPE_HASH_IPV6. + +This patch also updates AF_XDP zero-copy function igc_clean_rx_irq_zc() +to use the xdp_buff wrapper struct igc_xdp_buff. + +Signed-off-by: Jesper Dangaard Brouer +Signed-off-by: Daniel Borkmann +Acked-by: Song Yoong Siang +Link: https://lore.kernel.org/bpf/168182465285.616355.2701740913376314790.stgit@firesoul +Stable-dep-of: 175c241288c0 ("igc: Fix TX Hang issue when QBV Gate is closed") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc.h | 1 + + drivers/net/ethernet/intel/igc/igc_main.c | 53 +++++++++++++++++++++++ + 2 files changed, 54 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h +index 3bb48840a249e..f09c6a65e3ab8 100644 +--- a/drivers/net/ethernet/intel/igc/igc.h ++++ b/drivers/net/ethernet/intel/igc/igc.h +@@ -505,6 +505,7 @@ struct igc_rx_buffer { + /* context wrapper around xdp_buff to provide access to descriptor metadata */ + struct igc_xdp_buff { + struct xdp_buff xdp; ++ union igc_adv_rx_desc *rx_desc; + }; + + struct igc_q_vector { +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index c6169357f72fc..c0e21701e7817 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -2572,6 +2572,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) + igc_rx_offset(rx_ring) + pkt_offset, + size, true); + xdp_buff_clear_frags_flag(&ctx.xdp); ++ ctx.rx_desc = rx_desc; + + skb = igc_xdp_run_prog(adapter, &ctx.xdp); + } +@@ -2698,6 +2699,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, + napi_gro_receive(&q_vector->napi, skb); + } + ++static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) ++{ ++ /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The ++ * igc_xdp_buff shares its layout with xdp_buff_xsk and private ++ * igc_xdp_buff fields fall into xdp_buff_xsk->cb ++ */ ++ return (struct igc_xdp_buff *)xdp; ++} ++ + static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) + { + struct igc_adapter *adapter = q_vector->adapter; +@@ -2716,6 +2726,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) + while (likely(total_packets < budget)) { + union igc_adv_rx_desc *desc; + struct igc_rx_buffer *bi; ++ struct igc_xdp_buff *ctx; + ktime_t timestamp = 0; + unsigned int size; + int res; +@@ -2733,6 +2744,9 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) + + bi = &ring->rx_buffer_info[ntc]; + ++ ctx = xsk_buff_to_igc_ctx(bi->xdp); ++ ctx->rx_desc = desc; ++ + if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { + timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, + bi->xdp->data); +@@ -6490,6 +6504,44 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) + return value; + } + ++/* Mapping HW RSS Type to enum xdp_rss_hash_type */ ++static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { ++ [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, ++ [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, ++ [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, ++ [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, ++ [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, ++ [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, ++ [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, ++ [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, ++ [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, ++ [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, ++ [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ ++ [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ ++ [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ ++ [13] = XDP_RSS_TYPE_NONE, ++ [14] = XDP_RSS_TYPE_NONE, ++ [15] = XDP_RSS_TYPE_NONE, ++}; ++ ++static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, ++ enum xdp_rss_hash_type *rss_type) ++{ ++ const struct igc_xdp_buff *ctx = (void *)_ctx; ++ ++ if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) ++ return -ENODATA; ++ ++ *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); ++ *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; ++ ++ return 0; ++} ++ ++static const struct xdp_metadata_ops igc_xdp_metadata_ops = { ++ .xmo_rx_hash = igc_xdp_rx_hash, ++}; ++ + /** + * igc_probe - Device Initialization Routine + * @pdev: PCI device information struct +@@ -6563,6 +6615,7 @@ static int igc_probe(struct pci_dev *pdev, + hw->hw_addr = adapter->io_addr; + + netdev->netdev_ops = &igc_netdev_ops; ++ netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; + igc_ethtool_set_ops(netdev); + netdev->watchdog_timeo = 5 * HZ; + +-- +2.39.2 + diff --git a/queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch b/queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch new file mode 100644 index 00000000000..b7e158530c6 --- /dev/null +++ b/queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch @@ -0,0 +1,109 @@ +From 2c4ff73ed926e50f603809bb4611a7f59b76753e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Jun 2023 16:07:10 +0200 +Subject: igc: Do not enable taprio offload for invalid arguments + +From: Florian Kauer + +[ Upstream commit 82ff5f29b7377d614f0c01fd74b5d0cb225f0adc ] + +Only set adapter->taprio_offload_enable after validating the arguments. +Otherwise, it stays set even if the offload was not enabled. +Since the subsequent code does not get executed in case of invalid +arguments, it will not be read at first. +However, by activating and then deactivating another offload +(e.g. ETF/TX launchtime offload), taprio_offload_enable is read +and erroneously keeps the offload feature of the NIC enabled. + +This can be reproduced as follows: + + # TAPRIO offload (flags == 0x2) and negative base-time leading to expected -ERANGE + sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ + num_tc 1 \ + map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ + queues 1@0 \ + base-time -1000 \ + sched-entry S 01 300000 \ + flags 0x2 + + # IGC_TQAVCTRL is 0x0 as expected (iomem=relaxed for reading register) + sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 + + # Activate ETF offload + sudo tc qdisc replace dev enp1s0 parent root handle 6666 mqprio \ + num_tc 3 \ + map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \ + queues 1@0 1@1 2@2 \ + hw 0 + sudo tc qdisc add dev enp1s0 parent 6666:1 etf \ + clockid CLOCK_TAI \ + delta 500000 \ + offload + + # IGC_TQAVCTRL is 0x9 as expected + sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 + + # Deactivate ETF offload again + sudo tc qdisc delete dev enp1s0 parent 6666:1 + + # IGC_TQAVCTRL should now be 0x0 again, but is observed as 0x9 + sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 + +Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") +Signed-off-by: Florian Kauer +Reviewed-by: Kurt Kanzenbach +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 18 ++++++------------ + 1 file changed, 6 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 6bed12224120f..f051ca733af1b 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -6090,6 +6090,7 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) + + adapter->base_time = 0; + adapter->cycle_time = NSEC_PER_SEC; ++ adapter->taprio_offload_enable = false; + adapter->qbv_config_change_errors = 0; + adapter->qbv_transition = false; + adapter->qbv_count = 0; +@@ -6117,20 +6118,12 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + size_t n; + int i; + +- switch (qopt->cmd) { +- case TAPRIO_CMD_REPLACE: +- adapter->taprio_offload_enable = true; +- break; +- case TAPRIO_CMD_DESTROY: +- adapter->taprio_offload_enable = false; +- break; +- default: +- return -EOPNOTSUPP; +- } +- +- if (!adapter->taprio_offload_enable) ++ if (qopt->cmd == TAPRIO_CMD_DESTROY) + return igc_tsn_clear_schedule(adapter); + ++ if (qopt->cmd != TAPRIO_CMD_REPLACE) ++ return -EOPNOTSUPP; ++ + if (qopt->base_time < 0) + return -ERANGE; + +@@ -6142,6 +6135,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + + adapter->cycle_time = qopt->cycle_time; + adapter->base_time = qopt->base_time; ++ adapter->taprio_offload_enable = true; + + igc_ptp_read(adapter, &now); + +-- +2.39.2 + diff --git a/queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch b/queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch new file mode 100644 index 00000000000..20fa4e6c94a --- /dev/null +++ b/queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch @@ -0,0 +1,128 @@ +From 6b68b0e96e9c61cab6fdc99d116c0f3373e355f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Jun 2023 16:07:14 +0200 +Subject: igc: Fix inserting of empty frame for launchtime + +From: Florian Kauer + +[ Upstream commit 0bcc62858d6ba62cbade957d69745e6adeed5f3d ] + +The insertion of an empty frame was introduced with +commit db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") +in order to ensure that the current cycle has at least one packet if +there is some packet to be scheduled for the next cycle. + +However, the current implementation does not properly check if +a packet is already scheduled for the current cycle. Currently, +an empty packet is always inserted if and only if +txtime >= end_of_cycle && txtime > last_tx_cycle +but since last_tx_cycle is always either the end of the current +cycle (end_of_cycle) or the end of a previous cycle, the +second part (txtime > last_tx_cycle) is always true unless +txtime == last_tx_cycle. + +What actually needs to be checked here is if the last_tx_cycle +was already written within the current cycle, so an empty frame +should only be inserted if and only if +txtime >= end_of_cycle && end_of_cycle > last_tx_cycle. + +This patch does not only avoid an unnecessary insertion, but it +can actually be harmful to insert an empty packet if packets +are already scheduled in the current cycle, because it can lead +to a situation where the empty packet is actually processed +as the first packet in the upcoming cycle shifting the packet +with the first_flag even one cycle into the future, finally leading +to a TX hang. + +The TX hang can be reproduced on a i225 with: + + sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ + num_tc 1 \ + map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ + queues 1@0 \ + base-time 0 \ + sched-entry S 01 300000 \ + flags 0x1 \ + txtime-delay 500000 \ + clockid CLOCK_TAI + sudo tc qdisc replace dev enp1s0 parent 100:1 etf \ + clockid CLOCK_TAI \ + delta 500000 \ + offload \ + skip_sock_check + +and traffic generator + + sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns + +with traffic.cfg + + #define ETH_P_IP 0x0800 + + { + /* Ethernet Header */ + 0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e, # MAC Dest - adapt as needed + 0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36, # MAC Src - adapt as needed + const16(ETH_P_IP), + + /* IPv4 Header */ + 0b01000101, 0, # IPv4 version, IHL, TOS + const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) + const16(2), # IPv4 ident + 0b01000000, 0, # IPv4 flags, fragmentation off + 64, # IPv4 TTL + 17, # Protocol UDP + csumip(14, 33), # IPv4 checksum + + /* UDP Header */ + 10, 0, 48, 1, # IP Src - adapt as needed + 10, 0, 48, 10, # IP Dest - adapt as needed + const16(5555), # UDP Src Port + const16(6666), # UDP Dest Port + const16(1008), # UDP length (UDP header 8 bytes + payload length) + csumudp(14, 34), # UDP checksum + + /* Payload */ + fill('W', 1000), + } + +and the observed message with that is for example + + igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang + Tx Queue <0> + TDH <32> + TDT <3c> + next_to_use <3c> + next_to_clean <32> + buffer_info[next_to_clean] + time_stamp + next_to_watch <00000000632a1828> + jiffies + desc.status <1048000> + +Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") +Signed-off-by: Florian Kauer +Reviewed-by: Kurt Kanzenbach +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 96a2f6e6f6b8a..44aa4342cbbb5 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -1029,7 +1029,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, + *first_flag = true; + ring->last_ff_cycle = baset_est; + +- if (ktime_compare(txtime, ring->last_tx_cycle) > 0) ++ if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) + *insert_empty = true; + } + } +-- +2.39.2 + diff --git a/queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch b/queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch new file mode 100644 index 00000000000..e9b033a1744 --- /dev/null +++ b/queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch @@ -0,0 +1,46 @@ +From eb725bbffeb2dd4b8cf1e08c265041dc518ca66b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Jun 2023 16:07:13 +0200 +Subject: igc: Fix launchtime before start of cycle + +From: Florian Kauer + +[ Upstream commit c1bca9ac0bcb355be11354c2e68bc7bf31f5ac5a ] + +It is possible (verified on a running system) that frames are processed +by igc_tx_launchtime with a txtime before the start of the cycle +(baset_est). + +However, the result of txtime - baset_est is written into a u32, +leading to a wrap around to a positive number. The following +launchtime > 0 check will only branch to executing launchtime = 0 +if launchtime is already 0. + +Fix it by using a s32 before checking launchtime > 0. + +Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") +Signed-off-by: Florian Kauer +Reviewed-by: Kurt Kanzenbach +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 97eb3c390de9a..96a2f6e6f6b8a 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -1016,7 +1016,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, + ktime_t base_time = adapter->base_time; + ktime_t now = ktime_get_clocktai(); + ktime_t baset_est, end_of_cycle; +- u32 launchtime; ++ s32 launchtime; + s64 n; + + n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); +-- +2.39.2 + diff --git a/queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch b/queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch new file mode 100644 index 00000000000..37fe891ae56 --- /dev/null +++ b/queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch @@ -0,0 +1,316 @@ +From 42ea27f8a5e10b6150f8d50d0ee4ea639c8892e7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 3 Jun 2023 20:59:34 +0800 +Subject: igc: Fix TX Hang issue when QBV Gate is closed + +From: Muhammad Husaini Zulkifli + +[ Upstream commit 175c241288c09f81eb7b44d65c1ef6045efa4d1a ] + +If a user schedules a Gate Control List (GCL) to close one of +the QBV gates while also transmitting a packet to that closed gate, +TX Hang will be happen. HW would not drop any packet when the gate +is closed and keep queuing up in HW TX FIFO until the gate is re-opened. +This patch implements the solution to drop the packet for the closed +gate. + +This patch will also reset the adapter to perform SW initialization +for each 1st Gate Control List (GCL) to avoid hang. +This is due to the HW design, where changing to TSN transmit mode +requires SW initialization. Intel Discrete I225/6 transmit mode +cannot be changed when in dynamic mode according to Software User +Manual Section 7.5.2.1. Subsequent Gate Control List (GCL) operations +will proceed without a reset, as they already are in TSN Mode. + +Step to reproduce: + +DUT: +1) Configure GCL List with certain gate close. + +BASE=$(date +%s%N) +tc qdisc replace dev $IFACE parent root handle 100 taprio \ + num_tc 4 \ + map 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ + queues 1@0 1@1 1@2 1@3 \ + base-time $BASE \ + sched-entry S 0x8 500000 \ + sched-entry S 0x4 500000 \ + flags 0x2 + +2) Transmit the packet to closed gate. You may use udp_tai +application to transmit UDP packet to any of the closed gate. + +./udp_tai -i -P 100000 -p 90 -c 1 -t <0/1> -u 30004 + +Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading") +Co-developed-by: Tan Tee Min +Signed-off-by: Tan Tee Min +Tested-by: Chwee Lin Choong +Signed-off-by: Muhammad Husaini Zulkifli +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc.h | 6 +++ + drivers/net/ethernet/intel/igc/igc_main.c | 58 +++++++++++++++++++++-- + drivers/net/ethernet/intel/igc/igc_tsn.c | 41 ++++++++++------ + 3 files changed, 87 insertions(+), 18 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h +index f09c6a65e3ab8..c0a07af36cb23 100644 +--- a/drivers/net/ethernet/intel/igc/igc.h ++++ b/drivers/net/ethernet/intel/igc/igc.h +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + #include "igc_hw.h" + +@@ -101,6 +102,8 @@ struct igc_ring { + u32 start_time; + u32 end_time; + u32 max_sdu; ++ bool oper_gate_closed; /* Operating gate. True if the TX Queue is closed */ ++ bool admin_gate_closed; /* Future gate. True if the TX Queue will be closed */ + + /* CBS parameters */ + bool cbs_enable; /* indicates if CBS is enabled */ +@@ -160,6 +163,7 @@ struct igc_adapter { + struct timer_list watchdog_timer; + struct timer_list dma_err_timer; + struct timer_list phy_info_timer; ++ struct hrtimer hrtimer; + + u32 wol; + u32 en_mng_pt; +@@ -189,6 +193,8 @@ struct igc_adapter { + ktime_t cycle_time; + bool qbv_enable; + u32 qbv_config_change_errors; ++ bool qbv_transition; ++ unsigned int qbv_count; + + /* OS defined structs */ + struct pci_dev *pdev; +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index c0e21701e7817..826556e609800 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -1572,6 +1572,9 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, + first->bytecount = skb->len; + first->gso_segs = 1; + ++ if (adapter->qbv_transition || tx_ring->oper_gate_closed) ++ goto out_drop; ++ + if (tx_ring->max_sdu > 0) { + u32 max_sdu = 0; + +@@ -3004,8 +3007,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) + time_after(jiffies, tx_buffer->time_stamp + + (adapter->tx_timeout_factor * HZ)) && + !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && +- (rd32(IGC_TDH(tx_ring->reg_idx)) != +- readl(tx_ring->tail))) { ++ (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && ++ !tx_ring->oper_gate_closed) { + /* detected Tx unit hang */ + netdev_err(tx_ring->netdev, + "Detected Tx Unit Hang\n" +@@ -6095,6 +6098,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) + adapter->base_time = 0; + adapter->cycle_time = NSEC_PER_SEC; + adapter->qbv_config_change_errors = 0; ++ adapter->qbv_transition = false; ++ adapter->qbv_count = 0; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; +@@ -6102,6 +6107,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) + ring->start_time = 0; + ring->end_time = NSEC_PER_SEC; + ring->max_sdu = 0; ++ ring->oper_gate_closed = false; ++ ring->admin_gate_closed = false; + } + + return 0; +@@ -6113,6 +6120,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + bool queue_configured[IGC_MAX_TX_QUEUES] = { }; + struct igc_hw *hw = &adapter->hw; + u32 start_time = 0, end_time = 0; ++ struct timespec64 now; + size_t n; + int i; + +@@ -6133,6 +6141,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + adapter->cycle_time = qopt->cycle_time; + adapter->base_time = qopt->base_time; + ++ igc_ptp_read(adapter, &now); ++ + for (n = 0; n < qopt->num_entries; n++) { + struct tc_taprio_sched_entry *e = &qopt->entries[n]; + +@@ -6167,7 +6177,10 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + ring->start_time = start_time; + ring->end_time = end_time; + +- queue_configured[i] = true; ++ if (ring->start_time >= adapter->cycle_time) ++ queue_configured[i] = false; ++ else ++ queue_configured[i] = true; + } + + start_time += e->interval; +@@ -6177,8 +6190,20 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + * If not, set the start and end time to be end time. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { ++ struct igc_ring *ring = adapter->tx_ring[i]; ++ ++ if (!is_base_time_past(qopt->base_time, &now)) { ++ ring->admin_gate_closed = false; ++ } else { ++ ring->oper_gate_closed = false; ++ ring->admin_gate_closed = false; ++ } ++ + if (!queue_configured[i]) { +- struct igc_ring *ring = adapter->tx_ring[i]; ++ if (!is_base_time_past(qopt->base_time, &now)) ++ ring->admin_gate_closed = true; ++ else ++ ring->oper_gate_closed = true; + + ring->start_time = end_time; + ring->end_time = end_time; +@@ -6542,6 +6567,27 @@ static const struct xdp_metadata_ops igc_xdp_metadata_ops = { + .xmo_rx_hash = igc_xdp_rx_hash, + }; + ++static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) ++{ ++ struct igc_adapter *adapter = container_of(timer, struct igc_adapter, ++ hrtimer); ++ unsigned int i; ++ ++ adapter->qbv_transition = true; ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ struct igc_ring *tx_ring = adapter->tx_ring[i]; ++ ++ if (tx_ring->admin_gate_closed) { ++ tx_ring->admin_gate_closed = false; ++ tx_ring->oper_gate_closed = true; ++ } else { ++ tx_ring->oper_gate_closed = false; ++ } ++ } ++ adapter->qbv_transition = false; ++ return HRTIMER_NORESTART; ++} ++ + /** + * igc_probe - Device Initialization Routine + * @pdev: PCI device information struct +@@ -6720,6 +6766,9 @@ static int igc_probe(struct pci_dev *pdev, + INIT_WORK(&adapter->reset_task, igc_reset_task); + INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); + ++ hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ adapter->hrtimer.function = &igc_qbv_scheduling_timer; ++ + /* Initialize link properties that are user-changeable */ + adapter->fc_autoneg = true; + hw->mac.autoneg = true; +@@ -6823,6 +6872,7 @@ static void igc_remove(struct pci_dev *pdev) + + cancel_work_sync(&adapter->reset_task); + cancel_work_sync(&adapter->watchdog_task); ++ hrtimer_cancel(&adapter->hrtimer); + + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index 6b299b83e7ef2..3cdb0c9887283 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -114,7 +114,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) + static int igc_tsn_enable_offload(struct igc_adapter *adapter) + { + struct igc_hw *hw = &adapter->hw; +- bool tsn_mode_reconfig = false; + u32 tqavctrl, baset_l, baset_h; + u32 sec, nsec, cycle; + ktime_t base_time, systim; +@@ -228,11 +227,10 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) + + tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; + +- if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN) +- tsn_mode_reconfig = true; +- + tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + ++ adapter->qbv_count++; ++ + cycle = adapter->cycle_time; + base_time = adapter->base_time; + +@@ -250,17 +248,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) + */ + if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && +- tsn_mode_reconfig) ++ (adapter->qbv_count > 1)) + adapter->qbv_config_change_errors++; + } else { +- /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit +- * has to be configured before the cycle time and base time. +- * Tx won't hang if there is a GCL is already running, +- * so in this case we don't need to set FutScdDis. +- */ +- if (igc_is_device_id_i226(hw) && +- !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) +- tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; ++ if (igc_is_device_id_i226(hw)) { ++ ktime_t adjust_time, expires_time; ++ ++ /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit ++ * has to be configured before the cycle time and base time. ++ * Tx won't hang if a GCL is already running, ++ * so in this case we don't need to set FutScdDis. ++ */ ++ if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) ++ tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; ++ ++ nsec = rd32(IGC_SYSTIML); ++ sec = rd32(IGC_SYSTIMH); ++ systim = ktime_set(sec, nsec); ++ ++ adjust_time = adapter->base_time; ++ expires_time = ktime_sub_ns(adjust_time, systim); ++ hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL); ++ } + } + + wr32(IGC_TQAVCTRL, tqavctrl); +@@ -306,7 +315,11 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter) + { + struct igc_hw *hw = &adapter->hw; + +- if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) { ++ /* Per I225/6 HW Design Section 7.5.2.1, transmit mode ++ * cannot be changed dynamically. Require reset the adapter. ++ */ ++ if (netif_running(adapter->netdev) && ++ (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { + schedule_work(&adapter->reset_task); + return 0; + } +-- +2.39.2 + diff --git a/queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch b/queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch new file mode 100644 index 00000000000..038e2f7b08a --- /dev/null +++ b/queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch @@ -0,0 +1,62 @@ +From 5b7662266cd1b15b3162ed4f1c1bf7509118f885 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Jun 2023 16:07:11 +0200 +Subject: igc: Handle already enabled taprio offload for basetime 0 + +From: Florian Kauer + +[ Upstream commit e5d88c53d03f8df864776431175d08c053645f50 ] + +Since commit e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") +it is possible to enable taprio offload with a basetime of 0. +However, the check if taprio offload is already enabled (and thus -EALREADY +should be returned for igc_save_qbv_schedule) still relied on +adapter->base_time > 0. + +This can be reproduced as follows: + + # TAPRIO offload (flags == 0x2) and base-time = 0 + sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ + num_tc 1 \ + map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ + queues 1@0 \ + base-time 0 \ + sched-entry S 01 300000 \ + flags 0x2 + + # The second call should fail with "Error: Device failed to setup taprio offload." + # But that only happens if base-time was != 0 + sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ + num_tc 1 \ + map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ + queues 1@0 \ + base-time 0 \ + sched-entry S 01 300000 \ + flags 0x2 + +Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") +Signed-off-by: Florian Kauer +Reviewed-by: Kurt Kanzenbach +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index f051ca733af1b..97eb3c390de9a 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -6127,7 +6127,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + if (qopt->base_time < 0) + return -ERANGE; + +- if (igc_is_device_id_i225(hw) && adapter->base_time) ++ if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) + return -EALREADY; + + if (!validate_schedule(adapter, qopt)) +-- +2.39.2 + diff --git a/queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch b/queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch new file mode 100644 index 00000000000..2bcbc72c982 --- /dev/null +++ b/queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch @@ -0,0 +1,109 @@ +From e831b1688eeeb1e282688d82ce7d06c77c59889d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 12:00:43 +0530 +Subject: igc: Handle PPS start time programming for past time values + +From: Aravindhan Gunasekaran + +[ Upstream commit 84a192e46106355de1a314d709e657231d4b1026 ] + +I225/6 hardware can be programmed to start PPS output once +the time in Target Time registers is reached. The time +programmed in these registers should always be into future. +Only then PPS output is triggered when SYSTIM register +reaches the programmed value. There are two modes in i225/6 +hardware to program PPS, pulse and clock mode. + +There were issues reported where PPS is not generated when +start time is in past. + +Example 1, "echo 0 0 0 2 0 > /sys/class/ptp/ptp0/period" + +In the current implementation, a value of '0' is programmed +into Target time registers and PPS output is in pulse mode. +Eventually an interrupt which is triggered upon SYSTIM +register reaching Target time is not fired. Thus no PPS +output is generated. + +Example 2, "echo 0 0 0 1 0 > /sys/class/ptp/ptp0/period" + +Above case, a value of '0' is programmed into Target time +registers and PPS output is in clock mode. Here, HW tries to +catch-up the current time by incrementing Target Time +register. This catch-up time seem to vary according to +programmed PPS period time as per the HW design. In my +experiments, the delay ranged between few tens of seconds to +few minutes. The PPS output is only generated after the +Target time register reaches current time. + +In my experiments, I also observed PPS stopped working with +below test and could not recover until module is removed and +loaded again. + +1) echo 0 0 1 0 > /sys/class/ptp/ptp1/period +2) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period +3) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period + +After this PPS did not work even if i re-program with proper +values. I could only get this back working by reloading the +driver. + +This patch takes care of calculating and programming +appropriate future time value into Target Time registers. + +Fixes: 5e91c72e560c ("igc: Fix PPS delta between two synchronized end-points") +Signed-off-by: Aravindhan Gunasekaran +Reviewed-by: Muhammad Husaini Zulkifli +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_ptp.c | 25 +++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c +index 32ef112f8291a..f0b979a706552 100644 +--- a/drivers/net/ethernet/intel/igc/igc_ptp.c ++++ b/drivers/net/ethernet/intel/igc/igc_ptp.c +@@ -356,16 +356,35 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, + tsim &= ~IGC_TSICR_TT0; + } + if (on) { ++ struct timespec64 safe_start; + int i = rq->perout.index; + + igc_pin_perout(igc, i, pin, use_freq); +- igc->perout[i].start.tv_sec = rq->perout.start.sec; ++ igc_ptp_read(igc, &safe_start); ++ ++ /* PPS output start time is triggered by Target time(TT) ++ * register. Programming any past time value into TT ++ * register will cause PPS to never start. Need to make ++ * sure we program the TT register a time ahead in ++ * future. There isn't a stringent need to fire PPS out ++ * right away. Adding +2 seconds should take care of ++ * corner cases. Let's say if the SYSTIML is close to ++ * wrap up and the timer keeps ticking as we program the ++ * register, adding +2seconds is safe bet. ++ */ ++ safe_start.tv_sec += 2; ++ ++ if (rq->perout.start.sec < safe_start.tv_sec) ++ igc->perout[i].start.tv_sec = safe_start.tv_sec; ++ else ++ igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc->perout[i].start.tv_nsec = rq->perout.start.nsec; + igc->perout[i].period.tv_sec = ts.tv_sec; + igc->perout[i].period.tv_nsec = ts.tv_nsec; +- wr32(trgttimh, rq->perout.start.sec); ++ wr32(trgttimh, (u32)igc->perout[i].start.tv_sec); + /* For now, always select timer 0 as source. */ +- wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); ++ wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec | ++ IGC_TT_IO_TIMER_SEL_SYSTIM0)); + if (use_freq) + wr32(freqout, ns); + tsauxc |= tsauxc_mask; +-- +2.39.2 + diff --git a/queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch b/queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch new file mode 100644 index 00000000000..b1af1cd350e --- /dev/null +++ b/queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch @@ -0,0 +1,68 @@ +From 94e338939954b57bd1507d45a8dfdadaee968f5b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Jun 2023 11:28:42 +0800 +Subject: igc: Include the length/type field and VLAN tag in queueMaxSDU + +From: Tan Tee Min + +[ Upstream commit 25102893e409bc02761ab82dbcfa092006404790 ] + +IEEE 802.1Q does not have clear definitions of what constitutes an +SDU (Service Data Unit), but IEEE Std 802.3 clause 3.1.2 does define +the MAC service primitives and clause 3.2.7 does define the MAC Client +Data for Q-tagged frames. + +It shows that the mac_service_data_unit (MSDU) does NOT contain the +preamble, destination and source address, or FCS. The MSDU does contain +the length/type field, MAC client data, VLAN tag and any padding +data (prior to the FCS). + +Thus, the maximum 802.3 frame size that is allowed to be transmitted +should be QueueMaxSDU (MSDU) + 16 (6 byte SA + 6 byte DA + 4 byte FCS). + +Fixes: 92a0dcb8427d ("igc: offload queue max SDU from tc-taprio") +Signed-off-by: Tan Tee Min +Reviewed-by: Muhammad Husaini Zulkifli +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 15 ++++----------- + 1 file changed, 4 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 826556e609800..e7bd2c60ee383 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -1575,16 +1575,9 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, + if (adapter->qbv_transition || tx_ring->oper_gate_closed) + goto out_drop; + +- if (tx_ring->max_sdu > 0) { +- u32 max_sdu = 0; +- +- max_sdu = tx_ring->max_sdu + +- (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0); +- +- if (first->bytecount > max_sdu) { +- adapter->stats.txdrop++; +- goto out_drop; +- } ++ if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { ++ adapter->stats.txdrop++; ++ goto out_drop; + } + + if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && +@@ -6215,7 +6208,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + struct net_device *dev = adapter->netdev; + + if (qopt->max_sdu[i]) +- ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len; ++ ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; + else + ring->max_sdu = 0; + } +-- +2.39.2 + diff --git a/queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch b/queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch new file mode 100644 index 00000000000..82372c609ed --- /dev/null +++ b/queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch @@ -0,0 +1,153 @@ +From 61956d16d51fe7d6d2b7ae9f849d0920cdfc2e00 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Jun 2023 16:07:12 +0200 +Subject: igc: No strict mode in pure launchtime/CBS offload + +From: Florian Kauer + +[ Upstream commit 8b86f10ab64eca0287ea8f7c94e9ad8b2e101c01 ] + +The flags IGC_TXQCTL_STRICT_CYCLE and IGC_TXQCTL_STRICT_END +prevent the packet transmission over slot and cycle boundaries. +This is important for taprio offload where the slots and +cycles correspond to the slots and cycles configured for the +network. + +However, the Qbv offload feature of the i225 is also used for +enabling TX launchtime / ETF offload. In that case, however, +the cycle has no meaning for the network and is only used +internally to adapt the base time register after a second has +passed. + +Enabling strict mode in this case would unnecessarily prevent +the transmission of certain packets (i.e. at the boundary of a +second) and thus interferes with the ETF qdisc that promises +transmission at a certain point in time. + +Similar to ETF, this also applies to CBS offload that also should +not be influenced by strict mode unless taprio offload would be +enabled at the same time. + +This fully reverts +commit d8f45be01dd9 ("igc: Use strict cycles for Qbv scheduling") +but its commit message only describes what was already implemented +before that commit. The difference to a plain revert of that commit +is that it now copes with the base_time = 0 case that was fixed with +commit e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") + +In particular, enabling strict mode leads to TX hang situations +under high traffic if taprio is applied WITHOUT taprio offload +but WITH ETF offload, e.g. as in + + sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ + num_tc 1 \ + map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ + queues 1@0 \ + base-time 0 \ + sched-entry S 01 300000 \ + flags 0x1 \ + txtime-delay 500000 \ + clockid CLOCK_TAI + sudo tc qdisc replace dev enp1s0 parent 100:1 etf \ + clockid CLOCK_TAI \ + delta 500000 \ + offload \ + skip_sock_check + +and traffic generator + + sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns + +with traffic.cfg + + #define ETH_P_IP 0x0800 + + { + /* Ethernet Header */ + 0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e, # MAC Dest - adapt as needed + 0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36, # MAC Src - adapt as needed + const16(ETH_P_IP), + + /* IPv4 Header */ + 0b01000101, 0, # IPv4 version, IHL, TOS + const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) + const16(2), # IPv4 ident + 0b01000000, 0, # IPv4 flags, fragmentation off + 64, # IPv4 TTL + 17, # Protocol UDP + csumip(14, 33), # IPv4 checksum + + /* UDP Header */ + 10, 0, 48, 1, # IP Src - adapt as needed + 10, 0, 48, 10, # IP Dest - adapt as needed + const16(5555), # UDP Src Port + const16(6666), # UDP Dest Port + const16(1008), # UDP length (UDP header 8 bytes + payload length) + csumudp(14, 34), # UDP checksum + + /* Payload */ + fill('W', 1000), + } + +and the observed message with that is for example + + igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang + Tx Queue <0> + TDH + TDT + next_to_use + next_to_clean + buffer_info[next_to_clean] + time_stamp + next_to_watch <00000000245a4efb> + jiffies + desc.status <1048000> + +Fixes: d8f45be01dd9 ("igc: Use strict cycles for Qbv scheduling") +Signed-off-by: Florian Kauer +Reviewed-by: Kurt Kanzenbach +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_tsn.c | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index b76ebfc10b1d5..a9c08321aca90 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -132,8 +132,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) + wr32(IGC_STQT(i), ring->start_time); + wr32(IGC_ENDQT(i), ring->end_time); + +- txqctl |= IGC_TXQCTL_STRICT_CYCLE | +- IGC_TXQCTL_STRICT_END; ++ if (adapter->taprio_offload_enable) { ++ /* If taprio_offload_enable is set we are in "taprio" ++ * mode and we need to be strict about the ++ * cycles: only transmit a packet if it can be ++ * completed during that cycle. ++ * ++ * If taprio_offload_enable is NOT true when ++ * enabling TSN offload, the cycle should have ++ * no external effects, but is only used internally ++ * to adapt the base time register after a second ++ * has passed. ++ * ++ * Enabling strict mode in this case would ++ * unnecessarily prevent the transmission of ++ * certain packets (i.e. at the boundary of a ++ * second) and thus interfere with the launchtime ++ * feature that promises transmission at a ++ * certain point in time. ++ */ ++ txqctl |= IGC_TXQCTL_STRICT_CYCLE | ++ IGC_TXQCTL_STRICT_END; ++ } + + if (ring->launchtime_enable) + txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; +-- +2.39.2 + diff --git a/queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch b/queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch new file mode 100644 index 00000000000..78b46171113 --- /dev/null +++ b/queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch @@ -0,0 +1,46 @@ +From 73bfe462e0a94fef7a5dc2bb51a4d33dd891aa8e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 May 2023 08:18:12 +0800 +Subject: igc: Remove delay during TX ring configuration + +From: Muhammad Husaini Zulkifli + +[ Upstream commit cca28ceac7c7857bc2d313777017585aef00bcc4 ] + +Remove unnecessary delay during the TX ring configuration. +This will cause delay, especially during link down and +link up activity. + +Furthermore, old SKUs like as I225 will call the reset_adapter +to reset the controller during TSN mode Gate Control List (GCL) +setting. This will add more time to the configuration of the +real-time use case. + +It doesn't mentioned about this delay in the Software User Manual. +It might have been ported from legacy code I210 in the past. + +Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") +Signed-off-by: Muhammad Husaini Zulkifli +Acked-by: Sasha Neftin +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index a8815ccf7887d..b131c8f2b03df 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -711,7 +711,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, + /* disable the queue */ + wr32(IGC_TXDCTL(reg_idx), 0); + wrfl(); +- mdelay(10); + + wr32(IGC_TDLEN(reg_idx), + ring->count * sizeof(union igc_adv_tx_desc)); +-- +2.39.2 + diff --git a/queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch b/queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch new file mode 100644 index 00000000000..a8a95707462 --- /dev/null +++ b/queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch @@ -0,0 +1,87 @@ +From d1a187de552f05b69cea2eec119c49c6b8d52d27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Jun 2023 16:07:09 +0200 +Subject: igc: Rename qbv_enable to taprio_offload_enable + +From: Florian Kauer + +[ Upstream commit 8046063df887bee35c002224267ba46f41be7cf6 ] + +In the current implementation the flags adapter->qbv_enable +and IGC_FLAG_TSN_QBV_ENABLED have a similar name, but do not +have the same meaning. The first one is used only to indicate +taprio offload (i.e. when igc_save_qbv_schedule was called), +while the second one corresponds to the Qbv mode of the hardware. +However, the second one is also used to support the TX launchtime +feature, i.e. ETF qdisc offload. This leads to situations where +adapter->qbv_enable is false, but the flag IGC_FLAG_TSN_QBV_ENABLED +is set. This is prone to confusion. + +The rename should reduce this confusion. Since it is a pure +rename, it has no impact on functionality. + +Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") +Signed-off-by: Florian Kauer +Reviewed-by: Kurt Kanzenbach +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc.h | 2 +- + drivers/net/ethernet/intel/igc/igc_main.c | 6 +++--- + drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h +index c0a07af36cb23..345d3a4e8ed44 100644 +--- a/drivers/net/ethernet/intel/igc/igc.h ++++ b/drivers/net/ethernet/intel/igc/igc.h +@@ -191,7 +191,7 @@ struct igc_adapter { + int tc_setup_type; + ktime_t base_time; + ktime_t cycle_time; +- bool qbv_enable; ++ bool taprio_offload_enable; + u32 qbv_config_change_errors; + bool qbv_transition; + unsigned int qbv_count; +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index ae986e44a4718..6bed12224120f 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -6119,16 +6119,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: +- adapter->qbv_enable = true; ++ adapter->taprio_offload_enable = true; + break; + case TAPRIO_CMD_DESTROY: +- adapter->qbv_enable = false; ++ adapter->taprio_offload_enable = false; + break; + default: + return -EOPNOTSUPP; + } + +- if (!adapter->qbv_enable) ++ if (!adapter->taprio_offload_enable) + return igc_tsn_clear_schedule(adapter); + + if (qopt->base_time < 0) +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index 3cdb0c9887283..b76ebfc10b1d5 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -37,7 +37,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) + { + unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; + +- if (adapter->qbv_enable) ++ if (adapter->taprio_offload_enable) + new_flags |= IGC_FLAG_TSN_QBV_ENABLED; + + if (is_any_launchtime(adapter)) +-- +2.39.2 + diff --git a/queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch b/queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch new file mode 100644 index 00000000000..d05c14e2102 --- /dev/null +++ b/queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch @@ -0,0 +1,39 @@ +From 1d3082ab17cf6c7fcee80d799b5321636a266645 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Jun 2023 11:09:01 -0700 +Subject: igc: set TP bit in 'supported' and 'advertising' fields of + ethtool_link_ksettings + +From: Prasad Koya + +[ Upstream commit 9ac3fc2f42e5ffa1e927dcbffb71b15fa81459e2 ] + +set TP bit in the 'supported' and 'advertising' fields. i225/226 parts +only support twisted pair copper. + +Fixes: 8c5ad0dae93c ("igc: Add ethtool support") +Signed-off-by: Prasad Koya +Acked-by: Sasha Neftin +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c +index 0e2cb00622d1a..93bce729be76a 100644 +--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c ++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c +@@ -1708,6 +1708,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, + /* twisted pair */ + cmd->base.port = PORT_TP; + cmd->base.phy_address = hw->phy.addr; ++ ethtool_link_ksettings_add_link_mode(cmd, supported, TP); ++ ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); + + /* advertising link modes */ + if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) +-- +2.39.2 + diff --git a/queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch b/queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch new file mode 100644 index 00000000000..544ea4c1e5d --- /dev/null +++ b/queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch @@ -0,0 +1,42 @@ +From 17988b459e31fad01e0ae817b91651d5da7ccab1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 11:20:06 -0700 +Subject: ionic: remove WARN_ON to prevent panic_on_warn + +From: Nitya Sunkad + +[ Upstream commit abfb2a58a5377ebab717d4362d6180f901b6e5c1 ] + +Remove unnecessary early code development check and the WARN_ON +that it uses. The irq alloc and free paths have long been +cleaned up and this check shouldn't have stuck around so long. + +Fixes: 77ceb68e29cc ("ionic: Add notifyq support") +Signed-off-by: Nitya Sunkad +Signed-off-by: Shannon Nelson +Reviewed-by: Jacob Keller +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +index 957027e546b30..e03a94f2469ab 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +@@ -474,11 +474,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif) + static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, + struct ionic_qcq *n_qcq) + { +- if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) { +- ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index); +- n_qcq->flags &= ~IONIC_QCQ_F_INTR; +- } +- + n_qcq->intr.vector = src_qcq->intr.vector; + n_qcq->intr.index = src_qcq->intr.index; + n_qcq->napi_qcq = src_qcq->napi_qcq; +-- +2.39.2 + diff --git a/queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch b/queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch new file mode 100644 index 00000000000..6c3f8960773 --- /dev/null +++ b/queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch @@ -0,0 +1,53 @@ +From b5b491a50e6aee6415aa41b22d508b32edea7c17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Jul 2023 14:59:10 +0800 +Subject: ipv6/addrconf: fix a potential refcount underflow for idev + +From: Ziyang Xuan + +[ Upstream commit 06a0716949c22e2aefb648526580671197151acc ] + +Now in addrconf_mod_rs_timer(), reference idev depends on whether +rs_timer is not pending. Then modify rs_timer timeout. + +There is a time gap in [1], during which if the pending rs_timer +becomes not pending. It will miss to hold idev, but the rs_timer +is activated. Thus rs_timer callback function addrconf_rs_timer() +will be executed and put idev later without holding idev. A refcount +underflow issue for idev can be caused by this. + + if (!timer_pending(&idev->rs_timer)) + in6_dev_hold(idev); + <--------------[1] + mod_timer(&idev->rs_timer, jiffies + when); + +To fix the issue, hold idev if mod_timer() return 0. + +Fixes: b7b1bfce0bb6 ("ipv6: split duplicate address detection and router solicitation timer") +Suggested-by: Eric Dumazet +Signed-off-by: Ziyang Xuan +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/addrconf.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 3797917237d03..5affca8e2f53a 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -318,9 +318,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) + static void addrconf_mod_rs_timer(struct inet6_dev *idev, + unsigned long when) + { +- if (!timer_pending(&idev->rs_timer)) ++ if (!mod_timer(&idev->rs_timer, jiffies + when)) + in6_dev_hold(idev); +- mod_timer(&idev->rs_timer, jiffies + when); + } + + static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, +-- +2.39.2 + diff --git a/queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch b/queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch new file mode 100644 index 00000000000..46b7d02f3a2 --- /dev/null +++ b/queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch @@ -0,0 +1,71 @@ +From 8653c27f9ad4d0d299c6fdc3694facc1e45869ba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Jul 2023 07:28:53 +0300 +Subject: kernel/trace: Fix cleanup logic of enable_trace_eprobe + +From: Tzvetomir Stoyanov (VMware) + +[ Upstream commit cf0a624dc706c306294c14e6b3e7694702f25191 ] + +The enable_trace_eprobe() function enables all event probes, attached +to given trace probe. If an error occurs in enabling one of the event +probes, all others should be roll backed. There is a bug in that roll +back logic - instead of all event probes, only the failed one is +disabled. + +Link: https://lore.kernel.org/all/20230703042853.1427493-1-tz.stoyanov@gmail.com/ + +Reported-by: Dan Carpenter +Fixes: 7491e2c44278 ("tracing: Add a probe that attaches to trace events") +Signed-off-by: Tzvetomir Stoyanov (VMware) +Acked-by: Masami Hiramatsu (Google) +Reviewed-by: Steven Rostedt (Google) +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_eprobe.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c +index 67e854979d53e..3f04f0ffe0d70 100644 +--- a/kernel/trace/trace_eprobe.c ++++ b/kernel/trace/trace_eprobe.c +@@ -675,6 +675,7 @@ static int enable_trace_eprobe(struct trace_event_call *call, + struct trace_eprobe *ep; + bool enabled; + int ret = 0; ++ int cnt = 0; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) +@@ -698,12 +699,25 @@ static int enable_trace_eprobe(struct trace_event_call *call, + if (ret) + break; + enabled = true; ++ cnt++; + } + + if (ret) { + /* Failed to enable one of them. Roll back all */ +- if (enabled) +- disable_eprobe(ep, file->tr); ++ if (enabled) { ++ /* ++ * It's a bug if one failed for something other than memory ++ * not being available but another eprobe succeeded. ++ */ ++ WARN_ON_ONCE(ret != -ENOMEM); ++ ++ list_for_each_entry(pos, trace_probe_probe_list(tp), list) { ++ ep = container_of(pos, struct trace_eprobe, tp); ++ disable_eprobe(ep, file->tr); ++ if (!--cnt) ++ break; ++ } ++ } + if (file) + trace_probe_remove_file(tp, file); + else +-- +2.39.2 + diff --git a/queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch b/queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch new file mode 100644 index 00000000000..ff8137d92dc --- /dev/null +++ b/queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch @@ -0,0 +1,55 @@ +From da4d7c5cc9088c82294ef515fadc245827befeda Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 08:53:25 +0200 +Subject: net: bgmac: postpone turning IRQs off to avoid SoC hangs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Rafał Miłecki + +[ Upstream commit e7731194fdf085f46d58b1adccfddbd0dfee4873 ] + +Turning IRQs off is done by accessing Ethernet controller registers. +That can't be done until device's clock is enabled. It results in a SoC +hang otherwise. + +This bug remained unnoticed for years as most bootloaders keep all +Ethernet interfaces turned on. It seems to only affect a niche SoC +family BCM47189. It has two Ethernet controllers but CFE bootloader uses +only the first one. + +Fixes: 34322615cbaa ("net: bgmac: Mask interrupts during probe") +Signed-off-by: Rafał Miłecki +Reviewed-by: Michal Kubiak +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bgmac.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c +index 1761df8fb7f96..10c7c232cc4ec 100644 +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -1492,8 +1492,6 @@ int bgmac_enet_probe(struct bgmac *bgmac) + + bgmac->in_init = true; + +- bgmac_chip_intrs_off(bgmac); +- + net_dev->irq = bgmac->irq; + SET_NETDEV_DEV(net_dev, bgmac->dev); + dev_set_drvdata(bgmac->dev, bgmac); +@@ -1511,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) + */ + bgmac_clk_enable(bgmac, 0); + ++ bgmac_chip_intrs_off(bgmac); ++ + /* This seems to be fixing IRQ by assigning OOB #6 to the core */ + if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { + if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) +-- +2.39.2 + diff --git a/queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch b/queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch new file mode 100644 index 00000000000..42691567394 --- /dev/null +++ b/queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch @@ -0,0 +1,100 @@ +From 9c24e69609c9264256560c1ac114ed3bb51aed5d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 13:44:21 +0300 +Subject: net: dsa: felix: make vsc9959_tas_guard_bands_update() visible to + ocelot->ops + +From: Vladimir Oltean + +[ Upstream commit c60819149b637d0f9f7f66e110d2a0d90a3993ea ] + +In a future change we will need to make +ocelot_port_update_active_preemptible_tcs() call +vsc9959_tas_guard_bands_update(), but that is currently not possible, +since the ocelot switch lib does not have access to functions private to +the DSA wrapper. + +Move the pointer to vsc9959_tas_guard_bands_update() from felix->info +(which is private to the DSA driver) to ocelot->ops (which is also +visible to the ocelot switch lib). + +Signed-off-by: Vladimir Oltean +Message-ID: <20230705104422.49025-3-vladimir.oltean@nxp.com> +Signed-off-by: Jakub Kicinski +Stable-dep-of: c6efb4ae387c ("net: mscc: ocelot: fix oversize frame dropping for preemptible TCs") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/ocelot/felix.c | 5 ++--- + drivers/net/dsa/ocelot/felix.h | 1 - + drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +- + include/soc/mscc/ocelot.h | 1 + + 4 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c +index 70c0e2b1936b3..8348da2b3c97a 100644 +--- a/drivers/net/dsa/ocelot/felix.c ++++ b/drivers/net/dsa/ocelot/felix.c +@@ -1786,14 +1786,13 @@ static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu) + { + struct ocelot *ocelot = ds->priv; + struct ocelot_port *ocelot_port = ocelot->ports[port]; +- struct felix *felix = ocelot_to_felix(ocelot); + + ocelot_port_set_maxlen(ocelot, port, new_mtu); + + mutex_lock(&ocelot->tas_lock); + +- if (ocelot_port->taprio && felix->info->tas_guard_bands_update) +- felix->info->tas_guard_bands_update(ocelot, port); ++ if (ocelot_port->taprio && ocelot->ops->tas_guard_bands_update) ++ ocelot->ops->tas_guard_bands_update(ocelot, port); + + mutex_unlock(&ocelot->tas_lock); + +diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h +index 96008c046da53..1d4befe7cfe8e 100644 +--- a/drivers/net/dsa/ocelot/felix.h ++++ b/drivers/net/dsa/ocelot/felix.h +@@ -57,7 +57,6 @@ struct felix_info { + void (*mdio_bus_free)(struct ocelot *ocelot); + int (*port_setup_tc)(struct dsa_switch *ds, int port, + enum tc_setup_type type, void *type_data); +- void (*tas_guard_bands_update)(struct ocelot *ocelot, int port); + void (*port_sched_speed_set)(struct ocelot *ocelot, int port, + u32 speed); + void (*phylink_mac_config)(struct ocelot *ocelot, int port, +diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c +index d172a3e9736c4..219fb672a68d7 100644 +--- a/drivers/net/dsa/ocelot/felix_vsc9959.c ++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c +@@ -2600,6 +2600,7 @@ static const struct ocelot_ops vsc9959_ops = { + .cut_through_fwd = vsc9959_cut_through_fwd, + .tas_clock_adjust = vsc9959_tas_clock_adjust, + .update_stats = vsc9959_update_stats, ++ .tas_guard_bands_update = vsc9959_tas_guard_bands_update, + }; + + static const struct felix_info felix_info_vsc9959 = { +@@ -2625,7 +2626,6 @@ static const struct felix_info felix_info_vsc9959 = { + .port_modes = vsc9959_port_modes, + .port_setup_tc = vsc9959_port_setup_tc, + .port_sched_speed_set = vsc9959_sched_speed_set, +- .tas_guard_bands_update = vsc9959_tas_guard_bands_update, + }; + + /* The INTB interrupt is shared between for PTP TX timestamp availability +diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h +index 22aae505c813b..85a726fb006ca 100644 +--- a/include/soc/mscc/ocelot.h ++++ b/include/soc/mscc/ocelot.h +@@ -663,6 +663,7 @@ struct ocelot_ops { + struct flow_stats *stats); + void (*cut_through_fwd)(struct ocelot *ocelot); + void (*tas_clock_adjust)(struct ocelot *ocelot); ++ void (*tas_guard_bands_update)(struct ocelot *ocelot, int port); + void (*update_stats)(struct ocelot *ocelot); + }; + +-- +2.39.2 + diff --git a/queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch b/queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch new file mode 100644 index 00000000000..b7f1d32bdf7 --- /dev/null +++ b/queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch @@ -0,0 +1,38 @@ +From b2967cb2e10684574c536ff370379d49e4f2d27b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 09:39:07 +0800 +Subject: net: dsa: qca8k: Add check for skb_copy + +From: Jiasheng Jiang + +[ Upstream commit 87355b7c3da9bfd81935caba0ab763355147f7b0 ] + +Add check for the return value of skb_copy in order to avoid NULL pointer +dereference. + +Fixes: 2cd548566384 ("net: dsa: qca8k: add support for phy read/write with mgmt Ethernet") +Signed-off-by: Jiasheng Jiang +Reviewed-by: Pavan Chebbi +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/qca/qca8k-8xxx.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c +index 6d5ac7588a691..d775a14784f7e 100644 +--- a/drivers/net/dsa/qca/qca8k-8xxx.c ++++ b/drivers/net/dsa/qca/qca8k-8xxx.c +@@ -588,6 +588,9 @@ qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data, + bool ack; + int ret; + ++ if (!skb) ++ return -ENOMEM; ++ + reinit_completion(&mgmt_eth_data->rw_done); + + /* Increment seq_num and set it in the copy pkt */ +-- +2.39.2 + diff --git a/queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch b/queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch new file mode 100644 index 00000000000..a8e7b48a6b1 --- /dev/null +++ b/queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch @@ -0,0 +1,38 @@ +From e63a1d795279609a3479ddd881e2263853d8e98e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 11:18:59 +0800 +Subject: net: dsa: Removed unneeded of_node_put in felix_parse_ports_node + +From: Lu Hongfei + +[ Upstream commit 04499f28b40bfc24f20b0e2331008bb90a54a6cf ] + +Remove unnecessary of_node_put from the continue path to prevent +child node from being released twice, which could avoid resource +leak or other unexpected issues. + +Signed-off-by: Lu Hongfei +Reviewed-by: Vladimir Oltean +Fixes: de879a016a94 ("net: dsa: felix: add functionality when not all ports are supported") +Link: https://lore.kernel.org/r/20230710031859.36784-1-luhongfei@vivo.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/ocelot/felix.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c +index 8348da2b3c97a..d78b4bd4787e8 100644 +--- a/drivers/net/dsa/ocelot/felix.c ++++ b/drivers/net/dsa/ocelot/felix.c +@@ -1286,7 +1286,6 @@ static int felix_parse_ports_node(struct felix *felix, + if (err < 0) { + dev_info(dev, "Unsupported PHY mode %s on port %d\n", + phy_modes(phy_mode), port); +- of_node_put(child); + + /* Leave port_phy_modes[port] = 0, which is also + * PHY_INTERFACE_MODE_NA. This will perform a +-- +2.39.2 + diff --git a/queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch b/queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch new file mode 100644 index 00000000000..23dfde6a422 --- /dev/null +++ b/queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch @@ -0,0 +1,83 @@ +From 04b9c610dfeb96ae1fa770e036bf8e8d46994305 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 16:10:11 +0800 +Subject: net: fec: increase the size of tx ring and update tx_wake_threshold + +From: Wei Fang + +[ Upstream commit 56b3c6ba53d0e9649ea5e4089b39cadde13aaef8 ] + +When the XDP feature is enabled and with heavy XDP frames to be +transmitted, there is a considerable probability that available +tx BDs are insufficient. This will lead to some XDP frames to be +discarded and the "NOT enough BD for SG!" error log will appear +in the console (as shown below). + +[ 160.013112] fec 30be0000.ethernet eth0: NOT enough BD for SG! +[ 160.023116] fec 30be0000.ethernet eth0: NOT enough BD for SG! +[ 160.028926] fec 30be0000.ethernet eth0: NOT enough BD for SG! +[ 160.038946] fec 30be0000.ethernet eth0: NOT enough BD for SG! +[ 160.044758] fec 30be0000.ethernet eth0: NOT enough BD for SG! + +In the case of heavy XDP traffic, sometimes the speed of recycling +tx BDs may be slower than the speed of sending XDP frames. There +may be several specific reasons, such as the interrupt is not +responsed in time, the efficiency of the NAPI callback function is +too low due to all the queues (tx queues and rx queues) share the +same NAPI, and so on. + +After trying various methods, I think that increase the size of tx +BD ring is simple and effective. Maybe the best resolution is that +allocate NAPI for each queue to improve the efficiency of the NAPI +callback, but this change is a bit big and I didn't try this method. +Perheps this method will be implemented in a future patch. + +This patch also updates the tx_wake_threshold of tx ring which is +related to the size of tx ring in the previous logic. Otherwise, +the tx_wake_threshold will be too high (403 BDs), which is more +likely to impact the slow path in the case of heavy XDP traffic, +because XDP path and slow path share the tx BD rings. According +to Jakub's suggestion, the tx_wake_threshold is at least equal to +tx_stop_threshold + 2 * MAX_SKB_FRAGS, if a queue of hundreds of +entries is overflowing, we should be able to apply a hysteresis +of a few tens of entries. + +Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support") +Signed-off-by: Wei Fang +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/fec.h | 2 +- + drivers/net/ethernet/freescale/fec_main.c | 3 +-- + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h +index 8c0226d061fec..63a053dea819d 100644 +--- a/drivers/net/ethernet/freescale/fec.h ++++ b/drivers/net/ethernet/freescale/fec.h +@@ -355,7 +355,7 @@ struct bufdesc_ex { + #define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) + #define FEC_ENET_TX_FRSIZE 2048 + #define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) +-#define TX_RING_SIZE 512 /* Must be power of two */ ++#define TX_RING_SIZE 1024 /* Must be power of two */ + #define TX_RING_MOD_MASK 511 /* for this to work */ + + #define BD_ENET_RX_INT 0x00800000 +diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c +index e6ed36e5daefa..7659888a96917 100644 +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -3347,8 +3347,7 @@ static int fec_enet_alloc_queue(struct net_device *ndev) + fep->total_tx_ring_size += fep->tx_queue[i]->bd.ring_size; + + txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; +- txq->tx_wake_threshold = +- (txq->bd.ring_size - txq->tx_stop_threshold) / 2; ++ txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS; + + txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev, + txq->bd.ring_size * TSO_HEADER_SIZE, +-- +2.39.2 + diff --git a/queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch b/queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch new file mode 100644 index 00000000000..3aaac7af248 --- /dev/null +++ b/queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch @@ -0,0 +1,297 @@ +From 9d13d0210d9db3c4192c3ce53045e1fe7e5217d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 16:10:10 +0800 +Subject: net: fec: recycle pages for transmitted XDP frames + +From: Wei Fang + +[ Upstream commit 20f797399035a8052dbd7297fdbe094079a9482e ] + +Once the XDP frames have been successfully transmitted through the +ndo_xdp_xmit() interface, it's the driver responsibility to free +the frames so that the page_pool can recycle the pages and reuse +them. However, this action is not implemented in the fec driver. +This leads to a user-visible problem that the console will print +the following warning log. + +[ 157.568851] page_pool_release_retry() stalled pool shutdown 1389 inflight 60 sec +[ 217.983446] page_pool_release_retry() stalled pool shutdown 1389 inflight 120 sec +[ 278.399006] page_pool_release_retry() stalled pool shutdown 1389 inflight 181 sec +[ 338.812885] page_pool_release_retry() stalled pool shutdown 1389 inflight 241 sec +[ 399.226946] page_pool_release_retry() stalled pool shutdown 1389 inflight 302 sec + +Therefore, to solve this issue, we free XDP frames via xdp_return_frame() +while cleaning the tx BD ring. + +Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support") +Signed-off-by: Wei Fang +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/fec.h | 15 ++- + drivers/net/ethernet/freescale/fec_main.c | 148 +++++++++++++++------- + 2 files changed, 115 insertions(+), 48 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h +index 9939ccafb5566..8c0226d061fec 100644 +--- a/drivers/net/ethernet/freescale/fec.h ++++ b/drivers/net/ethernet/freescale/fec.h +@@ -544,10 +544,23 @@ enum { + XDP_STATS_TOTAL, + }; + ++enum fec_txbuf_type { ++ FEC_TXBUF_T_SKB, ++ FEC_TXBUF_T_XDP_NDO, ++}; ++ ++struct fec_tx_buffer { ++ union { ++ struct sk_buff *skb; ++ struct xdp_frame *xdp; ++ }; ++ enum fec_txbuf_type type; ++}; ++ + struct fec_enet_priv_tx_q { + struct bufdesc_prop bd; + unsigned char *tx_bounce[TX_RING_SIZE]; +- struct sk_buff *tx_skbuff[TX_RING_SIZE]; ++ struct fec_tx_buffer tx_buf[TX_RING_SIZE]; + + unsigned short tx_stop_threshold; + unsigned short tx_wake_threshold; +diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c +index 40d71be45f604..e6ed36e5daefa 100644 +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -397,7 +397,7 @@ static void fec_dump(struct net_device *ndev) + fec16_to_cpu(bdp->cbd_sc), + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), +- txq->tx_skbuff[index]); ++ txq->tx_buf[index].skb); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); + index++; + } while (bdp != txq->bd.base); +@@ -654,7 +654,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, + + index = fec_enet_get_bd_index(last_bdp, &txq->bd); + /* Save skb pointer */ +- txq->tx_skbuff[index] = skb; ++ txq->tx_buf[index].skb = skb; + + /* Make sure the updates to rest of the descriptor are performed before + * transferring ownership. +@@ -672,9 +672,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, + + skb_tx_timestamp(skb); + +- /* Make sure the update to bdp and tx_skbuff are performed before +- * txq->bd.cur. +- */ ++ /* Make sure the update to bdp is performed before txq->bd.cur. */ + wmb(); + txq->bd.cur = bdp; + +@@ -862,7 +860,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, + } + + /* Save skb pointer */ +- txq->tx_skbuff[index] = skb; ++ txq->tx_buf[index].skb = skb; + + skb_tx_timestamp(skb); + txq->bd.cur = bdp; +@@ -952,16 +950,33 @@ static void fec_enet_bd_init(struct net_device *dev) + for (i = 0; i < txq->bd.ring_size; i++) { + /* Initialize the BD for every fragment in the page. */ + bdp->cbd_sc = cpu_to_fec16(0); +- if (bdp->cbd_bufaddr && +- !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) +- dma_unmap_single(&fep->pdev->dev, +- fec32_to_cpu(bdp->cbd_bufaddr), +- fec16_to_cpu(bdp->cbd_datlen), +- DMA_TO_DEVICE); +- if (txq->tx_skbuff[i]) { +- dev_kfree_skb_any(txq->tx_skbuff[i]); +- txq->tx_skbuff[i] = NULL; ++ if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { ++ if (bdp->cbd_bufaddr && ++ !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) ++ dma_unmap_single(&fep->pdev->dev, ++ fec32_to_cpu(bdp->cbd_bufaddr), ++ fec16_to_cpu(bdp->cbd_datlen), ++ DMA_TO_DEVICE); ++ if (txq->tx_buf[i].skb) { ++ dev_kfree_skb_any(txq->tx_buf[i].skb); ++ txq->tx_buf[i].skb = NULL; ++ } ++ } else { ++ if (bdp->cbd_bufaddr) ++ dma_unmap_single(&fep->pdev->dev, ++ fec32_to_cpu(bdp->cbd_bufaddr), ++ fec16_to_cpu(bdp->cbd_datlen), ++ DMA_TO_DEVICE); ++ ++ if (txq->tx_buf[i].xdp) { ++ xdp_return_frame(txq->tx_buf[i].xdp); ++ txq->tx_buf[i].xdp = NULL; ++ } ++ ++ /* restore default tx buffer type: FEC_TXBUF_T_SKB */ ++ txq->tx_buf[i].type = FEC_TXBUF_T_SKB; + } ++ + bdp->cbd_bufaddr = cpu_to_fec32(0); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); + } +@@ -1360,6 +1375,7 @@ static void + fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) + { + struct fec_enet_private *fep; ++ struct xdp_frame *xdpf; + struct bufdesc *bdp; + unsigned short status; + struct sk_buff *skb; +@@ -1387,16 +1403,31 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) + + index = fec_enet_get_bd_index(bdp, &txq->bd); + +- skb = txq->tx_skbuff[index]; +- txq->tx_skbuff[index] = NULL; +- if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) +- dma_unmap_single(&fep->pdev->dev, +- fec32_to_cpu(bdp->cbd_bufaddr), +- fec16_to_cpu(bdp->cbd_datlen), +- DMA_TO_DEVICE); +- bdp->cbd_bufaddr = cpu_to_fec32(0); +- if (!skb) +- goto skb_done; ++ if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { ++ skb = txq->tx_buf[index].skb; ++ txq->tx_buf[index].skb = NULL; ++ if (bdp->cbd_bufaddr && ++ !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) ++ dma_unmap_single(&fep->pdev->dev, ++ fec32_to_cpu(bdp->cbd_bufaddr), ++ fec16_to_cpu(bdp->cbd_datlen), ++ DMA_TO_DEVICE); ++ bdp->cbd_bufaddr = cpu_to_fec32(0); ++ if (!skb) ++ goto tx_buf_done; ++ } else { ++ xdpf = txq->tx_buf[index].xdp; ++ if (bdp->cbd_bufaddr) ++ dma_unmap_single(&fep->pdev->dev, ++ fec32_to_cpu(bdp->cbd_bufaddr), ++ fec16_to_cpu(bdp->cbd_datlen), ++ DMA_TO_DEVICE); ++ bdp->cbd_bufaddr = cpu_to_fec32(0); ++ if (!xdpf) { ++ txq->tx_buf[index].type = FEC_TXBUF_T_SKB; ++ goto tx_buf_done; ++ } ++ } + + /* Check for errors. */ + if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | +@@ -1415,21 +1446,11 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) + ndev->stats.tx_carrier_errors++; + } else { + ndev->stats.tx_packets++; +- ndev->stats.tx_bytes += skb->len; +- } + +- /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who +- * are to time stamp the packet, so we still need to check time +- * stamping enabled flag. +- */ +- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && +- fep->hwts_tx_en) && +- fep->bufdesc_ex) { +- struct skb_shared_hwtstamps shhwtstamps; +- struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; +- +- fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); +- skb_tstamp_tx(skb, &shhwtstamps); ++ if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) ++ ndev->stats.tx_bytes += skb->len; ++ else ++ ndev->stats.tx_bytes += xdpf->len; + } + + /* Deferred means some collisions occurred during transmit, +@@ -1438,10 +1459,32 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) + if (status & BD_ENET_TX_DEF) + ndev->stats.collisions++; + +- /* Free the sk buffer associated with this last transmit */ +- dev_kfree_skb_any(skb); +-skb_done: +- /* Make sure the update to bdp and tx_skbuff are performed ++ if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { ++ /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who ++ * are to time stamp the packet, so we still need to check time ++ * stamping enabled flag. ++ */ ++ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && ++ fep->hwts_tx_en) && fep->bufdesc_ex) { ++ struct skb_shared_hwtstamps shhwtstamps; ++ struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; ++ ++ fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); ++ skb_tstamp_tx(skb, &shhwtstamps); ++ } ++ ++ /* Free the sk buffer associated with this last transmit */ ++ dev_kfree_skb_any(skb); ++ } else { ++ xdp_return_frame(xdpf); ++ ++ txq->tx_buf[index].xdp = NULL; ++ /* restore default tx buffer type: FEC_TXBUF_T_SKB */ ++ txq->tx_buf[index].type = FEC_TXBUF_T_SKB; ++ } ++ ++tx_buf_done: ++ /* Make sure the update to bdp and tx_buf are performed + * before dirty_tx + */ + wmb(); +@@ -3247,9 +3290,19 @@ static void fec_enet_free_buffers(struct net_device *ndev) + for (i = 0; i < txq->bd.ring_size; i++) { + kfree(txq->tx_bounce[i]); + txq->tx_bounce[i] = NULL; +- skb = txq->tx_skbuff[i]; +- txq->tx_skbuff[i] = NULL; +- dev_kfree_skb(skb); ++ ++ if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { ++ skb = txq->tx_buf[i].skb; ++ txq->tx_buf[i].skb = NULL; ++ dev_kfree_skb(skb); ++ } else { ++ if (txq->tx_buf[i].xdp) { ++ xdp_return_frame(txq->tx_buf[i].xdp); ++ txq->tx_buf[i].xdp = NULL; ++ } ++ ++ txq->tx_buf[i].type = FEC_TXBUF_T_SKB; ++ } + } + } + } +@@ -3809,7 +3862,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, + ebdp->cbd_esc = cpu_to_fec32(estatus); + } + +- txq->tx_skbuff[index] = NULL; ++ txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO; ++ txq->tx_buf[index].xdp = frame; + + /* Make sure the updates to rest of the descriptor are performed before + * transferring ownership. +-- +2.39.2 + diff --git a/queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch b/queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch new file mode 100644 index 00000000000..d2476079809 --- /dev/null +++ b/queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch @@ -0,0 +1,64 @@ +From b9d9804caf6a9776a5772f3c89ce55fe04f60fdc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 May 2023 10:26:15 +0800 +Subject: net: fec: remove last_bdp from fec_enet_txq_xmit_frame() + +From: Wei Fang + +[ Upstream commit bc638eabfed90fdc798fd5765e67e41abea76152 ] + +The last_bdp is initialized to bdp, and both last_bdp and bdp are +not changed. That is to say that last_bdp and bdp are always equal. +So bdp can be used directly. + +Signed-off-by: Wei Fang +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230529022615.669589-1-wei.fang@nxp.com +Signed-off-by: Paolo Abeni +Stable-dep-of: 20f797399035 ("net: fec: recycle pages for transmitted XDP frames") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/fec_main.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c +index c08331f7da7b3..40d71be45f604 100644 +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -3770,7 +3770,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, + struct xdp_frame *frame) + { + unsigned int index, status, estatus; +- struct bufdesc *bdp, *last_bdp; ++ struct bufdesc *bdp; + dma_addr_t dma_addr; + int entries_free; + +@@ -3782,7 +3782,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, + + /* Fill in a Tx ring entry */ + bdp = txq->bd.cur; +- last_bdp = bdp; + status = fec16_to_cpu(bdp->cbd_sc); + status &= ~BD_ENET_TX_STATS; + +@@ -3810,7 +3809,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, + ebdp->cbd_esc = cpu_to_fec32(estatus); + } + +- index = fec_enet_get_bd_index(last_bdp, &txq->bd); + txq->tx_skbuff[index] = NULL; + + /* Make sure the updates to rest of the descriptor are performed before +@@ -3825,7 +3823,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, + bdp->cbd_sc = cpu_to_fec16(status); + + /* If this was the last BD in the ring, start at the beginning again. */ +- bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd); ++ bdp = fec_enet_get_nextdesc(bdp, &txq->bd); + + /* Make sure the update to bdp are performed before txq->bd.cur. */ + dma_wmb(); +-- +2.39.2 + diff --git a/queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch b/queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch new file mode 100644 index 00000000000..e5305f47ff5 --- /dev/null +++ b/queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch @@ -0,0 +1,66 @@ +From f09a2cd78c7abcfe06f26e33813370a6caced0b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 May 2023 10:01:13 +0800 +Subject: net: fec: remove useless fec_enet_reset_skb() + +From: Wei Fang + +[ Upstream commit 2ae9c66b04554bf5b3eeaab8c12a0bfb9f28ebde ] + +This patch is a cleanup for fec driver. The fec_enet_reset_skb() +is used to free skb buffers for tx queues and is only invoked in +fec_restart(). However, fec_enet_bd_init() also resets skb buffers +and is invoked in fec_restart() too. So fec_enet_reset_skb() is +redundant and useless. + +Signed-off-by: Wei Fang +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Stable-dep-of: 20f797399035 ("net: fec: recycle pages for transmitted XDP frames") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/fec_main.c | 21 --------------------- + 1 file changed, 21 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c +index 38e5b5abe067c..c08331f7da7b3 100644 +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -1011,24 +1011,6 @@ static void fec_enet_enable_ring(struct net_device *ndev) + } + } + +-static void fec_enet_reset_skb(struct net_device *ndev) +-{ +- struct fec_enet_private *fep = netdev_priv(ndev); +- struct fec_enet_priv_tx_q *txq; +- int i, j; +- +- for (i = 0; i < fep->num_tx_queues; i++) { +- txq = fep->tx_queue[i]; +- +- for (j = 0; j < txq->bd.ring_size; j++) { +- if (txq->tx_skbuff[j]) { +- dev_kfree_skb_any(txq->tx_skbuff[j]); +- txq->tx_skbuff[j] = NULL; +- } +- } +- } +-} +- + /* + * This function is called to start or restart the FEC during a link + * change, transmit timeout, or to reconfigure the FEC. The network +@@ -1071,9 +1053,6 @@ fec_restart(struct net_device *ndev) + + fec_enet_enable_ring(ndev); + +- /* Reset tx SKB buffers. */ +- fec_enet_reset_skb(ndev); +- + /* Enable MII mode */ + if (fep->full_duplex == DUPLEX_FULL) { + /* FD enable */ +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch b/queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch new file mode 100644 index 00000000000..98ec5bc2a1f --- /dev/null +++ b/queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch @@ -0,0 +1,42 @@ +From a34d2ad002d2b201833052ecee081b78e238b9e3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 14:07:03 +0300 +Subject: net/mlx5: Query hca_cap_2 only when supported + +From: Maher Sanalla + +[ Upstream commit 6496357aa5f710eec96f91345b9da1b37c3231f6 ] + +On vport enable, where fw's hca caps are queried, the driver queries +hca_caps_2 without checking if fw truly supports them, causing a false +failure of vfs vport load and blocking SRIOV enablement on old devices +such as CX4 where hca_caps_2 support is missing. + +Thus, add a check for the said caps support before accessing them. + +Fixes: e5b9642a33be ("net/mlx5: E-Switch, Implement devlink port function cmds to control migratable") +Signed-off-by: Maher Sanalla +Reviewed-by: Shay Drory +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +index 901c53751b0aa..f81c6d8d5e0f4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -800,6 +800,9 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + ++ if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2)) ++ goto out_free; ++ + memset(query_ctx, 0, query_out_sz); + err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, + MLX5_CAP_GENERAL_2); +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch b/queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch new file mode 100644 index 00000000000..2d0e06e08b5 --- /dev/null +++ b/queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch @@ -0,0 +1,84 @@ +From 52acc5f90291c9483639677ab04e86981279f2ca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Jun 2023 20:36:41 -0700 +Subject: net/mlx5: Register a unique thermal zone per device + +From: Saeed Mahameed + +[ Upstream commit 631079e08aa4a20b73e70de4cf457886194f029f ] + +Prior to this patch only one "mlx5" thermal zone could have been +registered regardless of the number of individual mlx5 devices in the +system. + +To fix this setup a unique name per device to register its own thermal +zone. + +In order to not register a thermal zone for a virtual device (VF/SF) add +a check for PF device type. + +The new name is a concatenation between "mlx5_" and "", which +will also help associating a thermal zone with its PCI device. + +$ lspci | grep ConnectX +00:04.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx] +00:05.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx] + +$ cat /sys/devices/virtual/thermal/thermal_zone0/type +mlx5_0000:00:04.0 +$ cat /sys/devices/virtual/thermal/thermal_zone1/type +mlx5_0000:00:05.0 + +Fixes: c1fef618d611 ("net/mlx5: Implement thermal zone") +CC: Sandipan Patra +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/thermal.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c +index e47fa6fb836f1..89a22ff04cb60 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c +@@ -68,14 +68,19 @@ static struct thermal_zone_device_ops mlx5_thermal_ops = { + + int mlx5_thermal_init(struct mlx5_core_dev *mdev) + { ++ char data[THERMAL_NAME_LENGTH]; + struct mlx5_thermal *thermal; +- struct thermal_zone_device *tzd; +- const char *data = "mlx5"; ++ int err; + +- tzd = thermal_zone_get_zone_by_name(data); +- if (!IS_ERR(tzd)) ++ if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev)) + return 0; + ++ err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device)); ++ if (err < 0 || err >= sizeof(data)) { ++ mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err); ++ return -EINVAL; ++ } ++ + thermal = kzalloc(sizeof(*thermal), GFP_KERNEL); + if (!thermal) + return -ENOMEM; +@@ -88,10 +93,10 @@ int mlx5_thermal_init(struct mlx5_core_dev *mdev) + &mlx5_thermal_ops, + NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); + if (IS_ERR(thermal->tzdev)) { +- dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n", +- data, PTR_ERR(thermal->tzdev)); ++ err = PTR_ERR(thermal->tzdev); ++ mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err); + kfree(thermal); +- return -EINVAL; ++ return err; + } + + mdev->thermal = thermal; +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch b/queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch new file mode 100644 index 00000000000..ea3817a13f2 --- /dev/null +++ b/queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch @@ -0,0 +1,133 @@ +From ff417c1989674958a9b8ca169580be9bde998850 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Jun 2023 09:32:10 +0200 +Subject: net/mlx5e: Check for NOT_READY flag state after locking + +From: Vlad Buslov + +[ Upstream commit 65e64640e97c0f223e77f9ea69b5a46186b93470 ] + +Currently the check for NOT_READY flag is performed before obtaining the +necessary lock. This opens a possibility for race condition when the flow +is concurrently removed from unready_flows list by the workqueue task, +which causes a double-removal from the list and a crash[0]. Fix the issue +by moving the flag check inside the section protected by +uplink_priv->unready_flows_lock mutex. + +[0]: +[44376.389654] general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP +[44376.391665] CPU: 7 PID: 59123 Comm: tc Not tainted 6.4.0-rc4+ #1 +[44376.392984] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[44376.395342] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] +[44376.396857] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 +[44376.399167] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 +[44376.399680] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 +[44376.400337] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 +[44376.401001] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 +[44376.401663] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 +[44376.402342] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 +[44376.402999] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 +[44376.403787] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[44376.404343] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 +[44376.405004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[44376.405665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[44376.406339] Call Trace: +[44376.406651] +[44376.406939] ? die_addr+0x33/0x90 +[44376.407311] ? exc_general_protection+0x192/0x390 +[44376.407795] ? asm_exc_general_protection+0x22/0x30 +[44376.408292] ? mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] +[44376.408876] __mlx5e_tc_del_fdb_peer_flow+0xbc/0xe0 [mlx5_core] +[44376.409482] mlx5e_tc_del_flow+0x42/0x210 [mlx5_core] +[44376.410055] mlx5e_flow_put+0x25/0x50 [mlx5_core] +[44376.410529] mlx5e_delete_flower+0x24b/0x350 [mlx5_core] +[44376.411043] tc_setup_cb_reoffload+0x22/0x80 +[44376.411462] fl_reoffload+0x261/0x2f0 [cls_flower] +[44376.411907] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] +[44376.412481] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] +[44376.413044] tcf_block_playback_offloads+0x76/0x170 +[44376.413497] tcf_block_unbind+0x7b/0xd0 +[44376.413881] tcf_block_setup+0x17d/0x1c0 +[44376.414269] tcf_block_offload_cmd.isra.0+0xf1/0x130 +[44376.414725] tcf_block_offload_unbind+0x43/0x70 +[44376.415153] __tcf_block_put+0x82/0x150 +[44376.415532] ingress_destroy+0x22/0x30 [sch_ingress] +[44376.415986] qdisc_destroy+0x3b/0xd0 +[44376.416343] qdisc_graft+0x4d0/0x620 +[44376.416706] tc_get_qdisc+0x1c9/0x3b0 +[44376.417074] rtnetlink_rcv_msg+0x29c/0x390 +[44376.419978] ? rep_movs_alternative+0x3a/0xa0 +[44376.420399] ? rtnl_calcit.isra.0+0x120/0x120 +[44376.420813] netlink_rcv_skb+0x54/0x100 +[44376.421192] netlink_unicast+0x1f6/0x2c0 +[44376.421573] netlink_sendmsg+0x232/0x4a0 +[44376.421980] sock_sendmsg+0x38/0x60 +[44376.422328] ____sys_sendmsg+0x1d0/0x1e0 +[44376.422709] ? copy_msghdr_from_user+0x6d/0xa0 +[44376.423127] ___sys_sendmsg+0x80/0xc0 +[44376.423495] ? ___sys_recvmsg+0x8b/0xc0 +[44376.423869] __sys_sendmsg+0x51/0x90 +[44376.424226] do_syscall_64+0x3d/0x90 +[44376.424587] entry_SYSCALL_64_after_hwframe+0x46/0xb0 +[44376.425046] RIP: 0033:0x7f045134f887 +[44376.425403] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 +[44376.426914] RSP: 002b:00007ffd63a82b98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[44376.427592] RAX: ffffffffffffffda RBX: 000000006481955f RCX: 00007f045134f887 +[44376.428195] RDX: 0000000000000000 RSI: 00007ffd63a82c00 RDI: 0000000000000003 +[44376.428796] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 +[44376.429404] R10: 00007f0451208708 R11: 0000000000000246 R12: 0000000000000001 +[44376.430039] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400 +[44376.430644] +[44376.430907] Modules linked in: mlx5_ib mlx5_core act_mirred act_tunnel_key cls_flower vxlan dummy sch_ingress openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_g +ss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: mlx5_core] +[44376.433936] ---[ end trace 0000000000000000 ]--- +[44376.434373] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] +[44376.434951] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 +[44376.436452] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 +[44376.436924] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 +[44376.437530] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 +[44376.438179] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 +[44376.438786] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 +[44376.439393] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 +[44376.439998] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 +[44376.440714] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[44376.441225] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 +[44376.441843] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[44376.442471] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +Fixes: ad86755b18d5 ("net/mlx5e: Protect unready flows with dedicated lock") +Signed-off-by: Vlad Buslov +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +index b9b1da751a3b8..ed05ac8ae1de5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1639,7 +1639,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); +- unready_flow_del(flow); ++ if (flow_flag_test(flow, NOT_READY)) ++ unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); + } + +@@ -1932,8 +1933,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, + esw_attr = attr->esw_attr; + mlx5e_put_flow_tunnel_id(flow); + +- if (flow_flag_test(flow, NOT_READY)) +- remove_unready_flow(flow); ++ remove_unready_flow(flow); + + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch b/queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch new file mode 100644 index 00000000000..2375796c99a --- /dev/null +++ b/queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch @@ -0,0 +1,38 @@ +From d7e4e0b298f7e025a8d1d477793ef1fc0ddad78a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 08:59:34 +0800 +Subject: net/mlx5e: fix double free in mlx5e_destroy_flow_table + +From: Zhengchao Shao + +[ Upstream commit 884abe45a9014d0de2e6edb0630dfd64f23f1d1b ] + +In function accel_fs_tcp_create_groups(), when the ft->g memory is +successfully allocated but the 'in' memory fails to be allocated, the +memory pointed to by ft->g is released once. And in function +accel_fs_tcp_create_table, mlx5e_destroy_flow_table is called to release +the memory pointed to by ft->g again. This will cause double free problem. + +Fixes: c062d52ac24c ("net/mlx5e: Receive flow steering framework for accelerated TCP flows") +Signed-off-by: Zhengchao Shao +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +index 88a5aed9d6781..c7d191f66ad1b 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +@@ -190,6 +190,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); ++ ft->g = NULL; + kvfree(in); + return -ENOMEM; + } +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch new file mode 100644 index 00000000000..652c792b696 --- /dev/null +++ b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch @@ -0,0 +1,51 @@ +From 98b9d970ea12c3f1cdc033d3db131647cf868f8c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Jun 2023 09:49:02 +0800 +Subject: net/mlx5e: fix memory leak in mlx5e_fs_tt_redirect_any_create + +From: Zhengchao Shao + +[ Upstream commit 3250affdc658557a41df9c5fb567723e421f8bf2 ] + +The memory pointed to by the fs->any pointer is not freed in the error +path of mlx5e_fs_tt_redirect_any_create, which can lead to a memory leak. +Fix by freeing the memory in the error path, thereby making the error path +identical to mlx5e_fs_tt_redirect_any_destroy(). + +Fixes: 0f575c20bf06 ("net/mlx5e: Introduce Flow Steering ANY API") +Signed-off-by: Zhengchao Shao +Reviewed-by: Simon Horman +Reviewed-by: Rahul Rameshbabu +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +index 03cb79adf912f..be83ad9db82a4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +@@ -594,7 +594,7 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) + + err = fs_any_create_table(fs); + if (err) +- return err; ++ goto err_free_any; + + err = fs_any_enable(fs); + if (err) +@@ -606,8 +606,8 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) + + err_destroy_table: + fs_any_destroy_table(fs_any); +- +- kfree(fs_any); ++err_free_any: + mlx5e_fs_set_any(fs, NULL); ++ kfree(fs_any); + return err; + } +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch new file mode 100644 index 00000000000..5fdef13faa9 --- /dev/null +++ b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch @@ -0,0 +1,44 @@ +From c7fe10d9e258be469ad14d38f7e422c7c5b53fc7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Jun 2023 09:49:03 +0800 +Subject: net/mlx5e: fix memory leak in mlx5e_ptp_open + +From: Zhengchao Shao + +[ Upstream commit d543b649ffe58a0cb4b6948b3305069c5980a1fa ] + +When kvzalloc_node or kvzalloc failed in mlx5e_ptp_open, the memory +pointed by "c" or "cparams" is not freed, which can lead to a memory +leak. Fix by freeing the array in the error path. + +Fixes: 145e5637d941 ("net/mlx5e: Add TX PTP port object support") +Signed-off-by: Zhengchao Shao +Reviewed-by: Rahul Rameshbabu +Reviewed-by: Gal Pressman +Reviewed-by: Simon Horman +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +index 3cbebfba582bd..b0b429a0321ed 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +@@ -729,8 +729,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); + cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); +- if (!c || !cparams) +- return -ENOMEM; ++ if (!c || !cparams) { ++ err = -ENOMEM; ++ goto err_free; ++ } + + c->priv = priv; + c->mdev = priv->mdev; +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch b/queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch new file mode 100644 index 00000000000..53066e5e8db --- /dev/null +++ b/queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch @@ -0,0 +1,52 @@ +From f7c41e83186a2321edeb0617b48d595a94c0f87b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 May 2023 21:18:53 +0300 +Subject: net/mlx5e: RX, Fix flush and close release flow of regular rq for + legacy rq + +From: Dragos Tatulea + +[ Upstream commit 2e2d1965794d22fbe86df45bf4f933216743577d ] + +Regular (non-XSK) RQs get flushed on XSK setup and re-activated on XSK +close. If the same regular RQ is closed (a config change for example) +soon after the XSK close, a double release occurs because the missing +wqes get released a second time. + +Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") +Signed-off-by: Dragos Tatulea +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +index 69634829558e2..111f6a4a64b64 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -390,10 +390,18 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) + { + struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); + +- if (rq->xsk_pool) ++ if (rq->xsk_pool) { + mlx5e_xsk_free_rx_wqe(wi); +- else ++ } else { + mlx5e_free_rx_wqe(rq, wi); ++ ++ /* Avoid a second release of the wqe pages: dealloc is called ++ * for the same missing wqes on regular RQ flush and on regular ++ * RQ close. This happens when XSK RQs come into play. ++ */ ++ for (int i = 0; i < rq->wqe.info.num_frags; i++, wi++) ++ wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); ++ } + } + + static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch b/queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch new file mode 100644 index 00000000000..e2f5dfee88f --- /dev/null +++ b/queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch @@ -0,0 +1,124 @@ +From 59ad05f1f9dbd8088f1ca0e32352df75c7c095a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 May 2023 21:18:49 +0300 +Subject: net/mlx5e: RX, Fix page_pool page fragment tracking for XDP + +From: Dragos Tatulea + +[ Upstream commit 7abd955a58fb0fcd4e756fa2065c03ae488fcfa7 ] + +Currently mlx5e releases pages directly to the page_pool for XDP_TX and +does page fragment counting for XDP_REDIRECT. RX pages from the +page_pool are leaking on XDP_REDIRECT because the xdp core will release +only one fragment out of MLX5E_PAGECNT_BIAS_MAX and subsequently the page +is marked as "skip release" which avoids the driver release. + +A fix would be to take an extra fragment for XDP_REDIRECT and not set the +"skip release" bit so that the release on the driver side can handle the +remaining bias fragments. But this would be a shortsighted solution. +Instead, this patch converges the two XDP paths (XDP_TX and XDP_REDIRECT) to +always do fragment tracking. The "skip release" bit is no longer +necessary for XDP. + +Fixes: 6f5742846053 ("net/mlx5e: RX, Enable skb page recycling through the page_pool") +Signed-off-by: Dragos Tatulea +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 3 +- + .../net/ethernet/mellanox/mlx5/core/en_rx.c | 32 +++++++------------ + 2 files changed, 13 insertions(+), 22 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +index f0e6095809faf..40589cebb7730 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +@@ -662,8 +662,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, + /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) + * as we know this is a page_pool page. + */ +- page_pool_put_defragged_page(page->pp, +- page, -1, true); ++ page_pool_recycle_direct(page->pp, page); + } while (++n < num); + + break; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +index 111f6a4a64b64..08e08489f4220 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -1753,11 +1753,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi + + prog = rcu_dereference(rq->xdp_prog); + if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { +- if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { ++ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + struct mlx5e_wqe_frag_info *pwi; + + for (pwi = head_wi; pwi < wi; pwi++) +- pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); ++ pwi->frag_page->frags++; + } + return NULL; /* page/packet was consumed by XDP */ + } +@@ -1827,12 +1827,8 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) + rq, wi, cqe, cqe_bcnt); + if (!skb) { + /* probably for XDP */ +- if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { +- /* do not return page to cache, +- * it will be returned on XDP_TX completion. +- */ +- wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); +- } ++ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) ++ wi->frag_page->frags++; + goto wq_cyc_pop; + } + +@@ -1878,12 +1874,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) + rq, wi, cqe, cqe_bcnt); + if (!skb) { + /* probably for XDP */ +- if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { +- /* do not return page to cache, +- * it will be returned on XDP_TX completion. +- */ +- wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); +- } ++ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) ++ wi->frag_page->frags++; + goto wq_cyc_pop; + } + +@@ -2062,12 +2054,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w + if (prog) { + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { +- int i; ++ struct mlx5e_frag_page *pfp; ++ ++ for (pfp = head_page; pfp < frag_page; pfp++) ++ pfp->frags++; + +- for (i = 0; i < sinfo->nr_frags; i++) +- /* non-atomic */ +- __set_bit(page_idx + i, wi->skip_release_bitmap); +- return NULL; ++ wi->linear_page.frags++; + } + mlx5e_page_release_fragmented(rq, &wi->linear_page); + return NULL; /* page/packet was consumed by XDP */ +@@ -2165,7 +2157,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + cqe_bcnt, &mxbuf); + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) +- __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ ++ frag_page->frags++; + return NULL; /* page/packet was consumed by XDP */ + } + +-- +2.39.2 + diff --git a/queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch b/queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch new file mode 100644 index 00000000000..596630e0950 --- /dev/null +++ b/queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch @@ -0,0 +1,92 @@ +From e31e91b5b2ccba22c8394e1c6632516613d95bbb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 4 Jun 2023 12:45:38 +0300 +Subject: net/mlx5e: TC, CT: Offload ct clear only once + +From: Yevgeny Kliteynik + +[ Upstream commit f7a485115ad4cfc560833942014bf791abf1f827 ] + +Non-clear CT action causes a flow rule split, while CT clear action +doesn't and is just a header-rewrite to the current flow rule. +But ct offload is done in post_parse and is per ct action instance, +so ct clear offload is parsed multiple times, while its deleted once. + +Fix this by post_parsing the ct action only once per flow attribute +(which is per flow rule) by using a offloaded ct_attr flag. + +Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action") +Signed-off-by: Paul Blakey +Signed-off-by: Yevgeny Kliteynik +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 14 +++++++++++--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 1 + + 2 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +index a254e728ac954..fadfa8b50bebe 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +@@ -1545,7 +1545,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + + attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ + attr->ct_attr.zone = act->ct.zone; +- attr->ct_attr.nf_ft = act->ct.flow_table; ++ if (!(act->ct.action & TCA_CT_ACT_CLEAR)) ++ attr->ct_attr.nf_ft = act->ct.flow_table; + attr->ct_attr.act_miss_cookie = act->miss_cookie; + + return 0; +@@ -1990,6 +1991,9 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att + if (!priv) + return -EOPNOTSUPP; + ++ if (attr->ct_attr.offloaded) ++ return 0; ++ + if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { + err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts, + 0, 0, 0, 0); +@@ -1999,11 +2003,15 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + +- if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ ++ if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */ ++ attr->ct_attr.offloaded = true; + return 0; ++ } + + mutex_lock(&priv->control_lock); + err = __mlx5_tc_ct_flow_offload(priv, attr); ++ if (!err) ++ attr->ct_attr.offloaded = true; + mutex_unlock(&priv->control_lock); + + return err; +@@ -2021,7 +2029,7 @@ void + mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr) + { +- if (!attr->ct_attr.ft) /* no ct action, return */ ++ if (!attr->ct_attr.offloaded) /* no ct action, return */ + return; + if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ + return; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +index 8e9316fa46d4b..b66c5f98067f7 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +@@ -29,6 +29,7 @@ struct mlx5_ct_attr { + u32 ct_labels_id; + u32 act_miss_mapping; + u64 act_miss_cookie; ++ bool offloaded; + struct mlx5_ct_ft *ft; + }; + +-- +2.39.2 + diff --git a/queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch b/queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch new file mode 100644 index 00000000000..50a9344544c --- /dev/null +++ b/queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch @@ -0,0 +1,133 @@ +From fea32e0ddf5b2d1794d710488bd9e12dea65d7ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 13:44:22 +0300 +Subject: net: mscc: ocelot: fix oversize frame dropping for preemptible TCs + +From: Vladimir Oltean + +[ Upstream commit c6efb4ae387c79bf0d4da286108c810b7b40de3c ] + +This switch implements Hold/Release in a strange way, with no control +from the user as required by IEEE 802.1Q-2018 through Set-And-Hold-MAC +and Set-And-Release-MAC, but rather, it emits HOLD requests implicitly +based on the schedule. + +Namely, when the gate of a preemptible TC is about to close (actually +QSYS::PREEMPTION_CFG.HOLD_ADVANCE octet times in advance of this event), +the QSYS seems to emit a HOLD request pulse towards the MAC which +preempts the currently transmitted packet, and further packets are held +back in the queue system. + +This allows large frames to be squeezed through small time slots, +because HOLD requests initiated by the gate events result in the frame +being segmented in multiple fragments, the bit time of which is equal to +the size of the time slot. + +It has been reported that the vsc9959_tas_guard_bands_update() logic +breaks this, because it doesn't take preemptible TCs into account, and +enables oversized frame dropping when the time slot doesn't allow a full +MTU to be sent, but it does allow 2*minFragSize to be sent (128B). +Packets larger than 128B are dropped instead of being sent in multiple +fragments. + +Confusingly, the manual says: + +| For guard band, SDU calculation of a traffic class of a port, if +| preemption is enabled (through 'QSYS::PREEMPTION_CFG.P_QUEUES') then +| QSYS::PREEMPTION_CFG.HOLD_ADVANCE is used, otherwise +| QSYS::QMAXSDU_CFG_*.QMAXSDU_* is used. + +but this only refers to the static guard band durations, and the +QMAXSDU_CFG_* registers have dual purpose - the other being oversized +frame dropping, which takes place irrespective of whether frames are +preemptible or express. + +So, to fix the problem, we need to call vsc9959_tas_guard_bands_update() +from ocelot_port_update_active_preemptible_tcs(), and modify the guard +band logic to consider a different (lower) oversize limit for +preemptible traffic classes. + +Fixes: 403ffc2c34de ("net: mscc: ocelot: add support for preemptible traffic classes") +Signed-off-by: Vladimir Oltean +Message-ID: <20230705104422.49025-4-vladimir.oltean@nxp.com> +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/ocelot/felix_vsc9959.c | 21 +++++++++++++++++---- + drivers/net/ethernet/mscc/ocelot_mm.c | 7 +++++-- + 2 files changed, 22 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c +index 219fb672a68d7..bd11f9fb95e54 100644 +--- a/drivers/net/dsa/ocelot/felix_vsc9959.c ++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c +@@ -1221,11 +1221,13 @@ static u32 vsc9959_tas_tc_max_sdu(struct tc_taprio_qopt_offload *taprio, int tc) + static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ struct ocelot_mm_state *mm = &ocelot->mm[port]; + struct tc_taprio_qopt_offload *taprio; + u64 min_gate_len[OCELOT_NUM_TC]; ++ u32 val, maxlen, add_frag_size; ++ u64 needed_min_frag_time_ps; + int speed, picos_per_byte; + u64 needed_bit_time_ps; +- u32 val, maxlen; + u8 tas_speed; + int tc; + +@@ -1265,9 +1267,18 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) + */ + needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte; + ++ /* Preemptible TCs don't need to pass a full MTU, the port will ++ * automatically emit a HOLD request when a preemptible TC gate closes ++ */ ++ val = ocelot_read_rix(ocelot, QSYS_PREEMPTION_CFG, port); ++ add_frag_size = QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(val); ++ needed_min_frag_time_ps = picos_per_byte * ++ (u64)(24 + 2 * ethtool_mm_frag_size_add_to_min(add_frag_size)); ++ + dev_dbg(ocelot->dev, +- "port %d: max frame size %d needs %llu ps at speed %d\n", +- port, maxlen, needed_bit_time_ps, speed); ++ "port %d: max frame size %d needs %llu ps, %llu ps for mPackets at speed %d\n", ++ port, maxlen, needed_bit_time_ps, needed_min_frag_time_ps, ++ speed); + + vsc9959_tas_min_gate_lengths(taprio, min_gate_len); + +@@ -1281,7 +1292,9 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) + remaining_gate_len_ps = + vsc9959_tas_remaining_gate_len_ps(min_gate_len[tc]); + +- if (remaining_gate_len_ps > needed_bit_time_ps) { ++ if ((mm->active_preemptible_tcs & BIT(tc)) ? ++ remaining_gate_len_ps > needed_min_frag_time_ps : ++ remaining_gate_len_ps > needed_bit_time_ps) { + /* Setting QMAXSDU_CFG to 0 disables oversized frame + * dropping. + */ +diff --git a/drivers/net/ethernet/mscc/ocelot_mm.c b/drivers/net/ethernet/mscc/ocelot_mm.c +index fb3145118d686..99b29d1e62449 100644 +--- a/drivers/net/ethernet/mscc/ocelot_mm.c ++++ b/drivers/net/ethernet/mscc/ocelot_mm.c +@@ -67,10 +67,13 @@ void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port) + val = mm->preemptible_tcs; + + /* Cut through switching doesn't work for preemptible priorities, +- * so first make sure it is disabled. ++ * so first make sure it is disabled. Also, changing the preemptible ++ * TCs affects the oversized frame dropping logic, so that needs to be ++ * re-triggered. And since tas_guard_bands_update() also implicitly ++ * calls cut_through_fwd(), we don't need to explicitly call it. + */ + mm->active_preemptible_tcs = val; +- ocelot->ops->cut_through_fwd(ocelot); ++ ocelot->ops->tas_guard_bands_update(ocelot, port); + + dev_dbg(ocelot->dev, + "port %d %s/%s, MM TX %s, preemptible TCs 0x%x, active 0x%x\n", +-- +2.39.2 + diff --git a/queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch b/queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch new file mode 100644 index 00000000000..9d3a6bdf6e1 --- /dev/null +++ b/queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch @@ -0,0 +1,48 @@ +From bbbecf69e697b454d49b4534c8578fac9b3928f1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 07:37:12 +0200 +Subject: net: mvneta: fix txq_map in case of txq_number==1 + +From: Klaus Kudielka + +[ Upstream commit 21327f81db6337c8843ce755b01523c7d3df715b ] + +If we boot with mvneta.txq_number=1, the txq_map is set incorrectly: +MVNETA_CPU_TXQ_ACCESS(1) refers to TX queue 1, but only TX queue 0 is +initialized. Fix this. + +Fixes: 50bf8cb6fc9c ("net: mvneta: Configure XPS support") +Signed-off-by: Klaus Kudielka +Reviewed-by: Michal Kubiak +Link: https://lore.kernel.org/r/20230705053712.3914-1-klaus.kudielka@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/mvneta.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c +index 2cad76d0a50ef..4401fad31fb98 100644 +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -1505,7 +1505,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) + */ + if (txq_number == 1) + txq_map = (cpu == pp->rxq_def) ? +- MVNETA_CPU_TXQ_ACCESS(1) : 0; ++ MVNETA_CPU_TXQ_ACCESS(0) : 0; + + } else { + txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK; +@@ -4295,7 +4295,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) + */ + if (txq_number == 1) + txq_map = (cpu == elected_cpu) ? +- MVNETA_CPU_TXQ_ACCESS(1) : 0; ++ MVNETA_CPU_TXQ_ACCESS(0) : 0; + else + txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) & + MVNETA_CPU_TXQ_ACCESS_ALL_MASK; +-- +2.39.2 + diff --git a/queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch b/queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch new file mode 100644 index 00000000000..197960b446d --- /dev/null +++ b/queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch @@ -0,0 +1,102 @@ +From e38314d12545f455b99b83eca2878449aefaef47 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 10:11:10 +0200 +Subject: net: prevent skb corruption on frag list segmentation + +From: Paolo Abeni + +[ Upstream commit c329b261afe71197d9da83c1f18eb45a7e97e089 ] + +Ian reported several skb corruptions triggered by rx-gro-list, +collecting different oops alike: + +[ 62.624003] BUG: kernel NULL pointer dereference, address: 00000000000000c0 +[ 62.631083] #PF: supervisor read access in kernel mode +[ 62.636312] #PF: error_code(0x0000) - not-present page +[ 62.641541] PGD 0 P4D 0 +[ 62.644174] Oops: 0000 [#1] PREEMPT SMP NOPTI +[ 62.648629] CPU: 1 PID: 913 Comm: napi/eno2-79 Not tainted 6.4.0 #364 +[ 62.655162] Hardware name: Supermicro Super Server/A2SDi-12C-HLN4F, BIOS 1.7a 10/13/2022 +[ 62.663344] RIP: 0010:__udp_gso_segment (./include/linux/skbuff.h:2858 +./include/linux/udp.h:23 net/ipv4/udp_offload.c:228 net/ipv4/udp_offload.c:261 +net/ipv4/udp_offload.c:277) +[ 62.687193] RSP: 0018:ffffbd3a83b4f868 EFLAGS: 00010246 +[ 62.692515] RAX: 00000000000000ce RBX: 0000000000000000 RCX: 0000000000000000 +[ 62.699743] RDX: ffffa124def8a000 RSI: 0000000000000079 RDI: ffffa125952a14d4 +[ 62.706970] RBP: ffffa124def8a000 R08: 0000000000000022 R09: 00002000001558c9 +[ 62.714199] R10: 0000000000000000 R11: 00000000be554639 R12: 00000000000000e2 +[ 62.721426] R13: ffffa125952a1400 R14: ffffa125952a1400 R15: 00002000001558c9 +[ 62.728654] FS: 0000000000000000(0000) GS:ffffa127efa40000(0000) +knlGS:0000000000000000 +[ 62.736852] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 62.742702] CR2: 00000000000000c0 CR3: 00000001034b0000 CR4: 00000000003526e0 +[ 62.749948] Call Trace: +[ 62.752498] +[ 62.779267] inet_gso_segment (net/ipv4/af_inet.c:1398) +[ 62.787605] skb_mac_gso_segment (net/core/gro.c:141) +[ 62.791906] __skb_gso_segment (net/core/dev.c:3403 (discriminator 2)) +[ 62.800492] validate_xmit_skb (./include/linux/netdevice.h:4862 +net/core/dev.c:3659) +[ 62.804695] validate_xmit_skb_list (net/core/dev.c:3710) +[ 62.809158] sch_direct_xmit (net/sched/sch_generic.c:330) +[ 62.813198] __dev_queue_xmit (net/core/dev.c:3805 net/core/dev.c:4210) +net/netfilter/core.c:626) +[ 62.821093] br_dev_queue_push_xmit (net/bridge/br_forward.c:55) +[ 62.825652] maybe_deliver (net/bridge/br_forward.c:193) +[ 62.829420] br_flood (net/bridge/br_forward.c:233) +[ 62.832758] br_handle_frame_finish (net/bridge/br_input.c:215) +[ 62.837403] br_handle_frame (net/bridge/br_input.c:298 +net/bridge/br_input.c:416) +[ 62.851417] __netif_receive_skb_core.constprop.0 (net/core/dev.c:5387) +[ 62.866114] __netif_receive_skb_list_core (net/core/dev.c:5570) +[ 62.871367] netif_receive_skb_list_internal (net/core/dev.c:5638 +net/core/dev.c:5727) +[ 62.876795] napi_complete_done (./include/linux/list.h:37 +./include/net/gro.h:434 ./include/net/gro.h:429 net/core/dev.c:6067) +[ 62.881004] ixgbe_poll (drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:3191) +[ 62.893534] __napi_poll (net/core/dev.c:6498) +[ 62.897133] napi_threaded_poll (./include/linux/netpoll.h:89 +net/core/dev.c:6640) +[ 62.905276] kthread (kernel/kthread.c:379) +[ 62.913435] ret_from_fork (arch/x86/entry/entry_64.S:314) +[ 62.917119] + +In the critical scenario, rx-gro-list GRO-ed packets are fed, via a +bridge, both to the local input path and to an egress device (tun). + +The segmentation of such packets unsafely writes to the cloned skbs +with shared heads. + +This change addresses the issue by uncloning as needed the +to-be-segmented skbs. + +Reported-by: Ian Kumlien +Tested-by: Ian Kumlien +Fixes: 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.") +Signed-off-by: Paolo Abeni +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/skbuff.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index cea28d30abb55..1b6a1d99869dc 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -4270,6 +4270,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, + + skb_push(skb, -skb_network_offset(skb) + offset); + ++ /* Ensure the head is writeable before touching the shared info */ ++ err = skb_unclone(skb, GFP_ATOMIC); ++ if (err) ++ goto err_linearize; ++ + skb_shinfo(skb)->frag_list = NULL; + + while (list_skb) { +-- +2.39.2 + diff --git a/queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch b/queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch new file mode 100644 index 00000000000..8be08683961 --- /dev/null +++ b/queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch @@ -0,0 +1,62 @@ +From f6a83e813f95e57de0c82a8c14e364de598faf03 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 12:15:30 -0400 +Subject: net/sched: cls_fw: Fix improper refcount update leads to + use-after-free + +From: M A Ramdhan + +[ Upstream commit 0323bce598eea038714f941ce2b22541c46d488f ] + +In the event of a failure in tcf_change_indev(), fw_set_parms() will +immediately return an error after incrementing or decrementing +reference counter in tcf_bind_filter(). If attacker can control +reference counter to zero and make reference freed, leading to +use after free. + +In order to prevent this, move the point of possible failure above the +point where the TC_FW_CLASSID is handled. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: M A Ramdhan +Signed-off-by: M A Ramdhan +Acked-by: Jamal Hadi Salim +Reviewed-by: Pedro Tammela +Message-ID: <20230705161530.52003-1-ramdhan@starlabs.sg> +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_fw.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c +index ae9439a6c56c9..8641f80593179 100644 +--- a/net/sched/cls_fw.c ++++ b/net/sched/cls_fw.c +@@ -212,11 +212,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, + if (err < 0) + return err; + +- if (tb[TCA_FW_CLASSID]) { +- f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); +- tcf_bind_filter(tp, &f->res, base); +- } +- + if (tb[TCA_FW_INDEV]) { + int ret; + ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack); +@@ -233,6 +228,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, + } else if (head->mask != 0xFFFFFFFF) + return err; + ++ if (tb[TCA_FW_CLASSID]) { ++ f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); ++ tcf_bind_filter(tp, &f->res, base); ++ } ++ + return 0; + } + +-- +2.39.2 + diff --git a/queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch b/queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch new file mode 100644 index 00000000000..ddcc34c0b9c --- /dev/null +++ b/queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch @@ -0,0 +1,82 @@ +From deadee03222c098c10faf927753a1bb2e00a2762 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 10:08:09 +0300 +Subject: net/sched: flower: Ensure both minimum and maximum ports are + specified + +From: Ido Schimmel + +[ Upstream commit d3f87278bcb80bd7f9519669d928b43320363d4f ] + +The kernel does not currently validate that both the minimum and maximum +ports of a port range are specified. This can lead user space to think +that a filter matching on a port range was successfully added, when in +fact it was not. For example, with a patched (buggy) iproute2 that only +sends the minimum port, the following commands do not return an error: + + # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass + + # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass + + # tc filter show dev swp1 ingress + filter protocol ip pref 1 flower chain 0 + filter protocol ip pref 1 flower chain 0 handle 0x1 + eth_type ipv4 + ip_proto udp + not_in_hw + action order 1: gact action pass + random type none pass val 0 + index 1 ref 1 bind 1 + + filter protocol ip pref 1 flower chain 0 handle 0x2 + eth_type ipv4 + ip_proto udp + not_in_hw + action order 1: gact action pass + random type none pass val 0 + index 2 ref 1 bind 1 + +Fix by returning an error unless both ports are specified: + + # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass + Error: Both min and max source ports must be specified. + We have an error talking to the kernel + + # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass + Error: Both min and max destination ports must be specified. + We have an error talking to the kernel + +Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges") +Signed-off-by: Ido Schimmel +Reviewed-by: Petr Machata +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_flower.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c +index 815c3e416bc54..652158f612fc2 100644 +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -799,6 +799,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, + TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src, + TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src)); + ++ if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) { ++ NL_SET_ERR_MSG(extack, ++ "Both min and max destination ports must be specified"); ++ return -EINVAL; ++ } ++ if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) { ++ NL_SET_ERR_MSG(extack, ++ "Both min and max source ports must be specified"); ++ return -EINVAL; ++ } + if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && + ntohs(key->tp_range.tp_max.dst) <= + ntohs(key->tp_range.tp_min.dst)) { +-- +2.39.2 + diff --git a/queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch b/queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch new file mode 100644 index 00000000000..d93417c2bc6 --- /dev/null +++ b/queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch @@ -0,0 +1,49 @@ +From 10d76bd0d19f2b3e7cf085d812e9f2ba3263efee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 23:16:34 -0300 +Subject: net/sched: make psched_mtu() RTNL-less safe + +From: Pedro Tammela + +[ Upstream commit 150e33e62c1fa4af5aaab02776b6c3812711d478 ] + +Eric Dumazet says[1]: +------- +Speaking of psched_mtu(), I see that net/sched/sch_pie.c is using it +without holding RTNL, so dev->mtu can be changed underneath. +KCSAN could issue a warning. +------- + +Annotate dev->mtu with READ_ONCE() so KCSAN don't issue a warning. + +[1] https://lore.kernel.org/all/CANn89iJoJO5VtaJ-2=_d2aOQhb0Xw8iBT_Cxqp2HyuS-zj6azw@mail.gmail.com/ + +v1 -> v2: Fix commit message + +Fixes: d4b36210c2e6 ("net: pkt_sched: PIE AQM scheme") +Suggested-by: Eric Dumazet +Signed-off-by: Pedro Tammela +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230711021634.561598-1-pctammela@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/pkt_sched.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h +index 7dba1c3a7b801..2465d1e79d10e 100644 +--- a/include/net/pkt_sched.h ++++ b/include/net/pkt_sched.h +@@ -134,7 +134,7 @@ extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; + */ + static inline unsigned int psched_mtu(const struct net_device *dev) + { +- return dev->mtu + dev->hard_header_len; ++ return READ_ONCE(dev->mtu) + dev->hard_header_len; + } + + static inline struct net *qdisc_net(struct Qdisc *q) +-- +2.39.2 + diff --git a/queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch b/queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch new file mode 100644 index 00000000000..a60164fad47 --- /dev/null +++ b/queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch @@ -0,0 +1,96 @@ +From 6a853a023604a3d01a17866ce9b4d5a5355edc97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 18:01:02 -0300 +Subject: net/sched: sch_qfq: account for stab overhead in qfq_enqueue + +From: Pedro Tammela + +[ Upstream commit 3e337087c3b5805fe0b8a46ba622a962880b5d64 ] + +Lion says: +------- +In the QFQ scheduler a similar issue to CVE-2023-31436 +persists. + +Consider the following code in net/sched/sch_qfq.c: + +static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + unsigned int len = qdisc_pkt_len(skb), gso_segs; + + // ... + + if (unlikely(cl->agg->lmax < len)) { + pr_debug("qfq: increasing maxpkt from %u to %u for class %u", + cl->agg->lmax, len, cl->common.classid); + err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); + if (err) { + cl->qstats.drops++; + return qdisc_drop(skb, sch, to_free); + } + + // ... + + } + +Similarly to CVE-2023-31436, "lmax" is increased without any bounds +checks according to the packet length "len". Usually this would not +impose a problem because packet sizes are naturally limited. + +This is however not the actual packet length, rather the +"qdisc_pkt_len(skb)" which might apply size transformations according to +"struct qdisc_size_table" as created by "qdisc_get_stab()" in +net/sched/sch_api.c if the TCA_STAB option was set when modifying the qdisc. + +A user may choose virtually any size using such a table. + +As a result the same issue as in CVE-2023-31436 can occur, allowing heap +out-of-bounds read / writes in the kmalloc-8192 cache. +------- + +We can create the issue with the following commands: + +tc qdisc add dev $DEV root handle 1: stab mtu 2048 tsize 512 mpu 0 \ +overhead 999999999 linklayer ethernet qfq +tc class add dev $DEV parent 1: classid 1:1 htb rate 6mbit burst 15k +tc filter add dev $DEV parent 1: matchall classid 1:1 +ping -I $DEV 1.1.1.2 + +This is caused by incorrectly assuming that qdisc_pkt_len() returns a +length within the QFQ_MIN_LMAX < len < QFQ_MAX_LMAX. + +Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost") +Reported-by: Lion +Reviewed-by: Eric Dumazet +Signed-off-by: Jamal Hadi Salim +Signed-off-by: Pedro Tammela +Reviewed-by: Simon Horman +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/sch_qfq.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c +index 63a5b277c117f..befaf74b33caa 100644 +--- a/net/sched/sch_qfq.c ++++ b/net/sched/sch_qfq.c +@@ -381,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, + u32 lmax) + { + struct qfq_sched *q = qdisc_priv(sch); +- struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight); ++ struct qfq_aggregate *new_agg; + ++ /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */ ++ if (lmax > QFQ_MAX_LMAX) ++ return -EINVAL; ++ ++ new_agg = qfq_find_agg(q, lmax, weight); + if (new_agg == NULL) { /* create new aggregate */ + new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC); + if (new_agg == NULL) +-- +2.39.2 + diff --git a/queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch b/queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch new file mode 100644 index 00000000000..57fad1fd183 --- /dev/null +++ b/queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch @@ -0,0 +1,52 @@ +From 995657ed88cb063b7ecfad81292ee1ca3ae6f64e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 18:01:00 -0300 +Subject: net/sched: sch_qfq: reintroduce lmax bound check for MTU + +From: Pedro Tammela + +[ Upstream commit 158810b261d02fc7dd92ca9c392d8f8a211a2401 ] + +25369891fcef deletes a check for the case where no 'lmax' is +specified which 3037933448f6 previously fixed as 'lmax' +could be set to the device's MTU without any bound checking +for QFQ_LMAX_MIN and QFQ_LMAX_MAX. Therefore, reintroduce the check. + +Fixes: 25369891fcef ("net/sched: sch_qfq: refactor parsing of netlink parameters") +Acked-by: Jamal Hadi Salim +Reviewed-by: Eric Dumazet +Signed-off-by: Pedro Tammela +Reviewed-by: Simon Horman +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/sch_qfq.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c +index dfd9a99e62570..63a5b277c117f 100644 +--- a/net/sched/sch_qfq.c ++++ b/net/sched/sch_qfq.c +@@ -423,10 +423,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + else + weight = 1; + +- if (tb[TCA_QFQ_LMAX]) ++ if (tb[TCA_QFQ_LMAX]) { + lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); +- else ++ } else { ++ /* MTU size is user controlled */ + lmax = psched_mtu(qdisc_dev(sch)); ++ if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) { ++ NL_SET_ERR_MSG_MOD(extack, ++ "MTU size out of bounds for qfq"); ++ return -EINVAL; ++ } ++ } + + inv_w = ONE_FP / weight; + weight = ONE_FP / inv_w; +-- +2.39.2 + diff --git a/queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch b/queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch new file mode 100644 index 00000000000..5849dee6534 --- /dev/null +++ b/queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch @@ -0,0 +1,390 @@ +From b56c299a48f29f6799850dcc1ba7c0497b531ed9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 May 2023 12:19:45 +0300 +Subject: net/sched: taprio: replace tc_taprio_qopt_offload :: enable with a + "cmd" enum + +From: Vladimir Oltean + +[ Upstream commit 2d800bc500fb3fb07a0fb42e2d0a1356fb9e1e8f ] + +Inspired from struct flow_cls_offload :: cmd, in order for taprio to be +able to report statistics (which is future work), it seems that we need +to drill one step further with the ndo_setup_tc(TC_SETUP_QDISC_TAPRIO) +multiplexing, and pass the command as part of the common portion of the +muxed structure. + +Since we already have an "enable" variable in tc_taprio_qopt_offload, +refactor all drivers to check for "cmd" instead of "enable", and reject +every other command except "replace" and "destroy" - to be future proof. + +Signed-off-by: Vladimir Oltean +Reviewed-by: Horatiu Vultur # for lan966x +Acked-by: Kurt Kanzenbach # hellcreek +Reviewed-by: Muhammad Husaini Zulkifli +Reviewed-by: Gerhard Engleder +Signed-off-by: David S. Miller +Stable-dep-of: 8046063df887 ("igc: Rename qbv_enable to taprio_offload_enable") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/hirschmann/hellcreek.c | 14 +++++++++----- + drivers/net/dsa/ocelot/felix_vsc9959.c | 4 +++- + drivers/net/dsa/sja1105/sja1105_tas.c | 7 +++++-- + drivers/net/ethernet/engleder/tsnep_selftests.c | 12 ++++++------ + drivers/net/ethernet/engleder/tsnep_tc.c | 4 +++- + drivers/net/ethernet/freescale/enetc/enetc_qos.c | 6 +++++- + drivers/net/ethernet/intel/igc/igc_main.c | 13 +++++++++++-- + .../net/ethernet/microchip/lan966x/lan966x_tc.c | 10 ++++++++-- + drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 7 +++++-- + drivers/net/ethernet/ti/am65-cpsw-qos.c | 11 ++++++++--- + include/net/pkt_sched.h | 7 ++++++- + net/sched/sch_taprio.c | 4 ++-- + 12 files changed, 71 insertions(+), 28 deletions(-) + +diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c +index 595a548bb0a80..af50001ccdd4e 100644 +--- a/drivers/net/dsa/hirschmann/hellcreek.c ++++ b/drivers/net/dsa/hirschmann/hellcreek.c +@@ -1885,13 +1885,17 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port, + case TC_SETUP_QDISC_TAPRIO: { + struct tc_taprio_qopt_offload *taprio = type_data; + +- if (!hellcreek_validate_schedule(hellcreek, taprio)) +- return -EOPNOTSUPP; ++ switch (taprio->cmd) { ++ case TAPRIO_CMD_REPLACE: ++ if (!hellcreek_validate_schedule(hellcreek, taprio)) ++ return -EOPNOTSUPP; + +- if (taprio->enable) + return hellcreek_port_set_schedule(ds, port, taprio); +- +- return hellcreek_port_del_schedule(ds, port); ++ case TAPRIO_CMD_DESTROY: ++ return hellcreek_port_del_schedule(ds, port); ++ default: ++ return -EOPNOTSUPP; ++ } + } + default: + return -EOPNOTSUPP; +diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c +index bd11f9fb95e54..772f8b817390b 100644 +--- a/drivers/net/dsa/ocelot/felix_vsc9959.c ++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c +@@ -1436,7 +1436,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, + + mutex_lock(&ocelot->tas_lock); + +- if (!taprio->enable) { ++ if (taprio->cmd == TAPRIO_CMD_DESTROY) { + ocelot_port_mqprio(ocelot, port, &taprio->mqprio); + ocelot_rmw_rix(ocelot, 0, QSYS_TAG_CONFIG_ENABLE, + QSYS_TAG_CONFIG, port); +@@ -1448,6 +1448,8 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, + + mutex_unlock(&ocelot->tas_lock); + return 0; ++ } else if (taprio->cmd != TAPRIO_CMD_REPLACE) { ++ return -EOPNOTSUPP; + } + + ret = ocelot_port_mqprio(ocelot, port, &taprio->mqprio); +diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c +index e6153848a9509..d7818710bc028 100644 +--- a/drivers/net/dsa/sja1105/sja1105_tas.c ++++ b/drivers/net/dsa/sja1105/sja1105_tas.c +@@ -516,10 +516,11 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port, + /* Can't change an already configured port (must delete qdisc first). + * Can't delete the qdisc from an unconfigured port. + */ +- if (!!tas_data->offload[port] == admin->enable) ++ if ((!!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_REPLACE) || ++ (!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_DESTROY)) + return -EINVAL; + +- if (!admin->enable) { ++ if (admin->cmd == TAPRIO_CMD_DESTROY) { + taprio_offload_free(tas_data->offload[port]); + tas_data->offload[port] = NULL; + +@@ -528,6 +529,8 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port, + return rc; + + return sja1105_static_config_reload(priv, SJA1105_SCHEDULING); ++ } else if (admin->cmd != TAPRIO_CMD_REPLACE) { ++ return -EOPNOTSUPP; + } + + /* The cycle time extension is the amount of time the last cycle from +diff --git a/drivers/net/ethernet/engleder/tsnep_selftests.c b/drivers/net/ethernet/engleder/tsnep_selftests.c +index 1581d6b222320..8a9145f93147c 100644 +--- a/drivers/net/ethernet/engleder/tsnep_selftests.c ++++ b/drivers/net/ethernet/engleder/tsnep_selftests.c +@@ -329,7 +329,7 @@ static bool disable_taprio(struct tsnep_adapter *adapter) + int retval; + + memset(&qopt, 0, sizeof(qopt)); +- qopt.enable = 0; ++ qopt.cmd = TAPRIO_CMD_DESTROY; + retval = tsnep_tc_setup(adapter->netdev, TC_SETUP_QDISC_TAPRIO, &qopt); + if (retval) + return false; +@@ -360,7 +360,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) + for (i = 0; i < 255; i++) + qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; + +- qopt->enable = 1; ++ qopt->cmd = TAPRIO_CMD_REPLACE; + qopt->base_time = ktime_set(0, 0); + qopt->cycle_time = 1500000; + qopt->cycle_time_extension = 0; +@@ -382,7 +382,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) + if (!run_taprio(adapter, qopt, 100)) + goto failed; + +- qopt->enable = 1; ++ qopt->cmd = TAPRIO_CMD_REPLACE; + qopt->base_time = ktime_set(0, 0); + qopt->cycle_time = 411854; + qopt->cycle_time_extension = 0; +@@ -406,7 +406,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) + if (!run_taprio(adapter, qopt, 100)) + goto failed; + +- qopt->enable = 1; ++ qopt->cmd = TAPRIO_CMD_REPLACE; + qopt->base_time = ktime_set(0, 0); + delay_base_time(adapter, qopt, 12); + qopt->cycle_time = 125000; +@@ -457,7 +457,7 @@ static bool tsnep_test_taprio_change(struct tsnep_adapter *adapter) + for (i = 0; i < 255; i++) + qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; + +- qopt->enable = 1; ++ qopt->cmd = TAPRIO_CMD_REPLACE; + qopt->base_time = ktime_set(0, 0); + qopt->cycle_time = 100000; + qopt->cycle_time_extension = 0; +@@ -610,7 +610,7 @@ static bool tsnep_test_taprio_extension(struct tsnep_adapter *adapter) + for (i = 0; i < 255; i++) + qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; + +- qopt->enable = 1; ++ qopt->cmd = TAPRIO_CMD_REPLACE; + qopt->base_time = ktime_set(0, 0); + qopt->cycle_time = 100000; + qopt->cycle_time_extension = 50000; +diff --git a/drivers/net/ethernet/engleder/tsnep_tc.c b/drivers/net/ethernet/engleder/tsnep_tc.c +index d083e6684f120..745b191a55402 100644 +--- a/drivers/net/ethernet/engleder/tsnep_tc.c ++++ b/drivers/net/ethernet/engleder/tsnep_tc.c +@@ -325,7 +325,7 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, + if (!adapter->gate_control) + return -EOPNOTSUPP; + +- if (!qopt->enable) { ++ if (qopt->cmd == TAPRIO_CMD_DESTROY) { + /* disable gate control if active */ + mutex_lock(&adapter->gate_control_lock); + +@@ -337,6 +337,8 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, + mutex_unlock(&adapter->gate_control_lock); + + return 0; ++ } else if (qopt->cmd != TAPRIO_CMD_REPLACE) { ++ return -EOPNOTSUPP; + } + + retval = tsnep_validate_gcl(qopt); +diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c +index 126007ab70f61..dfec50106106f 100644 +--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c ++++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c +@@ -65,7 +65,7 @@ static int enetc_setup_taprio(struct net_device *ndev, + gcl_len = admin_conf->num_entries; + + tge = enetc_rd(hw, ENETC_PTGCR); +- if (!admin_conf->enable) { ++ if (admin_conf->cmd == TAPRIO_CMD_DESTROY) { + enetc_wr(hw, ENETC_PTGCR, tge & ~ENETC_PTGCR_TGE); + enetc_reset_ptcmsdur(hw); + +@@ -138,6 +138,10 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int err, i; + ++ if (taprio->cmd != TAPRIO_CMD_REPLACE && ++ taprio->cmd != TAPRIO_CMD_DESTROY) ++ return -EOPNOTSUPP; ++ + /* TSD and Qbv are mutually exclusive in hardware */ + for (i = 0; i < priv->num_tx_rings; i++) + if (priv->tx_ring[i]->tsd_enable) +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index e7bd2c60ee383..ae986e44a4718 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -6117,9 +6117,18 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + size_t n; + int i; + +- adapter->qbv_enable = qopt->enable; ++ switch (qopt->cmd) { ++ case TAPRIO_CMD_REPLACE: ++ adapter->qbv_enable = true; ++ break; ++ case TAPRIO_CMD_DESTROY: ++ adapter->qbv_enable = false; ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } + +- if (!qopt->enable) ++ if (!adapter->qbv_enable) + return igc_tsn_clear_schedule(adapter); + + if (qopt->base_time < 0) +diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c +index cf0cc7562d042..ee652f2d23595 100644 +--- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c ++++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c +@@ -21,8 +21,14 @@ static int lan966x_tc_setup_qdisc_mqprio(struct lan966x_port *port, + static int lan966x_tc_setup_qdisc_taprio(struct lan966x_port *port, + struct tc_taprio_qopt_offload *taprio) + { +- return taprio->enable ? lan966x_taprio_add(port, taprio) : +- lan966x_taprio_del(port); ++ switch (taprio->cmd) { ++ case TAPRIO_CMD_REPLACE: ++ return lan966x_taprio_add(port, taprio); ++ case TAPRIO_CMD_DESTROY: ++ return lan966x_taprio_del(port); ++ default: ++ return -EOPNOTSUPP; ++ } + } + + static int lan966x_tc_setup_qdisc_tbf(struct lan966x_port *port, +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +index 9d55226479b4a..ac41ef4cbd2f0 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +@@ -966,8 +966,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, + return -EOPNOTSUPP; + } + +- if (!qopt->enable) ++ if (qopt->cmd == TAPRIO_CMD_DESTROY) + goto disable; ++ else if (qopt->cmd != TAPRIO_CMD_REPLACE) ++ return -EOPNOTSUPP; ++ + if (qopt->num_entries >= dep) + return -EINVAL; + if (!qopt->cycle_time) +@@ -988,7 +991,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, + + mutex_lock(&priv->plat->est->lock); + priv->plat->est->gcl_size = size; +- priv->plat->est->enable = qopt->enable; ++ priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE; + mutex_unlock(&priv->plat->est->lock); + + for (i = 0; i < size; i++) { +diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c +index 3a908db6e5b22..eced87fa261c9 100644 +--- a/drivers/net/ethernet/ti/am65-cpsw-qos.c ++++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c +@@ -450,7 +450,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, + + am65_cpsw_est_update_state(ndev); + +- if (!est_new->taprio.enable) { ++ if (est_new->taprio.cmd == TAPRIO_CMD_DESTROY) { + am65_cpsw_stop_est(ndev); + return ret; + } +@@ -476,7 +476,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, + am65_cpsw_est_set_sched_list(ndev, est_new); + am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf); + +- am65_cpsw_est_set(ndev, est_new->taprio.enable); ++ am65_cpsw_est_set(ndev, est_new->taprio.cmd == TAPRIO_CMD_REPLACE); + + if (tact == TACT_PROG) { + ret = am65_cpsw_timer_set(ndev, est_new); +@@ -520,7 +520,7 @@ static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data) + am65_cpsw_cp_taprio(taprio, &est_new->taprio); + ret = am65_cpsw_configure_taprio(ndev, est_new); + if (!ret) { +- if (taprio->enable) { ++ if (taprio->cmd == TAPRIO_CMD_REPLACE) { + devm_kfree(&ndev->dev, port->qos.est_admin); + + port->qos.est_admin = est_new; +@@ -564,8 +564,13 @@ static void am65_cpsw_est_link_up(struct net_device *ndev, int link_speed) + static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data) + { + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); ++ struct tc_taprio_qopt_offload *taprio = type_data; + struct am65_cpsw_common *common = port->common; + ++ if (taprio->cmd != TAPRIO_CMD_REPLACE && ++ taprio->cmd != TAPRIO_CMD_DESTROY) ++ return -EOPNOTSUPP; ++ + if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) + return -ENODEV; + +diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h +index 5722931d83d43..7dba1c3a7b801 100644 +--- a/include/net/pkt_sched.h ++++ b/include/net/pkt_sched.h +@@ -187,6 +187,11 @@ struct tc_taprio_caps { + bool broken_mqprio:1; + }; + ++enum tc_taprio_qopt_cmd { ++ TAPRIO_CMD_REPLACE, ++ TAPRIO_CMD_DESTROY, ++}; ++ + struct tc_taprio_sched_entry { + u8 command; /* TC_TAPRIO_CMD_* */ + +@@ -198,7 +203,7 @@ struct tc_taprio_sched_entry { + struct tc_taprio_qopt_offload { + struct tc_mqprio_qopt_offload mqprio; + struct netlink_ext_ack *extack; +- u8 enable; ++ enum tc_taprio_qopt_cmd cmd; + ktime_t base_time; + u64 cycle_time; + u64 cycle_time_extension; +diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c +index cf0e61ed92253..4caf80ddc6721 100644 +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -1527,7 +1527,7 @@ static int taprio_enable_offload(struct net_device *dev, + "Not enough memory for enabling offload mode"); + return -ENOMEM; + } +- offload->enable = 1; ++ offload->cmd = TAPRIO_CMD_REPLACE; + offload->extack = extack; + mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt); + offload->mqprio.extack = extack; +@@ -1575,7 +1575,7 @@ static int taprio_disable_offload(struct net_device *dev, + "Not enough memory to disable offload mode"); + return -ENOMEM; + } +- offload->enable = 0; ++ offload->cmd = TAPRIO_CMD_DESTROY; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { +-- +2.39.2 + diff --git a/queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch b/queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch new file mode 100644 index 00000000000..c4f34d05a4a --- /dev/null +++ b/queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch @@ -0,0 +1,40 @@ +From a1a847163ec939ce74bf72f6a3e83a252842afac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 14:34:14 +0800 +Subject: net: txgbe: fix eeprom calculation error + +From: Jiawen Wu + +[ Upstream commit aa846677a9fb19a0f2c58154c140398aa92a87ba ] + +For some device types like TXGBE_ID_XAUI, *checksum computed in +txgbe_calc_eeprom_checksum() is larger than TXGBE_EEPROM_SUM. Remove the +limit on the size of *checksum. + +Fixes: 049fe5365324 ("net: txgbe: Add operations to interact with firmware") +Fixes: 5e2ea7801fac ("net: txgbe: Fix unsigned comparison to zero in txgbe_calc_eeprom_checksum()") +Signed-off-by: Jiawen Wu +Link: https://lore.kernel.org/r/20230711063414.3311-1-jiawenwu@trustnetic.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +index ebc46f3be0569..fc37af2e71ffc 100644 +--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c ++++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +@@ -196,9 +196,6 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) + if (eeprom_ptrs) + kvfree(eeprom_ptrs); + +- if (*checksum > TXGBE_EEPROM_SUM) +- return -EINVAL; +- + *checksum = TXGBE_EEPROM_SUM - *checksum; + + return 0; +-- +2.39.2 + diff --git a/queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch b/queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch new file mode 100644 index 00000000000..70c14f43322 --- /dev/null +++ b/queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch @@ -0,0 +1,55 @@ +From 9904cc769d6876b45e8f23b7ac006d00e56bb023 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 11:52:26 +0300 +Subject: netdevsim: fix uninitialized data in nsim_dev_trap_fa_cookie_write() + +From: Dan Carpenter + +[ Upstream commit f72207a5c0dbaaf6921cf9a6c0d2fd0bc249ea78 ] + +The simple_write_to_buffer() function is designed to handle partial +writes. It returns negatives on error, otherwise it returns the number +of bytes that were able to be copied. This code doesn't check the +return properly. We only know that the first byte is written, the rest +of the buffer might be uninitialized. + +There is no need to use the simple_write_to_buffer() function. +Partial writes are prohibited by the "if (*ppos != 0)" check at the +start of the function. Just use memdup_user() and copy the whole +buffer. + +Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata") +Signed-off-by: Dan Carpenter +Reviewed-by: Pavan Chebbi +Reviewed-by: Ido Schimmel +Link: https://lore.kernel.org/r/7c1f950b-3a7d-4252-82a6-876e53078ef7@moroto.mountain +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/netdevsim/dev.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c +index 6045bece2654d..b4d3b9cde8bd6 100644 +--- a/drivers/net/netdevsim/dev.c ++++ b/drivers/net/netdevsim/dev.c +@@ -184,13 +184,10 @@ static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file, + cookie_len = (count - 1) / 2; + if ((count - 1) % 2) + return -EINVAL; +- buf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); +- if (!buf) +- return -ENOMEM; + +- ret = simple_write_to_buffer(buf, count, ppos, data, count); +- if (ret < 0) +- goto free_buf; ++ buf = memdup_user(data, count); ++ if (IS_ERR(buf)) ++ return PTR_ERR(buf); + + fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len, + GFP_KERNEL | __GFP_NOWARN); +-- +2.39.2 + diff --git a/queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch b/queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch new file mode 100644 index 00000000000..263f24f07ed --- /dev/null +++ b/queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch @@ -0,0 +1,94 @@ +From a0f10b653153e7204eca71f671829ee8f377403a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Jul 2023 12:25:23 +0200 +Subject: netfilter: conntrack: don't fold port numbers into addresses before + hashing + +From: Florian Westphal + +[ Upstream commit eaf9e7192ec9af2fbf1b6eb2299dd0feca6c5f7e ] + +Originally this used jhash2() over tuple and folded the zone id, +the pernet hash value, destination port and l4 protocol number into the +32bit seed value. + +When the switch to siphash was done, I used an on-stack temporary +buffer to build a suitable key to be hashed via siphash(). + +But this showed up as performance regression, so I got rid of +the temporary copy and collected to-be-hashed data in 4 u64 variables. + +This makes it easy to build tuples that produce the same hash, which isn't +desirable even though chain lengths are limited. + +Switch back to plain siphash, but just like with jhash2(), take advantage +of the fact that most of to-be-hashed data is already in a suitable order. + +Use an empty struct as annotation in 'struct nf_conntrack_tuple' to mark +last member that can be used as hash input. + +The only remaining data that isn't present in the tuple structure are the +zone identifier and the pernet hash: fold those into the key. + +Fixes: d2c806abcf0b ("netfilter: conntrack: use siphash_4u64") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_conntrack_tuple.h | 3 +++ + net/netfilter/nf_conntrack_core.c | 20 +++++++------------- + 2 files changed, 10 insertions(+), 13 deletions(-) + +diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h +index 9334371c94e2b..f7dd950ff2509 100644 +--- a/include/net/netfilter/nf_conntrack_tuple.h ++++ b/include/net/netfilter/nf_conntrack_tuple.h +@@ -67,6 +67,9 @@ struct nf_conntrack_tuple { + /* The protocol. */ + u_int8_t protonum; + ++ /* The direction must be ignored for the tuplehash */ ++ struct { } __nfct_hash_offsetend; ++ + /* The direction (for tuplehash) */ + u_int8_t dir; + } dst; +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index d119f1d4c2fc8..992393102d5f5 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -211,24 +211,18 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, + unsigned int zoneid, + const struct net *net) + { +- u64 a, b, c, d; ++ siphash_key_t key; + + get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); + +- /* The direction must be ignored, handle usable tuplehash members manually */ +- a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3]; +- b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3]; ++ key = nf_conntrack_hash_rnd; + +- c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16; +- c |= tuple->dst.protonum; ++ key.key[0] ^= zoneid; ++ key.key[1] ^= net_hash_mix(net); + +- d = (u64)zoneid << 32 | net_hash_mix(net); +- +- /* IPv4: u3.all[1,2,3] == 0 */ +- c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2]; +- d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2]; +- +- return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd); ++ return siphash((void *)tuple, ++ offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend), ++ &key); + } + + static u32 scale_hash(u32 hash) +-- +2.39.2 + diff --git a/queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch b/queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch new file mode 100644 index 00000000000..94bf990a231 --- /dev/null +++ b/queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch @@ -0,0 +1,752 @@ +From 6020f819a7c3532db9f038c037830fd7e15d9058 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 16:24:27 +0200 +Subject: netfilter: nf_tables: report use refcount overflow + +From: Pablo Neira Ayuso + +[ Upstream commit 1689f25924ada8fe14a4a82c38925d04994c7142 ] + +Overflow use refcount checks are not complete. + +Add helper function to deal with object reference counter tracking. +Report -EMFILE in case UINT_MAX is reached. + +nft_use_dec() splats in case that reference counter underflows, +which should not ever happen. + +Add nft_use_inc_restore() and nft_use_dec_restore() which are used +to restore reference counter from error and abort paths. + +Use u32 in nft_flowtable and nft_object since helper functions cannot +work on bitfields. + +Remove the few early incomplete checks now that the helper functions +are in place and used to check for refcount overflow. + +Fixes: 96518518cc41 ("netfilter: add nftables") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 31 +++++- + net/netfilter/nf_tables_api.c | 163 ++++++++++++++++++------------ + net/netfilter/nft_flow_offload.c | 6 +- + net/netfilter/nft_immediate.c | 8 +- + net/netfilter/nft_objref.c | 8 +- + 5 files changed, 141 insertions(+), 75 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index ee47d7143d99f..1b0beb8f08aee 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1211,6 +1211,29 @@ int __nft_release_basechain(struct nft_ctx *ctx); + + unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); + ++static inline bool nft_use_inc(u32 *use) ++{ ++ if (*use == UINT_MAX) ++ return false; ++ ++ (*use)++; ++ ++ return true; ++} ++ ++static inline void nft_use_dec(u32 *use) ++{ ++ WARN_ON_ONCE((*use)-- == 0); ++} ++ ++/* For error and abort path: restore use counter to previous state. */ ++static inline void nft_use_inc_restore(u32 *use) ++{ ++ WARN_ON_ONCE(!nft_use_inc(use)); ++} ++ ++#define nft_use_dec_restore nft_use_dec ++ + /** + * struct nft_table - nf_tables table + * +@@ -1296,8 +1319,8 @@ struct nft_object { + struct list_head list; + struct rhlist_head rhlhead; + struct nft_object_hash_key key; +- u32 genmask:2, +- use:30; ++ u32 genmask:2; ++ u32 use; + u64 handle; + u16 udlen; + u8 *udata; +@@ -1399,8 +1422,8 @@ struct nft_flowtable { + char *name; + int hooknum; + int ops_len; +- u32 genmask:2, +- use:30; ++ u32 genmask:2; ++ u32 use; + u64 handle; + /* runtime data below here */ + struct list_head hook_list ____cacheline_aligned; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 79719e8cda799..18546f9b2a63a 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -253,8 +253,10 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) + if (chain->bound) + return -EBUSY; + ++ if (!nft_use_inc(&chain->use)) ++ return -EMFILE; ++ + chain->bound = true; +- chain->use++; + nft_chain_trans_bind(ctx, chain); + + return 0; +@@ -437,7 +439,7 @@ static int nft_delchain(struct nft_ctx *ctx) + if (IS_ERR(trans)) + return PTR_ERR(trans); + +- ctx->table->use--; ++ nft_use_dec(&ctx->table->use); + nft_deactivate_next(ctx->net, ctx->chain); + + return 0; +@@ -476,7 +478,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) + /* You cannot delete the same rule twice */ + if (nft_is_active_next(ctx->net, rule)) { + nft_deactivate_next(ctx->net, rule); +- ctx->chain->use--; ++ nft_use_dec(&ctx->chain->use); + return 0; + } + return -ENOENT; +@@ -643,7 +645,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) + nft_map_deactivate(ctx, set); + + nft_deactivate_next(ctx->net, set); +- ctx->table->use--; ++ nft_use_dec(&ctx->table->use); + + return err; + } +@@ -675,7 +677,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) + return err; + + nft_deactivate_next(ctx->net, obj); +- ctx->table->use--; ++ nft_use_dec(&ctx->table->use); + + return err; + } +@@ -710,7 +712,7 @@ static int nft_delflowtable(struct nft_ctx *ctx, + return err; + + nft_deactivate_next(ctx->net, flowtable); +- ctx->table->use--; ++ nft_use_dec(&ctx->table->use); + + return err; + } +@@ -2395,9 +2397,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + struct nft_chain *chain; + int err; + +- if (table->use == UINT_MAX) +- return -EOVERFLOW; +- + if (nla[NFTA_CHAIN_HOOK]) { + struct nft_stats __percpu *stats = NULL; + struct nft_chain_hook hook = {}; +@@ -2493,6 +2492,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + if (err < 0) + goto err_destroy_chain; + ++ if (!nft_use_inc(&table->use)) { ++ err = -EMFILE; ++ goto err_use; ++ } ++ + trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); +@@ -2509,10 +2513,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + goto err_unregister_hook; + } + +- table->use++; +- + return 0; ++ + err_unregister_hook: ++ nft_use_dec_restore(&table->use); ++err_use: + nf_tables_unregister_hook(net, table, chain); + err_destroy_chain: + nf_tables_chain_destroy(ctx); +@@ -3841,9 +3846,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, + return -EINVAL; + handle = nf_tables_alloc_handle(table); + +- if (chain->use == UINT_MAX) +- return -EOVERFLOW; +- + if (nla[NFTA_RULE_POSITION]) { + pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); + old_rule = __nft_rule_lookup(chain, pos_handle); +@@ -3937,6 +3939,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, + } + } + ++ if (!nft_use_inc(&chain->use)) { ++ err = -EMFILE; ++ goto err_release_rule; ++ } ++ + if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { + err = nft_delrule(&ctx, old_rule); + if (err < 0) +@@ -3968,7 +3975,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, + } + } + kvfree(expr_info); +- chain->use++; + + if (flow) + nft_trans_flow_rule(trans) = flow; +@@ -3979,6 +3985,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, + return 0; + + err_destroy_flow_rule: ++ nft_use_dec_restore(&chain->use); + if (flow) + nft_flow_rule_destroy(flow); + err_release_rule: +@@ -5015,9 +5022,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + alloc_size = sizeof(*set) + size + udlen; + if (alloc_size < size || alloc_size > INT_MAX) + return -ENOMEM; ++ ++ if (!nft_use_inc(&table->use)) ++ return -EMFILE; ++ + set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT); +- if (!set) +- return -ENOMEM; ++ if (!set) { ++ err = -ENOMEM; ++ goto err_alloc; ++ } + + name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT); + if (!name) { +@@ -5075,7 +5088,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + goto err_set_expr_alloc; + + list_add_tail_rcu(&set->list, &table->sets); +- table->use++; ++ + return 0; + + err_set_expr_alloc: +@@ -5087,6 +5100,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + kfree(set->name); + err_set_name: + kvfree(set); ++err_alloc: ++ nft_use_dec_restore(&table->use); ++ + return err; + } + +@@ -5225,9 +5241,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *i; + struct nft_set_iter iter; + +- if (set->use == UINT_MAX) +- return -EOVERFLOW; +- + if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) + return -EBUSY; + +@@ -5255,10 +5268,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + return iter.err; + } + bind: ++ if (!nft_use_inc(&set->use)) ++ return -EMFILE; ++ + binding->chain = ctx->chain; + list_add_tail_rcu(&binding->list, &set->bindings); + nft_set_trans_bind(ctx, set); +- set->use++; + + return 0; + } +@@ -5332,7 +5347,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) + nft_clear(ctx->net, set); + } + +- set->use++; ++ nft_use_inc_restore(&set->use); + } + EXPORT_SYMBOL_GPL(nf_tables_activate_set); + +@@ -5348,7 +5363,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + else + list_del_rcu(&binding->list); + +- set->use--; ++ nft_use_dec(&set->use); + break; + case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) { +@@ -5357,7 +5372,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + + nft_deactivate_next(ctx->net, set); + } +- set->use--; ++ nft_use_dec(&set->use); + return; + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: +@@ -5365,7 +5380,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + +- set->use--; ++ nft_use_dec(&set->use); + fallthrough; + default: + nf_tables_unbind_set(ctx, set, binding, +@@ -6134,7 +6149,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, + nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) +- (*nft_set_ext_obj(ext))->use--; ++ nft_use_dec(&(*nft_set_ext_obj(ext))->use); + kfree(elem); + } + EXPORT_SYMBOL_GPL(nft_set_elem_destroy); +@@ -6636,8 +6651,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + set->objtype, genmask); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); ++ obj = NULL; + goto err_parse_key_end; + } ++ ++ if (!nft_use_inc(&obj->use)) { ++ err = -EMFILE; ++ obj = NULL; ++ goto err_parse_key_end; ++ } ++ + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + if (err < 0) + goto err_parse_key_end; +@@ -6706,10 +6729,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + if (flags) + *nft_set_ext_flags(ext) = flags; + +- if (obj) { ++ if (obj) + *nft_set_ext_obj(ext) = obj; +- obj->use++; +- } ++ + if (ulen > 0) { + if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) { + err = -EINVAL; +@@ -6774,12 +6796,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + kfree(trans); + err_elem_free: + nf_tables_set_elem_destroy(ctx, set, elem.priv); +- if (obj) +- obj->use--; + err_parse_data: + if (nla[NFTA_SET_ELEM_DATA] != NULL) + nft_data_release(&elem.data.val, desc.type); + err_parse_key_end: ++ if (obj) ++ nft_use_dec_restore(&obj->use); ++ + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); + err_parse_key: + nft_data_release(&elem.key.val, NFT_DATA_VALUE); +@@ -6859,7 +6882,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; +- chain->use++; ++ nft_use_inc_restore(&chain->use); + break; + } + } +@@ -6874,7 +6897,7 @@ static void nft_setelem_data_activate(const struct net *net, + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_hold(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) +- (*nft_set_ext_obj(ext))->use++; ++ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); + } + + static void nft_setelem_data_deactivate(const struct net *net, +@@ -6886,7 +6909,7 @@ static void nft_setelem_data_deactivate(const struct net *net, + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_release(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) +- (*nft_set_ext_obj(ext))->use--; ++ nft_use_dec(&(*nft_set_ext_obj(ext))->use); + } + + static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, +@@ -7429,9 +7452,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, + + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + ++ if (!nft_use_inc(&table->use)) ++ return -EMFILE; ++ + type = nft_obj_type_get(net, objtype); +- if (IS_ERR(type)) +- return PTR_ERR(type); ++ if (IS_ERR(type)) { ++ err = PTR_ERR(type); ++ goto err_type; ++ } + + obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); + if (IS_ERR(obj)) { +@@ -7465,7 +7493,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, + goto err_obj_ht; + + list_add_tail_rcu(&obj->list, &table->objects); +- table->use++; ++ + return 0; + err_obj_ht: + /* queued in transaction log */ +@@ -7481,6 +7509,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, + kfree(obj); + err_init: + module_put(type->owner); ++err_type: ++ nft_use_dec_restore(&table->use); ++ + return err; + } + +@@ -7882,7 +7913,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, + case NFT_TRANS_PREPARE: + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: +- flowtable->use--; ++ nft_use_dec(&flowtable->use); + fallthrough; + default: + return; +@@ -8236,9 +8267,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb, + + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + ++ if (!nft_use_inc(&table->use)) ++ return -EMFILE; ++ + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT); +- if (!flowtable) +- return -ENOMEM; ++ if (!flowtable) { ++ err = -ENOMEM; ++ goto flowtable_alloc; ++ } + + flowtable->table = table; + flowtable->handle = nf_tables_alloc_handle(table); +@@ -8293,7 +8329,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb, + goto err5; + + list_add_tail_rcu(&flowtable->list, &table->flowtables); +- table->use++; + + return 0; + err5: +@@ -8310,6 +8345,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, + kfree(flowtable->name); + err1: + kfree(flowtable); ++flowtable_alloc: ++ nft_use_dec_restore(&table->use); ++ + return err; + } + +@@ -9680,7 +9718,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) + */ + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !list_empty(&nft_trans_set(trans)->bindings)) +- trans->ctx.table->use--; ++ nft_use_dec(&trans->ctx.table->use); + } + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_NEWSET, GFP_KERNEL); +@@ -9910,7 +9948,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nft_trans_destroy(trans); + break; + } +- trans->ctx.table->use--; ++ nft_use_dec_restore(&trans->ctx.table->use); + nft_chain_del(trans->ctx.chain); + nf_tables_unregister_hook(trans->ctx.net, + trans->ctx.table, +@@ -9923,7 +9961,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + list_splice(&nft_trans_chain_hooks(trans), + &nft_trans_basechain(trans)->hook_list); + } else { +- trans->ctx.table->use++; ++ nft_use_inc_restore(&trans->ctx.table->use); + nft_clear(trans->ctx.net, trans->ctx.chain); + } + nft_trans_destroy(trans); +@@ -9933,7 +9971,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nft_trans_destroy(trans); + break; + } +- trans->ctx.chain->use--; ++ nft_use_dec_restore(&trans->ctx.chain->use); + list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), +@@ -9943,7 +9981,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + break; + case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: +- trans->ctx.chain->use++; ++ nft_use_inc_restore(&trans->ctx.chain->use); + nft_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) +@@ -9956,7 +9994,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nft_trans_destroy(trans); + break; + } +- trans->ctx.table->use--; ++ nft_use_dec_restore(&trans->ctx.table->use); + if (nft_trans_set_bound(trans)) { + nft_trans_destroy(trans); + break; +@@ -9965,7 +10003,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + break; + case NFT_MSG_DELSET: + case NFT_MSG_DESTROYSET: +- trans->ctx.table->use++; ++ nft_use_inc_restore(&trans->ctx.table->use); + nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); +@@ -10009,13 +10047,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); + nft_trans_destroy(trans); + } else { +- trans->ctx.table->use--; ++ nft_use_dec_restore(&trans->ctx.table->use); + nft_obj_del(nft_trans_obj(trans)); + } + break; + case NFT_MSG_DELOBJ: + case NFT_MSG_DESTROYOBJ: +- trans->ctx.table->use++; ++ nft_use_inc_restore(&trans->ctx.table->use); + nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_trans_destroy(trans); + break; +@@ -10024,7 +10062,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nft_unregister_flowtable_net_hooks(net, + &nft_trans_flowtable_hooks(trans)); + } else { +- trans->ctx.table->use--; ++ nft_use_dec_restore(&trans->ctx.table->use); + list_del_rcu(&nft_trans_flowtable(trans)->list); + nft_unregister_flowtable_net_hooks(net, + &nft_trans_flowtable(trans)->hook_list); +@@ -10036,7 +10074,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + list_splice(&nft_trans_flowtable_hooks(trans), + &nft_trans_flowtable(trans)->hook_list); + } else { +- trans->ctx.table->use++; ++ nft_use_inc_restore(&trans->ctx.table->use); + nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + } + nft_trans_destroy(trans); +@@ -10486,8 +10524,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + if (desc->flags & NFT_DATA_DESC_SETELEM && + chain->flags & NFT_CHAIN_BINDING) + return -EINVAL; ++ if (!nft_use_inc(&chain->use)) ++ return -EMFILE; + +- chain->use++; + data->verdict.chain = chain; + break; + } +@@ -10505,7 +10544,7 @@ static void nft_verdict_uninit(const struct nft_data *data) + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; +- chain->use--; ++ nft_use_dec(&chain->use); + break; + } + } +@@ -10674,11 +10713,11 @@ int __nft_release_basechain(struct nft_ctx *ctx) + nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); + list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { + list_del(&rule->list); +- ctx->chain->use--; ++ nft_use_dec(&ctx->chain->use); + nf_tables_rule_release(ctx, rule); + } + nft_chain_del(ctx->chain); +- ctx->table->use--; ++ nft_use_dec(&ctx->table->use); + nf_tables_chain_destroy(ctx); + + return 0; +@@ -10728,18 +10767,18 @@ static void __nft_release_table(struct net *net, struct nft_table *table) + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); +- chain->use--; ++ nft_use_dec(&chain->use); + nf_tables_rule_release(&ctx, rule); + } + } + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { + list_del(&flowtable->list); +- table->use--; ++ nft_use_dec(&table->use); + nf_tables_flowtable_destroy(flowtable); + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); +- table->use--; ++ nft_use_dec(&table->use); + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + +@@ -10747,13 +10786,13 @@ static void __nft_release_table(struct net *net, struct nft_table *table) + } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + nft_obj_del(obj); +- table->use--; ++ nft_use_dec(&table->use); + nft_obj_destroy(&ctx, obj); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + ctx.chain = chain; + nft_chain_del(chain); +- table->use--; ++ nft_use_dec(&table->use); + nf_tables_chain_destroy(&ctx); + } + nf_tables_table_destroy(&ctx); +diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c +index e860d8fe0e5e2..03159c6c6c4b6 100644 +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -404,8 +404,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, + if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + ++ if (!nft_use_inc(&flowtable->use)) ++ return -EMFILE; ++ + priv->flowtable = flowtable; +- flowtable->use++; + + return nf_ct_netns_get(ctx->net, ctx->family); + } +@@ -424,7 +426,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx, + { + struct nft_flow_offload *priv = nft_expr_priv(expr); + +- priv->flowtable->use++; ++ nft_use_inc_restore(&priv->flowtable->use); + } + + static void nft_flow_offload_destroy(const struct nft_ctx *ctx, +diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c +index 3d76ebfe8939b..407d7197f75bb 100644 +--- a/net/netfilter/nft_immediate.c ++++ b/net/netfilter/nft_immediate.c +@@ -159,7 +159,7 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, + default: + nft_chain_del(chain); + chain->bound = false; +- chain->table->use--; ++ nft_use_dec(&chain->table->use); + break; + } + break; +@@ -198,7 +198,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, + * let the transaction records release this chain and its rules. + */ + if (chain->bound) { +- chain->use--; ++ nft_use_dec(&chain->use); + break; + } + +@@ -206,9 +206,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, + chain_ctx = *ctx; + chain_ctx.chain = chain; + +- chain->use--; ++ nft_use_dec(&chain->use); + list_for_each_entry_safe(rule, n, &chain->rules, list) { +- chain->use--; ++ nft_use_dec(&chain->use); + list_del(&rule->list); + nf_tables_rule_destroy(&chain_ctx, rule); + } +diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c +index a48dd5b5d45b1..509011b1ef597 100644 +--- a/net/netfilter/nft_objref.c ++++ b/net/netfilter/nft_objref.c +@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx, + if (IS_ERR(obj)) + return -ENOENT; + ++ if (!nft_use_inc(&obj->use)) ++ return -EMFILE; ++ + nft_objref_priv(expr) = obj; +- obj->use++; + + return 0; + } +@@ -72,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx, + if (phase == NFT_TRANS_COMMIT) + return; + +- obj->use--; ++ nft_use_dec(&obj->use); + } + + static void nft_objref_activate(const struct nft_ctx *ctx, +@@ -80,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx, + { + struct nft_object *obj = nft_objref_priv(expr); + +- obj->use++; ++ nft_use_inc_restore(&obj->use); + } + + static const struct nft_expr_ops nft_objref_ops = { +-- +2.39.2 + diff --git a/queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch b/queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch new file mode 100644 index 00000000000..e23079bc635 --- /dev/null +++ b/queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch @@ -0,0 +1,64 @@ +From 7e2c2a25a65dd8ef5fe16a5b71182c1181093d7c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 5 Nov 2022 09:43:09 +0000 +Subject: NTB: amd: Fix error handling in amd_ntb_pci_driver_init() + +From: Yuan Can + +[ Upstream commit 98af0a33c1101c29b3ce4f0cf4715fd927c717f9 ] + +A problem about ntb_hw_amd create debugfs failed is triggered with the +following log given: + + [ 618.431232] AMD(R) PCI-E Non-Transparent Bridge Driver 1.0 + [ 618.433284] debugfs: Directory 'ntb_hw_amd' with parent '/' already present! + +The reason is that amd_ntb_pci_driver_init() returns pci_register_driver() +directly without checking its return value, if pci_register_driver() +failed, it returns without destroy the newly created debugfs, resulting +the debugfs of ntb_hw_amd can never be created later. + + amd_ntb_pci_driver_init() + debugfs_create_dir() # create debugfs directory + pci_register_driver() + driver_register() + bus_add_driver() + priv = kzalloc(...) # OOM happened + # return without destroy debugfs directory + +Fix by removing debugfs when pci_register_driver() returns error. + +Fixes: a1b3695820aa ("NTB: Add support for AMD PCI-Express Non-Transparent Bridge") +Signed-off-by: Yuan Can +Signed-off-by: Jon Mason +Signed-off-by: Sasha Levin +--- + drivers/ntb/hw/amd/ntb_hw_amd.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c +index 04550b1f984c6..730f2103b91d1 100644 +--- a/drivers/ntb/hw/amd/ntb_hw_amd.c ++++ b/drivers/ntb/hw/amd/ntb_hw_amd.c +@@ -1338,12 +1338,17 @@ static struct pci_driver amd_ntb_pci_driver = { + + static int __init amd_ntb_pci_driver_init(void) + { ++ int ret; + pr_info("%s %s\n", NTB_DESC, NTB_VER); + + if (debugfs_initialized()) + debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + +- return pci_register_driver(&amd_ntb_pci_driver); ++ ret = pci_register_driver(&amd_ntb_pci_driver); ++ if (ret) ++ debugfs_remove_recursive(debugfs_dir); ++ ++ return ret; + } + module_init(amd_ntb_pci_driver_init); + +-- +2.39.2 + diff --git a/queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch b/queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch new file mode 100644 index 00000000000..4f60eb29f71 --- /dev/null +++ b/queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch @@ -0,0 +1,66 @@ +From 180209186f21d1030d210c3ead622260d62483cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 5 Nov 2022 09:43:01 +0000 +Subject: ntb: idt: Fix error handling in idt_pci_driver_init() + +From: Yuan Can + +[ Upstream commit c012968259b451dc4db407f2310fe131eaefd800 ] + +A problem about ntb_hw_idt create debugfs failed is triggered with the +following log given: + + [ 1236.637636] IDT PCI-E Non-Transparent Bridge Driver 2.0 + [ 1236.639292] debugfs: Directory 'ntb_hw_idt' with parent '/' already present! + +The reason is that idt_pci_driver_init() returns pci_register_driver() +directly without checking its return value, if pci_register_driver() +failed, it returns without destroy the newly created debugfs, resulting +the debugfs of ntb_hw_idt can never be created later. + + idt_pci_driver_init() + debugfs_create_dir() # create debugfs directory + pci_register_driver() + driver_register() + bus_add_driver() + priv = kzalloc(...) # OOM happened + # return without destroy debugfs directory + +Fix by removing debugfs when pci_register_driver() returns error. + +Fixes: bf2a952d31d2 ("NTB: Add IDT 89HPESxNTx PCIe-switches support") +Signed-off-by: Yuan Can +Signed-off-by: Jon Mason +Signed-off-by: Sasha Levin +--- + drivers/ntb/hw/idt/ntb_hw_idt.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c +index 0ed6f809ff2ee..51799fccf8404 100644 +--- a/drivers/ntb/hw/idt/ntb_hw_idt.c ++++ b/drivers/ntb/hw/idt/ntb_hw_idt.c +@@ -2891,6 +2891,7 @@ static struct pci_driver idt_pci_driver = { + + static int __init idt_pci_driver_init(void) + { ++ int ret; + pr_info("%s %s\n", NTB_DESC, NTB_VER); + + /* Create the top DebugFS directory if the FS is initialized */ +@@ -2898,7 +2899,11 @@ static int __init idt_pci_driver_init(void) + dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + /* Register the NTB hardware driver to handle the PCI device */ +- return pci_register_driver(&idt_pci_driver); ++ ret = pci_register_driver(&idt_pci_driver); ++ if (ret) ++ debugfs_remove_recursive(dbgfs_topdir); ++ ++ return ret; + } + module_init(idt_pci_driver_init); + +-- +2.39.2 + diff --git a/queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch b/queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch new file mode 100644 index 00000000000..2ef3e24bb70 --- /dev/null +++ b/queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch @@ -0,0 +1,65 @@ +From 7b9f5f4f4ba4a81054b4298375bdf38a3828e4c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 5 Nov 2022 09:43:22 +0000 +Subject: ntb: intel: Fix error handling in intel_ntb_pci_driver_init() + +From: Yuan Can + +[ Upstream commit 4c3c796aca02883ad35bb117468938cc4022ca41 ] + +A problem about ntb_hw_intel create debugfs failed is triggered with the +following log given: + + [ 273.112733] Intel(R) PCI-E Non-Transparent Bridge Driver 2.0 + [ 273.115342] debugfs: Directory 'ntb_hw_intel' with parent '/' already present! + +The reason is that intel_ntb_pci_driver_init() returns +pci_register_driver() directly without checking its return value, if +pci_register_driver() failed, it returns without destroy the newly created +debugfs, resulting the debugfs of ntb_hw_intel can never be created later. + + intel_ntb_pci_driver_init() + debugfs_create_dir() # create debugfs directory + pci_register_driver() + driver_register() + bus_add_driver() + priv = kzalloc(...) # OOM happened + # return without destroy debugfs directory + +Fix by removing debugfs when pci_register_driver() returns error. + +Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") +Signed-off-by: Yuan Can +Acked-by: Dave Jiang +Signed-off-by: Jon Mason +Signed-off-by: Sasha Levin +--- + drivers/ntb/hw/intel/ntb_hw_gen1.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c +index 84772013812bf..60a4ebc7bf35a 100644 +--- a/drivers/ntb/hw/intel/ntb_hw_gen1.c ++++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c +@@ -2064,12 +2064,17 @@ static struct pci_driver intel_ntb_pci_driver = { + + static int __init intel_ntb_pci_driver_init(void) + { ++ int ret; + pr_info("%s %s\n", NTB_DESC, NTB_VER); + + if (debugfs_initialized()) + debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + +- return pci_register_driver(&intel_ntb_pci_driver); ++ ret = pci_register_driver(&intel_ntb_pci_driver); ++ if (ret) ++ debugfs_remove_recursive(debugfs_dir); ++ ++ return ret; + } + module_init(intel_ntb_pci_driver_init); + +-- +2.39.2 + diff --git a/queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch b/queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch new file mode 100644 index 00000000000..64acc436822 --- /dev/null +++ b/queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch @@ -0,0 +1,39 @@ +From 2f49422fef2e45f2bafb822a8c585bf41de7b492 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Nov 2022 11:32:44 +0800 +Subject: NTB: ntb_tool: Add check for devm_kcalloc + +From: Jiasheng Jiang + +[ Upstream commit 2790143f09938776a3b4f69685b380bae8fd06c7 ] + +As the devm_kcalloc may return NULL pointer, +it should be better to add check for the return +value, as same as the others. + +Fixes: 7f46c8b3a552 ("NTB: ntb_tool: Add full multi-port NTB API support") +Signed-off-by: Jiasheng Jiang +Reviewed-by: Serge Semin +Reviewed-by: Dave Jiang +Signed-off-by: Jon Mason +Signed-off-by: Sasha Levin +--- + drivers/ntb/test/ntb_tool.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c +index 5ee0afa621a95..eeeb4b1c97d2c 100644 +--- a/drivers/ntb/test/ntb_tool.c ++++ b/drivers/ntb/test/ntb_tool.c +@@ -998,6 +998,8 @@ static int tool_init_mws(struct tool_ctx *tc) + tc->peers[pidx].outmws = + devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmw_cnt, + sizeof(*tc->peers[pidx].outmws), GFP_KERNEL); ++ if (tc->peers[pidx].outmws == NULL) ++ return -ENOMEM; + + for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) { + tc->peers[pidx].outmws[widx].pidx = pidx; +-- +2.39.2 + diff --git a/queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch b/queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch new file mode 100644 index 00000000000..403d266e9d0 --- /dev/null +++ b/queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch @@ -0,0 +1,42 @@ +From 1be077967706a5225aa70459dc1f29e59f16d173 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Nov 2022 23:19:17 +0800 +Subject: NTB: ntb_transport: fix possible memory leak while device_register() + fails + +From: Yang Yingliang + +[ Upstream commit 8623ccbfc55d962e19a3537652803676ad7acb90 ] + +If device_register() returns error, the name allocated by +dev_set_name() need be freed. As comment of device_register() +says, it should use put_device() to give up the reference in +the error path. So fix this by calling put_device(), then the +name can be freed in kobject_cleanup(), and client_dev is freed +in ntb_transport_client_release(). + +Fixes: fce8a7bb5b4b ("PCI-Express Non-Transparent Bridge Support") +Signed-off-by: Yang Yingliang +Reviewed-by: Dave Jiang +Signed-off-by: Jon Mason +Signed-off-by: Sasha Levin +--- + drivers/ntb/ntb_transport.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c +index a9b97ebc71ac5..2abd2235bbcab 100644 +--- a/drivers/ntb/ntb_transport.c ++++ b/drivers/ntb/ntb_transport.c +@@ -410,7 +410,7 @@ int ntb_transport_register_client_dev(char *device_name) + + rc = device_register(dev); + if (rc) { +- kfree(client_dev); ++ put_device(dev); + goto err; + } + +-- +2.39.2 + diff --git a/queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch b/queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch new file mode 100644 index 00000000000..42ce8dcb216 --- /dev/null +++ b/queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch @@ -0,0 +1,36 @@ +From 3b568c0eb266f839b2b2d7a1072cb7c86cb4e780 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Jun 2023 18:08:05 +0530 +Subject: nvme: fix the NVME_ID_NS_NVM_STS_MASK definition + +From: Ankit Kumar + +[ Upstream commit b938e6603660652dc3db66d3c915fbfed3bce21d ] + +As per NVMe command set specification 1.0c Storage tag size is 7 bits. + +Fixes: 4020aad85c67 ("nvme: add support for enhanced metadata") +Signed-off-by: Ankit Kumar +Reviewed-by: Kanchan Joshi +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + include/linux/nvme.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/nvme.h b/include/linux/nvme.h +index 779507ac750b8..2819d6c3a6b5d 100644 +--- a/include/linux/nvme.h ++++ b/include/linux/nvme.h +@@ -473,7 +473,7 @@ struct nvme_id_ns_nvm { + }; + + enum { +- NVME_ID_NS_NVM_STS_MASK = 0x3f, ++ NVME_ID_NS_NVM_STS_MASK = 0x7f, + NVME_ID_NS_NVM_GUARD_SHIFT = 7, + NVME_ID_NS_NVM_GUARD_MASK = 0x3, + }; +-- +2.39.2 + diff --git a/queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch b/queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch new file mode 100644 index 00000000000..06d3b11e4bc --- /dev/null +++ b/queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch @@ -0,0 +1,40 @@ +From 65b3c7307116caccad741bb3f16b88a32e5423ac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 17:26:20 +0800 +Subject: nvme-pci: fix DMA direction of unmapping integrity data + +From: Ming Lei + +[ Upstream commit b8f6446b6853768cb99e7c201bddce69ca60c15e ] + +DMA direction should be taken in dma_unmap_page() for unmapping integrity +data. + +Fix this DMA direction, and reported in Guangwu's test. + +Reported-by: Guangwu Zhang +Fixes: 4aedb705437f ("nvme-pci: split metadata handling from nvme_map_data / nvme_unmap_data") +Signed-off-by: Ming Lei +Reviewed-by: Christoph Hellwig +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/pci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 492f319ebdf37..5b5303f0e2c20 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -968,7 +968,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req) + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + + dma_unmap_page(dev->dev, iod->meta_dma, +- rq_integrity_vec(req)->bv_len, rq_data_dir(req)); ++ rq_integrity_vec(req)->bv_len, rq_dma_dir(req)); + } + + if (blk_rq_nr_phys_segments(req)) +-- +2.39.2 + diff --git a/queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch b/queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch new file mode 100644 index 00000000000..f118880422a --- /dev/null +++ b/queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch @@ -0,0 +1,110 @@ +From a369d12b3781cc00b48dea406925dc0c01d8c53e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 13:59:36 +0530 +Subject: octeontx2-af: Move validation of ptp pointer before its usage + +From: Sai Krishna + +[ Upstream commit 7709fbd4922c197efabda03660d93e48a3e80323 ] + +Moved PTP pointer validation before its use to avoid smatch warning. +Also used kzalloc/kfree instead of devm_kzalloc/devm_kfree. + +Fixes: 2ef4e45d99b1 ("octeontx2-af: Add PTP PPS Errata workaround on CN10K silicon") +Signed-off-by: Naveen Mamindlapalli +Signed-off-by: Sunil Goutham +Signed-off-by: Sai Krishna +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../net/ethernet/marvell/octeontx2/af/ptp.c | 19 +++++++++---------- + .../net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- + 2 files changed, 10 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +index 3411e2e47d46b..0ee420a489fc4 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +@@ -208,7 +208,7 @@ struct ptp *ptp_get(void) + /* Check driver is bound to PTP block */ + if (!ptp) + ptp = ERR_PTR(-EPROBE_DEFER); +- else ++ else if (!IS_ERR(ptp)) + pci_dev_get(ptp->pdev); + + return ptp; +@@ -388,11 +388,10 @@ static int ptp_extts_on(struct ptp *ptp, int on) + static int ptp_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) + { +- struct device *dev = &pdev->dev; + struct ptp *ptp; + int err; + +- ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL); ++ ptp = kzalloc(sizeof(*ptp), GFP_KERNEL); + if (!ptp) { + err = -ENOMEM; + goto error; +@@ -428,20 +427,19 @@ static int ptp_probe(struct pci_dev *pdev, + return 0; + + error_free: +- devm_kfree(dev, ptp); ++ kfree(ptp); + + error: + /* For `ptp_get()` we need to differentiate between the case + * when the core has not tried to probe this device and the case when +- * the probe failed. In the later case we pretend that the +- * initialization was successful and keep the error in ++ * the probe failed. In the later case we keep the error in + * `dev->driver_data`. + */ + pci_set_drvdata(pdev, ERR_PTR(err)); + if (!first_ptp_block) + first_ptp_block = ERR_PTR(err); + +- return 0; ++ return err; + } + + static void ptp_remove(struct pci_dev *pdev) +@@ -449,16 +447,17 @@ static void ptp_remove(struct pci_dev *pdev) + struct ptp *ptp = pci_get_drvdata(pdev); + u64 clock_cfg; + +- if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) +- hrtimer_cancel(&ptp->hrtimer); +- + if (IS_ERR_OR_NULL(ptp)) + return; + ++ if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) ++ hrtimer_cancel(&ptp->hrtimer); ++ + /* Disable PTP clock */ + clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG); + clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN; + writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); ++ kfree(ptp); + } + + static const struct pci_device_id ptp_id_table[] = { +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +index b26b013216933..73932e2755bca 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +@@ -3253,7 +3253,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) + rvu->ptp = ptp_get(); + if (IS_ERR(rvu->ptp)) { + err = PTR_ERR(rvu->ptp); +- if (err == -EPROBE_DEFER) ++ if (err) + goto err_release_regions; + rvu->ptp = NULL; + } +-- +2.39.2 + diff --git a/queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch b/queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch new file mode 100644 index 00000000000..6a94e575127 --- /dev/null +++ b/queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch @@ -0,0 +1,118 @@ +From 0ab9b97172a9cb991254a7818274eb8296aacd12 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 09:57:05 +0530 +Subject: octeontx2-af: Promisc enable/disable through mbox + +From: Ratheesh Kannoth + +[ Upstream commit af42088bdaf292060b8d8a00d8644ca7b2b3f2d1 ] + +In legacy silicon, promiscuous mode is only modified +through CGX mbox messages. In CN10KB silicon, it is modified +from CGX mbox and NIX. This breaks legacy application +behaviour. Fix this by removing call from NIX. + +Fixes: d6c9784baf59 ("octeontx2-af: Invoke exact match functions if supported") +Signed-off-by: Ratheesh Kannoth +Reviewed-by: Leon Romanovsky +Reviewed-by: Michal Kubiak +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../ethernet/marvell/octeontx2/af/rvu_nix.c | 11 ++------- + .../marvell/octeontx2/af/rvu_npc_hash.c | 23 +++++++++++++++++-- + 2 files changed, 23 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +index f01d057ad025a..8cdf91a5bf44f 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +@@ -3815,21 +3815,14 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req, + } + + /* install/uninstall promisc entry */ +- if (promisc) { ++ if (promisc) + rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, + pfvf->rx_chan_base, + pfvf->rx_chan_cnt); +- +- if (rvu_npc_exact_has_match_table(rvu)) +- rvu_npc_exact_promisc_enable(rvu, pcifunc); +- } else { ++ else + if (!nix_rx_multicast) + rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false); + +- if (rvu_npc_exact_has_match_table(rvu)) +- rvu_npc_exact_promisc_disable(rvu, pcifunc); +- } +- + return 0; + } + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +index 9f11c1e407373..6fe67f3a7f6f1 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +@@ -1164,8 +1164,10 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i + { + struct npc_exact_table *table; + u16 *cnt, old_cnt; ++ bool promisc; + + table = rvu->hw->table; ++ promisc = table->promisc_mode[drop_mcam_idx]; + + cnt = &table->cnt_cmd_rules[drop_mcam_idx]; + old_cnt = *cnt; +@@ -1177,13 +1179,18 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i + + *enable_or_disable_cam = false; + +- /* If all rules are deleted, disable cam */ ++ if (promisc) ++ goto done; ++ ++ /* If all rules are deleted and not already in promisc mode; ++ * disable cam ++ */ + if (!*cnt && val < 0) { + *enable_or_disable_cam = true; + goto done; + } + +- /* If rule got added, enable cam */ ++ /* If rule got added and not already in promisc mode; enable cam */ + if (!old_cnt && val > 0) { + *enable_or_disable_cam = true; + goto done; +@@ -1462,6 +1469,12 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) + *promisc = false; + mutex_unlock(&table->lock); + ++ /* Enable drop rule */ ++ rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, ++ true); ++ ++ dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d)\n", ++ __func__, cgx_id, lmac_id); + return 0; + } + +@@ -1503,6 +1516,12 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) + *promisc = true; + mutex_unlock(&table->lock); + ++ /* disable drop rule */ ++ rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, ++ false); ++ ++ dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d)\n", ++ __func__, cgx_id, lmac_id); + return 0; + } + +-- +2.39.2 + diff --git a/queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch b/queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch new file mode 100644 index 00000000000..bf71c674b35 --- /dev/null +++ b/queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch @@ -0,0 +1,71 @@ +From ea03ebec26393aa8d6ce2220cad7c458bcde9935 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 16:00:27 +0530 +Subject: octeontx2-pf: Add additional check for MCAM rules + +From: Suman Ghosh + +[ Upstream commit 8278ee2a2646b9acf747317895e47a640ba933c9 ] + +Due to hardware limitation, MCAM drop rule with +ether_type == 802.1Q and vlan_id == 0 is not supported. Hence rejecting +such rules. + +Fixes: dce677da57c0 ("octeontx2-pf: Add vlan-etype to ntuple filters") +Signed-off-by: Suman Ghosh +Link: https://lore.kernel.org/r/20230710103027.2244139-1-sumang@marvell.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../ethernet/marvell/octeontx2/nic/otx2_flows.c | 8 ++++++++ + .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 15 +++++++++++++++ + 2 files changed, 23 insertions(+) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +index 10e11262d48a0..2d7713a1a1539 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +@@ -872,6 +872,14 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, + return -EINVAL; + + vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype); ++ ++ /* Drop rule with vlan_etype == 802.1Q ++ * and vlan_id == 0 is not supported ++ */ ++ if (vlan_etype == ETH_P_8021Q && !fsp->m_ext.vlan_tci && ++ fsp->ring_cookie == RX_CLS_FLOW_DISC) ++ return -EINVAL; ++ + /* Only ETH_P_8021Q and ETH_P_802AD types supported */ + if (vlan_etype != ETH_P_8021Q && + vlan_etype != ETH_P_8021AD) +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +index 8392f63e433fc..293bd3f29b077 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +@@ -604,6 +604,21 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, + return -EOPNOTSUPP; + } + ++ if (!match.mask->vlan_id) { ++ struct flow_action_entry *act; ++ int i; ++ ++ flow_action_for_each(i, act, &rule->action) { ++ if (act->id == FLOW_ACTION_DROP) { ++ netdev_err(nic->netdev, ++ "vlan tpid 0x%x with vlan_id %d is not supported for DROP rule.\n", ++ ntohs(match.key->vlan_tpid), ++ match.key->vlan_id); ++ return -EOPNOTSUPP; ++ } ++ } ++ } ++ + if (match.mask->vlan_id || + match.mask->vlan_dei || + match.mask->vlan_priority) { +-- +2.39.2 + diff --git a/queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch b/queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch new file mode 100644 index 00000000000..cdd46db0c18 --- /dev/null +++ b/queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch @@ -0,0 +1,75 @@ +From 696ca48d335bae7910f2e1abae3078b90a63c6aa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 17:54:40 +0100 +Subject: openrisc: Union fpcsr and oldmask in sigcontext to unbreak userspace + ABI + +From: Stafford Horne + +[ Upstream commit dceaafd668812115037fc13a1893d068b7b880f5 ] + +With commit 27267655c531 ("openrisc: Support floating point user api") I +added an entry to the struct sigcontext which caused an unwanted change +to the userspace ABI. + +To fix this we use the previously unused oldmask field space for the +floating point fpcsr state. We do this with a union to restore the ABI +back to the pre kernel v6.4 ABI and keep API compatibility. + +This does mean if there is some code somewhere that is setting oldmask +in an OpenRISC specific userspace sighandler it would end up setting the +floating point register status, but I think it's unlikely as oldmask was +never functional before. + +Fixes: 27267655c531 ("openrisc: Support floating point user api") +Reported-by: Szabolcs Nagy +Closes: https://lore.kernel.org/openrisc/20230626213840.GA1236108@port70.net/ +Signed-off-by: Stafford Horne +Signed-off-by: Sasha Levin +--- + arch/openrisc/include/uapi/asm/sigcontext.h | 6 ++++-- + arch/openrisc/kernel/signal.c | 4 ++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/arch/openrisc/include/uapi/asm/sigcontext.h b/arch/openrisc/include/uapi/asm/sigcontext.h +index ca585e4af6b8e..e7ffb58ff58fb 100644 +--- a/arch/openrisc/include/uapi/asm/sigcontext.h ++++ b/arch/openrisc/include/uapi/asm/sigcontext.h +@@ -28,8 +28,10 @@ + + struct sigcontext { + struct user_regs_struct regs; /* needs to be first */ +- struct __or1k_fpu_state fpu; +- unsigned long oldmask; ++ union { ++ unsigned long fpcsr; ++ unsigned long oldmask; /* unused */ ++ }; + }; + + #endif /* __ASM_OPENRISC_SIGCONTEXT_H */ +diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c +index 4664a18f0787d..2e7257a433ff4 100644 +--- a/arch/openrisc/kernel/signal.c ++++ b/arch/openrisc/kernel/signal.c +@@ -50,7 +50,7 @@ static int restore_sigcontext(struct pt_regs *regs, + err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)); + err |= __copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long)); + err |= __copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long)); +- err |= __copy_from_user(®s->fpcsr, &sc->fpu.fpcsr, sizeof(unsigned long)); ++ err |= __copy_from_user(®s->fpcsr, &sc->fpcsr, sizeof(unsigned long)); + + /* make sure the SM-bit is cleared so user-mode cannot fool us */ + regs->sr &= ~SPR_SR_SM; +@@ -113,7 +113,7 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) + err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long)); + err |= __copy_to_user(&sc->regs.pc, ®s->pc, sizeof(unsigned long)); + err |= __copy_to_user(&sc->regs.sr, ®s->sr, sizeof(unsigned long)); +- err |= __copy_to_user(&sc->fpu.fpcsr, ®s->fpcsr, sizeof(unsigned long)); ++ err |= __copy_to_user(&sc->fpcsr, ®s->fpcsr, sizeof(unsigned long)); + + return err; + } +-- +2.39.2 + diff --git a/queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch b/queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch new file mode 100644 index 00000000000..29b7fc1fa7e --- /dev/null +++ b/queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch @@ -0,0 +1,84 @@ +From fe769e2ae656339ebc5ea9b89b775402633f4d2d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Jun 2023 18:11:54 +0300 +Subject: platform/x86: wmi: Break possible infinite loop when parsing GUID + +From: Andy Shevchenko + +[ Upstream commit 028e6e204ace1f080cfeacd72c50397eb8ae8883 ] + +The while-loop may break on one of the two conditions, either ID string +is empty or GUID matches. The second one, may never be reached if the +parsed string is not correct GUID. In such a case the loop will never +advance to check the next ID. + +Break possible infinite loop by factoring out guid_parse_and_compare() +helper which may be moved to the generic header for everyone later on +and preventing from similar mistake in the future. + +Interestingly that firstly it appeared when WMI was turned into a bus +driver, but later when duplicated GUIDs were checked, the while-loop +has been replaced by for-loop and hence no mistake made again. + +Fixes: a48e23385fcf ("platform/x86: wmi: add context pointer field to struct wmi_device_id") +Fixes: 844af950da94 ("platform/x86: wmi: Turn WMI into a bus driver") +Signed-off-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20230621151155.78279-1-andriy.shevchenko@linux.intel.com +Tested-by: Armin Wolf +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/wmi.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c +index d81319a502efc..e1a3bfeeed529 100644 +--- a/drivers/platform/x86/wmi.c ++++ b/drivers/platform/x86/wmi.c +@@ -136,6 +136,16 @@ static acpi_status find_guid(const char *guid_string, struct wmi_block **out) + return AE_NOT_FOUND; + } + ++static bool guid_parse_and_compare(const char *string, const guid_t *guid) ++{ ++ guid_t guid_input; ++ ++ if (guid_parse(string, &guid_input)) ++ return false; ++ ++ return guid_equal(&guid_input, guid); ++} ++ + static const void *find_guid_context(struct wmi_block *wblock, + struct wmi_driver *wdriver) + { +@@ -146,11 +156,7 @@ static const void *find_guid_context(struct wmi_block *wblock, + return NULL; + + while (*id->guid_string) { +- guid_t guid_input; +- +- if (guid_parse(id->guid_string, &guid_input)) +- continue; +- if (guid_equal(&wblock->gblock.guid, &guid_input)) ++ if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid)) + return id->context; + id++; + } +@@ -827,11 +833,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver) + return 0; + + while (*id->guid_string) { +- guid_t driver_guid; +- +- if (WARN_ON(guid_parse(id->guid_string, &driver_guid))) +- continue; +- if (guid_equal(&driver_guid, &wblock->gblock.guid)) ++ if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid)) + return 1; + + id++; +-- +2.39.2 + diff --git a/queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch b/queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch new file mode 100644 index 00000000000..1a0e56b9f64 --- /dev/null +++ b/queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch @@ -0,0 +1,137 @@ +From 7c62c3b0f47ff2d49c0b22727b5753f5b0e98584 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 09:41:31 +0200 +Subject: riscv, bpf: Fix inconsistent JIT image generation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +[ Upstream commit c56fb2aab23505bb7160d06097c8de100b82b851 ] + +In order to generate the prologue and epilogue, the BPF JIT needs to +know which registers that are clobbered. Therefore, the during +pre-final passes, the prologue is generated after the body of the +program body-prologue-epilogue. Then, in the final pass, a proper +prologue-body-epilogue JITted image is generated. + +This scheme has worked most of the time. However, for some large +programs with many jumps, e.g. the test_kmod.sh BPF selftest with +hardening enabled (blinding constants), this has shown to be +incorrect. For the final pass, when the proper prologue-body-epilogue +is generated, the image has not converged. This will lead to that the +final image will have incorrect jump offsets. The following is an +excerpt from an incorrect image: + + | ... + | 3b8: 00c50663 beq a0,a2,3c4 <.text+0x3c4> + | 3bc: 0020e317 auipc t1,0x20e + | 3c0: 49630067 jalr zero,1174(t1) # 20e852 <.text+0x20e852> + | ... + | 20e84c: 8796 c.mv a5,t0 + | 20e84e: 6422 c.ldsp s0,8(sp) # Epilogue start + | 20e850: 6141 c.addi16sp sp,16 + | 20e852: 853e c.mv a0,a5 # Incorrect jump target + | 20e854: 8082 c.jr ra + +The image has shrunk, and the epilogue offset is incorrect in the +final pass. + +Correct the problem by always generating proper prologue-body-epilogue +outputs, which means that the first pass will only generate the body +to track what registers that are touched. + +Fixes: 2353ecc6f91f ("bpf, riscv: add BPF JIT for RV64G") +Signed-off-by: Björn Töpel +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20230710074131.19596-1-bjorn@kernel.org +Signed-off-by: Sasha Levin +--- + arch/riscv/net/bpf_jit.h | 6 +++--- + arch/riscv/net/bpf_jit_core.c | 19 +++++++++++++------ + 2 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h +index bf9802a63061d..2717f54904287 100644 +--- a/arch/riscv/net/bpf_jit.h ++++ b/arch/riscv/net/bpf_jit.h +@@ -69,7 +69,7 @@ struct rv_jit_context { + struct bpf_prog *prog; + u16 *insns; /* RV insns */ + int ninsns; +- int body_len; ++ int prologue_len; + int epilogue_offset; + int *offset; /* BPF to RV */ + int nexentries; +@@ -216,8 +216,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx) + int from, to; + + off++; /* BPF branch is from PC+1, RV is from PC */ +- from = (insn > 0) ? ctx->offset[insn - 1] : 0; +- to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; ++ from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len; ++ to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len; + return ninsns_rvoff(to - from); + } + +diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c +index 737baf8715da7..7a26a3e1c73cf 100644 +--- a/arch/riscv/net/bpf_jit_core.c ++++ b/arch/riscv/net/bpf_jit_core.c +@@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + unsigned int prog_size = 0, extable_size = 0; + bool tmp_blinded = false, extra_pass = false; + struct bpf_prog *tmp, *orig_prog = prog; +- int pass = 0, prev_ninsns = 0, prologue_len, i; ++ int pass = 0, prev_ninsns = 0, i; + struct rv_jit_data *jit_data; + struct rv_jit_context *ctx; + +@@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + prog = orig_prog; + goto out_offset; + } ++ ++ if (build_body(ctx, extra_pass, NULL)) { ++ prog = orig_prog; ++ goto out_offset; ++ } ++ + for (i = 0; i < prog->len; i++) { + prev_ninsns += 32; + ctx->offset[i] = prev_ninsns; +@@ -91,12 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + for (i = 0; i < NR_JIT_ITERATIONS; i++) { + pass++; + ctx->ninsns = 0; ++ ++ bpf_jit_build_prologue(ctx); ++ ctx->prologue_len = ctx->ninsns; ++ + if (build_body(ctx, extra_pass, ctx->offset)) { + prog = orig_prog; + goto out_offset; + } +- ctx->body_len = ctx->ninsns; +- bpf_jit_build_prologue(ctx); ++ + ctx->epilogue_offset = ctx->ninsns; + bpf_jit_build_epilogue(ctx); + +@@ -162,10 +171,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + + if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(jit_data->header); +- prologue_len = ctx->epilogue_offset - ctx->body_len; + for (i = 0; i < prog->len; i++) +- ctx->offset[i] = ninsns_rvoff(prologue_len + +- ctx->offset[i]); ++ ctx->offset[i] = ninsns_rvoff(ctx->offset[i]); + bpf_prog_fill_jited_linfo(prog, ctx->offset); + out_offset: + kfree(ctx->offset); +-- +2.39.2 + diff --git a/queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch b/queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch new file mode 100644 index 00000000000..7075d9bf6bc --- /dev/null +++ b/queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch @@ -0,0 +1,46 @@ +From 0711941654500c97465899b2d6b84b346d829f42 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 01:10:36 +0800 +Subject: riscv: mm: fix truncation warning on RV32 + +From: Jisheng Zhang + +[ Upstream commit b690e266dae2f85f4dfea21fa6a05e3500a51054 ] + +lkp reports below sparse warning when building for RV32: +arch/riscv/mm/init.c:1204:48: sparse: warning: cast truncates bits from +constant value (100000000 becomes 0) + +IMO, the reason we didn't see this truncates bug in real world is "0" +means MEMBLOCK_ALLOC_ACCESSIBLE in memblock and there's no RV32 HW +with more than 4GB memory. + +Fix it anyway to make sparse happy. + +Fixes: decf89f86ecd ("riscv: try to allocate crashkern region from 32bit addressible memory") +Signed-off-by: Jisheng Zhang +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202306080034.SLiCiOMn-lkp@intel.com/ +Link: https://lore.kernel.org/r/20230709171036.1906-1-jszhang@kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/mm/init.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c +index 1306149aad57a..93e7bb9f67fd4 100644 +--- a/arch/riscv/mm/init.c ++++ b/arch/riscv/mm/init.c +@@ -1346,7 +1346,7 @@ static void __init reserve_crashkernel(void) + */ + crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, + search_start, +- min(search_end, (unsigned long) SZ_4G)); ++ min(search_end, (unsigned long)(SZ_4G - 1))); + if (crash_base == 0) { + /* Try again without restricting region to 32bit addressible memory */ + crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, +-- +2.39.2 + diff --git a/queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch b/queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch new file mode 100644 index 00000000000..e584f41f927 --- /dev/null +++ b/queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch @@ -0,0 +1,74 @@ +From 8c9160ca105b8a4c5e9cc7c58c474696aba788c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 12:56:22 +0200 +Subject: s390/ism: Do not unregister clients with registered DMBs + +From: Niklas Schnelle + +[ Upstream commit 266deeea34ffd28c6b6a63edf2af9b5a07161c24 ] + +When ism_unregister_client() is called but the client still has DMBs +registered it returns -EBUSY and prints an error. This only happens +after the client has already been unregistered however. This is +unexpected as the unregister claims to have failed. Furthermore as this +implies a client bug a WARN() is more appropriate. Thus move the +deregistration after the check and use WARN(). + +Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration") +Signed-off-by: Niklas Schnelle +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/s390/net/ism_drv.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c +index d65571b3d5cad..6db5cf7e901f9 100644 +--- a/drivers/s390/net/ism_drv.c ++++ b/drivers/s390/net/ism_drv.c +@@ -96,29 +96,32 @@ int ism_unregister_client(struct ism_client *client) + int rc = 0; + + mutex_lock(&ism_dev_list.mutex); +- mutex_lock(&clients_lock); +- clients[client->id] = NULL; +- if (client->id + 1 == max_client) +- max_client--; +- mutex_unlock(&clients_lock); + list_for_each_entry(ism, &ism_dev_list.list, list) { + spin_lock_irqsave(&ism->lock, flags); + /* Stop forwarding IRQs and events */ + ism->subs[client->id] = NULL; + for (int i = 0; i < ISM_NR_DMBS; ++i) { + if (ism->sba_client_arr[i] == client->id) { +- pr_err("%s: attempt to unregister client '%s'" +- "with registered dmb(s)\n", __func__, +- client->name); ++ WARN(1, "%s: attempt to unregister '%s' with registered dmb(s)\n", ++ __func__, client->name); + rc = -EBUSY; +- goto out; ++ goto err_reg_dmb; + } + } + spin_unlock_irqrestore(&ism->lock, flags); + } +-out: + mutex_unlock(&ism_dev_list.mutex); + ++ mutex_lock(&clients_lock); ++ clients[client->id] = NULL; ++ if (client->id + 1 == max_client) ++ max_client--; ++ mutex_unlock(&clients_lock); ++ return rc; ++ ++err_reg_dmb: ++ spin_unlock_irqrestore(&ism->lock, flags); ++ mutex_unlock(&ism_dev_list.mutex); + return rc; + } + EXPORT_SYMBOL_GPL(ism_unregister_client); +-- +2.39.2 + diff --git a/queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch b/queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch new file mode 100644 index 00000000000..9c19dccb173 --- /dev/null +++ b/queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch @@ -0,0 +1,242 @@ +From 82e901cb9437e020d30429e55f1be39ce1457fc7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 12:56:21 +0200 +Subject: s390/ism: Fix and simplify add()/remove() callback handling + +From: Niklas Schnelle + +[ Upstream commit 76631ffa2fd2d45bae5ad717eef716b94144e0e7 ] + +Previously the clients_lock was protecting the clients array against +concurrent addition/removal of clients but was also accessed from IRQ +context. This meant that it had to be a spinlock and that the add() and +remove() callbacks in which clients need to do allocation and take +mutexes can't be called under the clients_lock. To work around this these +callbacks were moved to workqueues. This not only introduced significant +complexity but is also subtly broken in at least one way. + +In ism_dev_init() and ism_dev_exit() clients[i]->tgt_ism is used to +communicate the added/removed ISM device to the work function. While +write access to client[i]->tgt_ism is protected by the clients_lock and +the code waits that there is no pending add/remove work before and after +setting clients[i]->tgt_ism this is not enough. The problem is that the +wait happens based on per ISM device counters. Thus a concurrent +ism_dev_init()/ism_dev_exit() for a different ISM device may overwrite +a clients[i]->tgt_ism between unlocking the clients_lock and the +subsequent wait for the work to finnish. + +Thankfully with the clients_lock no longer held in IRQ context it can be +turned into a mutex which can be held during the calls to add()/remove() +completely removing the need for the workqueues and the associated +broken housekeeping including the per ISM device counters and the +clients[i]->tgt_ism. + +Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration") +Signed-off-by: Niklas Schnelle +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/s390/net/ism_drv.c | 86 +++++++++++--------------------------- + include/linux/ism.h | 6 --- + 2 files changed, 24 insertions(+), 68 deletions(-) + +diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c +index 216eb4b386286..d65571b3d5cad 100644 +--- a/drivers/s390/net/ism_drv.c ++++ b/drivers/s390/net/ism_drv.c +@@ -36,7 +36,7 @@ static const struct smcd_ops ism_ops; + static struct ism_client *clients[MAX_CLIENTS]; /* use an array rather than */ + /* a list for fast mapping */ + static u8 max_client; +-static DEFINE_SPINLOCK(clients_lock); ++static DEFINE_MUTEX(clients_lock); + struct ism_dev_list { + struct list_head list; + struct mutex mutex; /* protects ism device list */ +@@ -59,11 +59,10 @@ static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism) + int ism_register_client(struct ism_client *client) + { + struct ism_dev *ism; +- unsigned long flags; + int i, rc = -ENOSPC; + + mutex_lock(&ism_dev_list.mutex); +- spin_lock_irqsave(&clients_lock, flags); ++ mutex_lock(&clients_lock); + for (i = 0; i < MAX_CLIENTS; ++i) { + if (!clients[i]) { + clients[i] = client; +@@ -74,7 +73,8 @@ int ism_register_client(struct ism_client *client) + break; + } + } +- spin_unlock_irqrestore(&clients_lock, flags); ++ mutex_unlock(&clients_lock); ++ + if (i < MAX_CLIENTS) { + /* initialize with all devices that we got so far */ + list_for_each_entry(ism, &ism_dev_list.list, list) { +@@ -96,11 +96,11 @@ int ism_unregister_client(struct ism_client *client) + int rc = 0; + + mutex_lock(&ism_dev_list.mutex); +- spin_lock_irqsave(&clients_lock, flags); ++ mutex_lock(&clients_lock); + clients[client->id] = NULL; + if (client->id + 1 == max_client) + max_client--; +- spin_unlock_irqrestore(&clients_lock, flags); ++ mutex_unlock(&clients_lock); + list_for_each_entry(ism, &ism_dev_list.list, list) { + spin_lock_irqsave(&ism->lock, flags); + /* Stop forwarding IRQs and events */ +@@ -571,21 +571,9 @@ static u64 ism_get_local_gid(struct ism_dev *ism) + return ism->local_gid; + } + +-static void ism_dev_add_work_func(struct work_struct *work) +-{ +- struct ism_client *client = container_of(work, struct ism_client, +- add_work); +- +- client->add(client->tgt_ism); +- ism_setup_forwarding(client, client->tgt_ism); +- atomic_dec(&client->tgt_ism->add_dev_cnt); +- wake_up(&client->tgt_ism->waitq); +-} +- + static int ism_dev_init(struct ism_dev *ism) + { + struct pci_dev *pdev = ism->pdev; +- unsigned long flags; + int i, ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); +@@ -618,25 +606,16 @@ static int ism_dev_init(struct ism_dev *ism) + /* hardware is V2 capable */ + ism_create_system_eid(); + +- init_waitqueue_head(&ism->waitq); +- atomic_set(&ism->free_clients_cnt, 0); +- atomic_set(&ism->add_dev_cnt, 0); +- +- wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt)); +- spin_lock_irqsave(&clients_lock, flags); +- for (i = 0; i < max_client; ++i) ++ mutex_lock(&ism_dev_list.mutex); ++ mutex_lock(&clients_lock); ++ for (i = 0; i < max_client; ++i) { + if (clients[i]) { +- INIT_WORK(&clients[i]->add_work, +- ism_dev_add_work_func); +- clients[i]->tgt_ism = ism; +- atomic_inc(&ism->add_dev_cnt); +- schedule_work(&clients[i]->add_work); ++ clients[i]->add(ism); ++ ism_setup_forwarding(clients[i], ism); + } +- spin_unlock_irqrestore(&clients_lock, flags); +- +- wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt)); ++ } ++ mutex_unlock(&clients_lock); + +- mutex_lock(&ism_dev_list.mutex); + list_add(&ism->list, &ism_dev_list.list); + mutex_unlock(&ism_dev_list.mutex); + +@@ -711,40 +690,24 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) + return ret; + } + +-static void ism_dev_remove_work_func(struct work_struct *work) +-{ +- struct ism_client *client = container_of(work, struct ism_client, +- remove_work); +- unsigned long flags; +- +- spin_lock_irqsave(&client->tgt_ism->lock, flags); +- client->tgt_ism->subs[client->id] = NULL; +- spin_unlock_irqrestore(&client->tgt_ism->lock, flags); +- client->remove(client->tgt_ism); +- atomic_dec(&client->tgt_ism->free_clients_cnt); +- wake_up(&client->tgt_ism->waitq); +-} +- +-/* Callers must hold ism_dev_list.mutex */ + static void ism_dev_exit(struct ism_dev *ism) + { + struct pci_dev *pdev = ism->pdev; + unsigned long flags; + int i; + +- wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt)); +- spin_lock_irqsave(&clients_lock, flags); ++ spin_lock_irqsave(&ism->lock, flags); + for (i = 0; i < max_client; ++i) +- if (clients[i]) { +- INIT_WORK(&clients[i]->remove_work, +- ism_dev_remove_work_func); +- clients[i]->tgt_ism = ism; +- atomic_inc(&ism->free_clients_cnt); +- schedule_work(&clients[i]->remove_work); +- } +- spin_unlock_irqrestore(&clients_lock, flags); ++ ism->subs[i] = NULL; ++ spin_unlock_irqrestore(&ism->lock, flags); + +- wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt)); ++ mutex_lock(&ism_dev_list.mutex); ++ mutex_lock(&clients_lock); ++ for (i = 0; i < max_client; ++i) { ++ if (clients[i]) ++ clients[i]->remove(ism); ++ } ++ mutex_unlock(&clients_lock); + + if (SYSTEM_EID.serial_number[0] != '0' || + SYSTEM_EID.type[0] != '0') +@@ -755,15 +718,14 @@ static void ism_dev_exit(struct ism_dev *ism) + kfree(ism->sba_client_arr); + pci_free_irq_vectors(pdev); + list_del_init(&ism->list); ++ mutex_unlock(&ism_dev_list.mutex); + } + + static void ism_remove(struct pci_dev *pdev) + { + struct ism_dev *ism = dev_get_drvdata(&pdev->dev); + +- mutex_lock(&ism_dev_list.mutex); + ism_dev_exit(ism); +- mutex_unlock(&ism_dev_list.mutex); + + pci_release_mem_regions(pdev); + pci_disable_device(pdev); +diff --git a/include/linux/ism.h b/include/linux/ism.h +index 5160d47e5ea9e..9a4c204df3da1 100644 +--- a/include/linux/ism.h ++++ b/include/linux/ism.h +@@ -45,9 +45,6 @@ struct ism_dev { + int ieq_idx; + + struct ism_client *subs[MAX_CLIENTS]; +- atomic_t free_clients_cnt; +- atomic_t add_dev_cnt; +- wait_queue_head_t waitq; + }; + + struct ism_event { +@@ -69,9 +66,6 @@ struct ism_client { + */ + void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask); + /* Private area - don't touch! */ +- struct work_struct remove_work; +- struct work_struct add_work; +- struct ism_dev *tgt_ism; + u8 id; + }; + +-- +2.39.2 + diff --git a/queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch b/queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch new file mode 100644 index 00000000000..ac08f139fab --- /dev/null +++ b/queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch @@ -0,0 +1,209 @@ +From 120b472bc2a8e8531d07e4cbb70b758a9ee6a46c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 12:56:20 +0200 +Subject: s390/ism: Fix locking for forwarding of IRQs and events to clients + +From: Niklas Schnelle + +[ Upstream commit 6b5c13b591d753c6022fbd12f8c0c0a9a07fc065 ] + +The clients array references all registered clients and is protected by +the clients_lock. Besides its use as general list of clients the clients +array is accessed in ism_handle_irq() to forward ISM device events to +clients. + +While the clients_lock is taken in the IRQ handler when calling +handle_event() it is however incorrectly not held during the +client->handle_irq() call and for the preceding clients[] access leaving +it unprotected against concurrent client (un-)registration. + +Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb() +and ism_unregister_dmb() are not protected by any lock. This is +especially problematic as the client ID from the ism->sba_client_arr[] +is not checked against NO_CLIENT and neither is the client pointer +checked. + +Instead of expanding the use of the clients_lock further add a separate +array in struct ism_dev which references clients subscribed to the +device's events and IRQs. This array is protected by ism->lock which is +already taken in ism_handle_irq() and can be taken outside the IRQ +handler when adding/removing subscribers or the accessing +ism->sba_client_arr[]. This also means that the clients_lock is no +longer taken in IRQ context. + +Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration") +Signed-off-by: Niklas Schnelle +Reviewed-by: Alexandra Winter +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/s390/net/ism_drv.c | 44 +++++++++++++++++++++++++++++++------- + include/linux/ism.h | 1 + + 2 files changed, 37 insertions(+), 8 deletions(-) + +diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c +index c2096e4bba319..216eb4b386286 100644 +--- a/drivers/s390/net/ism_drv.c ++++ b/drivers/s390/net/ism_drv.c +@@ -47,6 +47,15 @@ static struct ism_dev_list ism_dev_list = { + .mutex = __MUTEX_INITIALIZER(ism_dev_list.mutex), + }; + ++static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ism->lock, flags); ++ ism->subs[client->id] = client; ++ spin_unlock_irqrestore(&ism->lock, flags); ++} ++ + int ism_register_client(struct ism_client *client) + { + struct ism_dev *ism; +@@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client) + list_for_each_entry(ism, &ism_dev_list.list, list) { + ism->priv[i] = NULL; + client->add(ism); ++ ism_setup_forwarding(client, ism); + } + } + mutex_unlock(&ism_dev_list.mutex); +@@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client) + max_client--; + spin_unlock_irqrestore(&clients_lock, flags); + list_for_each_entry(ism, &ism_dev_list.list, list) { ++ spin_lock_irqsave(&ism->lock, flags); ++ /* Stop forwarding IRQs and events */ ++ ism->subs[client->id] = NULL; + for (int i = 0; i < ISM_NR_DMBS; ++i) { + if (ism->sba_client_arr[i] == client->id) { + pr_err("%s: attempt to unregister client '%s'" +@@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client) + goto out; + } + } ++ spin_unlock_irqrestore(&ism->lock, flags); + } + out: + mutex_unlock(&ism_dev_list.mutex); +@@ -328,6 +342,7 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb, + struct ism_client *client) + { + union ism_reg_dmb cmd; ++ unsigned long flags; + int ret; + + ret = ism_alloc_dmb(ism, dmb); +@@ -351,7 +366,9 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb, + goto out; + } + dmb->dmb_tok = cmd.response.dmb_tok; ++ spin_lock_irqsave(&ism->lock, flags); + ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = client->id; ++ spin_unlock_irqrestore(&ism->lock, flags); + out: + return ret; + } +@@ -360,6 +377,7 @@ EXPORT_SYMBOL_GPL(ism_register_dmb); + int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb) + { + union ism_unreg_dmb cmd; ++ unsigned long flags; + int ret; + + memset(&cmd, 0, sizeof(cmd)); +@@ -368,7 +386,9 @@ int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb) + + cmd.request.dmb_tok = dmb->dmb_tok; + ++ spin_lock_irqsave(&ism->lock, flags); + ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = NO_CLIENT; ++ spin_unlock_irqrestore(&ism->lock, flags); + + ret = ism_cmd(ism, &cmd); + if (ret && ret != ISM_ERROR) +@@ -491,6 +511,7 @@ static u16 ism_get_chid(struct ism_dev *ism) + static void ism_handle_event(struct ism_dev *ism) + { + struct ism_event *entry; ++ struct ism_client *clt; + int i; + + while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) { +@@ -499,21 +520,21 @@ static void ism_handle_event(struct ism_dev *ism) + + entry = &ism->ieq->entry[ism->ieq_idx]; + debug_event(ism_debug_info, 2, entry, sizeof(*entry)); +- spin_lock(&clients_lock); +- for (i = 0; i < max_client; ++i) +- if (clients[i]) +- clients[i]->handle_event(ism, entry); +- spin_unlock(&clients_lock); ++ for (i = 0; i < max_client; ++i) { ++ clt = ism->subs[i]; ++ if (clt) ++ clt->handle_event(ism, entry); ++ } + } + } + + static irqreturn_t ism_handle_irq(int irq, void *data) + { + struct ism_dev *ism = data; +- struct ism_client *clt; + unsigned long bit, end; + unsigned long *bv; + u16 dmbemask; ++ u8 client_id; + + bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET]; + end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET; +@@ -530,8 +551,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data) + dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET]; + ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0; + barrier(); +- clt = clients[ism->sba_client_arr[bit]]; +- clt->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask); ++ client_id = ism->sba_client_arr[bit]; ++ if (unlikely(client_id == NO_CLIENT || !ism->subs[client_id])) ++ continue; ++ ism->subs[client_id]->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask); + } + + if (ism->sba->e) { +@@ -554,6 +577,7 @@ static void ism_dev_add_work_func(struct work_struct *work) + add_work); + + client->add(client->tgt_ism); ++ ism_setup_forwarding(client, client->tgt_ism); + atomic_dec(&client->tgt_ism->add_dev_cnt); + wake_up(&client->tgt_ism->waitq); + } +@@ -691,7 +715,11 @@ static void ism_dev_remove_work_func(struct work_struct *work) + { + struct ism_client *client = container_of(work, struct ism_client, + remove_work); ++ unsigned long flags; + ++ spin_lock_irqsave(&client->tgt_ism->lock, flags); ++ client->tgt_ism->subs[client->id] = NULL; ++ spin_unlock_irqrestore(&client->tgt_ism->lock, flags); + client->remove(client->tgt_ism); + atomic_dec(&client->tgt_ism->free_clients_cnt); + wake_up(&client->tgt_ism->waitq); +diff --git a/include/linux/ism.h b/include/linux/ism.h +index ea2bcdae74012..5160d47e5ea9e 100644 +--- a/include/linux/ism.h ++++ b/include/linux/ism.h +@@ -44,6 +44,7 @@ struct ism_dev { + u64 local_gid; + int ieq_idx; + ++ struct ism_client *subs[MAX_CLIENTS]; + atomic_t free_clients_cnt; + atomic_t add_dev_cnt; + wait_queue_head_t waitq; +-- +2.39.2 + diff --git a/queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch b/queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch new file mode 100644 index 00000000000..482d423911b --- /dev/null +++ b/queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch @@ -0,0 +1,38 @@ +From ba6f8103f9b47e25147279392d0190719036955e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Jun 2023 13:58:47 +0300 +Subject: scsi: qla2xxx: Fix error code in qla2x00_start_sp() + +From: Dan Carpenter + +[ Upstream commit e579b007eff3ff8d29d59d16214cd85fb9e573f7 ] + +This should be negative -EAGAIN instead of positive. The callers treat +non-zero error codes the same so it doesn't really impact runtime beyond +some trivial differences to debug output. + +Fixes: 80676d054e5a ("scsi: qla2xxx: Fix session cleanup hang") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/49866d28-4cfe-47b0-842b-78f110e61aab@moroto.mountain +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_iocb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c +index b9b3e6f80ea9b..1ed13199f27ce 100644 +--- a/drivers/scsi/qla2xxx/qla_iocb.c ++++ b/drivers/scsi/qla2xxx/qla_iocb.c +@@ -3892,7 +3892,7 @@ qla2x00_start_sp(srb_t *sp) + + pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + if (!pkt) { +- rval = EAGAIN; ++ rval = -EAGAIN; + ql_log(ql_log_warn, vha, 0x700c, + "qla2x00_alloc_iocbs failed.\n"); + goto done; +-- +2.39.2 + diff --git a/queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch b/queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch new file mode 100644 index 00000000000..2be6cf551bb --- /dev/null +++ b/queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch @@ -0,0 +1,55 @@ +From 577164d03134b5cdfa2ba8c54c8c325857472318 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Jun 2023 22:23:48 -0700 +Subject: scsi: ufs: ufs-mediatek: Add dependency for RESET_CONTROLLER + +From: Randy Dunlap + +[ Upstream commit 89f7ef7f2b23b2a7b8ce346c23161916eae5b15c ] + +When RESET_CONTROLLER is not set, kconfig complains about missing +dependencies for RESET_TI_SYSCON, so add the missing dependency just as is +done above for SCSI_UFS_QCOM. + +Silences this kconfig warning: + +WARNING: unmet direct dependencies detected for RESET_TI_SYSCON + Depends on [n]: RESET_CONTROLLER [=n] && HAS_IOMEM [=y] + Selected by [m]: + - SCSI_UFS_MEDIATEK [=m] && SCSI_UFSHCD [=y] && SCSI_UFSHCD_PLATFORM [=y] && ARCH_MEDIATEK [=y] + +Fixes: de48898d0cb6 ("scsi: ufs-mediatek: Create reset control device_link") +Signed-off-by: Randy Dunlap +Link: lore.kernel.org/r/202306020859.1wHg9AaT-lkp@intel.com +Link: https://lore.kernel.org/r/20230701052348.28046-1-rdunlap@infradead.org +Cc: Stanley Chu +Cc: Peter Wang +Cc: Paul Gazzillo +Cc: Necip Fazil Yildiran +Cc: linux-scsi@vger.kernel.org +Cc: linux-arm-kernel@lists.infradead.org +Cc: linux-mediatek@lists.infradead.org +Cc: "James E.J. Bottomley" +Cc: "Martin K. Petersen" +Reported-by: kernel test robot +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ufs/host/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/ufs/host/Kconfig b/drivers/ufs/host/Kconfig +index 8793e34335806..f11e98c9e6652 100644 +--- a/drivers/ufs/host/Kconfig ++++ b/drivers/ufs/host/Kconfig +@@ -72,6 +72,7 @@ config SCSI_UFS_QCOM + config SCSI_UFS_MEDIATEK + tristate "Mediatek specific hooks to UFS controller platform driver" + depends on SCSI_UFSHCD_PLATFORM && ARCH_MEDIATEK ++ depends on RESET_CONTROLLER + select PHY_MTK_UFS + select RESET_TI_SYSCON + help +-- +2.39.2 + diff --git a/queue-6.4/series b/queue-6.4/series new file mode 100644 index 00000000000..5fa083dae6a --- /dev/null +++ b/queue-6.4/series @@ -0,0 +1,98 @@ +drm-panel-simple-add-connector_type-for-innolux_at04.patch +drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch +drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch +swiotlb-always-set-the-number-of-areas-before-alloca.patch +swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch +drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch +xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch +netfilter-nf_tables-report-use-refcount-overflow.patch +netfilter-conntrack-don-t-fold-port-numbers-into-add.patch +ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch +ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch +igc-add-condition-for-qbv_config_change_errors-count.patch +igc-remove-delay-during-tx-ring-configuration.patch +igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch +igc-add-xdp-hints-kfuncs-for-rx-hash.patch +igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch +net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch +net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch +net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch +net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch +net-mlx5-register-a-unique-thermal-zone-per-device.patch +net-mlx5e-check-for-not_ready-flag-state-after-locki.patch +net-mlx5e-tc-ct-offload-ct-clear-only-once.patch +net-mlx5-query-hca_cap_2-only-when-supported.patch +net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch +igc-set-tp-bit-in-supported-and-advertising-fields-o.patch +igc-include-the-length-type-field-and-vlan-tag-in-qu.patch +igc-handle-pps-start-time-programming-for-past-time-.patch +blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch +scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch +scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch +bpf-fix-max-stack-depth-check-for-async-callbacks.patch +net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch +net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch +net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch +net-sched-cls_fw-fix-improper-refcount-update-leads-.patch +gve-set-default-duplex-configuration-to-full.patch +drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch +octeontx2-af-promisc-enable-disable-through-mbox.patch +octeontx2-af-move-validation-of-ptp-pointer-before-i.patch +ionic-remove-warn_on-to-prevent-panic_on_warn.patch +udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch +net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch +net-prevent-skb-corruption-on-frag-list-segmentation.patch +s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch +s390-ism-fix-and-simplify-add-remove-callback-handli.patch +s390-ism-do-not-unregister-clients-with-registered-d.patch +icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch +udp6-fix-udp6_ehashfn-typo.patch +ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch +ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch +ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch +ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch +ntb-ntb_tool-add-check-for-devm_kcalloc.patch +ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch +hid-hyperv-avoid-struct-memcpy-overrun-warning.patch +net-dsa-qca8k-add-check-for-skb_copy.patch +x86-fineibt-poison-endbr-at-0.patch +platform-x86-wmi-break-possible-infinite-loop-when-p.patch +net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch +igc-rename-qbv_enable-to-taprio_offload_enable.patch +igc-do-not-enable-taprio-offload-for-invalid-argumen.patch +igc-handle-already-enabled-taprio-offload-for-baseti.patch +kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch +fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch +igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch +igc-fix-launchtime-before-start-of-cycle.patch +igc-fix-inserting-of-empty-frame-for-launchtime.patch +nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch +openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch +riscv-bpf-fix-inconsistent-jit-image-generation.patch +net-fec-remove-useless-fec_enet_reset_skb.patch +net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch +net-fec-recycle-pages-for-transmitted-xdp-frames.patch +net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch +drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch +drm-i915-fix-one-wrong-caching-mode-enum-usage.patch +net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch +octeontx2-pf-add-additional-check-for-mcam-rules.patch +erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch +erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch +erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch +wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch +bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch +xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch +net-sched-flower-ensure-both-minimum-and-maximum-por.patch +riscv-mm-fix-truncation-warning-on-rv32.patch +drm-nouveau-disp-fix-hdmi-on-gt215.patch +drm-nouveau-disp-g94-enable-hdmi.patch +netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch +drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch +drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch +net-sched-make-psched_mtu-rtnl-less-safe.patch +net-txgbe-fix-eeprom-calculation-error.patch +wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch +net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch +net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch +nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch diff --git a/queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch b/queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch new file mode 100644 index 00000000000..642f69160a8 --- /dev/null +++ b/queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch @@ -0,0 +1,85 @@ +From 3e0aead0125493b7ae55fcf63b7b58f4cd13b0aa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Jun 2023 15:01:03 +0200 +Subject: swiotlb: always set the number of areas before allocating the pool + +From: Petr Tesarik + +[ Upstream commit aabd12609f91155f26584508b01f548215cc3c0c ] + +The number of areas defaults to the number of possible CPUs. However, the +total number of slots may have to be increased after adjusting the number +of areas. Consequently, the number of areas must be determined before +allocating the memory pool. This is even explained with a comment in +swiotlb_init_remap(), but swiotlb_init_late() adjusts the number of areas +after slots are already allocated. The areas may end up being smaller than +IO_TLB_SEGSIZE, which breaks per-area locking. + +While fixing swiotlb_init_late(), move all relevant comments before the +definition of swiotlb_adjust_nareas() and convert them to kernel-doc. + +Fixes: 20347fca71a3 ("swiotlb: split up the global swiotlb lock") +Signed-off-by: Petr Tesarik +Reviewed-by: Roberto Sassu +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + kernel/dma/swiotlb.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c +index af2e304c672c4..16f53d8c51bcf 100644 +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -115,9 +115,16 @@ static bool round_up_default_nslabs(void) + return true; + } + ++/** ++ * swiotlb_adjust_nareas() - adjust the number of areas and slots ++ * @nareas: Desired number of areas. Zero is treated as 1. ++ * ++ * Adjust the default number of areas in a memory pool. ++ * The default size of the memory pool may also change to meet minimum area ++ * size requirements. ++ */ + static void swiotlb_adjust_nareas(unsigned int nareas) + { +- /* use a single area when non is specified */ + if (!nareas) + nareas = 1; + else if (!is_power_of_2(nareas)) +@@ -298,10 +305,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, + if (swiotlb_force_disable) + return; + +- /* +- * default_nslabs maybe changed when adjust area number. +- * So allocate bounce buffer after adjusting area number. +- */ + if (!default_nareas) + swiotlb_adjust_nareas(num_possible_cpus()); + +@@ -363,6 +366,9 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, + if (swiotlb_force_disable) + return 0; + ++ if (!default_nareas) ++ swiotlb_adjust_nareas(num_possible_cpus()); ++ + retry: + order = get_order(nslabs << IO_TLB_SHIFT); + nslabs = SLABS_PER_PAGE << order; +@@ -397,9 +403,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, + (PAGE_SIZE << order) >> 20); + } + +- if (!default_nareas) +- swiotlb_adjust_nareas(num_possible_cpus()); +- + area_order = get_order(array_size(sizeof(*mem->areas), + default_nareas)); + mem->areas = (struct io_tlb_area *) +-- +2.39.2 + diff --git a/queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch b/queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch new file mode 100644 index 00000000000..1b1673f170a --- /dev/null +++ b/queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch @@ -0,0 +1,114 @@ +From 0745d7a9e77343a03937e00b50782642061849c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Jun 2023 15:01:04 +0200 +Subject: swiotlb: reduce the number of areas to match actual memory pool size + +From: Petr Tesarik + +[ Upstream commit 8ac04063354a01a484d2e55d20ed1958aa0d3392 ] + +Although the desired size of the SWIOTLB memory pool is increased in +swiotlb_adjust_nareas() to match the number of areas, the actual allocation +may be smaller, which may require reducing the number of areas. + +For example, Xen uses swiotlb_init_late(), which in turn uses the page +allocator. On x86, page size is 4 KiB and MAX_ORDER is 10 (1024 pages), +resulting in a maximum memory pool size of 4 MiB. This corresponds to 2048 +slots of 2 KiB each. The minimum area size is 128 (IO_TLB_SEGSIZE), +allowing at most 2048 / 128 = 16 areas. + +If num_possible_cpus() is greater than the maximum number of areas, areas +are smaller than IO_TLB_SEGSIZE and contiguous groups of free slots will +span multiple areas. When allocating and freeing slots, only one area will +be properly locked, causing race conditions on the unlocked slots and +ultimately data corruption, kernel hangs and crashes. + +Fixes: 20347fca71a3 ("swiotlb: split up the global swiotlb lock") +Signed-off-by: Petr Tesarik +Reviewed-by: Roberto Sassu +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + kernel/dma/swiotlb.c | 27 ++++++++++++++++++++++++--- + 1 file changed, 24 insertions(+), 3 deletions(-) + +diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c +index 16f53d8c51bcf..b1bbd6270ba79 100644 +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -138,6 +138,23 @@ static void swiotlb_adjust_nareas(unsigned int nareas) + (default_nslabs << IO_TLB_SHIFT) >> 20); + } + ++/** ++ * limit_nareas() - get the maximum number of areas for a given memory pool size ++ * @nareas: Desired number of areas. ++ * @nslots: Total number of slots in the memory pool. ++ * ++ * Limit the number of areas to the maximum possible number of areas in ++ * a memory pool of the given size. ++ * ++ * Return: Maximum possible number of areas. ++ */ ++static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) ++{ ++ if (nslots < nareas * IO_TLB_SEGSIZE) ++ return nslots / IO_TLB_SEGSIZE; ++ return nareas; ++} ++ + static int __init + setup_io_tlb_npages(char *str) + { +@@ -297,6 +314,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, + { + struct io_tlb_mem *mem = &io_tlb_default_mem; + unsigned long nslabs; ++ unsigned int nareas; + size_t alloc_size; + void *tlb; + +@@ -309,10 +327,12 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, + swiotlb_adjust_nareas(num_possible_cpus()); + + nslabs = default_nslabs; ++ nareas = limit_nareas(default_nareas, nslabs); + while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { + if (nslabs <= IO_TLB_MIN_SLABS) + return; + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); ++ nareas = limit_nareas(nareas, nslabs); + } + + if (default_nslabs != nslabs) { +@@ -358,6 +378,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, + { + struct io_tlb_mem *mem = &io_tlb_default_mem; + unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); ++ unsigned int nareas; + unsigned char *vstart = NULL; + unsigned int order, area_order; + bool retried = false; +@@ -403,8 +424,8 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, + (PAGE_SIZE << order) >> 20); + } + +- area_order = get_order(array_size(sizeof(*mem->areas), +- default_nareas)); ++ nareas = limit_nareas(default_nareas, nslabs); ++ area_order = get_order(array_size(sizeof(*mem->areas), nareas)); + mem->areas = (struct io_tlb_area *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order); + if (!mem->areas) +@@ -418,7 +439,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, + set_memory_decrypted((unsigned long)vstart, + (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); + swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true, +- default_nareas); ++ nareas); + + swiotlb_print_info(); + return 0; +-- +2.39.2 + diff --git a/queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch b/queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch new file mode 100644 index 00000000000..80cf831a62a --- /dev/null +++ b/queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch @@ -0,0 +1,57 @@ +From 8b9f1e208289fa8437d0478c29172a6ebf9f8da2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 21:39:20 -0700 +Subject: udp6: add a missing call into udp_fail_queue_rcv_skb tracepoint + +From: Ivan Babrou + +[ Upstream commit 8139dccd464aaee4a2c351506ff883733c6ca5a3 ] + +The tracepoint has existed for 12 years, but it only covered udp +over the legacy IPv4 protocol. Having it enabled for udp6 removes +the unnecessary difference in error visibility. + +Signed-off-by: Ivan Babrou +Fixes: 296f7ea75b45 ("udp: add tracepoints for queueing skb to rcvbuf") +Acked-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/net-traces.c | 2 ++ + net/ipv6/udp.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/net/core/net-traces.c b/net/core/net-traces.c +index 805b7385dd8da..6aef976bc1da2 100644 +--- a/net/core/net-traces.c ++++ b/net/core/net-traces.c +@@ -63,4 +63,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); + EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset); + EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum); + ++EXPORT_TRACEPOINT_SYMBOL_GPL(udp_fail_queue_rcv_skb); ++ + EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready); +diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c +index e5a337e6b9705..debb98fb23c0b 100644 +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -680,6 +681,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) + } + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb_reason(skb, drop_reason); ++ trace_udp_fail_queue_rcv_skb(rc, sk); + return -1; + } + +-- +2.39.2 + diff --git a/queue-6.4/udp6-fix-udp6_ehashfn-typo.patch b/queue-6.4/udp6-fix-udp6_ehashfn-typo.patch new file mode 100644 index 00000000000..ccca51c2c3f --- /dev/null +++ b/queue-6.4/udp6-fix-udp6_ehashfn-typo.patch @@ -0,0 +1,40 @@ +From d63211de5fb52a2741b61eae43f615679bbddba7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Jul 2023 08:29:58 +0000 +Subject: udp6: fix udp6_ehashfn() typo + +From: Eric Dumazet + +[ Upstream commit 51d03e2f2203e76ed02d33fb5ffbb5fc85ffaf54 ] + +Amit Klein reported that udp6_ehash_secret was initialized but never used. + +Fixes: 1bbdceef1e53 ("inet: convert inet_ehash_secret and ipv6_hash_secret to net_get_random_once") +Reported-by: Amit Klein +Signed-off-by: Eric Dumazet +Cc: Willy Tarreau +Cc: Willem de Bruijn +Cc: David Ahern +Cc: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/udp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c +index debb98fb23c0b..d594a0425749b 100644 +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -91,7 +91,7 @@ static u32 udp6_ehashfn(const struct net *net, + fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); + + return __inet6_ehashfn(lhash, lport, fhash, fport, +- udp_ipv6_hash_secret + net_hash_mix(net)); ++ udp6_ehash_secret + net_hash_mix(net)); + } + + int udp_v6_get_port(struct sock *sk, unsigned short snum) +-- +2.39.2 + diff --git a/queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch b/queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch new file mode 100644 index 00000000000..6967b0fc66a --- /dev/null +++ b/queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch @@ -0,0 +1,47 @@ +From 2685e6ba08190492fa99432286f029afadcb06ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Jul 2023 06:31:54 -0700 +Subject: wifi: airo: avoid uninitialized warning in airo_get_rate() + +From: Randy Dunlap + +[ Upstream commit 9373771aaed17f5c2c38485f785568abe3a9f8c1 ] + +Quieten a gcc (11.3.0) build error or warning by checking the function +call status and returning -EBUSY if the function call failed. +This is similar to what several other wireless drivers do for the +SIOCGIWRATE ioctl call when there is a locking problem. + +drivers/net/wireless/cisco/airo.c: error: 'status_rid.currentXmitRate' is used uninitialized [-Werror=uninitialized] + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Randy Dunlap +Reported-by: Geert Uytterhoeven +Link: https://lore.kernel.org/r/39abf2c7-24a-f167-91da-ed4c5435d1c4@linux-m68k.org +Link: https://lore.kernel.org/r/20230709133154.26206-1-rdunlap@infradead.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/cisco/airo.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c +index 7c4cc5f5e1eb4..dbd13f7aa3e6e 100644 +--- a/drivers/net/wireless/cisco/airo.c ++++ b/drivers/net/wireless/cisco/airo.c +@@ -6157,8 +6157,11 @@ static int airo_get_rate(struct net_device *dev, + struct iw_param *vwrq = &wrqu->bitrate; + struct airo_info *local = dev->ml_priv; + StatusRid status_rid; /* Card status info */ ++ int ret; + +- readStatusRid(local, &status_rid, 1); ++ ret = readStatusRid(local, &status_rid, 1); ++ if (ret) ++ return -EBUSY; + + vwrq->value = le16_to_cpu(status_rid.currentXmitRate) * 500000; + /* If more than one rate, set auto */ +-- +2.39.2 + diff --git a/queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch b/queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch new file mode 100644 index 00000000000..3461fb18da2 --- /dev/null +++ b/queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch @@ -0,0 +1,51 @@ +From cf9f44752b619631f770e4dc300984d1dac80d17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jul 2023 10:45:00 +0800 +Subject: wifi: rtw89: debug: fix error code in rtw89_debug_priv_send_h2c_set() + +From: Zhang Shurong + +[ Upstream commit 4f4626cd049576af1276c7568d5b44eb3f7bb1b1 ] + +If there is a failure during rtw89_fw_h2c_raw() rtw89_debug_priv_send_h2c +should return negative error code instead of a positive value count. +Fix this bug by returning correct error code. + +Fixes: e3ec7017f6a2 ("rtw89: add Realtek 802.11ax driver") +Signed-off-by: Zhang Shurong +Acked-by: Ping-Ke Shih +Link: https://lore.kernel.org/r/tencent_AD09A61BC4DA92AD1EB0790F5C850E544D07@qq.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/realtek/rtw89/debug.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c +index 1e5b7a9987163..858494ddfb12e 100644 +--- a/drivers/net/wireless/realtek/rtw89/debug.c ++++ b/drivers/net/wireless/realtek/rtw89/debug.c +@@ -2998,17 +2998,18 @@ static ssize_t rtw89_debug_priv_send_h2c_set(struct file *filp, + struct rtw89_debugfs_priv *debugfs_priv = filp->private_data; + struct rtw89_dev *rtwdev = debugfs_priv->rtwdev; + u8 *h2c; ++ int ret; + u16 h2c_len = count / 2; + + h2c = rtw89_hex2bin_user(rtwdev, user_buf, count); + if (IS_ERR(h2c)) + return -EFAULT; + +- rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len); ++ ret = rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len); + + kfree(h2c); + +- return count; ++ return ret ? ret : count; + } + + static int +-- +2.39.2 + diff --git a/queue-6.4/x86-fineibt-poison-endbr-at-0.patch b/queue-6.4/x86-fineibt-poison-endbr-at-0.patch new file mode 100644 index 00000000000..40fbeec819e --- /dev/null +++ b/queue-6.4/x86-fineibt-poison-endbr-at-0.patch @@ -0,0 +1,89 @@ +From 79f7f4bdb8ae801346e94933d8c848c76e4ea88b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 21:35:48 +0200 +Subject: x86/fineibt: Poison ENDBR at +0 + +From: Peter Zijlstra + +[ Upstream commit 04505bbbbb15da950ea0239e328a76a3ad2376e0 ] + +Alyssa noticed that when building the kernel with CFI_CLANG+IBT and +booting on IBT enabled hardware to obtain FineIBT, the indirect +functions look like: + + __cfi_foo: + endbr64 + subl $hash, %r10d + jz 1f + ud2 + nop + 1: + foo: + endbr64 + +This is because the compiler generates code for kCFI+IBT. In that case +the caller does the hash check and will jump to +0, so there must be +an ENDBR there. The compiler doesn't know about FineIBT at all; also +it is possible to actually use kCFI+IBT when booting with 'cfi=kcfi' +on IBT enabled hardware. + +Having this second ENDBR however makes it possible to elide the CFI +check. Therefore, we should poison this second ENDBR when switching to +FineIBT mode. + +Fixes: 931ab63664f0 ("x86/ibt: Implement FineIBT") +Reported-by: "Milburn, Alyssa" +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Kees Cook +Reviewed-by: Sami Tolvanen +Link: https://lore.kernel.org/r/20230615193722.194131053@infradead.org +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/alternative.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +index f615e0cb6d932..4e2c70f88e05b 100644 +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -940,6 +940,17 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) + return 0; + } + ++static void cfi_rewrite_endbr(s32 *start, s32 *end) ++{ ++ s32 *s; ++ ++ for (s = start; s < end; s++) { ++ void *addr = (void *)s + *s; ++ ++ poison_endbr(addr+16, false); ++ } ++} ++ + /* .retpoline_sites */ + static int cfi_rand_callers(s32 *start, s32 *end) + { +@@ -1034,14 +1045,19 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + return; + + case CFI_FINEIBT: ++ /* place the FineIBT preamble at func()-16 */ + ret = cfi_rewrite_preamble(start_cfi, end_cfi); + if (ret) + goto err; + ++ /* rewrite the callers to target func()-16 */ + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + ++ /* now that nobody targets func()+0, remove ENDBR there */ ++ cfi_rewrite_endbr(start_cfi, end_cfi); ++ + if (builtin) + pr_info("Using FineIBT CFI\n"); + return; +-- +2.39.2 + diff --git a/queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch b/queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch new file mode 100644 index 00000000000..83ad713a5e5 --- /dev/null +++ b/queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch @@ -0,0 +1,53 @@ +From ef7d0bc0116eed8b256212c207fd0aaa11f2a936 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 12:59:26 +0200 +Subject: xdp: use trusted arguments in XDP hints kfuncs + +From: Larysa Zaremba + +[ Upstream commit 2e06c57d66d3f6c26faa5f5b479fb3add34ce85a ] + +Currently, verifier does not reject XDP programs that pass NULL pointer to +hints functions. At the same time, this case is not handled in any driver +implementation (including veth). For example, changing + +bpf_xdp_metadata_rx_timestamp(ctx, ×tamp); + +to + +bpf_xdp_metadata_rx_timestamp(ctx, NULL); + +in xdp_metadata test successfully crashes the system. + +Add KF_TRUSTED_ARGS flag to hints kfunc definitions, so driver code +does not have to worry about getting invalid pointers. + +Fixes: 3d76a4d3d4e5 ("bpf: XDP metadata RX kfuncs") +Reported-by: Stanislav Fomichev +Closes: https://lore.kernel.org/bpf/ZKWo0BbpLfkZHbyE@google.com/ +Signed-off-by: Larysa Zaremba +Acked-by: Jesper Dangaard Brouer +Acked-by: Stanislav Fomichev +Link: https://lore.kernel.org/r/20230711105930.29170-1-larysa.zaremba@intel.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + net/core/xdp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/xdp.c b/net/core/xdp.c +index 41e5ca8643ec9..8362130bf085d 100644 +--- a/net/core/xdp.c ++++ b/net/core/xdp.c +@@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, + __diag_pop(); + + BTF_SET8_START(xdp_metadata_kfunc_ids) +-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0) ++#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) + XDP_METADATA_KFUNC_xxx + #undef XDP_METADATA_KFUNC + BTF_SET8_END(xdp_metadata_kfunc_ids) +-- +2.39.2 + diff --git a/queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch b/queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch new file mode 100644 index 00000000000..d1548ee2677 --- /dev/null +++ b/queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch @@ -0,0 +1,90 @@ +From 2eedacb8d561774f8457305e7eb3c924ce6f9ca9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Jun 2023 15:12:13 +0200 +Subject: xen/virtio: Fix NULL deref when a bridge of PCI root bus has no + parent + +From: Petr Pavlu + +[ Upstream commit 21a235bce12361e64adfc2ef97e4ae2e51ad63d4 ] + +When attempting to run Xen on a QEMU/KVM virtual machine with virtio +devices (all x86_64), function xen_dt_get_node() crashes on accessing +bus->bridge->parent->of_node because a bridge of the PCI root bus has no +parent set: + +[ 1.694192][ T1] BUG: kernel NULL pointer dereference, address: 0000000000000288 +[ 1.695688][ T1] #PF: supervisor read access in kernel mode +[ 1.696297][ T1] #PF: error_code(0x0000) - not-present page +[ 1.696297][ T1] PGD 0 P4D 0 +[ 1.696297][ T1] Oops: 0000 [#1] PREEMPT SMP NOPTI +[ 1.696297][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.3.7-1-default #1 openSUSE Tumbleweed a577eae57964bb7e83477b5a5645a1781df990f0 +[ 1.696297][ T1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 +[ 1.696297][ T1] RIP: e030:xen_virtio_restricted_mem_acc+0xd9/0x1c0 +[ 1.696297][ T1] Code: 45 0c 83 e8 c9 a3 ea ff 31 c0 eb d7 48 8b 87 40 ff ff ff 48 89 c2 48 8b 40 10 48 85 c0 75 f4 48 8b 82 10 01 00 00 48 8b 40 40 <48> 83 b8 88 02 00 00 00 0f 84 45 ff ff ff 66 90 31 c0 eb a5 48 89 +[ 1.696297][ T1] RSP: e02b:ffffc90040013cc8 EFLAGS: 00010246 +[ 1.696297][ T1] RAX: 0000000000000000 RBX: ffff888006c75000 RCX: 0000000000000029 +[ 1.696297][ T1] RDX: ffff888005ed1000 RSI: ffffc900400f100c RDI: ffff888005ee30d0 +[ 1.696297][ T1] RBP: ffff888006c75010 R08: 0000000000000001 R09: 0000000330000006 +[ 1.696297][ T1] R10: ffff888005850028 R11: 0000000000000002 R12: ffffffff830439a0 +[ 1.696297][ T1] R13: 0000000000000000 R14: ffff888005657900 R15: ffff888006e3e1e8 +[ 1.696297][ T1] FS: 0000000000000000(0000) GS:ffff88804a000000(0000) knlGS:0000000000000000 +[ 1.696297][ T1] CS: e030 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 1.696297][ T1] CR2: 0000000000000288 CR3: 0000000002e36000 CR4: 0000000000050660 +[ 1.696297][ T1] Call Trace: +[ 1.696297][ T1] +[ 1.696297][ T1] virtio_features_ok+0x1b/0xd0 +[ 1.696297][ T1] virtio_dev_probe+0x19c/0x270 +[ 1.696297][ T1] really_probe+0x19b/0x3e0 +[ 1.696297][ T1] __driver_probe_device+0x78/0x160 +[ 1.696297][ T1] driver_probe_device+0x1f/0x90 +[ 1.696297][ T1] __driver_attach+0xd2/0x1c0 +[ 1.696297][ T1] bus_for_each_dev+0x74/0xc0 +[ 1.696297][ T1] bus_add_driver+0x116/0x220 +[ 1.696297][ T1] driver_register+0x59/0x100 +[ 1.696297][ T1] virtio_console_init+0x7f/0x110 +[ 1.696297][ T1] do_one_initcall+0x47/0x220 +[ 1.696297][ T1] kernel_init_freeable+0x328/0x480 +[ 1.696297][ T1] kernel_init+0x1a/0x1c0 +[ 1.696297][ T1] ret_from_fork+0x29/0x50 +[ 1.696297][ T1] +[ 1.696297][ T1] Modules linked in: +[ 1.696297][ T1] CR2: 0000000000000288 +[ 1.696297][ T1] ---[ end trace 0000000000000000 ]--- + +The PCI root bus is in this case created from ACPI description via +acpi_pci_root_add() -> pci_acpi_scan_root() -> acpi_pci_root_create() -> +pci_create_root_bus() where the last function is called with +parent=NULL. It indicates that no parent is present and then +bus->bridge->parent is NULL too. + +Fix the problem by checking bus->bridge->parent in xen_dt_get_node() for +NULL first. + +Fixes: ef8ae384b4c9 ("xen/virtio: Handle PCI devices which Host controller is described in DT") +Signed-off-by: Petr Pavlu +Reviewed-by: Oleksandr Tyshchenko +Reviewed-by: Stefano Stabellini +Link: https://lore.kernel.org/r/20230621131214.9398-2-petr.pavlu@suse.com +Signed-off-by: Juergen Gross +Signed-off-by: Sasha Levin +--- + drivers/xen/grant-dma-ops.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c +index 9784a77fa3c99..76f6f26265a3b 100644 +--- a/drivers/xen/grant-dma-ops.c ++++ b/drivers/xen/grant-dma-ops.c +@@ -303,6 +303,8 @@ static struct device_node *xen_dt_get_node(struct device *dev) + while (!pci_is_root_bus(bus)) + bus = bus->parent; + ++ if (!bus->bridge->parent) ++ return NULL; + return of_node_get(bus->bridge->parent->of_node); + } + +-- +2.39.2 + -- 2.47.3