]> git.ipfire.org Git - thirdparty/openwrt.git/commitdiff
mac80211: ath11k: Fix DMA buffer allocation to resolve SWIOTLB issues 17751/head
authorTim Harvey <tharvey@gateworks.com>
Mon, 27 Jan 2025 22:44:27 +0000 (14:44 -0800)
committerRobert Marko <robimarko@gmail.com>
Tue, 1 Apr 2025 09:04:26 +0000 (11:04 +0200)
Backport two commits to resolve issues with ath1kk causing it to fail
driver registration on iommuless systems with DRAM outside of 32bit
addressing such as a 4GiB imx8mm:

commit 1bcd20981834 ("wifi: ath11k: Fix DMA buffer allocation
to resolve SWIOTLB issues")
commit eeadc6baf8b3 ("wifi: ath11k: Use dma_alloc_noncoherent
for rx_tid buffer allocation")

Signed-off-by: Tim Harvey <tharvey@gateworks.com>
Link: https://github.com/openwrt/openwrt/pull/17751
Signed-off-by: Robert Marko <robimarko@gmail.com>
package/kernel/mac80211/patches/ath11k/001-wifi-ath11k-Fix-DMA-buffer-allocation-to-resolve-SWIOTLB-issues.patch [new file with mode: 0644]
package/kernel/mac80211/patches/ath11k/002-wifi-ath11k-use-dma-alloc-noncoherent-for-rx-tid-buffer-allocation.patch [new file with mode: 0644]

diff --git a/package/kernel/mac80211/patches/ath11k/001-wifi-ath11k-Fix-DMA-buffer-allocation-to-resolve-SWIOTLB-issues.patch b/package/kernel/mac80211/patches/ath11k/001-wifi-ath11k-Fix-DMA-buffer-allocation-to-resolve-SWIOTLB-issues.patch
new file mode 100644 (file)
index 0000000..d5d80a3
--- /dev/null
@@ -0,0 +1,91 @@
+wifi: ath11k: Fix DMA buffer allocation to resolve SWIOTLB issues
+Currently, the driver allocates cacheable DMA buffers for rings like
+HAL_REO_DST and HAL_WBM2SW_RELEASE. The buffers for HAL_WBM2SW_RELEASE
+are large (1024 KiB), exceeding the SWIOTLB slot size of 256 KiB. This
+leads to "swiotlb buffer is full" error messages on systems without an
+IOMMU that use SWIOTLB, causing driver initialization failures. The driver
+calls dma_map_single() with these large buffers obtained from kzalloc(),
+resulting in ring initialization errors on systems without an IOMMU that
+use SWIOTLB.
+
+To address these issues, replace the flawed buffer allocation mechanism
+with the appropriate DMA API. Specifically, use dma_alloc_noncoherent()
+for cacheable DMA buffers, ensuring proper freeing of buffers with
+dma_free_noncoherent().
+
+Error log:
+[   10.194343] ath11k_pci 0000:04:00.0: swiotlb buffer is full (sz:1048583 bytes), total 32768 (slots), used 2529 (slots)
+[   10.194406] ath11k_pci 0000:04:00.0: failed to set up tcl_comp ring (0) :-12
+[   10.194781] ath11k_pci 0000:04:00.0: failed to init DP: -12
+
+Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
+
+Reported-by: Tim Harvey <tharvey@gateworks.com>
+Closes: https://lore.kernel.org/all/20241210041133.GA17116@lst.de/
+Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
+Tested-by: Tim Harvey <tharvey@gateworks.com>
+Link: https://patch.msgid.link/20250119164219.647059-2-quic_ppranees@quicinc.com
+Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
+--- a/drivers/net/wireless/ath/ath11k/dp.c
++++ b/drivers/net/wireless/ath/ath11k/dp.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: BSD-3-Clause-Clear
+ /*
+  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
++ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
+  */
+ #include <crypto/hash.h>
+@@ -104,14 +104,12 @@ void ath11k_dp_srng_cleanup(struct ath11
+       if (!ring->vaddr_unaligned)
+               return;
+-      if (ring->cached) {
+-              dma_unmap_single(ab->dev, ring->paddr_unaligned, ring->size,
+-                               DMA_FROM_DEVICE);
+-              kfree(ring->vaddr_unaligned);
+-      } else {
++      if (ring->cached)
++              dma_free_noncoherent(ab->dev, ring->size, ring->vaddr_unaligned,
++                                   ring->paddr_unaligned, DMA_FROM_DEVICE);
++      else
+               dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
+                                 ring->paddr_unaligned);
+-      }
+       ring->vaddr_unaligned = NULL;
+ }
+@@ -249,25 +247,14 @@ int ath11k_dp_srng_setup(struct ath11k_b
+               default:
+                       cached = false;
+               }
+-
+-              if (cached) {
+-                      ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
+-                      if (!ring->vaddr_unaligned)
+-                              return -ENOMEM;
+-
+-                      ring->paddr_unaligned = dma_map_single(ab->dev,
+-                                                             ring->vaddr_unaligned,
+-                                                             ring->size,
+-                                                             DMA_FROM_DEVICE);
+-                      if (dma_mapping_error(ab->dev, ring->paddr_unaligned)) {
+-                              kfree(ring->vaddr_unaligned);
+-                              ring->vaddr_unaligned = NULL;
+-                              return -ENOMEM;
+-                      }
+-              }
+       }
+-      if (!cached)
++      if (cached)
++              ring->vaddr_unaligned = dma_alloc_noncoherent(ab->dev, ring->size,
++                                                            &ring->paddr_unaligned,
++                                                            DMA_FROM_DEVICE,
++                                                            GFP_KERNEL);
++      else
+               ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
+                                                          &ring->paddr_unaligned,
+                                                          GFP_KERNEL);
diff --git a/package/kernel/mac80211/patches/ath11k/002-wifi-ath11k-use-dma-alloc-noncoherent-for-rx-tid-buffer-allocation.patch b/package/kernel/mac80211/patches/ath11k/002-wifi-ath11k-use-dma-alloc-noncoherent-for-rx-tid-buffer-allocation.patch
new file mode 100644 (file)
index 0000000..2257b06
--- /dev/null
@@ -0,0 +1,255 @@
+wifi: ath11k: Use dma_alloc_noncoherent for rx_tid buffer allocation
+
+Currently, the driver allocates cacheable DMA buffers for the rx_tid
+structure using kzalloc() and dma_map_single(). These buffers are
+long-lived and can persist for the lifetime of the peer, which is not
+advisable. Instead of using kzalloc() and dma_map_single() for allocating
+cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the
+allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid.
+Since dma_alloc_noncoherent() returns unaligned physical and virtual
+addresses, align them internally before use within the driver. This
+ensures proper allocation of non-coherent memory through the kernel
+helper.
+
+Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
+
+Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
+--- a/drivers/net/wireless/ath/ath11k/dp.h
++++ b/drivers/net/wireless/ath/ath11k/dp.h
+@@ -1,7 +1,7 @@
+ /* SPDX-License-Identifier: BSD-3-Clause-Clear */
+ /*
+  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
++ * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+  */
+ #ifndef ATH11K_DP_H
+@@ -20,7 +20,6 @@ struct ath11k_ext_irq_grp;
+ struct dp_rx_tid {
+       u8 tid;
+-      u32 *vaddr;
+       dma_addr_t paddr;
+       u32 size;
+       u32 ba_win_sz;
+@@ -37,6 +36,9 @@ struct dp_rx_tid {
+       /* Timer info related to fragments */
+       struct timer_list frag_timer;
+       struct ath11k_base *ab;
++      u32 *vaddr_unaligned;
++      dma_addr_t paddr_unaligned;
++      u32 unaligned_size;
+ };
+ #define DP_REO_DESC_FREE_THRESHOLD  64
+--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
++++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: BSD-3-Clause-Clear
+ /*
+  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
++ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
+  */
+ #include <linux/ieee80211.h>
+@@ -675,11 +675,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
+       list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) {
+               list_del(&cmd->list);
+               rx_tid = &cmd->data;
+-              if (rx_tid->vaddr) {
+-                      dma_unmap_single(ab->dev, rx_tid->paddr,
+-                                       rx_tid->size, DMA_BIDIRECTIONAL);
+-                      kfree(rx_tid->vaddr);
+-                      rx_tid->vaddr = NULL;
++              if (rx_tid->vaddr_unaligned) {
++                      dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++                                           rx_tid->vaddr_unaligned,
++                                           rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++                      rx_tid->vaddr_unaligned = NULL;
+               }
+               kfree(cmd);
+       }
+@@ -689,11 +689,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
+               list_del(&cmd_cache->list);
+               dp->reo_cmd_cache_flush_count--;
+               rx_tid = &cmd_cache->data;
+-              if (rx_tid->vaddr) {
+-                      dma_unmap_single(ab->dev, rx_tid->paddr,
+-                                       rx_tid->size, DMA_BIDIRECTIONAL);
+-                      kfree(rx_tid->vaddr);
+-                      rx_tid->vaddr = NULL;
++              if (rx_tid->vaddr_unaligned) {
++                      dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++                                           rx_tid->vaddr_unaligned,
++                                           rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++                      rx_tid->vaddr_unaligned = NULL;
+               }
+               kfree(cmd_cache);
+       }
+@@ -708,11 +708,11 @@ static void ath11k_dp_reo_cmd_free(struc
+       if (status != HAL_REO_CMD_SUCCESS)
+               ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n",
+                           rx_tid->tid, status);
+-      if (rx_tid->vaddr) {
+-              dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size,
+-                               DMA_BIDIRECTIONAL);
+-              kfree(rx_tid->vaddr);
+-              rx_tid->vaddr = NULL;
++      if (rx_tid->vaddr_unaligned) {
++              dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size,
++                                   rx_tid->vaddr_unaligned,
++                                   rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++              rx_tid->vaddr_unaligned = NULL;
+       }
+ }
+@@ -749,10 +749,10 @@ static void ath11k_dp_reo_cache_flush(st
+       if (ret) {
+               ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n",
+                          rx_tid->tid, ret);
+-              dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
+-                               DMA_BIDIRECTIONAL);
+-              kfree(rx_tid->vaddr);
+-              rx_tid->vaddr = NULL;
++              dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++                                   rx_tid->vaddr_unaligned,
++                                   rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++              rx_tid->vaddr_unaligned = NULL;
+       }
+ }
+@@ -802,10 +802,10 @@ static void ath11k_dp_rx_tid_del_func(st
+       return;
+ free_desc:
+-      dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
+-                       DMA_BIDIRECTIONAL);
+-      kfree(rx_tid->vaddr);
+-      rx_tid->vaddr = NULL;
++      dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++                           rx_tid->vaddr_unaligned,
++                           rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++      rx_tid->vaddr_unaligned = NULL;
+ }
+ void ath11k_peer_rx_tid_delete(struct ath11k *ar,
+@@ -831,14 +831,16 @@ void ath11k_peer_rx_tid_delete(struct at
+               if (ret != -ESHUTDOWN)
+                       ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n",
+                                  tid, ret);
+-              dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size,
+-                               DMA_BIDIRECTIONAL);
+-              kfree(rx_tid->vaddr);
+-              rx_tid->vaddr = NULL;
++              dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size,
++                                   rx_tid->vaddr_unaligned,
++                                   rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++              rx_tid->vaddr_unaligned = NULL;
+       }
+       rx_tid->paddr = 0;
++      rx_tid->paddr_unaligned = 0;
+       rx_tid->size = 0;
++      rx_tid->unaligned_size = 0;
+ }
+ static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab,
+@@ -982,10 +984,9 @@ static void ath11k_dp_rx_tid_mem_free(st
+       if (!rx_tid->active)
+               goto unlock_exit;
+-      dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
+-                       DMA_BIDIRECTIONAL);
+-      kfree(rx_tid->vaddr);
+-      rx_tid->vaddr = NULL;
++      dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned,
++                           rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++      rx_tid->vaddr_unaligned = NULL;
+       rx_tid->active = false;
+@@ -1000,9 +1001,8 @@ int ath11k_peer_rx_tid_setup(struct ath1
+       struct ath11k_base *ab = ar->ab;
+       struct ath11k_peer *peer;
+       struct dp_rx_tid *rx_tid;
+-      u32 hw_desc_sz;
+-      u32 *addr_aligned;
+-      void *vaddr;
++      u32 hw_desc_sz, *vaddr;
++      void *vaddr_unaligned;
+       dma_addr_t paddr;
+       int ret;
+@@ -1050,37 +1050,34 @@ int ath11k_peer_rx_tid_setup(struct ath1
+       else
+               hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
+-      vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC);
+-      if (!vaddr) {
++      rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1;
++      vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr,
++                                              DMA_BIDIRECTIONAL, GFP_ATOMIC);
++      if (!vaddr_unaligned) {
+               spin_unlock_bh(&ab->base_lock);
+               return -ENOMEM;
+       }
+-      addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN);
+-
+-      ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz,
+-                                 ssn, pn_type);
+-
+-      paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz,
+-                             DMA_BIDIRECTIONAL);
+-
+-      ret = dma_mapping_error(ab->dev, paddr);
+-      if (ret) {
+-              spin_unlock_bh(&ab->base_lock);
+-              ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n",
+-                          peer_mac, tid, ret);
+-              goto err_mem_free;
+-      }
+-
+-      rx_tid->vaddr = vaddr;
+-      rx_tid->paddr = paddr;
++      rx_tid->vaddr_unaligned = vaddr_unaligned;
++      vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN);
++      rx_tid->paddr_unaligned = paddr;
++      rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr -
++                      (unsigned long)rx_tid->vaddr_unaligned);
++      ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type);
+       rx_tid->size = hw_desc_sz;
+       rx_tid->active = true;
++      /* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup.
++       * Since these changes are not reflected in the device, driver now needs to
++       * explicitly call dma_sync_single_for_device.
++       */
++      dma_sync_single_for_device(ab->dev, rx_tid->paddr,
++                                 rx_tid->size,
++                                 DMA_TO_DEVICE);
+       spin_unlock_bh(&ab->base_lock);
+-      ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac,
+-                                                   paddr, tid, 1, ba_win_sz);
++      ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr,
++                                                   tid, 1, ba_win_sz);
+       if (ret) {
+               ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n",
+                           peer_mac, tid, ret);
+@@ -1088,12 +1085,6 @@ int ath11k_peer_rx_tid_setup(struct ath1
+       }
+       return ret;
+-
+-err_mem_free:
+-      kfree(rx_tid->vaddr);
+-      rx_tid->vaddr = NULL;
+-
+-      return ret;
+ }
+ int ath11k_dp_rx_ampdu_start(struct ath11k *ar,